From 1a76e634c78d27ca5df81621539f64ca7d453eac Mon Sep 17 00:00:00 2001 From: Mike Hibler <mike@flux.utah.edu> Date: Thu, 28 Oct 2004 20:26:36 +0000 Subject: [PATCH] Minor tweaks from a one-day binge of performance analysis. The only meaningful change was to insert a sched_yield() in the frisbee decompressor path. Apparently, the decompressor can run long enough to cause the incoming socket buffer to overflow. I was under the assumption that the decompressor would not run much longer than a single time slice (0.001 seconds, about 8 packets) before its priority would force it to be context switched. But it was running much longer than that! Forcing a periodic yield seems to have taken care of this. One other cause of retransmitted blocks that I saw was where the server was taking a long time to read data from a file (up to 0.25 seconds). This would stall the clients and force them to rerequest blocks (which they do after about 0.10 seconds). We can improve on this by splitting the file reading off to a seperate thread. Most other changes are related to the event logging code. --- os/frisbee.redux/GNUmakefile.in | 2 +- os/frisbee.redux/client.c | 32 +++++++++++++++------ os/frisbee.redux/server.c | 41 ++++++++++++++++----------- os/frisbee.redux/trace.c | 49 ++++++++++++++++++++++----------- os/frisbee.redux/trace.h | 11 ++++---- os/imagezip/imageunzip.c | 18 +++++++++++- 6 files changed, 106 insertions(+), 47 deletions(-) diff --git a/os/frisbee.redux/GNUmakefile.in b/os/frisbee.redux/GNUmakefile.in index b05406217e..a259aedb7b 100644 --- a/os/frisbee.redux/GNUmakefile.in +++ b/os/frisbee.redux/GNUmakefile.in @@ -45,7 +45,7 @@ LDFLAGS = -static #CFLAGS += -DCONDVARS_WORK # Define this to a non-zero value to enable recording of trace data -#CFLAGS += -DNEVENTS=25000 +#CFLAGS += -DNEVENTS=50000 # Turn on client event handling #CFLAGS += -DDOEVENTS diff --git a/os/frisbee.redux/client.c b/os/frisbee.redux/client.c index ae04e3610f..f4ab2e23ae 100644 --- a/os/frisbee.redux/client.c +++ b/os/frisbee.redux/client.c @@ -48,6 +48,7 @@ int redodelay = CLIENT_REQUEST_REDO_DELAY; int idledelay = CLIENT_WRITER_IDLE_DELAY; int startdelay = 0, startat = 0; +int nothreads = 0; int debug = 0; int tracing = 0; char traceprefix[64]; @@ -140,6 +141,7 @@ char *usagestr = " (Half used for network, half for disk).\n" " -I ms The time interval (millisec) between re-requests of a chunk.\n" " -R # The max number of chunks we will request ahead.\n" + " -O Make chunk requests in increasing order (default is random order).\n" "\n"; void @@ -149,17 +151,23 @@ usage() exit(1); } +void (*DiskIdleCallback)(); +static void +WriterIdleCallback(int isidle) +{ + CLEVENT(1, EV_CLIWRSTATUS, isidle, 0, 0, 0); +} + int main(int argc, char **argv) { int ch, mem; char *filename; int zero = 0; - int nothreads = 0; int dostype = -1; int slice = 0; - while ((ch = getopt(argc, argv, "dhp:m:s:i:tbznT:r:E:D:C:W:S:M:R:I:")) != -1) + while ((ch = getopt(argc, argv, "dhp:m:s:i:tbznT:r:E:D:C:W:S:M:R:I:O")) != -1) switch(ch) { case 'd': debug++; @@ -266,6 +274,10 @@ main(int argc, char **argv) redodelay = 0; break; + case 'O': + randomize = 0; + break; + case 'h': case '?': default: @@ -410,6 +422,8 @@ main(int argc, char **argv) if (tracing) { ClientTraceInit(traceprefix); TraceStart(tracing); + if (!nothreads) + DiskIdleCallback = WriterIdleCallback; } PlayFrisbee(); @@ -539,7 +553,7 @@ ClientRecvThread(void *arg) continue; } - CLEVENT(3, EV_CLIGOTPKT, + CLEVENT(BackOff ? 1 : 3, EV_CLIGOTPKT, pstamp.tv_sec, pstamp.tv_usec, 0, 0); #ifdef NEVENTS needstamp = 1; @@ -667,7 +681,7 @@ ChunkerStartup(void) } #endif if (!wasidle) { - CLEVENT(1, EV_CLIWRIDLE, 0, 0, 0, 0); + CLEVENT(1, EV_CLIDCIDLE, 0, 0, 0, 0); if (debug) log("No chunks ready to write!"); } @@ -702,7 +716,7 @@ ChunkerStartup(void) } } - CLEVENT(1, EV_CLIWRSTART, + CLEVENT(1, EV_CLIDCSTART, ChunkBuffer[i].thischunk, wasidle, decompblocks, writeridles); wasidle = 0; @@ -715,7 +729,7 @@ ChunkerStartup(void) */ ChunkBuffer[i].state = CHUNK_EMPTY; chunkcount--; - CLEVENT(1, EV_CLIWRDONE, + CLEVENT(1, EV_CLIDCDONE, ChunkBuffer[i].thischunk, chunkcount, decompblocks, writeridles); } @@ -935,7 +949,8 @@ GotBlock(Packet_t *p) if (lastchunk != -1 && chunk != lastchunk && lastchunk == ChunkBuffer[lastchunkbuf].thischunk && ChunkBuffer[lastchunkbuf].state == CHUNK_FILLING) - CLEVENT(1, EV_CLILCHUNK, lastchunk, lastblock, 0, 0); + CLEVENT(1, EV_CLILCHUNK, lastchunk, lastblock, + ChunkBuffer[lastchunkbuf].blockcount, 0); lastchunkbuf = i; lastchunk = chunk; lastblock = block; @@ -1254,7 +1269,8 @@ PlayFrisbee(void) * the server gets it. All the server does with it is print a * timestamp, and that is not critical to operation. */ - CLEVENT(1, EV_CLILEAVE, myid, estamp.tv_sec, 0, 0); + CLEVENT(1, EV_CLILEAVE, myid, estamp.tv_sec, + (Stats.u.v1.rbyteswritten >> 32), Stats.u.v1.rbyteswritten); #ifdef STATS p->hdr.type = PKTTYPE_REQUEST; p->hdr.subtype = PKTSUBTYPE_LEAVE2; diff --git a/os/frisbee.redux/server.c b/os/frisbee.redux/server.c index 2ea792a79e..8f34658e1e 100644 --- a/os/frisbee.redux/server.c +++ b/os/frisbee.redux/server.c @@ -388,8 +388,9 @@ ClientJoin(Packet_t *p) * Log after we send reply so that we get the packet off as * quickly as possible! */ - log("%s (id %u) joins at %s! %d active clients.", - inet_ntoa(ipaddr), clientid, CurrentTimeString(), activeclients); + log("%s (id %u, image %s) joins at %s! %d active clients.", + inet_ntoa(ipaddr), clientid, filename, + CurrentTimeString(), activeclients); } /* @@ -413,9 +414,9 @@ ClientLeave(Packet_t *p) activeclients--; clients[i].id = 0; clients[i].ip = 0; - log("%s (id %u): leaves at %s, " + log("%s (id %u, image %s): leaves at %s, " "ran for %d seconds. %d active clients", - inet_ntoa(ipaddr), clientid, + inet_ntoa(ipaddr), clientid, filename, CurrentTimeString(), p->msg.leave.elapsed, activeclients); break; @@ -426,8 +427,9 @@ ClientLeave(Packet_t *p) } #else activeclients--; - log("%s (id %u): leaves at %s, ran for %d seconds. %d active clients", - inet_ntoa(ipaddr), clientid, CurrentTimeString(), + log("%s (id %u, image %s): leaves at %s, ran for %d seconds. " + "%d active clients", + inet_ntoa(ipaddr), clientid, filename, CurrentTimeString(), p->msg.leave.elapsed, activeclients); #endif } @@ -453,9 +455,9 @@ ClientLeave2(Packet_t *p) clients[i].id = 0; clients[i].ip = 0; activeclients--; - log("%s (id %u): leaves at %s, " + log("%s (id %u, image %s): leaves at %s, " "ran for %d seconds. %d active clients", - inet_ntoa(ipaddr), clientid, + inet_ntoa(ipaddr), clientid, filename, CurrentTimeString(), p->msg.leave2.elapsed, activeclients); ClientStatsDump(clientid, &p->msg.leave2.stats); @@ -467,8 +469,9 @@ ClientLeave2(Packet_t *p) } #else activeclients--; - log("%s (id %u): leaves at %s, ran for %d seconds. %d active clients", - inet_ntoa(ipaddr), clientid, CurrentTimeString(), + log("%s (id %u, image %s): leaves at %s, ran for %d seconds. " + "%d active clients", + inet_ntoa(ipaddr), clientid, filename, CurrentTimeString(), p->msg.leave2.elapsed, activeclients); #endif } @@ -646,6 +649,7 @@ PlayFrisbee(void) /* If less than zero, exit when last cilent leaves */ if (timeout < 0 && Stats.joins > 0 && activeclients == 0) { + fsleep(2000000); log("Last client left!"); break; } @@ -679,6 +683,10 @@ PlayFrisbee(void) int readbytes; int resends; int thisburst = 0; +#ifdef NEVENTS + struct timeval rstamp; + gettimeofday(&rstamp, 0); +#endif /* * Read blocks of data from disk. @@ -698,7 +706,7 @@ PlayFrisbee(void) DOSTAT(filereads++); DOSTAT(filebytes += cc); EVENT(2, EV_READFILE, mcastaddr, - offset, readbytes, cc, 0); + offset, readbytes, rstamp.tv_sec, rstamp.tv_usec); if (cc != readbytes) fatal("Short read: %d!=%d", cc, readbytes); @@ -715,8 +723,8 @@ PlayFrisbee(void) PacketSend(p, &resends); sendretries += resends; DOSTAT(blockssent++); - EVENT(3, EV_BLOCKMSG, mcastaddr, - chunk, block+j, 0, 0); + EVENT(resends ? 1 : 3, EV_BLOCKMSG, mcastaddr, + chunk, block+j, resends, 0); /* * Completed a burst. Adjust the busrtsize @@ -929,10 +937,11 @@ main(int argc, char **argv) log(" client re-req: %d", Stats.clientlost); log(" 1k blocks sent: %d (%d repeated)", - Stats.blockssent, Stats.blockssent - FileInfo.blocks); + Stats.blockssent, Stats.blockssent ? + (Stats.blockssent-FileInfo.blocks) : 0); log(" file reads: %d (%qu bytes, %qu repeated)", - Stats.filereads, Stats.filebytes, - Stats.filebytes - FileInfo.blocks * BLOCKSIZE); + Stats.filereads, Stats.filebytes, Stats.filebytes ? + (Stats.filebytes - FileInfo.blocks * BLOCKSIZE) : 0); log(" net idle/blocked: %d/%d", Stats.goesidle, nonetbufs); log(" send intvl/missed: %d/%d", Stats.intervals, Stats.missed); diff --git a/os/frisbee.redux/trace.c b/os/frisbee.redux/trace.c index 5350d4ef21..0e61de84a2 100644 --- a/os/frisbee.redux/trace.c +++ b/os/frisbee.redux/trace.c @@ -1,6 +1,6 @@ /* * EMULAB-COPYRIGHT - * Copyright (c) 2002, 2003 University of Utah and the Flux Group. + * Copyright (c) 2002, 2003, 2004 University of Utah and the Flux Group. * All rights reserved. */ @@ -161,8 +161,9 @@ TraceDump(void) ptr->args[2], ptr->args[3]); break; case EV_BLOCKMSG: - fprintf(fd, "send block, %lu[%lu]\n", - ptr->args[0], ptr->args[1]); + fprintf(fd, "sent block, %lu[%lu], retry=%lu\n", + ptr->args[0], ptr->args[1], + ptr->args[2]); break; case EV_WORKENQ: fprintf(fd, "enqueues, %lu(%lu), " @@ -200,9 +201,12 @@ TraceDump(void) ptr->args[0]); break; case EV_READFILE: - fprintf(fd, "readfile, %lu@%lu -> %lu\n", - ptr->args[1], - ptr->args[0], ptr->args[2]); + stamp.tv_sec = ptr->args[2]; + stamp.tv_usec = ptr->args[3]; + timersub(&ptr->tstamp, &stamp, &stamp); + fprintf(fd, "readfile, %lu@%lu, %lu.%03lus\n", + ptr->args[1], ptr->args[0], + stamp.tv_sec, stamp.tv_usec/1000); break; @@ -282,9 +286,11 @@ TraceDump(void) break; case EV_CLILCHUNK: fprintf(fd, "%s: switched from incomplete " - "chunk %lu at block %lu\n", + "chunk %lu at block %lu " + "(%lu blocks to go)\n", inet_ntoa(ptr->srcip), - ptr->args[0], ptr->args[1]); + ptr->args[0], ptr->args[1], + ptr->args[2]); break; case EV_CLIREQ: fprintf(fd, "%s: send REQUEST, %lu[%lu-%lu]\n", @@ -316,33 +322,44 @@ TraceDump(void) inet_ntoa(ptr->srcip), ptr->args[0]); break; case EV_CLILEAVE: - fprintf(fd, "%s: send LEAVE, ID=%lx, time=%lu\n", + { + unsigned long long bytes; + bytes = (unsigned long long)ptr->args[2] << 32; + bytes |= ptr->args[3]; + fprintf(fd, "%s: send LEAVE, ID=%lx, " + "time=%lu, bytes=%qu\n", inet_ntoa(ptr->srcip), - ptr->args[0], ptr->args[1]); + ptr->args[0], ptr->args[1], bytes); break; + } case EV_CLISTAMP: fprintf(fd, "%s: update chunk %lu, stamp %lu.%06lu\n", inet_ntoa(ptr->srcip), ptr->args[0], ptr->args[1], ptr->args[2]); break; - case EV_CLIWRSTART: - fprintf(fd, "%s: writing chunk %lu, " + case EV_CLIDCSTART: + fprintf(fd, "%s: decompressing chunk %lu, " "idle=%lu, (dblock=%lu, widle=%lu)\n", inet_ntoa(ptr->srcip), ptr->args[0], ptr->args[1], ptr->args[2], ptr->args[3]); break; - case EV_CLIWRDONE: - fprintf(fd, "%s: chunk %lu written, " + case EV_CLIDCDONE: + fprintf(fd, "%s: chunk %lu decompressed, " "%lu left, (dblock=%lu, widle=%lu)\n", inet_ntoa(ptr->srcip), ptr->args[0], ptr->args[1], ptr->args[2], ptr->args[3]); break; - case EV_CLIWRIDLE: - fprintf(fd, "%s: disk IDLE\n", + case EV_CLIDCIDLE: + fprintf(fd, "%s: decompressor IDLE\n", inet_ntoa(ptr->srcip)); break; + case EV_CLIWRSTATUS: + fprintf(fd, "%s: writer %s\n", + inet_ntoa(ptr->srcip), + ptr->args[0] ? "IDLE" : "STARTED"); + break; case EV_CLIGOTPKT: stamp.tv_sec = ptr->args[0]; stamp.tv_usec = ptr->args[1]; diff --git a/os/frisbee.redux/trace.h b/os/frisbee.redux/trace.h index 3974c1ceda..bf189a8dbf 100644 --- a/os/frisbee.redux/trace.h +++ b/os/frisbee.redux/trace.h @@ -1,6 +1,6 @@ /* * EMULAB-COPYRIGHT - * Copyright (c) 2002, 2003 University of Utah and the Flux Group. + * Copyright (c) 2002, 2003, 2004 University of Utah and the Flux Group. * All rights reserved. */ @@ -78,9 +78,9 @@ if (evlogging >= (l)) { \ #define EV_CLILEAVE 24 #define EV_CLIREQMSG 25 #define EV_CLISTAMP 26 -#define EV_CLIWRSTART 27 -#define EV_CLIWRDONE 28 -#define EV_CLIWRIDLE 29 +#define EV_CLIDCSTART 27 +#define EV_CLIDCDONE 28 +#define EV_CLIDCIDLE 29 #define EV_CLIBLOCK 30 #define EV_CLISTART 31 #define EV_CLIGOTPKT 32 @@ -92,8 +92,9 @@ if (evlogging >= (l)) { \ #define EV_OVERRUN 38 #define EV_LONGBURST 39 #define EV_DUPCHUNK 40 +#define EV_CLIWRSTATUS 41 -#define EV_MAX 40 +#define EV_MAX 41 extern void ClientTraceInit(char *file); extern void ClientTraceReinit(char *file); diff --git a/os/imagezip/imageunzip.c b/os/imagezip/imageunzip.c index 4cc4121d13..a0935cb78d 100644 --- a/os/imagezip/imageunzip.c +++ b/os/imagezip/imageunzip.c @@ -841,6 +841,10 @@ threadquit(void) pthread_join(child_pid, &ignored); } +#ifdef FRISBEE +extern void (*DiskIdleCallback)(int); +#endif + void * DiskWriter(void *arg) { @@ -852,8 +856,13 @@ DiskWriter(void *arg) pthread_mutex_lock(&writequeue_mutex); if (queue_empty(&writequeue)) { - if (gotone) + if (gotone) { writeridles++; +#ifdef FRISBEE + if (DiskIdleCallback) + (*DiskIdleCallback)(1); +#endif + } do { #ifdef CONDVARS_WORK pthread_cond_wait(&writequeue_cond, @@ -865,6 +874,10 @@ DiskWriter(void *arg) #endif pthread_testcancel(); } while (queue_empty(&writequeue)); +#ifdef FRISBEE + if (DiskIdleCallback) + (*DiskIdleCallback)(0); +#endif } queue_remove_first(&writequeue, wbuf, writebuf_t *, chain); writeinprogress = 1; /* XXX */ @@ -1000,6 +1013,9 @@ inflate_subblock(char *chunkbufp) d_stream.next_out = &wbuf->data[ibleft]; d_stream.avail_out = OUTSIZE - ibleft; +#ifdef FRISBEE + sched_yield(); +#endif /* * Inflate a chunk */ -- GitLab