diff --git a/os/frisbee.redux/decls.h b/os/frisbee.redux/decls.h index 556c59aaeccfa32df6dbd13c625cf7d15c75fd92..10e8bb325a66cf068da1fe13e691adb242e128f3 100644 --- a/os/frisbee.redux/decls.h +++ b/os/frisbee.redux/decls.h @@ -1,6 +1,6 @@ /* * EMULAB-COPYRIGHT - * Copyright (c) 2000-2004 University of Utah and the Flux Group. + * Copyright (c) 2000-2005 University of Utah and the Flux Group. * All rights reserved. */ @@ -282,6 +282,7 @@ typedef struct { */ int ClientNetInit(void); int ServerNetInit(void); +int ServerNetMCKeepAlive(void); unsigned long ClientNetID(void); int PacketReceive(Packet_t *p); void PacketSend(Packet_t *p, int *resends); diff --git a/os/frisbee.redux/network.c b/os/frisbee.redux/network.c index 29747a8a05150b87389a4820c1ca19ab761910a9..58b90ea289385dc2d65a55ab72000ccc38238445 100644 --- a/os/frisbee.redux/network.c +++ b/os/frisbee.redux/network.c @@ -1,6 +1,6 @@ /* * EMULAB-COPYRIGHT - * Copyright (c) 2000-2004 University of Utah and the Flux Group. + * Copyright (c) 2000-2005 University of Utah and the Flux Group. * All rights reserved. */ @@ -35,7 +35,7 @@ unsigned long nonetbufs; /* Max number of hops multicast hops. */ #define MCAST_TTL 5 -static int sock; +static int sock = -1; struct in_addr myipaddr; static int nobufdelay = -1; int broadcast = 0; @@ -184,6 +184,40 @@ ServerNetInit(void) return 1; } +/* + * XXX hack. + * + * Cisco switches without a multicast router defined have an unfortunate + * habit of losing our IGMP membership. This function allows us to send + * a report message to remind the switch we are still around. + * + * We need a better way to do this! + */ +int +ServerNetMCKeepAlive(void) +{ + struct ip_mreq mreq; + + if (broadcast || (ntohl(mcastaddr.s_addr) >> 28) != 14) + return 0; + + if (sock == -1) + return 1; + + mreq.imr_multiaddr.s_addr = mcastaddr.s_addr; + if (mcastif.s_addr) + mreq.imr_interface.s_addr = mcastif.s_addr; + else + mreq.imr_interface.s_addr = htonl(INADDR_ANY); + + if (setsockopt(sock, IPPROTO_IP, IP_DROP_MEMBERSHIP, + &mreq, sizeof(mreq)) < 0 || + setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP, + &mreq, sizeof(mreq)) < 0) + return 1; + return 0; +} + /* * Look for a packet on the socket. Propogate the errors back to the caller * exactly as the system call does. Remember that we set up a socket timeout diff --git a/os/frisbee.redux/server.c b/os/frisbee.redux/server.c index 8f34658e1e3808b4bb660d57738cbdf31482a8f8..b3c94089993f915e57b090bc07bf8f346d317727 100644 --- a/os/frisbee.redux/server.c +++ b/os/frisbee.redux/server.c @@ -1,6 +1,6 @@ /* * EMULAB-COPYRIGHT - * Copyright (c) 2000-2004 University of Utah and the Flux Group. + * Copyright (c) 2000-2005 University of Utah and the Flux Group. * All rights reserved. */ @@ -34,6 +34,7 @@ int debug = 0; int tracing = 0; int dynburst = 0; int timeout = SERVER_INACTIVE_SECONDS; +int keepalive = 0; int readsize = SERVER_READ_SIZE; volatile int burstsize = SERVER_BURST_SIZE; int maxburstsize = SERVER_DYNBURST_SIZE; @@ -542,16 +543,39 @@ void * ServerRecvThread(void *arg) { Packet_t packet, *p = &packet; + int idles = 0, kafails = 0; static int gotone; if (debug > 1) log("Server pthread starting up ..."); + /* + * Recalculate keepalive interval in terms of packet receive + * timeouts for simplicity. + */ + if (keepalive) + keepalive = (int)(((unsigned long long)keepalive * 1000000) / + PKTRCV_TIMEOUT); while (1) { pthread_testcancel(); if (PacketReceive(p) != 0) { + if (keepalive && ++idles > keepalive) { + if (ServerNetMCKeepAlive()) { + warning("Multicast keepalive failed"); + if (++kafails > 5) { + warning("too many failures, disabled"); + keepalive = 0; + } + } else { + kafails = 0; + idles = 0; + if (debug > 1) + log("Ping..."); + } + } continue; } + idles = 0; DOSTAT(msgin++); if (! PacketValid(p, FileInfo.chunks)) { @@ -794,7 +818,7 @@ main(int argc, char **argv) off_t fsize; void *ignored; - while ((ch = getopt(argc, argv, "dhp:m:i:tbDT:R:B:G:L:W:")) != -1) + while ((ch = getopt(argc, argv, "dhp:m:i:tbDT:R:B:G:L:W:K:")) != -1) switch(ch) { case 'b': broadcast++; @@ -836,6 +860,11 @@ main(int argc, char **argv) case 'W': bandwidth = atol(optarg); break; + case 'K': + keepalive = atoi(optarg); + if (keepalive < 0) + keepalive = 0; + break; case 'h': case '?': default: @@ -849,6 +878,11 @@ main(int argc, char **argv) if (!portnum || ! mcastaddr.s_addr) usage(); + if (timeout > 0 && keepalive > timeout) { + warning("keepalive > timeout, disabling keepalive"); + keepalive = 0; + } + signal(SIGINT, quit); signal(SIGTERM, quit); signal(SIGHUP, reinit);