From b9425e72280b27dd863d6a8e126efaf25642e74a Mon Sep 17 00:00:00 2001 From: Mike Hibler <mike@flux.utah.edu> Date: Wed, 11 May 2005 22:35:33 +0000 Subject: [PATCH] Hack multicast "keep alive" mechanism. The "-K <seconds>" option can be used to force the server to send an IGMP report if it doesn't receive any packets within <seconds> seconds. As long as the server is receiving packets, it won't send the report. What I'm not lovin here, is that to send a report I have to drop membership in the group (socket opt IP_DROP_MEMBERSHIP) and rejoin (IP_ADD_MEMBERSHIP). Simply trying to do an add membership doesn't work because the kernel thinks you are already in the group and errs out. I'm hoping all the up and down activity doesn't make the switch behave any worse than it already does. --- os/frisbee.redux/decls.h | 3 ++- os/frisbee.redux/network.c | 38 ++++++++++++++++++++++++++++++++++++-- os/frisbee.redux/server.c | 38 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 74 insertions(+), 5 deletions(-) diff --git a/os/frisbee.redux/decls.h b/os/frisbee.redux/decls.h index 556c59aaec..10e8bb325a 100644 --- a/os/frisbee.redux/decls.h +++ b/os/frisbee.redux/decls.h @@ -1,6 +1,6 @@ /* * EMULAB-COPYRIGHT - * Copyright (c) 2000-2004 University of Utah and the Flux Group. + * Copyright (c) 2000-2005 University of Utah and the Flux Group. * All rights reserved. */ @@ -282,6 +282,7 @@ typedef struct { */ int ClientNetInit(void); int ServerNetInit(void); +int ServerNetMCKeepAlive(void); unsigned long ClientNetID(void); int PacketReceive(Packet_t *p); void PacketSend(Packet_t *p, int *resends); diff --git a/os/frisbee.redux/network.c b/os/frisbee.redux/network.c index 29747a8a05..58b90ea289 100644 --- a/os/frisbee.redux/network.c +++ b/os/frisbee.redux/network.c @@ -1,6 +1,6 @@ /* * EMULAB-COPYRIGHT - * Copyright (c) 2000-2004 University of Utah and the Flux Group. + * Copyright (c) 2000-2005 University of Utah and the Flux Group. * All rights reserved. */ @@ -35,7 +35,7 @@ unsigned long nonetbufs; /* Max number of hops multicast hops. */ #define MCAST_TTL 5 -static int sock; +static int sock = -1; struct in_addr myipaddr; static int nobufdelay = -1; int broadcast = 0; @@ -184,6 +184,40 @@ ServerNetInit(void) return 1; } +/* + * XXX hack. + * + * Cisco switches without a multicast router defined have an unfortunate + * habit of losing our IGMP membership. This function allows us to send + * a report message to remind the switch we are still around. + * + * We need a better way to do this! + */ +int +ServerNetMCKeepAlive(void) +{ + struct ip_mreq mreq; + + if (broadcast || (ntohl(mcastaddr.s_addr) >> 28) != 14) + return 0; + + if (sock == -1) + return 1; + + mreq.imr_multiaddr.s_addr = mcastaddr.s_addr; + if (mcastif.s_addr) + mreq.imr_interface.s_addr = mcastif.s_addr; + else + mreq.imr_interface.s_addr = htonl(INADDR_ANY); + + if (setsockopt(sock, IPPROTO_IP, IP_DROP_MEMBERSHIP, + &mreq, sizeof(mreq)) < 0 || + setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP, + &mreq, sizeof(mreq)) < 0) + return 1; + return 0; +} + /* * Look for a packet on the socket. Propogate the errors back to the caller * exactly as the system call does. Remember that we set up a socket timeout diff --git a/os/frisbee.redux/server.c b/os/frisbee.redux/server.c index 8f34658e1e..b3c9408999 100644 --- a/os/frisbee.redux/server.c +++ b/os/frisbee.redux/server.c @@ -1,6 +1,6 @@ /* * EMULAB-COPYRIGHT - * Copyright (c) 2000-2004 University of Utah and the Flux Group. + * Copyright (c) 2000-2005 University of Utah and the Flux Group. * All rights reserved. */ @@ -34,6 +34,7 @@ int debug = 0; int tracing = 0; int dynburst = 0; int timeout = SERVER_INACTIVE_SECONDS; +int keepalive = 0; int readsize = SERVER_READ_SIZE; volatile int burstsize = SERVER_BURST_SIZE; int maxburstsize = SERVER_DYNBURST_SIZE; @@ -542,16 +543,39 @@ void * ServerRecvThread(void *arg) { Packet_t packet, *p = &packet; + int idles = 0, kafails = 0; static int gotone; if (debug > 1) log("Server pthread starting up ..."); + /* + * Recalculate keepalive interval in terms of packet receive + * timeouts for simplicity. + */ + if (keepalive) + keepalive = (int)(((unsigned long long)keepalive * 1000000) / + PKTRCV_TIMEOUT); while (1) { pthread_testcancel(); if (PacketReceive(p) != 0) { + if (keepalive && ++idles > keepalive) { + if (ServerNetMCKeepAlive()) { + warning("Multicast keepalive failed"); + if (++kafails > 5) { + warning("too many failures, disabled"); + keepalive = 0; + } + } else { + kafails = 0; + idles = 0; + if (debug > 1) + log("Ping..."); + } + } continue; } + idles = 0; DOSTAT(msgin++); if (! PacketValid(p, FileInfo.chunks)) { @@ -794,7 +818,7 @@ main(int argc, char **argv) off_t fsize; void *ignored; - while ((ch = getopt(argc, argv, "dhp:m:i:tbDT:R:B:G:L:W:")) != -1) + while ((ch = getopt(argc, argv, "dhp:m:i:tbDT:R:B:G:L:W:K:")) != -1) switch(ch) { case 'b': broadcast++; @@ -836,6 +860,11 @@ main(int argc, char **argv) case 'W': bandwidth = atol(optarg); break; + case 'K': + keepalive = atoi(optarg); + if (keepalive < 0) + keepalive = 0; + break; case 'h': case '?': default: @@ -849,6 +878,11 @@ main(int argc, char **argv) if (!portnum || ! mcastaddr.s_addr) usage(); + if (timeout > 0 && keepalive > timeout) { + warning("keepalive > timeout, disabling keepalive"); + keepalive = 0; + } + signal(SIGINT, quit); signal(SIGTERM, quit); signal(SIGHUP, reinit); -- GitLab