Commit b9425e72 authored by Mike Hibler's avatar Mike Hibler

Hack multicast "keep alive" mechanism. The "-K <seconds>" option can be

used to force the server to send an IGMP report if it doesn't receive any
packets within <seconds> seconds.  As long as the server is receiving
packets, it won't send the report.

What I'm not lovin here, is that to send a report I have to drop membership
in the group (socket opt IP_DROP_MEMBERSHIP) and rejoin (IP_ADD_MEMBERSHIP).
Simply trying to do an add membership doesn't work because the kernel thinks
you are already in the group and errs out.  I'm hoping all the up and down
activity doesn't make the switch behave any worse than it already does.
parent e8ba8543
/* /*
* EMULAB-COPYRIGHT * EMULAB-COPYRIGHT
* Copyright (c) 2000-2004 University of Utah and the Flux Group. * Copyright (c) 2000-2005 University of Utah and the Flux Group.
* All rights reserved. * All rights reserved.
*/ */
...@@ -282,6 +282,7 @@ typedef struct { ...@@ -282,6 +282,7 @@ typedef struct {
*/ */
int ClientNetInit(void); int ClientNetInit(void);
int ServerNetInit(void); int ServerNetInit(void);
int ServerNetMCKeepAlive(void);
unsigned long ClientNetID(void); unsigned long ClientNetID(void);
int PacketReceive(Packet_t *p); int PacketReceive(Packet_t *p);
void PacketSend(Packet_t *p, int *resends); void PacketSend(Packet_t *p, int *resends);
......
/* /*
* EMULAB-COPYRIGHT * EMULAB-COPYRIGHT
* Copyright (c) 2000-2004 University of Utah and the Flux Group. * Copyright (c) 2000-2005 University of Utah and the Flux Group.
* All rights reserved. * All rights reserved.
*/ */
...@@ -35,7 +35,7 @@ unsigned long nonetbufs; ...@@ -35,7 +35,7 @@ unsigned long nonetbufs;
/* Max number of hops multicast hops. */ /* Max number of hops multicast hops. */
#define MCAST_TTL 5 #define MCAST_TTL 5
static int sock; static int sock = -1;
struct in_addr myipaddr; struct in_addr myipaddr;
static int nobufdelay = -1; static int nobufdelay = -1;
int broadcast = 0; int broadcast = 0;
...@@ -184,6 +184,40 @@ ServerNetInit(void) ...@@ -184,6 +184,40 @@ ServerNetInit(void)
return 1; return 1;
} }
/*
* XXX hack.
*
* Cisco switches without a multicast router defined have an unfortunate
* habit of losing our IGMP membership. This function allows us to send
* a report message to remind the switch we are still around.
*
* We need a better way to do this!
*/
int
ServerNetMCKeepAlive(void)
{
struct ip_mreq mreq;
if (broadcast || (ntohl(mcastaddr.s_addr) >> 28) != 14)
return 0;
if (sock == -1)
return 1;
mreq.imr_multiaddr.s_addr = mcastaddr.s_addr;
if (mcastif.s_addr)
mreq.imr_interface.s_addr = mcastif.s_addr;
else
mreq.imr_interface.s_addr = htonl(INADDR_ANY);
if (setsockopt(sock, IPPROTO_IP, IP_DROP_MEMBERSHIP,
&mreq, sizeof(mreq)) < 0 ||
setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP,
&mreq, sizeof(mreq)) < 0)
return 1;
return 0;
}
/* /*
* Look for a packet on the socket. Propogate the errors back to the caller * Look for a packet on the socket. Propogate the errors back to the caller
* exactly as the system call does. Remember that we set up a socket timeout * exactly as the system call does. Remember that we set up a socket timeout
......
/* /*
* EMULAB-COPYRIGHT * EMULAB-COPYRIGHT
* Copyright (c) 2000-2004 University of Utah and the Flux Group. * Copyright (c) 2000-2005 University of Utah and the Flux Group.
* All rights reserved. * All rights reserved.
*/ */
...@@ -34,6 +34,7 @@ int debug = 0; ...@@ -34,6 +34,7 @@ int debug = 0;
int tracing = 0; int tracing = 0;
int dynburst = 0; int dynburst = 0;
int timeout = SERVER_INACTIVE_SECONDS; int timeout = SERVER_INACTIVE_SECONDS;
int keepalive = 0;
int readsize = SERVER_READ_SIZE; int readsize = SERVER_READ_SIZE;
volatile int burstsize = SERVER_BURST_SIZE; volatile int burstsize = SERVER_BURST_SIZE;
int maxburstsize = SERVER_DYNBURST_SIZE; int maxburstsize = SERVER_DYNBURST_SIZE;
...@@ -542,16 +543,39 @@ void * ...@@ -542,16 +543,39 @@ void *
ServerRecvThread(void *arg) ServerRecvThread(void *arg)
{ {
Packet_t packet, *p = &packet; Packet_t packet, *p = &packet;
int idles = 0, kafails = 0;
static int gotone; static int gotone;
if (debug > 1) if (debug > 1)
log("Server pthread starting up ..."); log("Server pthread starting up ...");
/*
* Recalculate keepalive interval in terms of packet receive
* timeouts for simplicity.
*/
if (keepalive)
keepalive = (int)(((unsigned long long)keepalive * 1000000) /
PKTRCV_TIMEOUT);
while (1) { while (1) {
pthread_testcancel(); pthread_testcancel();
if (PacketReceive(p) != 0) { if (PacketReceive(p) != 0) {
if (keepalive && ++idles > keepalive) {
if (ServerNetMCKeepAlive()) {
warning("Multicast keepalive failed");
if (++kafails > 5) {
warning("too many failures, disabled");
keepalive = 0;
}
} else {
kafails = 0;
idles = 0;
if (debug > 1)
log("Ping...");
}
}
continue; continue;
} }
idles = 0;
DOSTAT(msgin++); DOSTAT(msgin++);
if (! PacketValid(p, FileInfo.chunks)) { if (! PacketValid(p, FileInfo.chunks)) {
...@@ -794,7 +818,7 @@ main(int argc, char **argv) ...@@ -794,7 +818,7 @@ main(int argc, char **argv)
off_t fsize; off_t fsize;
void *ignored; void *ignored;
while ((ch = getopt(argc, argv, "dhp:m:i:tbDT:R:B:G:L:W:")) != -1) while ((ch = getopt(argc, argv, "dhp:m:i:tbDT:R:B:G:L:W:K:")) != -1)
switch(ch) { switch(ch) {
case 'b': case 'b':
broadcast++; broadcast++;
...@@ -836,6 +860,11 @@ main(int argc, char **argv) ...@@ -836,6 +860,11 @@ main(int argc, char **argv)
case 'W': case 'W':
bandwidth = atol(optarg); bandwidth = atol(optarg);
break; break;
case 'K':
keepalive = atoi(optarg);
if (keepalive < 0)
keepalive = 0;
break;
case 'h': case 'h':
case '?': case '?':
default: default:
...@@ -849,6 +878,11 @@ main(int argc, char **argv) ...@@ -849,6 +878,11 @@ main(int argc, char **argv)
if (!portnum || ! mcastaddr.s_addr) if (!portnum || ! mcastaddr.s_addr)
usage(); usage();
if (timeout > 0 && keepalive > timeout) {
warning("keepalive > timeout, disabling keepalive");
keepalive = 0;
}
signal(SIGINT, quit); signal(SIGINT, quit);
signal(SIGTERM, quit); signal(SIGTERM, quit);
signal(SIGHUP, reinit); signal(SIGHUP, reinit);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment