Commit 42757a0c authored by Leigh B. Stoller's avatar Leigh B. Stoller

Bumb remote tmcc timeout to 30 seconds.

Add better timeout handling code to tmcc, which watches for progress
instead of just dumping after the timeout. This lets really slow
connections proceed okay, but still timeout out if nothing happens at
all.

This new tmcc has been installed into the sup trees for WIDE and RON.
parent a8636777
......@@ -43,7 +43,7 @@ use English;
my $vnodeid;
#
# True is running inside a jail. Set just below.
# True if running inside a jail. Set just below.
#
my $injail;
......@@ -227,6 +227,7 @@ my $vname = "";
# Control tmcc error condition and timeout. Dynamic, not lexical!
$tmccdie = 1;
$tmcctimeout = 0;
my $TMCCTIMEO = 30; # Default timeout on remote nodes.
# When on the MFS, we do a much smaller set of stuff.
# Cause of the way the packages are loaded (which I do not understand),
......@@ -352,6 +353,7 @@ sub cleanup_node ($) {
unlink TMDELAY, TMLINKDELAY;
unlink TMMOUNTDB . ".db";
unlink TMSFSMOUNTDB . ".db";
unlink "$VARDIR/db/rtabid";
#
# If scrubbing, remove the password/group file DBs so that we revert
......@@ -2027,7 +2029,7 @@ sub bootsetup()
sub nodeupdate()
{
if (REMOTE()) {
local $tmcctimeout = 10;
local $tmcctimeout = $TMCCTIMEO;
nodeupdateaux();
}
else {
......
......@@ -43,7 +43,7 @@ use English;
my $vnodeid;
#
# True is running inside a jail. Set just below.
# True if running inside a jail. Set just below.
#
my $injail;
......@@ -227,6 +227,7 @@ my $vname = "";
# Control tmcc error condition and timeout. Dynamic, not lexical!
$tmccdie = 1;
$tmcctimeout = 0;
my $TMCCTIMEO = 30; # Default timeout on remote nodes.
# When on the MFS, we do a much smaller set of stuff.
# Cause of the way the packages are loaded (which I do not understand),
......@@ -352,6 +353,7 @@ sub cleanup_node ($) {
unlink TMDELAY, TMLINKDELAY;
unlink TMMOUNTDB . ".db";
unlink TMSFSMOUNTDB . ".db";
unlink "$VARDIR/db/rtabid";
#
# If scrubbing, remove the password/group file DBs so that we revert
......@@ -2027,7 +2029,7 @@ sub bootsetup()
sub nodeupdate()
{
if (REMOTE()) {
local $tmcctimeout = 10;
local $tmcctimeout = $TMCCTIMEO;
nodeupdateaux();
}
else {
......
......@@ -91,6 +91,30 @@ usage()
exit(1);
}
/*
* We cannot let remote nodes hang, but they can be slow. If we get connected
* we give it an extra timeout, and if we make any progress at all, keep
* giving it extra timeouts.
*/
static int connected = 0;
static int progress = 0;
static int waitfor = 0;
static void
tooktoolong()
{
static int lastprogress = 0;
/* If we made progress, keep going (reset timer too) */
if (connected && progress > lastprogress) {
lastprogress = progress;
alarm(waitfor);
return;
}
fprintf(stderr, "Timed out cause there was no progress!\n");
exit(-1);
}
int
main(int argc, char **argv)
......@@ -108,7 +132,6 @@ main(int argc, char **argv)
char *keyfile = NULL;
char *privkey = NULL;
char *proxypath= NULL;
int waitfor = 0;
while ((ch = getopt(argc, argv, "v:s:p:un:t:k:x:l:do:")) != -1)
switch(ch) {
......@@ -276,6 +299,7 @@ main(int argc, char **argv)
* When a timeout is requested, just let the signal kill us.
*/
if (waitfor) {
signal(SIGALRM, tooktoolong);
alarm(waitfor);
}
......@@ -393,6 +417,7 @@ dotcp(char *data, int outfd, struct in_addr serverip)
sleep(10);
}
foundit:
connected = 1;
n = 1;
if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &n, sizeof(n)) < 0) {
......@@ -426,6 +451,7 @@ dotcp(char *data, int outfd, struct in_addr serverip)
}
break;
}
progress += cc;
if (dooutput(outfd, buf, cc) < 0)
goto bad;
}
......@@ -471,9 +497,11 @@ doudp(char *data, int outfd, struct in_addr serverip, int portnum)
fprintf(stderr, "short write (%d != %d)\n", cc, n);
return -1;
}
connected = 1;
cc = recvfrom(sock, buf, sizeof(buf) - 1, 0,
(struct sockaddr *)&client, &length);
progress += cc;
if (cc < 0) {
perror("Reading from socket:");
......@@ -515,6 +543,7 @@ dounix(char *data, int outfd, char *unixpath)
close(sock);
return -1;
}
connected = 1;
/*
* Write the command to the socket and wait for the response.
......@@ -542,6 +571,7 @@ dounix(char *data, int outfd, char *unixpath)
}
break;
}
progress += cc;
if (dooutput(outfd, buf, cc) < 0)
goto bad;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment