Commit 08ce72b6 authored by Leigh B Stoller's avatar Leigh B Stoller

Add an "interruptible" option to TBScriptLock(). When set, each time

through the loop we look to see if signals are pending, and if so we return
early with an error. The caller (libvnode_xen) can use this to avoid really
long waits, when the server has said to stop what its doing. For example, a
vnode setup is waiting for an image lock, but the server comes along ands
to stop setting up. Previously, we would wait for the lock, now we return
early. This is to help with cancelation where it is nice if the server can
stop a CreateSliver() in its tracks, when it is safe to do so.
parent 5bd9ad1a
......@@ -34,7 +34,8 @@ use Exporter;
TBSCRIPTLOCK_OKAY TBSCRIPTLOCK_TIMEDOUT
TBSCRIPTLOCK_IGNORE TBSCRIPTLOCK_FAILED TBSCRIPTLOCK_GLOBALWAIT
TBSCRIPTLOCK_SHAREDLOCK TBSCRIPTLOCK_NONBLOCKING
TBSCRIPTLOCK_WOULDBLOCK
TBSCRIPTLOCK_WOULDBLOCK TBSCRIPTLOCK_INTERRUPTED
TBSCRIPTLOCK_INTERRUPTIBLE
TBTimeStamp TBTimeStampWithDate TBBackGround ReOpenLog
);
......@@ -44,6 +45,7 @@ use English;
use Fcntl ':flock';
use IO::Handle;
use Time::HiRes qw(gettimeofday);
use POSIX qw(:signal_h);
#
# Turn off line buffering on output
......@@ -267,10 +269,12 @@ sub TBSCRIPTLOCK_OKAY() { 0; }
sub TBSCRIPTLOCK_TIMEDOUT() { 1; }
sub TBSCRIPTLOCK_IGNORE() { 2; }
sub TBSCRIPTLOCK_WOULDBLOCK() { 4; }
sub TBSCRIPTLOCK_INTERRUPTED() { 8; }
sub TBSCRIPTLOCK_FAILED() { -1; }
sub TBSCRIPTLOCK_GLOBALWAIT() { 0x01; }
sub TBSCRIPTLOCK_SHAREDLOCK() { 0x10; }
sub TBSCRIPTLOCK_NONBLOCKING() { 0x20; }
sub TBSCRIPTLOCK_INTERRUPTIBLE(){ 0x40; }
#
# There are two kinds of serialization.
......@@ -288,6 +292,7 @@ sub TBScriptLock($;$$$)
local *LOCK;
my $global = 0;
my $shared = 0;
my $interruptible = 0;
if (!defined($waittime)) {
$waittime = 30;
......@@ -299,6 +304,8 @@ sub TBScriptLock($;$$$)
if (defined($flags) && ($flags & TBSCRIPTLOCK_GLOBALWAIT()));
$shared = 1
if (defined($flags) && ($flags & TBSCRIPTLOCK_SHAREDLOCK()));
$interruptible = 1
if (defined($flags) && ($flags & TBSCRIPTLOCK_INTERRUPTIBLE()));
$lockname = "/var/tmp/testbed_${token}_lockfile";
my $oldmask = umask(0000);
......@@ -310,6 +317,20 @@ sub TBScriptLock($;$$$)
}
umask($oldmask);
my $checkforinterrupt = sub {
my $sigset = POSIX::SigSet->new;
sigpending($sigset);
# XXX Why isn't SIGRTMIN and SIGRTMAX defined in the POSIX module.
for (my $i = 1; $i < 50; $i++) {
if ($sigset->ismember($i)) {
print "checkForInterrupt: Signal $i is pending\n";
return 1;
}
}
return 0;
};
if (! $global) {
#
# A plain old lock.
......@@ -329,6 +350,10 @@ sub TBScriptLock($;$$$)
return TBSCRIPTLOCK_TIMEDOUT();
}
sleep(1);
if ($interruptible && &$checkforinterrupt()) {
print STDERR "ScriptLock interrupted by signal!\n";
return TBSCRIPTLOCK_INTERRUPTED();
}
}
# Okay, got the lock. Save the handle. We need it below.
if (defined($lockhandle_ref)) {
......@@ -373,9 +398,13 @@ sub TBScriptLock($;$$$)
return TBSCRIPTLOCK_TIMEDOUT();
}
sleep(1);
if ($interruptible && &$checkforinterrupt()) {
print STDERR "ScriptLock interrupted by signal!\n";
return TBSCRIPTLOCK_INTERRUPTED();
}
}
$count = 0;
my $count = 0;
#
# If we did not get the lock, wait for the process that did to finish.
#
......@@ -397,6 +426,10 @@ sub TBScriptLock($;$$$)
return TBSCRIPTLOCK_TIMEDOUT();
}
sleep(1);
if ($interruptible && &$checkforinterrupt()) {
print STDERR "ScriptLock interrupted by signal!\n";
return TBSCRIPTLOCK_INTERRUPTED();
}
}
}
}
......
......@@ -755,41 +755,44 @@ if (defined(VNCONFIG('SSHDPORT')) && VNCONFIG('SSHDPORT') ne "" &&
# it running in its new context. Still, lets protect it with a timer
# since it might get hung up inside and we do not want to get stuck here.
#
my $childpid = fork();
if ($childpid) {
my $timedout = 0;
local $SIG{ALRM} = sub { kill("TERM", $childpid); $timedout = 1; };
alarm 180
if (!$ISXENVM);
waitpid($childpid, 0);
alarm 0
if (!$ISXENVM);
if (!$ISXENVM) {
my $childpid = fork();
if ($childpid) {
my $timedout = 0;
local $SIG{ALRM} = sub { kill("TERM", $childpid); $timedout = 1; };
alarm 180;
waitpid($childpid, 0);
alarm 0;
#
# If failure then cleanup.
#
if ($? || $timedout) {
MyFatal("$vnodeid container startup ".
($timedout ? "timed out." : "failed."));
#
# If failure then cleanup.
#
if ($? || $timedout) {
MyFatal("$vnodeid container startup ".
($timedout ? "timed out." : "failed."));
}
}
}
else {
#
# We want to call this as clean as possible.
#
$SIG{TERM} = 'DEFAULT';
$SIG{INT} = 'DEFAULT';
$SIG{USR1} = 'DEFAULT';
$SIG{USR2} = 'DEFAULT';
$SIG{HUP} = 'DEFAULT';
POSIX::setsid();
if ($libops{$vmtype}{"vnodeBoot"}->($vnodeid, $vmid,
\%vnconfig, $vnstate->{'private'})) {
print STDERR "*** ERROR: vnodeBoot failed\n";
exit(1);
else {
#
# We want to call this as clean as possible.
#
$SIG{TERM} = 'DEFAULT';
$SIG{INT} = 'DEFAULT';
$SIG{USR1} = 'DEFAULT';
$SIG{USR2} = 'DEFAULT';
$SIG{HUP} = 'DEFAULT';
POSIX::setsid();
if ($libops{$vmtype}{"vnodeBoot"}->($vnodeid, $vmid,
\%vnconfig, $vnstate->{'private'})){
print STDERR "*** ERROR: vnodeBoot failed\n";
exit(1);
}
exit(0);
}
exit(0);
}
elsif (safeLibOp('vnodeBoot', 1, 1)) {
MyFatal("$vnodeid container startup failed.");
}
if (safeLibOp('vnodePostConfig', 1, 1)) {
MyFatal("vnodePostConfig failed");
......
......@@ -84,6 +84,7 @@ use File::Basename;
use File::Path;
use File::Copy;
use File::Temp;
use POSIX qw(:signal_h);
# Pull in libvnode
BEGIN { require "/etc/emulab/paths.pm"; import emulabpaths; }
......@@ -295,6 +296,10 @@ my $VIFROUTING = ((-e "$ETCDIR/xenvifrouting") ? 1 : 0);
my $TMCD_PORT = 7777;
# Number of concurrent containers set up in parallel. We bump this up
# a bit down in doingThinLVM().
my $MAXCONCURRENT = 3;
#
# Information about the running Xen hypervisor
#
......@@ -336,6 +341,7 @@ sub LookupRouteTable($);
sub FreeRouteTable($);
sub downloadOneImage($$$);
sub captureRunning($);
sub checkForInterrupt();
sub getXenInfo()
{
......@@ -782,8 +788,9 @@ sub rootPreConfigNetwork($$$$)
TBDebugTimeStamp("rootPreConfigNetwork: grabbing global lock $GLOBAL_CONF_LOCK")
if ($lockdebug);
if (TBScriptLock($GLOBAL_CONF_LOCK, 0, 900) != TBSCRIPTLOCK_OKAY()) {
print STDERR "Could not get the global lock after a long time!\n";
if (TBScriptLock($GLOBAL_CONF_LOCK,
TBSCRIPTLOCK_INTERRUPTIBLE(), 900) != TBSCRIPTLOCK_OKAY()){
print STDERR "Could not get the global lock!\n";
return -1;
}
TBDebugTimeStamp(" got global lock")
......@@ -863,9 +870,10 @@ sub vnodeCreate($$$$)
my $imagelockname = ImageLockName($imagename);
TBDebugTimeStamp("grabbing image lock $imagelockname shared")
if ($lockdebug);
if (TBScriptLock($imagelockname, TBSCRIPTLOCK_SHAREDLOCK(), 1800)
!= TBSCRIPTLOCK_OKAY()) {
fatal("Could not get $imagelockname lock after a long time!");
if (TBScriptLock($imagelockname,
TBSCRIPTLOCK_INTERRUPTIBLE()|TBSCRIPTLOCK_SHAREDLOCK(),
1800) != TBSCRIPTLOCK_OKAY()) {
fatal("Could not get $imagelockname lock!");
}
TBDebugTimeStamp(" got image lock")
if ($lockdebug);
......@@ -899,10 +907,9 @@ sub vnodeCreate($$$$)
TBScriptUnlock();
TBDebugTimeStamp("grabbing image lock $imagelockname exclusive")
if ($lockdebug);
if (TBScriptLock($imagelockname, undef, 1800)
if (TBScriptLock($imagelockname, TBSCRIPTLOCK_INTERRUPTIBLE(), 1800)
!= TBSCRIPTLOCK_OKAY()) {
fatal("Could not get $imagelockname write lock ".
"after a long time!");
fatal("Could not get $imagelockname write lock!");
}
TBDebugTimeStamp(" got image lock")
if ($lockdebug);
......@@ -918,7 +925,9 @@ sub vnodeCreate($$$$)
TBScriptUnlock();
TBDebugTimeStamp("grabbing image lock $imagelockname shared")
if ($lockdebug);
if (TBScriptLock($imagelockname, TBSCRIPTLOCK_SHAREDLOCK(), 1800)
if (TBScriptLock($imagelockname,
TBSCRIPTLOCK_INTERRUPTIBLE()|
TBSCRIPTLOCK_SHAREDLOCK(), 1800)
!= TBSCRIPTLOCK_OKAY()) {
fatal("Could not get $imagelockname lock back ".
"after a long time!");
......@@ -2099,8 +2108,9 @@ sub vnodePreConfigExpNetwork($$$$)
#
TBDebugTimeStamp("vnodePreConfigExpNetwork: grabbing global lock $GLOBAL_CONF_LOCK")
if ($lockdebug);
if (TBScriptLock($GLOBAL_CONF_LOCK, 0, 900) != TBSCRIPTLOCK_OKAY()) {
print STDERR "Could not get the global lock after a long time!\n";
if (TBScriptLock($GLOBAL_CONF_LOCK, TBSCRIPTLOCK_INTERRUPTIBLE(), 900)
!= TBSCRIPTLOCK_OKAY()) {
print STDERR "Could not get the global lock!\n";
return -1;
}
TBDebugTimeStamp(" got global lock")
......@@ -2375,6 +2385,8 @@ sub vnodeBoot($$$$)
return 0;
}
$countdown--;
last
if (checkForInterrupt());
}
#
# Tear it down and try again. Use vnodeHalt cause it protects
......@@ -2392,6 +2404,8 @@ sub vnodeBoot($$$$)
TBDebugTimeStamp("Container not gone yet");
}
TBDebugTimeStamp("Container is gone ($i)!");
last
if (checkForInterrupt());
}
return -1;
}
......@@ -3131,7 +3145,8 @@ sub grabGoldenLock($)
TBDebugTimeStamp("grabbing gimage lock $token")
if ($lockdebug);
if (TBScriptLock($token, undef, 900, \$lockref) == TBSCRIPTLOCK_OKAY()) {
if (TBScriptLock($token, TBSCRIPTLOCK_INTERRUPTIBLE(),
900, \$lockref) == TBSCRIPTLOCK_OKAY()) {
TBDebugTimeStamp(" got gimage lock")
if ($lockdebug);
return $lockref;
......@@ -3283,10 +3298,10 @@ sub createImageDisk($$$$)
# And back to a shared lock.
TBDebugTimeStamp("grabbing image lock $imagelockname shared")
if ($lockdebug);
if (TBScriptLock($imagelockname, TBSCRIPTLOCK_SHAREDLOCK(), 1800)
!= TBSCRIPTLOCK_OKAY()) {
print STDERR "Could not get $imagelockname lock back ".
"after a long time!\n";
if (TBScriptLock($imagelockname,
TBSCRIPTLOCK_INTERRUPTIBLE()|TBSCRIPTLOCK_SHAREDLOCK(),
1800) != TBSCRIPTLOCK_OKAY()) {
print STDERR "Could not get $imagelockname lock back!\n";
return -1;
}
TBDebugTimeStamp(" got image lock")
......@@ -3321,9 +3336,9 @@ sub downloadOneImage($$$)
TBDebugTimeStamp("grabbing image lock $imagelockname exclusive")
if ($lockdebug);
if (TBScriptLock($imagelockname, undef, 1800) != TBSCRIPTLOCK_OKAY()) {
print STDERR "Could not get $imagelockname write lock".
"after a long time!\n";
if (TBScriptLock($imagelockname, TBSCRIPTLOCK_INTERRUPTIBLE(), 1800)
!= TBSCRIPTLOCK_OKAY()) {
print STDERR "Could not get $imagelockname write lock!\n";
return -1;
}
TBDebugTimeStamp(" got image lock")
......@@ -4230,8 +4245,9 @@ sub createExpBridges($$$)
#
TBDebugTimeStamp("createExpBridges: grabbing global lock $GLOBAL_CONF_LOCK")
if ($lockdebug);
if (TBScriptLock($GLOBAL_CONF_LOCK, 0, 1800) != TBSCRIPTLOCK_OKAY()) {
print STDERR "Could not get the global lock after a long time!\n";
if (TBScriptLock($GLOBAL_CONF_LOCK, TBSCRIPTLOCK_INTERRUPTIBLE(),
1800) != TBSCRIPTLOCK_OKAY()) {
print STDERR "Could not get the global lock!\n";
return -1;
}
TBDebugTimeStamp(" got global lock")
......@@ -4720,7 +4736,7 @@ sub doingThinLVM()
$usethin = 0;
return 0;
}
$MAXCONCURRENT = 5;
return 1;
}
......@@ -4959,7 +4975,8 @@ sub AllocateIFBs($$$)
TBDebugTimeStamp("AllocateIFBs: grabbing global lock $GLOBAL_CONF_LOCK")
if ($lockdebug);
if (TBScriptLock($GLOBAL_CONF_LOCK, 0, 1800) != TBSCRIPTLOCK_OKAY()) {
if (TBScriptLock($GLOBAL_CONF_LOCK, TBSCRIPTLOCK_INTERRUPTIBLE(),
1800) != TBSCRIPTLOCK_OKAY()) {
print STDERR "Could not get the global lock after a long time!\n";
return -1;
}
......@@ -5407,16 +5424,30 @@ sub RunWithLock($$)
return $status;
}
sub checkForInterrupt()
{
my $sigset = POSIX::SigSet->new;
sigpending($sigset);
# XXX Why isn't SIGRTMIN and SIGRTMAX defined in th POSIX module.
for (my $i = 1; $i < 50; $i++) {
if ($sigset->ismember($i)) {
print "checkForInterrupt: Signal $i is pending\n";
return 1;
}
}
return 0;
}
#
# We need to control how many simultaneous creates happen at once.
#
my $MAXCONCURRENT = 3;
my $createvnode_lockref;
sub CreateVnodeLock()
{
my $tries = 1000;
while ($tries) {
for (my $i = 0; $i < $MAXCONCURRENT; $i++) {
my $token = "createvnode_${i}";
......@@ -5435,7 +5466,11 @@ sub CreateVnodeLock()
}
print "Still trying to get the create lock at " . time() . "\n"
if (($tries % 60) == 0);
return -1
if (checkForInterrupt());
sleep(4);
return -1
if (checkForInterrupt());
$tries--;
}
TBDebugTimeStamp("Could not get the createvnode lock after a long time!");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment