From 9dfea9fb023abfb8b426e30ffe27a1176c17952d Mon Sep 17 00:00:00 2001 From: "Leigh B. Stoller" Date: Thu, 15 Jan 2004 18:47:40 +0000 Subject: [PATCH] Add "killmode" to frisbeelauncher to kill a running frisbee for an imageid. Uses new slot in the images table (frisbee_pid) to track running frisbee daemon for an image so that it can be killed from create-image (kill before creating new image) and from the web page before deleting an imageid. --- configure | 2 +- configure.in | 2 +- tbsetup/GNUmakefile.in | 4 +- tbsetup/frisbeelauncher.in | 204 +++++++++++++++++++++++++++++------- tbsetup/webfrisbeekiller.in | 27 +++++ utils/create_image.in | 11 ++ www/deleteimageid.php3 | 8 +- www/showstuff.php3 | 19 ++++ 8 files changed, 232 insertions(+), 45 deletions(-) create mode 100644 tbsetup/webfrisbeekiller.in diff --git a/configure b/configure index 1511c9879..746180bc5 100755 --- a/configure +++ b/configure @@ -1429,7 +1429,7 @@ outfiles="$outfiles Makeconf GNUmakefile \ tbsetup/idleswap tbsetup/webidleswap tbsetup/switchmac \ tbsetup/newnode_reboot \ tbsetup/tarfiles_setup tbsetup/webtarfiles_setup \ - tbsetup/fetchtar.proxy \ + tbsetup/fetchtar.proxy tbsetup/webfrisbeekiller \ tbsetup/plab/GNUmakefile tbsetup/plab/libplab.py \ tbsetup/plab/plabslice tbsetup/plab/plabnode tbsetup/plab/plabdaemon \ tbsetup/plab/plabmetrics tbsetup/plab/plabstats \ diff --git a/configure.in b/configure.in index 98d060a1a..c03cec88e 100755 --- a/configure.in +++ b/configure.in @@ -474,7 +474,7 @@ outfiles="$outfiles Makeconf GNUmakefile \ tbsetup/idleswap tbsetup/webidleswap tbsetup/switchmac \ tbsetup/newnode_reboot \ tbsetup/tarfiles_setup tbsetup/webtarfiles_setup \ - tbsetup/fetchtar.proxy \ + tbsetup/fetchtar.proxy tbsetup/webfrisbeekiller \ tbsetup/plab/GNUmakefile tbsetup/plab/libplab.py \ tbsetup/plab/plabslice tbsetup/plab/plabnode tbsetup/plab/plabdaemon \ tbsetup/plab/plabmetrics tbsetup/plab/plabstats \ diff --git a/tbsetup/GNUmakefile.in b/tbsetup/GNUmakefile.in index 88536b41a..9227aa71b 100644 --- a/tbsetup/GNUmakefile.in +++ b/tbsetup/GNUmakefile.in @@ -1,6 +1,6 @@ # # EMULAB-COPYRIGHT -# Copyright (c) 2000-2003 University of Utah and the Flux Group. +# Copyright (c) 2000-2004 University of Utah and the Flux Group. # All rights reserved. # @@ -42,7 +42,7 @@ LIBEXEC_STUFF = rmproj wanlinksolve wanlinkinfo \ webmkgroup websetgroups webmkproj \ spewlogfile staticroutes routecalc wanassign \ webnodereboot webrmuser webidleswap switchmac \ - spewrpmtar webtarfiles_setup + spewrpmtar webtarfiles_setup webfrisbeekiller LIB_STUFF = libtbsetup.pm exitonwarn.pm libtestbed.pm snmpit_intel.pm \ snmpit_cisco.pm snmpit_lib.pm snmpit_apc.pm power_rpc27.pm \ diff --git a/tbsetup/frisbeelauncher.in b/tbsetup/frisbeelauncher.in index 509b6ae17..6ecd071c4 100755 --- a/tbsetup/frisbeelauncher.in +++ b/tbsetup/frisbeelauncher.in @@ -1,16 +1,28 @@ #!/usr/bin/perl -wT - # # EMULAB-COPYRIGHT -# Copyright (c) 2000-2002 University of Utah and the Flux Group. +# Copyright (c) 2000-2002, 2004 University of Utah and the Flux Group. # All rights reserved. # - use Getopt::Std; use POSIX 'setsid'; # For &daemonize use Sys::Syslog; use English; +# +# This also kills a running frisbee. +# +sub usage() +{ + print "Usage: $0 [-d] [-k] \n"; + print "-k: Kill running frisbee.\n"; + print "-d: Print debugging output.\n"; + exit(1); +} +my $optlist = "dk"; +my $debug = 0; +my $killmode = 0; + # Configure variables my $TB = "@prefix@"; my $TBOPS = "@TBOPSEMAIL@"; @@ -36,10 +48,19 @@ my $BASEADDR = "234.5.6"; my $BASEPORT = "3564"; my $LOGFILE = "$TB/log/frisbeelauncher"; -# Process command line options -getopts('d',\%opt); -if (@ARGV != 1) { - exit &usage(); +# +# Parse command arguments. Once we return from getopts, all that should be +# left are the required arguments. +# +%options = (); +if (! getopts($optlist, \%options)) { + usage(); +} +if (defined($options{"d"})) { + $debug = 1; +} +if (defined($options{"k"})) { + $killmode = 1; } $imageid = shift @ARGV; @@ -47,36 +68,113 @@ $imageid = shift @ARGV; # Untaint the argument. # if ($imageid =~ /^([-\@\w\+.]+)$/) { - $imageid = $1; -} else { - die("Invalid image '$imageid' contains illegal characters.\n"); + $imageid = $1; +} +else { + die("Invalid image '$imageid' contains illegal characters.\n"); } - # Grab the filename to give to frisbee my $filename = &get_filename($imageid); # -# Make sure that the user has sufficient permissions, and read the image file -# or bomb out right now. +# Make sure that the user has sufficient permissions. # -if (!TBImageIDAccessCheck($UID,$imageid,TB_IMAGEID_READINFO)) { - die("*** You do not have pemssion to load $imageid on nodes\n"); +if (!TBImageIDAccessCheck($UID, $imageid, + ($killmode ? TB_IMAGEID_DESTROY : + TB_IMAGEID_READINFO))) { + die("*** $0:\n". + " Not enough permission!\n"); } -if (! -R $filename) { - die("*** You do not have permission to read the image file for\n". - "imageid $imageid: $filename\n"); +if (!$killmode && ! -R $filename) { + die("*** You do not have permission to read the image file for\n". + "imageid $imageid: $filename\n"); } # -# Need to lock the tables here, since we are going to mess with the -# busy indicator. +# Need to lock the tables here, so we can lock out anyone else from +# messing with the image (and so we can pick an address atomically). # &lock_tables; # Try to discover if some other process is handling this address -$address = &get_address($imageid); +($address, $pid) = get_address($imageid); + +if ($killmode) { + # + # Killmode. We do some sanity checking cause there is a small race + # inherent in trying to use the pid via the DB without locking (below). + # + # No point in leaving table locked; not going to modify it. + # + unlock_tables(); + + # Nothing running. + exit(0) + if (!$address && !$pid); + + if (!$address && $pid) { + # + # This makes no sense. Just send email. + # + my $mesg = "Inconsistent DB state. PID ($pid) but no load address!"; + + SENDMAIL($TBOPS, + "Frisbee Killer Failed!", + "Imageid: $imageid\n". + $mesg); + + die("*** $0:\n". + " $mesg\n"); + } + if ($address && !$pid) { + # + # Okay, minor problem. It is possible we caught the launcher between + # setting the address and setting the pid. Wait a moment, and then + # try again. If still no pid, bail. + # + sleep(1); + ($address, $pid) = get_address($imageid); + + # Okay, situation resolved itself; other frisbeelauncher bailed. + exit(0) + if (!$address && !$pid); + + # + # Still inconsistent so bail. + # + if ($address && !$pid) { + my $mesg = "Inconsistent DB state. Load address but no PID!"; + + SENDMAIL($TBOPS, + "Frisbee Killer Failed!", + "Imageid: $imageid\n". + $mesg); + + die("*** $0:\n". + " $mesg\n"); + } + } + + # + # Okay, address and pid. We could clear the pid from the DB, + # preventing another killer from thinking it is running, but not + # much point since it is not likely to happen. If it turns out to + # be a problem we can change the way this works. + # + unlock_tables(); + if (! kill('TERM', $pid)) { + SENDMAIL($TBOPS, + "Frisbee Killer Failed!", + "Failed to stop frisbee daemon for $imageid\n". + "Could not kill(TERM) process $pid: $? $!"); + + die("*** $0:\n". + " Failed to stop frisbee daemon for $imageid!\n"); + } + exit(0); +} if ($address && &keepbusy($imageid)) { &unlock_tables; @@ -84,6 +182,19 @@ if ($address && &keepbusy($imageid)) { exit (0); } +# This would be inconsistent. +if ($pid) { + my $mesg = "Inconsistent DB state. No load address but PID ($pid) set!"; + + SENDMAIL($TBOPS, + "Frisbee Startup Failed!", + "Imageid: $imageid\n". + $mesg); + + die("*** $0:\n". + " $mesg\n"); +} + # Pick an address: Die if unsucessful, set address and unlock if sucessful $address = &pick_address; &debug("Picked address $address\n"); @@ -101,6 +212,13 @@ if (TBBackGround($LOGFILE)) { exit(0); } +# Set up a signal handler that will clean up in case we get killed +$SIG{HUP} = $SIG{INT} = $SIG{TERM} = \&cleanup; + +# Set our pid. This happens outside the lock which could lead to races, +# but that is unlikely. Look for it above though. +set_pid($imageid, $PID); + # # Drop root permissions, if we have them # @@ -108,10 +226,6 @@ if ($EUID == 0) { $EUID = $UID; } -# Set up a signal handler that will clean up in case we get killed -$SIG{HUP} = $SIG{INT} = $SIG{TERM} = \&cleanup; -# XXX: Any others we should catch? - # Now, we actually launch Frisbee while (1) { # @@ -140,7 +254,8 @@ while (1) { # Dump early. This will leave the address in # in the DB, so that another one will not start # until the matter is resolved by someone. - # + # + set_pid($imageid, 0); exit(1); } } @@ -148,7 +263,6 @@ while (1) { # Child branch # The database format for address is host:port - however, # we need to give them as seperate arguments to frisbeed. - if ($address =~ /(.*):(.*)/) { my $addr = $1; my $port = $2; @@ -169,16 +283,10 @@ exit(0); # Subroutines ###################################################################### -# Print out a usage mesage -sub usage { - print "Usage: $0 [-d] IMAGEID\n"; - print "-d: Print debugging output\n"; -} - # Only print if -d option was given. Also add $$ on the beginning of the # string, to ease debugging sub debug { - if ($opt{d}) { print "$$: ", @_ }; + if ($debug) { print "$$: ", @_ }; } # Grab the address for the passed-in imageid @@ -186,7 +294,7 @@ sub get_address { my ($imageid) = @_; my $sth = - DBQueryFatal("SELECT imageid,load_address ". + DBQueryFatal("SELECT load_address,frisbee_pid ". "FROM images WHERE imageid='$imageid'"); my @row = $sth->fetchrow; @@ -194,7 +302,7 @@ sub get_address { die "No such imageid: $imageid\n"; } - return $row[1]; + return ($row[0], $row[1]); } # Grab the filename for the passed-in imageid @@ -270,6 +378,22 @@ sub set_address { "WHERE imageid='$imageid'"); } +# Pass in an imageid and a pid. +sub set_pid { + my ($imageid,$pid) = @_; + + DBQueryFatal("UPDATE images SET frisbee_pid=$pid " . + "WHERE imageid='$imageid'"); +} + +# Pass in an imageid and a pid. +sub clear_pid { + my ($imageid) = @_; + + DBQueryFatal("UPDATE images SET frisbee_pid=0 " . + "WHERE imageid='$imageid'"); +} + # Bump the busy indicator to keep the frisbeed going. sub keepbusy($imageid) { my ($imageid) = @_; @@ -313,9 +437,9 @@ sub cleanup { # Clear out the address (and pid) registered to this process sub clear_address { - &debug("Clearing out registered load_address\n"); + &debug("Clearing out registered load_address and pid\n"); # Now, clear out the load_address we had set up - my $address_clear = "UPDATE images SET load_address='',load_busy=0 " . - "WHERE imageid='$imageid'"; - DBQueryFatal($address_clear); + DBQueryFatal("update images set ". + "load_address='',load_busy=0,frisbee_pid=0 ". + "where imageid='$imageid'"); } diff --git a/tbsetup/webfrisbeekiller.in b/tbsetup/webfrisbeekiller.in new file mode 100644 index 000000000..739948580 --- /dev/null +++ b/tbsetup/webfrisbeekiller.in @@ -0,0 +1,27 @@ +#!/usr/bin/perl -w + +# +# EMULAB-COPYRIGHT +# Copyright (c) 2000-2002, 2004 University of Utah and the Flux Group. +# All rights reserved. +# + +use English; + +# +# This gets invoked from the Web interface. Simply a wrapper. +# + +# +# Configure variables +# +my $TB = "@prefix@"; + +# +# Run the real thing, and never return. +# +unshift(@ARGV, "-k"); +exec "$TB/sbin/frisbeelauncher", @ARGV; + +die("*** $0:\n". + " Could not exec program: $!"); diff --git a/utils/create_image.in b/utils/create_image.in index 0d288f1c4..c318cf7a0 100755 --- a/utils/create_image.in +++ b/utils/create_image.in @@ -54,6 +54,7 @@ delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; my $nodereboot = "$TB/bin/node_reboot"; my $createimage = "/usr/local/bin/create-image"; +my $friskiller = "$TB/sbin/frisbeelauncher"; my $osselect = "$TB/bin/os_select"; my $devtype = "ad"; my $devnum = 0; @@ -207,6 +208,16 @@ else { " Bad filename: $filename!\n"); } +# +# Be sure to kill off running frisbee. If a node is trying to load that +# image, well tough. +# +system("$friskiller -k $imageid"); +if ($?) { + die("*** $0:\n". + " Could not kill running frisbee for $imageid!\n"); +} + open(FILE, "> $filename") or die("*** $0:\n". " Could not create $filename: $!\n"); diff --git a/www/deleteimageid.php3 b/www/deleteimageid.php3 index 0a86d1b79..69658703b 100644 --- a/www/deleteimageid.php3 +++ b/www/deleteimageid.php3 @@ -1,7 +1,7 @@ \n"; + echo " + Frisbee pid: + \n"; + + if ($edit && $isadmin) { + echo ""; + } + else { + echo "$frisbee_pid"; + } + echo " + \n"; + if ($edit) { echo " -- GitLab