#!/usr/bin/perl -wT use English; use Getopt::Std; # # Update mounts and accounts and anything else after changing the permissions # for a node. This is intended to be invoked from the web interface after # adding and/or subtracting pids from the experiment pid access list. # # XXX There is an inherent race condition with using this script. What if # nodes are released while it is running? # # The output is all jumbled together since the updates are issued in parallel. # Might be a pain when debugging. # sub usage() { print STDOUT "Usage: node_update [-b] \n". "Update user accounts and NFS mounts on nodes in your project.\n". "Use -b to use batch operation (place in background, send email).\n"; exit(-1); } my $optlist = "be:"; # # Configure variables # my $TB = "@prefix@"; my $TESTMODE = @TESTMODE@; my $TBOPS = "@TBOPSEMAIL@"; my $TBLOGS = "@TBLOGSEMAIL@"; my $ssh = "$TB/bin/sshtb -n"; my $sshremote = "$TB/bin/sshremote -n"; my $expsetup = "$TB/sbin/exports_setup"; my $batchmode = 0; my $maxchildren = 20; # # Load the Testbed support stuff. # use lib "@prefix@/lib"; use libdb; use libtestbed; # un-taint path $ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin'; delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; # Turn off line buffering on output $| = 1; # # Parse command arguments. Once we return from getopts, all that should be # left are the required arguments. # %options = (); if (! getopts($optlist, \%options)) { usage(); } if (@ARGV != 2) { usage(); } my $pid = $ARGV[0]; my $eid = $ARGV[1]; if (defined($options{"b"})) { $batchmode = 1; } # # Untaint the arguments. # if ($pid =~ /^([-\@\w]+)$/) { $pid = $1; } else { die("*** Bad data in pid: $pid\n"); } if ($eid =~ /^([-\@\w]+)$/) { $eid = $1; } else { die("*** Bad data in eid: $eid\n"); } my $user_name; my $user_email; my $logname; my %pids = (); my $failed = 0; my $dbuid; # # We don't want to run this script unless its the real version. # That is, it must be setuid root. # if ($EUID != 0) { die("*** $0:\n". " Must be root! Maybe its a development version?\n"); } # # Verify actual user and get his DB uid. # if (! UNIX2DBUID($UID, \$dbuid)) { die("*** $0:\n". " You do not exist in the Emulab Database.\n"); } if (! UserDBInfo($dbuid, \$user_name, \$user_email)) { die("*** $0:\n". " Cannot determine your name and email address.\n"); } # # Verify that this person is allowed to do this. Must be an admin type, # the experiment creator or the project leader. # if ($UID && !TBAdmin()) { my $expt_leader = ExpLeader($pid, $eid); my $proj_leader = ProjLeader($pid); if (!$expt_leader || !$proj_leader) { die("*** $0:\n". " No such Experiment $eid or no such Project $pid\n"); } if ($expt_leader ne $dbuid && $proj_leader ne $dbuid) { die("*** $0:\n". " You must be the experiment creator or the project leader\n"); } } # # We need to lock down the experiment during this. # DBQueryFatal("lock tables experiments write"); if (TBExpLocked($pid, $eid)) { DBQueryWarn("unlock tables"); die("*** $0:\n". " Experiment $pid/$eid is in transition. Please try later!\n"); } # # A sanity check. Lets make sure the experiment is in the swapped in # state so that we are not trying to update nodes that are still booting # or swapping out, etc. # if (ExpState($pid, $eid) ne EXPTSTATE_ACTIVE) { DBQueryWarn("unlock tables"); die("*** $0:\n". " The experiment $pid/$eid must be fully activated first!\n"); } TBLockExp($pid, $eid); DBQueryFatal("unlock tables"); # # Batchmode (as from the web interface) goes to background and reports # later via email. # if ($batchmode) { # # Create a temporary name for a log file. # $logname = `mktemp /tmp/node_update-$pid-$eid.XXXXXX`; chop($logname); if (TBBackGround($logname)) { # # Parent exits normally # print STDOUT "Node Update for $pid/$eid is now in progress.\n". "You will be notified via email when the is complete.\n"; exit(0); } } # # Currently, we just need to update the mount points. The UID change because # of PERL sillyness. # $UID = $EUID; if (system("$expsetup")) { fatal("Exports Setup Failed"); } # Give ops a chance to react. sleep(2); # # Get the list of nodes that need to be "updated." # my @nodes = ExpNodes($pid, $eid); if (! @nodes) { fatal("No Nodes in the experiment"); } # # We want some overlap, but not too much since we could burn up # a lot processes on wedged nodes. Issue a small number in parallel, # and wait once we reach the limit for one to finish, before issuing # the next one. # my $maxpids = 0; foreach my $node ( @nodes ) { while ($maxpids >= $maxchildren) { my $thispid = waitpid(-1, 0); my $thisnode = $pids{$thispid}; if ($?) { $failed++; print STDERR "Update of node $thisnode failed!\n"; } else { print STDOUT "$thisnode updated ...\n"; } delete($pids{$thispid}); $maxpids--; } my $thispid = UpdateNode($node); $pids{$thispid} = $node; $maxpids++; sleep(1); } # # Wait for any remaining children to exit before continuing. # foreach my $thispid ( keys(%pids) ) { my $node = $pids{$thispid}; waitpid($thispid, 0); if ($?) { $failed++; print STDERR "Update of node $node failed!\n"; } else { print STDOUT "$node updated ...\n"; } } TBUnLockExp($pid, $eid); NotifyUser("Node Update Complete", $failed); if (defined($logname)) { unlink($logname); } exit($failed); # # Update a node in a child process. Return the pid to the parent so # that it can wait on all the children later. # sub UpdateNode { my($node) = @_; my($syspid, $mypid); print STDOUT "Updating $node ...\n"; # # We need to know if its a remote or local node, so we know how # to update it. This info needs to be in the DB at some point. # my($isremote) = TBIsNodeRemote($node); $mypid = fork(); if ($mypid) { return $mypid; } # # Run an ssh command in a child process, protected by an alarm to # ensure that the ssh is not hung up forever if the machine is in # some funky state. # $syspid = fork(); # Must change our real UID to root so that ssh will work. $UID = 0; if ($syspid) { local $SIG{ALRM} = sub { kill("TERM", $syspid); }; alarm 15; waitpid($syspid, 0); alarm 0; print STDERR "update of $node returned $?.\n" if $debug; # # If either ssh is not running or it timed out, # send it a ping of death. # if ($? == 256 || $? == 15) { if ($? == 256) { print STDERR "$node is not running sshd.\n" if $debug; } else { print STDERR "$node is wedged.\n" if $debug; } exit(-1); } exit(0); } else { if ($isremote) { exec("$sshremote $node /usr/local/etc/testbed/update"); } else { exec("$ssh $node /etc/testbed/update"); } exit(0); } exit(0); } sub NotifyUser($$) { my($mesg, $iserr) = @_; my($subject, $from, $to, $hdrs); print STDOUT "$mesg\n"; if (! $batchmode) { return; } if ($iserr) { $subject = "Node Update Failed $pid/$eid"; } else { $subject = "Node Update Success $pid/$eid"; } $from = $TBOPS; $hdrs = "Reply-To: $TBOPS"; # # Message goes to user. If a failure, TBOPS also gets it, otherwise # it goes into the logs. # $to = "$user_name <$user_email>"; if ($iserr) { $hdrs = "Cc: $TBOPS\n". "$hdrs"; } else { $hdrs = "Bcc: $TBLOGS\n". "$hdrs"; } # # Send a message to the testbed list. Append the logfile. # SENDMAIL($to, $subject, $mesg, $from, $hdrs, ($logname)); } sub fatal($) { my($mesg) = @_; TBUnLockExp($pid, $eid); NotifyUser($mesg, 1); if (defined($logname)) { unlink($logname); } exit(1); }