#!/usr/bin/perl -w # # EMULAB-COPYRIGHT # Copyright (c) 2000-2009 University of Utah and the Flux Group. # All rights reserved. # use English; # Returns 0 on success. # Returns 1 on non-assign_wrapper failure. # Returns (1 | assign_wrapper's errorcode) on assign_wrapper failure. # Returns |0x40 if update caused a swapout. Icky. # Returns -1 on uncontrolled error (die called). # XXX: handle error cases for update? (backup the db?) # XXX: Shouldn't do idempotent stuff twice for update. # XXX: repush/calc routing for update??? (tbprerun) # XXX: previz for update??? (tbprerun) # XXX: make snmpit faster for update. # # XXX: for update, expt is swapped out on os_setup fail. # (we only recover if assign fails) sub usage() { print STDERR "Usage: $0 {in|out|modify [-reboot] [-eventsys_restart]} ". "[-noswapout] [-genimode] pid eid\n"; exit(-1); } # # Configure variables # my $TBROOT = "@prefix@"; my $TBOPS = "@TBOPSEMAIL@"; my $TBLOGS = "@TBLOGSEMAIL@"; my $MAINSITE = @TBMAINSITE@; my $THISHOMEBASE = "@THISHOMEBASE@"; my $TESTMODE = @TESTMODE@; my $DISABLE_EVENTS = "@DISABLE_EVENT_SCHED@"; my $piper = "$TBROOT/sbin/locpiper"; my $NFSTRACESUPPORT= @NFSTRACESUPPORT@; # Untaint the path $ENV{'PATH'} = "/usr/bin:$TBROOT/libexec:$TBROOT/libexec/ns2ir" . ":$TBROOT/sbin:$TBROOT/bin"; # # Testbed Support libraries # use lib "@prefix@/lib"; use libdb; use libtestbed; use libadminctrl; use libadminmfs; use libtblog; use libArchive; use Experiment; use User; use Lan; #require exitonwarn; # exitonwarn isn't really a module, so just require it # # Actual swap-in and swap-out functions, defined below. # sub doSwapout($); sub doSwapin($); # # Firewall stuff # XXX maybe should be elsewhere # sub FWSETUP() { return 1; } sub FWADDNODES() { return 2; } sub FWDELNODES() { return 3; } sub FWTEARDOWN() { return 4; } sub doFW($$$$); # XXX fixme: should not be hardwired! my $cnetstack = "-S Control"; my $cnetvlanname = "Control"; sub REAL() { return 5; } sub CLEANUP() { return 4; } sub RETRY() { return 3; } sub MODIFY() { return 2; } sub UPDATE() { return 1; } sub MODIFY_RECOVER() { return 0; } # # Grab global enable of linkdelays. # my $enablelinkdelays = TBGetSiteVar("general/linux_endnodeshaping"); # # Turn off line buffering on output # $| = 1; my $updateReboot = 0; my $updateReconfig = 1; my $update_Eventsys_restart = 0; my $elabinelab = 0; my $plabinelab = 0; my $force = 0; my $noswapout = 0; my $genimode = 0; my $errors = 0; my $updatehosed = 0; my $state; my $canceled; my $os_setup_pid; my $nextState; # # First argument is either "in", "out", or "update"; # this value goes into $swapop. # my $swapop = shift; if (!$swapop || (($swapop ne "in") && ($swapop ne "out") && ($swapop ne "modify") && ($swapop ne "update"))) { usage(); } # # Get other arguments. # while ($#ARGV > 1) { $arg = shift; if ($arg eq "-force") { $force = 1; } elsif ($arg eq "-reboot") { $updateReboot = 1; $updateReconfig = 0; } elsif ($arg eq "-noreconfig") { $updateReboot = 0; $updateReconfig = 0; } elsif ($arg eq "-eventsys_restart" && $swapop eq "modify") { $update_Eventsys_restart = 1; } elsif ($arg eq "-noswapout") { $noswapout = 0; } else { usage(); } } if ($#ARGV < 1) { usage(); } my ($pid,$eid) = @ARGV; # # Untaint the arguments. # if ($pid =~ /^([-\@\w.]+)$/) { $pid = $1; } else { die("Tainted argument $pid!\n"); } if ($eid =~ /^([-\@\w.]+)$/) { $eid = $1; } else { die("Tainted argument $eid!\n"); } # # Set Error reporting info # tblog_set_info($pid,$eid,$UID); # # Turn on timestamps # TBDebugTimeStampsOn(); # # Verify user and get his DB uid and other info for later. # my $this_user = User->ThisUser(); if (! defined($this_user)) { tbdie("You ($UID) do not exist!"); } my $user_uid = $this_user->uid(); my $user_name = $this_user->name(); my $user_email = $this_user->email(); # Slowly convert to using Experiment module. my $experiment = Experiment->Lookup($pid, $eid); if (!defined($experiment)) { tbdie("Could not lookup experiment object!") } # # Print starting message. # my $exptidx; TBExptIDX($pid, $eid, \$exptidx); print "Beginning swap-$swapop for $pid/$eid ($exptidx). " . TBTimeStampWithDate() . "\n"; TBDebugTimeStamp("tbswap $swapop started"); # # Get experiment state; verify that experiment exists. # if (! ($state = ExpState($pid, $eid))) { tbdie "No such experiment $pid/$eid"; } # Sanity check the current state. if (!$force) { if ($swapop eq "in") { tbdie("Experiment should be ACTIVATING. Currently $state.") if ($state ne EXPTSTATE_ACTIVATING); } elsif ($swapop eq "out") { tbdie("Experiment should be SWAPPING. Currently $state.") if ($state ne EXPTSTATE_SWAPPING); } elsif ($swapop eq "modify" || $swapop eq "update") { tbdie("Experiment should be MODIFY_RESWAP. Currently $state.") if ($state ne EXPTSTATE_MODIFY_RESWAP); } } # Get elabinelab status. See below. if (! TBExptIsElabInElab($pid, $eid, \$elabinelab)) { tbdie("Could not get elabinelab status for experiment $pid/$eid"); } # and plabinelab status. if (! TBExptIsPlabInElab($pid, $eid, \$plabinelab)) { tbdie("Could not get plabinelab status for experiment $pid/$eid"); } if ($elabinelab && $plabinelab) { tberror "Cannot get my head around Plab in Elab in Elab!\n"; print "Failingly finished swap-$swapop for $pid/$eid. " . TBTimeStamp() . "\n"; TBDebugTimeStamp("tbswap $swapop finished (failed)"); exit(1); } # # See if the experiment is firewalled # my $firewalled = TBExptFirewall($pid, $eid); # # Do actual swapping # if ($swapop eq "out") { # # Swap out # tblog_set_attempt(1); $errors = doSwapout(REAL); } elsif ($swapop eq "update" || $swapop eq "modify") { # # Before swapout, do cursory admission control to see if the # modified experiment will be swappable. assign_wrapper does a # more stringent check using assign. # print STDERR "Checking with Admission Control ...\n"; if (! TBAdmissionControlCheck(undef, $experiment, undef)) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['admission_control_failure']}, "Admission control failure!\n"); print "Failingly finished swap-$swapop for $pid/$eid. " . TBTimeStamp() . "\n"; TBDebugTimeStamp("tbswap $swapop finished (failed)"); exit(1); } # # Update. # tblog_set_attempt(1); # # Phase One -- swap experiment partially out. # print STDERR "Backing up physical state...\n"; $experiment->BackupPhysicalState(); # # Actually, in update mode this is not done cause we are anticipating # adding nodes only. # $errors = ($swapop eq "modify" ? doSwapout(MODIFY) : 0); if (0) { print STDERR "Doing a swapmodswapout on the experiment archive ...\n"; if (libArchive::TBExperimentArchiveSwapModSwapOut($pid, $eid) < 0) { tberror("Failed swapmodswapout on the experiment archive!"); $errors = 1; } } if ($errors) { # # Clean up the mess, leaving the experiment in the SWAPPED state, # print STDERR "Cleaning up after errors.\n"; doSwapout(CLEANUP); $updatehosed = 1; } else { # # Phase Two -- swap experiment back in. # $errors = doSwapin(MODIFY); if ($errors) { # # There were errors; see if we can recover. # my $CanRecover = 1; if ($errors != 7) { print STDERR "Update failure occurred _after_ assign phase; "; $CanRecover = 0; } if ($CanRecover) { print STDERR "Recovering virtual and physical state.\n"; # It is safe to remove the phystate since we know it was # backed up above, and cause we do not know if assign_wrapper # made it to that point before it failed. if ($experiment->RemoveVirtualState($pid, $eid) || $experiment->RestoreVirtualState($pid, $eid) || $experiment->RemovePhysicalState($pid, $eid) || $experiment->RestorePhysicalState($pid,$eid)) { print STDERR "Could not restore backed-up state; "; $CanRecover = 0; } else { print STDERR "Doing a recovery swap-in of old state.\n"; if (doSwapin(MODIFY_RECOVER)) { print STDERR "Could not swap in old physical state; "; $CanRecover = 0; } } } # # Some part of the recovery failed; must swap it out. swapexp # (caller) will then have to do more clean up, hence the special # exit status indicated by $updatehosed. # if (!$CanRecover) { if ($noswapout) { print STDERR "No Recovery, but leaving experiment swapped in.\n"; } else { print STDERR "Recovery aborted! Swapping experiment out.\n"; doSwapout(CLEANUP); $updatehosed = 1; } } else { print STDERR "Update recovery successful.\n"; } } } } elsif ($swapop eq "in") { # # Swap in # my $retries = 2; # # Before real swapin, do cursory admission control. assign_wrapper does # a more stringent check using assign. # print STDERR "Checking with Admission Control ...\n"; if (! TBAdmissionControlCheck(undef, $experiment, undef)) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['admission_control_failure']}, "Admission control failure!\n"); print "Failingly finished swap-$swapop for $pid/$eid. " . TBTimeStamp() . "\n"; TBDebugTimeStamp("tbswap $swapop finished (failed)"); exit(1); } tblog_set_attempt(1); $errors = doSwapin(REAL); # # Attempt a retry if: # a) there were errors, # b) doswapin() indicated (via return code 3) a retry is appropriate, # c) we haven't tried too many times already. # d) The cancelflag has not been set. # e) $TESTMODE == 0. # while ($errors == 3 && $retries && !$canceled && !$TESTMODE) { $retries--; tblog_inc_attempt(); print STDERR "Cleaning up after errors; will try again.\n"; doSwapout(RETRY); print STDERR "Trying again...\n"; $errors = doSwapin(RETRY); } if ($errors || $canceled) { print STDERR "Cleaning up after " . ($canceled ? "cancelation" : "errors") . ".\n"; doSwapout(CLEANUP); } } tblog_set_attempt(0); TBGetCancelFlag($pid, $eid, \$canceled); if ($canceled) { tberror ({type=>'summary', cause=>'canceled', severity=>SEV_IMMEDIATE, error=>['cancel_flag']}, "Experiment swap-$swapop canceled by user."); } # # Write appropriate message and exit. # if ($errors) { print "Failingly finished swap-$swapop for $pid/$eid. ".TBTimeStamp()."\n"; TBDebugTimeStamp("tbswap $swapop finished (failed)"); # Pass out magic value to indicate that update failed! exit(1 | ($updatehosed ? 0x40 : 0)); } print "Successfully finished swap-$swapop for $pid/$eid. " .TBTimeStamp()."\n"; TBDebugTimeStamp("tbswap $swapop finished (succeeded)"); exit(0); ################################# ## # # doSwapout - Swaps experiment out. # # If in REAL or CLEANUP, # this function will free all nodes for the # experiment. # # If in RETRY or UDPATE, # only nodes not in RES_READY will be freed. # # Returns 0 on success, >0 on failure. # ## sub doSwapout($) { my $type = shift; my $swapout_errors = 0; tblog_set_cleanup(1) if $type == CLEANUP; # # wait for os_setup; # this only applies if called after a failed doswapin. # if ($os_setup_pid) { print "Waiting for os_setup to finish\n"; waitpid($os_setup_pid, 0); undef $os_setup_pid; } if (0 && $NFSTRACESUPPORT && $type == REAL) { print "Getting files accessed via NFS.\n"; TBDebugTimeStamp("nfstrace started"); system("nfstrace transfer $pid $eid"); TBDebugTimeStamp("nfstrace finished"); } if (! $TESTMODE) { if (! ($DISABLE_EVENTS || $elabinelab)) { if ($type >= RETRY || ($update_Eventsys_restart && $type == MODIFY) ) { print "Stopping the event system\n"; if (system("eventsys_control stop $pid,$eid")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['eventsys_stop_failed']}, "Failed to stop the event system."); $swapout_errors = 1; } # # Stop the location piper. # if (-x $piper) { print "Stopping the location piper\n"; if (system("$piper -k $pid $eid")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['piper_stop_failed']}, "Failed to stop location piper."); $swapout_errors = 1; } } } } # # Do teardown of inner elab. We must do this before we teardown the # vlans since the inner control network is a vlan, and we want that # active so inner boss can reboot the inner nodes (avoid power cycle). # if ($elabinelab && $type >= CLEANUP) { print "Tearing down elabinelab. This could take a while.\n"; if (system("elabinelab -k $pid $eid")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['elabinelab_tear_down_failed']}, "Failed to teardown elabinelab!"); $swapout_errors = 1; } } # # Clean up any VLANs in experiment. # # When modifying an elabinelab experiment, leave the vlans intact # so that the inner networks are not suddenly disconnected. # if ($type != MODIFY) { TBDebugTimeStamp("snmpit started"); print STDERR "Removing VLANs.\n"; if (system("snmpit -r $pid $eid")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['vlan_reset_failed']}, "Failed to reset VLANs"); $swapout_errors = 1; } TBDebugTimeStamp("snmpit finished"); } # # Must check for stale vlans that we kept around in the above clause # since they will not be in the lans table anymore. # if ($type == CLEANUP) { my @stale; if (VLan->StaleVlanList($experiment, \@stale) != 0) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['vlan_reset_failed']}, "Failed to get stale VLANs"); $swapout_errors = 1; } if (@stale) { print "Removing stale vlans @stale\n"; system("snmpit -f ". join(" ", map("-o $_", @stale))); if ($?) { tberror({type => 'summary', severity => SEV_SECONDARY, error => ['vlan_reset_failed']}, "Failed to remove stale vlans"); $swapout_errors = 1; } } } } if ($type >= CLEANUP) { # # Undo plab in elab specialness. # No need to worry about VLANs here, as all the special abilities # involve the control network. # if (! $TESTMODE && $plabinelab) { print "Tearing down plabinelab.\n"; if (system("plabinelab -k $pid $eid")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['plabinelab_tear_down_failed']}, "Failed to teardown plabinelab!"); $swapout_errors = 1; } } # # We're not attempting a retry; # # Stop all of the vnodes. # if (! $TESTMODE) { print "Tearing down virtual nodes.\n"; TBDebugTimeStamp("vnode_setup -k started"); if (system("vnode_setup -d -k $pid $eid")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['vnode_tear_down_failed']}, "Failed to tear down vnodes."); $swapout_errors = 1; } TBDebugTimeStamp("vnode_setup finished"); } # # Nodes behind a firewall are treated special. # See undoFWNodes for details. # if ($firewalled && undoFWNodes($pid, $eid)) { tblog_set_cleanup(0); return 1; } # # Perform swapout time admin actions. Right now there is at most # one of these. It isn't really a general mechanism, just a hook # for state saving or data collection during swapout. # A couple of important "fer now" notes: # # We don't do this for firewalled experiments. We need a way # to "tag" the saved disk state to ensure it doesn't get # instantiated outside of a firewall. # # We only do this on REAL swapouts, and not on CLEANUPs. # There are some types of CLEANUPs where we may want to # do this, in particular an invocation caused by a failed # modify operation, where the admin action is to save the # experiment state. So we will need to revisit this. # my %soaction = (); if ($type == REAL && !$firewalled) { TBExptGetSwapoutAction($pid, $eid, \%soaction); } if ($soaction{'command'} && doSwapoutAction($pid, $eid, %soaction)) { tblog_set_cleanup(0); return 1; } # # remove all nodes from the experiment. # (nfree will send them to RES_FREE_DIRTY) # print STDERR "Freeing nodes.\n"; TBDebugTimeStamp("nfree started"); if (system("nfree -a $pid $eid")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['nfree_failed']}, "Could not free nodes."); $swapout_errors = 1; } TBDebugTimeStamp("nfree finished"); # # Since this is an actual swapout, # reset our count of swap out nag emails sent. # DBQueryWarn("update experiments set swap_requests='', ". " sim_reswap_count='0' ". "where eid='$eid' and pid='$pid'"); } else { # # $type == RETRY or $type == MODIFY. # Therefore, don't deallocate nodes which have been successfully # incorporated into the experiment (i.e., are RES_READY). # (nfree will send deallocated nodes to RES_FREE_DIRTY) # my @failedpnodes = (); my @failedvnodes = (); my $db_result = DBQueryFatal("select rv.node_id,n.allocstate,nt.isvirtnode ". " from reserved as rv ". "left join nodes as n on n.node_id = rv.node_id ". "left join node_types as nt on nt.type=n.type ". "where rv.pid='$pid' and rv.eid='$eid' and ". " rv.genisliver_idx is null"); while (my ($node,$allocstate,$isvirt) = $db_result->fetchrow_array) { if ($allocstate ne TBDB_ALLOCSTATE_RES_READY()) { if ($isvirt) { push(@failedvnodes, $node); } else { push(@failedpnodes, $node); } } } # # Tear down failed vnodes. Perhaps not needed? # if (!$TESTMODE && @failedvnodes > 0) { print "Tearing down failed virtual nodes.\n"; TBDebugTimeStamp("vnode_setup -k started"); if (system("vnode_setup -d -k $pid $eid @failedvnodes")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['vnode_tear_down_failed']}, "Failed to tear down vnodes."); $swapout_errors = 1; } TBDebugTimeStamp("vnode_setup -k finished"); } # # Undo plabinelab setup # if (!$TESTMODE && $plabinelab && @failedpnodes > 0) { print "Removing failed nodes from inner plab.\n"; if (system("plabinelab -r $pid $eid @failedpnodes")) { tberror "Failed to remove inner nodes!"; $swapout_errors = 1; } } # # Release all failed nodes. # if (@failedpnodes > 0 || @failedvnodes > 0) { print STDERR "Freeing failed nodes.\n"; TBDebugTimeStamp("nfree started"); # # Specify -x switch so when a physical node gets freed, # any virtual nodes (owned by this experiment) # sitting on top of it are freed as well. # if (system("nfree -x $pid $eid " . join(" ", (@failedpnodes, @failedvnodes)))) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['nfree_failed']}, "Could not free nodes."); $swapout_errors = 1; } TBDebugTimeStamp("nfree finished"); } } if (! $TESTMODE) { # # If the experiment has no Plab dslice nodes left, but still has # a Plab slice, destroy the slice # # Does the slice exist? $db_result = DBQueryFatal("select slicename from plab_slices ". "where pid='$pid' and eid='$eid'"); if ($db_result->numrows) { # Are there any nodes left in the slice? $db_result = DBQueryFatal("select r.node_id ". " from plab_slices as ps". " left join plab_slice_nodes as psn ". " on (ps.slicename=psn.slicename ". " and ps.plc_idx=psn.plc_idx) ". " left join reserved as r ". " on psn.node_id=r.node_id ". " where ps.pid='$pid' and ps.eid='$eid'". " and r.node_id is not NULL"); if (!$db_result->numrows) { print "Tearing down Slices.\n"; TBDebugTimeStamp("plabslice destroy started"); if (system("plabslice destroy $pid $eid")) { tberror "Failed to tear down Slices."; $swapout_errors = 1; } TBDebugTimeStamp("plabslice destroy finished"); } } } if (! $TESTMODE) { # # All of these errors are non-fatal on swapout. We find out about them # via email sent from the individual scripts. # # # Only reset mountpoints if this is an actual swapout, and # not a failed swapin(cleanup), update, or retry. # if ($type == REAL) { print "Resetting mountpoints.\n"; TBDebugTimeStamp("exports started"); if (system("exports_setup")) { tberror({severity => SEV_WARNING, error => ['mountpoint_reset_failed']}, "Failed to reset mountpoints."); } TBDebugTimeStamp("exports finished"); # # Ditto these two. # print "Resetting named maps.\n"; TBDebugTimeStamp("named started"); if (system("named_setup")) { tbwarn "Failed to reset named map."; } TBDebugTimeStamp("named finished"); print "Resetting email lists.\n"; TBDebugTimeStamp("genelists started"); if (system("genelists -t")) { tbwarn "Failed to reset email lists."; } TBDebugTimeStamp("genelists finished"); } } # # Wipe the DB clean except during MODIFY or RETRY. In those # cases, assign_wrapper will reset the DB after reading # the info. # if ( $type >= CLEANUP ) { print STDERR "Resetting DB.\n"; $experiment->RemovePhysicalState(); # Special. $experiment->ClearPortRegistration(); } tblog_set_cleanup(0); return $swapout_errors; } ## # # doSwapin - Swaps experiment in. # # Returns: # 0 - successful swapin # 1 - failed swapin; cleanup required. # 3 - failed swapin; cleanup required; can retry. # 7 - failed swapin; assign failed; no cleanup. ## sub doSwapin($) { my $type = shift; # Just the physnodes ... my @deleted_pnodes = (); # # assign_wrapper does all the virtual to physical mapping # and updating the DB state. # if ($type > MODIFY_RECOVER) { # # Hacky test to allow disabling of linkdelays if the node is going # to run Linux. See sitevar above. # if (! $enablelinkdelays) { $db_result = DBQueryFatal("select distinct e.pid,e.eid,vl.vnode,vn.osname ". " from experiments as e ". "left join virt_lans as vl on vl.pid=e.pid and ". " vl.eid=e.eid ". "left join virt_nodes as vn on vn.pid=e.pid and ". " vn.eid=e.eid and vn.vname=vl.vnode ". "left join os_info as o on o.osname=vn.osname and ". " (o.pid=vl.pid or o.pid='" . TBOPSPID() . "') ". "where (vl.uselinkdelay!=0 or e.uselinkdelays!=0 or ". " e.forcelinkdelays!=0) and ". " (o.os is NULL or o.os='Linux' or ". " o.os='Fedora') and ". " e.pid='$pid' and e.eid='$eid'"); if ($db_result->numrows) { tberror "Endnodeshaping is disabled on Linux Images!"; tberror "You must modify your experiment to swap it in."; return 1; } } print "Mapping to physical reality ...\n"; TBDebugTimeStamp("assign_wrapper started"); # # Pass the -u (update) switch into assign_wrapper, which turns on # update mode. When doing a retry, must also fix the current nodes # to avoid stuff jumping around when simply trying to replace a node # that did not boot. # my $exitcode; my $wrapper = "assign_wrapper -u"; $wrapper .= " -f" if ($type == RETRY); if (system("$wrapper $pid $eid")) { $exitcode = $? >> 8; tberror "Failed ($exitcode) to map to reality."; # Wrapper sets this bit when recovery is possible. if ($exitcode & 64) { # We can recover. tbreport(SEV_SECONDARY, 'assign_wrapper_failed', $exitcode); return 7; } else { # No recovery, no retry. tbreport(SEV_SECONDARY, 'assign_wrapper_failed', $exitcode); return 1; } } TBDebugTimeStamp("assign_wrapper finished"); print "Mapped to physical reality!\n"; } # Check cancel flag before continuing. No retry, TBGetCancelFlag($pid, $eid, \$canceled); if ($canceled) { tbreport(SEV_IMMEDIATE, 'cancel_flag'); return 1 } # # When doing a modify, we have to compare vlans to determine which # vlans actually changed and need to be deleted, before processing # the new vlans for the experiment. Note that vlans that already # exist on the switches will be left as is by snmpit. # # We must do this before the nfree of deleted nodes in the next section # because the new test in nfree the prevents nodes from accidentally # getting released when they are in a vlan. # if ($type == MODIFY) { my @diff = (); my @same = (); if (Lan->CompareVlansWithSwitches($experiment, \@diff, \@same) != 0) { tberror({type => 'summary', severity => SEV_SECONDARY, error => ['vlan_setup_failed']}, "Failed to compare old vlans"); return 1; } if (@diff) { print "Removing obsolete vlans @diff\n"; system("snmpit -f ". join(" ", map("-o $_", @diff))); if ($?) { tberror({type => 'summary', severity => SEV_SECONDARY, error => ['vlan_setup_failed']}, "Failed to remove old vlans"); return 1; } } } # # Look for any nodes in RES_TEARDOWN. These need to be released, # and if a virtnode, they need to be torn down. We cannot wait for # the virtnodes to go down with the physnode they are hosted on, # so teardown and release the virtnodes first, and then do the # physnodes. # # Errors are fatal; no recovery or retry. # if ($type == MODIFY || $type == UPDATE) { my $allocstate = TBDB_ALLOCSTATE_RES_TEARDOWN(); $db_result = DBQueryFatal("select r.node_id,nt.isvirtnode,nt.isremotenode ". " from reserved as r ". "left join nodes as n on n.node_id=r.node_id ". "left join node_types as nt on nt.type=n.type ". "where r.pid='$pid' and r.eid='$eid' and ". " n.allocstate='$allocstate'"); if ($db_result->numrows) { my @virtnodes = (); my @physnodes = (); print "Tearing down and releasing unused nodes\n"; # First teardown/release virtnodes. while (my ($node,$isvirt,$isrem) = $db_result->fetchrow_array()) { if ($isvirt) { push(@virtnodes, $node); } elsif (!$isrem) { push(@physnodes, $node); } } # See below. @deleted_pnodes = @physnodes; if (@virtnodes) { TBDebugTimeStamp("vnode_setup started"); if (system("vnode_setup -k $pid $eid @virtnodes")) { tberror "Failed to tear down unused virtnodes!\n"; return 1; } TBDebugTimeStamp("vnode_setup finished"); if (system("nfree $pid $eid @virtnodes")) { tberror "Failed to nfree unused virtnodes!\n"; return 1; } } if (@physnodes) { if ($elabinelab) { print "Removing nodes from inner elab.\n"; if (system("elabinelab -r $pid $eid @physnodes")) { tberror "Failed to remove inner nodes!"; return 1; } } elsif ($plabinelab) { print "Removing nodes from inner plab.\n"; if (system("plabinelab -r $pid $eid @physnodes")) { tberror "Failed to remove inner nodes!"; return 1; } } # # If the experiment is firewalled, cleanup the nodes # we are releasing. # if ($firewalled && undoFWNodes($pid, $eid, @deleted_pnodes)) { return 1; } if (system("nfree $pid $eid @physnodes")) { tberror "Failed to nfree unused physnodes!\n"; return 1; } } } } # Exit here if we are testing. if ($TESTMODE) { print "Testing run - Stopping here.\n"; return 0; } # # Handle tarballs - we might need to fetch some from URLs if the user # asked for that. # print "Fetching tarballs and RPMs (if any) ...\n"; TBDebugTimeStamp("tarfiles_setup started"); if (system("tarfiles_setup $pid $eid")) { # # No recovery for now - what would we do? # tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['tarfiles_setup_failed']}, "Failed to set up tarballs."); return 1; } TBDebugTimeStamp("tarfiles_setup finished"); # # If there are any Plab dslice nodes in the experiment, create the # dslice now # if ($type > MODIFY_RECOVER) { # Are there any Plab nodes? First get a list of node types in the exp; # if any are types hosted by any of the PLCs we know about, create # all slices necessary for the experiment in a single plabslice call. $db_result = DBQueryFatal("select nt.type,ppi.plc_name from nodes as n ". " left join node_types as nt on n.type = nt.type ". " left join reserved as r on r.node_id = n.node_id ". " left join plab_plc_info as ppi ". " on nt.type = ppi.node_type". " where r.pid='$pid' and r.eid='$eid'". " and ppi.node_type is not NULL". " group by nt.type"); if ($db_result->numrows) { my $info_str = ""; while (my ($nt,$plc) = $db_result->fetchrow_array()) { $info_str .= " $plc($nt)"; } print "Configuring Slices for $info_str.\n"; TBDebugTimeStamp("plabslice started"); if (system("plabslice create $pid $eid")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['plabslice_create_failed']}, "Failed to configure Slices."); return 3; } TBDebugTimeStamp("plabslice finished"); } } # Check cancel flag before continuing. No retry, TBGetCancelFlag($pid, $eid, \$canceled); if ($canceled) { tbreport(SEV_IMMEDIATE, 'cancel_flag'); return 1 } # # These things need to get started before the nodes come up, so we'll # do them before the os_setup. Everything else can done in parallel with # os_setup. (Actually, these probably can too, since they should finish # long before the nodes reboot, but better safe than sorry) # print "Setting up mountpoints.\n"; TBDebugTimeStamp("mountpoints started"); if (system("exports_setup")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['mountpoints_setup_failed']}, "Failed to setup mountpoints."); return 1; } TBDebugTimeStamp("mountpoints finished"); TBDebugTimeStamp("named started"); print "Setting up named maps.\n"; if (system("named_setup")) { tbwarn "Failed to add node names to named map."; # # This is a non-fatal error. # } TBDebugTimeStamp("named finished"); if ($NFSTRACESUPPORT) { print "Cleaning NFS traces.\n"; TBDebugTimeStamp("nfstrace gc started"); if (system("nfstrace gc $pid $eid")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['nfstrace_setup_failed']}, "Failed to setup nfstrace."); return 1; } TBDebugTimeStamp("nfstrace gc finished"); } # Check cancel flag before continuing. No retry, TBGetCancelFlag($pid, $eid, \$canceled); if ($canceled) { tbreport(SEV_IMMEDIATE, 'cancel_flag'); return 1 } # # Setup any control-net firewall. # This must be done before reloading and rebooting nodes. # if ($firewalled && ($type == REAL || $type == MODIFY) && doFW($pid, $eid, (($type == MODIFY) ? FWADDNODES : FWSETUP), undef)) { return 1; } # # PlabinElab setup. This is currently just tweaking out the dhcpd.conf # file and that must be done before os_setup (i.e., before nodes are # rebooted). # if ($plabinelab && !$TESTMODE && $type > MODIFY_RECOVER) { # for MODIFY and RETRY we pass in the -u to pick up new nodes my $optarg = ($type == REAL ? "" : "-u"); print "Setting up plabinelab.\n"; TBDebugTimeStamp("plabinelab setup started"); if (system("plabinelab $optarg $pid $eid")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['plabinelab_setup_failed']}, "Failed to setup plabinelab!"); return 1; } TBDebugTimeStamp("plabinelab setup finished"); } # # If user specified -reboot to update, # and we are successfully performing the update, # then mark all nodes in experiment so os_setup will reboot them. # We must reboot nodes on a RETRY as well, since assign has been rerun # and may have remapped interfaces on the nodes. # if ($type == RETRY || ($type == MODIFY && ($updateReboot || $updateReconfig))) { my $needreboot = ($type == RETRY || $updateReboot) ? 1 : 0; print STDERR "Marking nodes for ", $needreboot ? "reboot" : "reconfig", ".\n"; $db_result = DBQueryFatal("select r.node_id,n.allocstate from reserved as r ". "left join nodes as n on n.node_id=r.node_id ". "where r.pid='$pid' and r.eid='$eid'"); while (my ($node,$allocstate) = $db_result->fetchrow_array) { # # If the node is INIT_CLEAN, leave it alone. It will still get # rebooted, but will not falsely be tagged as dirty. This is # important for vnodes too, where INIT_CLEAN indicated the vnode # does not even exist yet (plab nodes). # if ($allocstate ne TBDB_ALLOCSTATE_RES_INIT_CLEAN()) { TBSetNodeAllocState($node, ($needreboot ? TBDB_ALLOCSTATE_RES_INIT_DIRTY() : TBDB_ALLOCSTATE_RES_RECONFIG())); } } # Do this only when nodes are to be rebooted. $experiment->ClearPortRegistration() if ($type == MODIFY); } # # Lets run gentopofile again, so we get ltmap right. This will come out # later, most likely. # TBDebugTimeStamp("gentopofile started"); print "Generating ltmap (again) ...\n"; if (system("gentopofile $pid $eid")) { tberror("gentopofile failed!"); return 1; } TBDebugTimeStamp("gentopofile finished"); # XXX fer now hack if (0 && !$firewalled && !$elabinelab && !$plabinelab && ($pid eq "testbed" || $pid eq "tbres")) { DBQueryWarn("update experiments set ". " savedisk=1 where pid='$pid' and eid='$eid'"); } # # Since it'll take a while for the nodes to reboot, we'll start now, and # wait for the os_setup to finish, down below # print "Resetting OS and rebooting.\n"; TBDebugTimeStamp("launching os_setup"); if (!($os_setup_pid = fork())) { exec("os_setup $pid $eid") or return 1; } elsif ($os_setup_pid == -1) { tberror "Fork failed."; return 1; } # # XXX # Don't add any steps between here and the waitpid() call below # without verifying that 1) It's OK for nodes to come up before # the step has completed and 2) It's OK for the command to run in # parallel with os_setup (no DB dependencies, etc.) # print "Setting up VLANs.\n"; TBDebugTimeStamp("snmpit started"); if (system("snmpit -t $pid $eid")) { tberror({type => 'summary', severity => SEV_SECONDARY, error => ['vlan_setup_failed']}, "Failed to set up VLANs."); return 1; } TBDebugTimeStamp("snmpit finished"); # No need to do this except during a real swapin. if ($type == REAL) { print "Setting up email lists.\n"; TBDebugTimeStamp("genelists started"); if (system("genelists -t")) { tbwarn "Failed to update email lists."; # # This is a non-fatal error. # } TBDebugTimeStamp("genelists finished"); } # # Don't clear port counters on MODIFY. # (XXX should clear new nodes' port counters.) if ($type >= RETRY) { print "Clearing port counters.\n"; TBDebugTimeStamp("portstats started"); if (system("portstats -z -a -q $pid $eid")) { tbwarn "Failed to clear port counters."; # # This is a non-fatal error. # } TBDebugTimeStamp("portstats finished"); } # # OK, let's see how that os_setup did # $kid = waitpid($os_setup_pid,0); if ($kid == $os_setup_pid) { undef $os_setup_pid; # Make sure doswapout() doesn't wait for it. if ($CHILD_ERROR) { tberror "Failed to reset OS and reboot nodes."; # # If there is a firewall involved, it could be that the # firewall rules are preventing essential communication, # so don't retry. # # XXX should only do this if the user has specified additional # rules. But right now, I may screw up too! # if ($firewalled) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['os_node_reset_failed', 'firewall']}, "Not retrying, ". "possibly an error in firewall setup or configuration."); return 1; } # # Use returncode from os_setup process to # set global $retry flag, indicating to caller # that it may be beneficial to attempt # a doSwapin() again. # if (($CHILD_ERROR >> 8) == 1) { tbreport(SEV_SECONDARY, 'os_node_reset_failed', 'retry'); return 3; } else { tbreport(SEV_SECONDARY, 'os_node_reset_failed', 'error'); print STDERR "Not retrying due to error type.\n"; return 1; } } } else { undef $os_setup_pid; tberror "Error waiting for os_setup to finish."; return 1; } # # Okay, start the event system now that we know all the nodes have # rebooted (os_setup is done). This only takes a moment (puts itself # in the background), so its not enough of a delay to worry about. # Don't do this during an update, since we didn't kill the # event system previously, so starting it again will fail! # if (! ($DISABLE_EVENTS || $elabinelab)) { # # For the robot testbed, start the location piper *before* the event # system. # if (-x $piper && ($type != MODIFY && $type != MODIFY_RECOVER)) { print "Starting the location piper.\n"; if (system("$piper $pid $eid")) { tberror "Failed to start the location piper."; return 1; } } if (($update_Eventsys_restart || ($type != MODIFY && $type != MODIFY_RECOVER))) { print "Starting the event system.\n"; TBDebugTimeStamp("eventsys_control started"); if (system("eventsys_control start $pid,$eid")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['eventsys_start_failed']}, "Failed to start the event system."); return 1; } TBDebugTimeStamp("eventsys_control finished"); } } # # Do linktest if user requested it at swapin. # my $query_result = DBQueryFatal("select linktest_level,linktest_pid from experiments ". "where pid='$pid' and eid='$eid'"); my ($linktest_level,$linktest_pid) = $query_result->fetchrow_array(); if ($linktest_level && ($type == REAL || $type == MODIFY)) { if ($linktest_pid) { tbwarn "Linktest is already running! $linktest_pid"; } else { # # Run it. No worries about failures. # my $optarg = "-l $linktest_level -t 600 -m"; print "Starting linktest ... this could take a while!'\n"; if (system("linktest_control $optarg $pid $eid") != 0) { tbwarn "Linktest run returned non-zero status!"; } } } # # ElabinElab setup. This might not be the right place for this! # if ($elabinelab && !$TESTMODE && ($type == REAL || $type == MODIFY)) { my $optarg = ($type == MODIFY ? "-u" : ""); print "Setting up elabinelab. This could take a while!\n"; TBDebugTimeStamp("elabinelab setup started"); if (system("elabinelab $optarg $pid $eid")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['elabinelab_setup_failed']}, "Failed to setup elabinelab!"); return 1; } TBDebugTimeStamp("ElabInElab setup finished"); } # # Tell the event scheduler to START. # if (! ($DISABLE_EVENTS || $elabinelab)) { if ( $update_Eventsys_restart || ($type != MODIFY && $type != MODIFY_RECOVER) ) { TBDebugTimeStamp("Starting event time"); if (system("tevc -e $pid/$eid now __ns_sequence start")) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['event_time_start_failed']}, "Failed to start event time."); return 1; } } } return 0; } # # Setup and teardown experiment firewall. # # XXX note that right now, we just setup the switch infrastructure # first, and then just let everything else go. Firewalled nodes will # not boot until the firewall is up since the VLAN is isolated til then. # The firewall will boot ok since it still talks to the real control net. # # XXX for tearing down firewalls, we assume that nodes have been "cleansed" # and it is safe to put ports back into the default control net VLAN. # sub doFW($$$$) { my ($pid, $eid, $action, $nodelist) = @_; my ($fwnode, $fwvlanname, $fwvlan, $fwport, $fwvid); my %nodenames; # # See if there is a firewall, fetching node/VLAN info if so. # If not, we are all done. # if (!TBExptFirewall($pid, $eid, \$fwnode, \$fwvid, \$fwvlan)) { return 0; } if ($action == FWSETUP) { $fwvid = TBGetUniqueIndex("cnet_vlanid"); print "Setting up control net firewall.\n"; } else { if ($action == FWADDNODES) { print "Adding nodes to control net firewall.\n"; } elsif ($action == FWDELNODES) { print "Removing nodes from control net firewall.\n"; } else { print "Tearing down control net firewall.\n"; } # Prior setup didn't succeed, nothing to do if (!defined($fwvid)) { return 0; } } # See below. if (defined($nodelist)) { foreach my $nodeid (@$nodelist) { my $node = Node->Lookup($nodeid); if (!defined($node)) { tberror("Could not map $nodeid to its object"); return 1; } $nodenames{$nodeid} = $node; } } # Get current list of reserved nodes. my @allnodes; if (Node->BulkLookup($experiment, \@allnodes) < 0) { tberror("Failed to load reserved nodes"); return 1; } # XXX vlanid in the DB is currently an int, we need a more unique name $fwvlanname = "fw$fwvid"; # # Find all the experiment nodes and their control interface switch ports # my $portlist = ""; foreach my $node (@allnodes) { next if ($node->isremotenode() || $node->isvirtnode()); my $control_iface = Interface->LookupControl($node); if (!defined($control_iface)) { tberror("Could not find control iface object for $node"); return 1; } my $node_id = $node->node_id(); my $cif = $control_iface->iface(); print "$node_id $cif\n"; if ($node_id eq $fwnode) { $fwport = "$node_id:$cif"; } elsif (defined($nodelist)) { print "foo @$nodelist\n"; # Only nodes we are moving in/out of the experiment. $portlist .= " $node_id:$cif" if (exists($nodenames{$node_id})); } else { $portlist .= " $node_id:$cif"; } } if (!defined($fwport)) { tberror "Firewall node '$fwnode' not found in $pid/${eid}!"; return 0; } if ($portlist eq "") { # # We catch this up in swapexp; admin users can specify just a firewall, # but mere users must have at least one firewalled node. Just print # the warning though. # tbwarn "No firewalled nodes in $pid/${eid}!"; } # # XXX hack commands til we nail down the API # my $fwsetupstr1 = "snmpit $cnetstack -m $fwvlanname $pid $eid $portlist"; my $fwsetupstr3 = "snmpit $cnetstack -T $fwport $cnetvlanname "; my $fwtakedownstr0 = "snmpit $cnetstack -e $fwport"; my $fwtakedownstr1 = ($portlist eq "" ? "true" : "snmpit $cnetstack -m $cnetvlanname $pid $eid $portlist"); my $fwtakedownstr2 = "snmpit $cnetstack -o $fwvlanname $pid $eid"; my $fwtakedownstr3 = "snmpit $cnetstack -U $fwport"; my $fwtakedownstr4 = "snmpit $cnetstack -f -m $cnetvlanname $fwport"; if ($action == FWSETUP) { TBDebugTimeStamp("snmpit firewall setup: VLAN"); print "doFW: '$fwsetupstr1'\n"; if (system($fwsetupstr1)) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['fwcnvlan_setup_failed']}, "Failed to setup Firewall control net VLAN."); return 1; } my $vlan = VLan->Lookup($experiment, $fwvlanname); if (!defined($vlan)) { tberror({type => 'secondary', severity => SEV_SECONDARY, error => ['fwcnvlan_setup_failed']}, "Failed to locate vlan object for $fwvlanname"); return 1; } if ($vlan->GetTag(\$fwvlan) != 0) { tberror("No vlan tag associated with $vlan"); goto badsetup; } if ($portlist ne "") { $fwsetupstr3 = "$fwsetupstr3 " . $vlan->id(); TBDebugTimeStamp("snmpit firewall setup: trunk"); print "doFW: '$fwsetupstr3'\n"; if (system($fwsetupstr3)) { tberror "Failed to setup Firewall trunk on port $fwport."; badsetup: print "doFW: '$fwtakedownstr1'\n"; if (system($fwtakedownstr1)) { tberror "Could not return $portlist to Control VLAN!"; return 1; } print "doFW: '$fwtakedownstr2'\n"; if (system($fwtakedownstr2)) { tberror "Could not destroy VLAN $fwvlanname ($fwvlan)!"; return 1; } print "doFW: '$fwtakedownstr3'\n"; if (system($fwtakedownstr3)) { tberror "Could not untrunk $fwport!"; return 1; } print "doFW: '$fwtakedownstr4'\n"; if (system($fwtakedownstr4)) { tberror "Could not move $fwport back to Control lan!"; } return 1; } } TBDebugTimeStamp("snmpit firewall setup done"); # Record VLAN info now that everything is done TBSetExptFirewallVlan($pid, $eid, $fwvid, $fwvlan); return 0; } elsif ($action == FWADDNODES) { my $vlan = VLan->Lookup($experiment, $fwvlanname); if (!defined($vlan)) { tberror "Cannot find vlan object for $fwvlanname"; return 1; } TBDebugTimeStamp("snmpit firewall port addition"); print "doFW: '$fwsetupstr1'\n"; if (system($fwsetupstr1)) { tberror "Failed to add nodes to Firewall control net VLAN."; return 1; } # # Redo the trunk operation since there might not have been # any ports last time, and the vlan would not have existed, # so the trunk would not be setup. # $fwsetupstr3 = "$fwsetupstr3 " . $vlan->id(); print "doFW: '$fwsetupstr3'\n"; if (system($fwsetupstr3)) { tberror "Failed to setup Firewall trunk on port $fwport."; return 1; } TBDebugTimeStamp("snmpit firewall setup done"); } elsif ($action == FWDELNODES) { TBDebugTimeStamp("snmpit firewall port deletion"); print "doFW: '$fwtakedownstr1'\n"; if (system($fwtakedownstr1)) { tberror "Failed to remove nodes from Firewall control net VLAN.\n"; return 1; } TBDebugTimeStamp("snmpit firewall setup done"); } else { TBDebugTimeStamp("snmpit re-enable fw control port: $fwport"); print "doFW: '$fwtakedownstr0'\n"; my $failed = 0; if (system($fwtakedownstr0)) { tberror "Could not re-enable firewall control port $fwport!"; $failed = 1; } # # Do not try to do this if the vlan is already gone. # my $vlan = VLan->Lookup($experiment, $fwvlanname); if (defined($vlan)) { TBDebugTimeStamp("snmpit firewall teardown: VLAN"); print "doFW: '$fwtakedownstr1'\n"; if (system($fwtakedownstr1)) { tberror "Could not return $portlist to Control VLAN!"; return 1; } print "doFW: '$fwtakedownstr2'\n"; if (system($fwtakedownstr2)) { tberror "Could not destroy VLAN $fwvlanname ($fwvlan)!"; return 1; } } TBDebugTimeStamp("snmpit firewall teardown: trunk"); print "doFW: '$fwtakedownstr3'\n"; if (system($fwtakedownstr3)) { tberror "Could not tear down trunk on $fwport!"; $failed = 1; } print "doFW: '$fwtakedownstr4'\n"; if (system($fwtakedownstr4)) { tberror "Could not return $fwport to Control VLAN!"; $failed = 1; } if ($failed) { return 1; } TBDebugTimeStamp("snmpit firewall teardown done"); # Clean VLAN info from DB TBClearExptFirewallVlan($pid, $eid); } return 0; } # # Undo the firewall state for a set of nodes in the indicated experiment. # If no nodes are specified, we remove all nodes and tear down the firewall. # # This function takes care of ensuring that all such nodes have been # neutered prior to being released: # # Change the OSID of all nodes (firewall included) to reboot into # the admin MFS and then power cycle them. We must power cycle to # ensure nodes don't spoof a simple reboot request and pretend to # come up in the MFS. The power "cycle" is actually an "off" # followed by an "on", since true power cycling of large numbers # of nodes may be done in batches to avoid network (or power) # overload on restart. Skewed reboots like this open a window of # vulnerability where nodes rebooted later might be able, before # they are rebooted, to spoof the reload server for nodes that have # just rebooted. So we first turn everyone off, then batched power # ons are safe. # # Note that we take down the firewall while the nodes are turned # off. This is a convenient time while we know no nodes are in # transition. Plus, since we take down the firewall too, other # nodes would not be able to reboot if we left the firewall up. # # BIG SECURITY ASSUMPTION: we are assuming that upon power on, # no node can somehow reboot from the hard disk instead of # from the network. If it does, it is out from behind the # firewall and can wreak havoc. # sub undoFWNodes($$;@) { my ($pid, $eid, @nodes) = @_; my $doall = 0; my $fwerr = 0; my @fwstate = (); my $zap = 1; if (!@nodes || scalar(@nodes) == 0) { @nodes = ExpNodes($pid, $eid, 1, 1); $doall = 1; } # # There has to at least be a firewall node to be interesting # if (@nodes == 0) { return 0; } if ($doall) { print "Taking down experiment firewall.\n"; } else { my $fwnode; TBExptFirewall($pid, $eid, \$fwnode); if (grep {$_ eq $fwnode} @nodes) { tberror "Cannot remove firewall node from an experiment!"; return 1; } print "Removing firewalled nodes from experiment.\n"; } # # At the lowest level of security, we don't do the diskzap dance # unless we have paniced. # # This level ("blue") is used for experiments where we are trying to # protect the inside from outside; i.e., there is no bad stuff inside # to clean up. # my $paniced; my $security_level; if (TBExptGetPanicBit($pid, $eid, \$paniced) && $paniced == 0 && TBExptSecurityLevel($pid, $eid, \$security_level) && $security_level < TBDB_SECLEVEL_ZAPDISK()) { $zap = 0; } # # First turn off all the machines. # If we fail, the firewall in left in place, and some nodes may # be powered off. # if ($zap) { print STDERR "Powering down firewalled nodes.\n"; TBDebugTimeStamp("Powering down nodes"); system("power off @nodes"); if ($?) { tbreport(SEV_SECONDARY, 'power_off_failed'); $fwerr = "Failed to power off all nodes!"; @fwstate = ("Firewall is still in place", "Some nodes may NOT be powered off", "Nodes NOT switched to admin MFS"); goto done; } # # Force all nodes into admin mode. # If we fail, the firewall is left in place. # my %myargs; $myargs{'name'} = "tbswap"; $myargs{'on'} = 1; $myargs{'clearall'} = 1; if (TBAdminMfsSelect(\%myargs, undef, @nodes)) { $fwerr = "Failed to force all nodes into admin mode!"; @fwstate = ("Firewall is still in place", "All nodes are powered off", "Not all nodes have been switched to admin MFS"); goto done; } } # # Once all nodes have been turned off and their DB state changed # to force MFS booting, we can take modify the switch firewall state, # either tearing down the firewall ($doall) entirely or just moving # the indicated nodes out from behind it (!$doall). # # If this fails, we warn and punt. The switch or DB state could # be screwed up at this point. # if ($doall) { if (doFW($pid, $eid, FWTEARDOWN, undef)) { $fwerr = "Failed to tear down firewall!"; @fwstate = ("Firewall may NOT be in place", $zap ? ("All nodes are powered off", "All nodes set to admin mode") : (), "Switch/DB firewall state could be inconsistent!"); goto done; } } else { my @deleted = @nodes; if (doFW($pid, $eid, FWDELNODES, \@deleted)) { $fwerr = "Failed to remove nodes from firewall VLAN!"; @fwstate = ("Nodes may still be in firewall VLAN", $zap ? ("All nodes are powered off", "All nodes set to admin mode") : ()); goto done; } } # # Now we power on the nodes and let them boot into the MFS, # where they will run the disk bootblock zapper. # # If this fails, we power off all the nodes again and get a # little edgy in our error messages to emphasize the gravity # of the situation. Someday we could just move the failed # nodes into a special firewalled holding experiment, and # let the experiment swapout finish, freeing up the nodes that # did succeed. # if ($zap) { print STDERR "Booting nodes into admin MFS and zapping bootblocks.\n"; TBDebugTimeStamp("Booting admin MFS/zapping bootblocks"); my @failed = (); %myargs = (); $myargs{'name'} = "tbswap"; $myargs{'command'} = "sudo /usr/local/bin/diskzap"; $myargs{'poweron'} = 1; $myargs{'retry'} = 1; if (TBAdminMfsRunCmd(\%myargs, \@failed, @nodes)) { foreach my $failed (@failed) { tbreport(SEV_ERROR, 'invalidate_bootblock_failed', $failed); } $fwerr = "Failed to invalidate bootblocks on @failed!"; @fwstate = ("Firewall is NOT in place", "All nodes set to admin mode"); system("power off @nodes"); if ($?) { push(@fwstate, "Some nodes may NOT be powered off"); } else { push(@fwstate, "All nodes are powered off"); } push(@fwstate, "MAKE SURE THESE NODES DO NOT BOOT FROM DISK!"); } } done: # # If we had a failure when tearing down the firewall completely, # we act as though the panic button had been pressed (set panic # bit in DB, disable cnet port, and inform tbops). This hopefully # ensures that everything will get cleaned up correctly. This may # eventually prove to be overkill. # # If we failed while removing some nodes from behind the firewall # we don't get quite so cranky. # if ($fwerr) { my $op; if ($doall) { my ($fwnode, $fwport); if (!TBExptFirewallAndPort($pid, $eid, \$fwnode, \$fwport) || system("snmpit -d ${fwnode}:${fwport}") != 0) { push(@fwstate, "Firewall cnet interface ${fwnode}:${fwport} NOT disabled"); } else { push(@fwstate, "Firewall cnet interface ${fwnode}:${fwport} disabled"); } TBExptSetPanicBit($pid, $eid, 2); $op = "Swapout"; } else { $op = "Modify"; } tberror "$fwerr" . "\nINFORMING $TBOPS!"; if (defined($user_email)) { SENDMAIL("$user_name <$user_email>", "Firewalled experiment $op failed". " for $pid/$eid", "$op of firewalled experiment $pid/$eid". " by $user_uid failed!\n". "Admin intervention required:\n\n$fwerr\n\n". "Current state of @nodes:\n\n". join("\n", @fwstate) . "\n", "$user_name <$user_email>", "Cc: $TBOPS"); } return 1; } return 0; } # # Monitor the progress of swapout image creation. # This is a lot like regular image creation: we make sure that progress # is being made (the image is growing) and abort if not. # sub doSwapoutProgress($%) { my ($mystate, $status) = @_; my $perminute = int(60 / $mystate->{'_interval'}); my $iterations = $mystate->{'_iterations'}; my @running = grep { $status->{$_} eq "none" } keys(%$status); if (@running == 0) { return 0; } # # Make a generic progress report # if ((++$iterations % $perminute) == 0) { print "swapimage: still running after ", $iterations / $perminute, " minutes: @running\n"; } # # Of the nodes still running, determine which are not making progress # and fail them. # my $curtime = time(); foreach my $node (@running) { my $nstate = $mystate->{$node}; my $fname = $nstate->{'name'}; my $fsize = $nstate->{'size'}; my $ftime = $nstate->{'time'}; # XXX if ($ftime == 0) { $ftime = ($curtime - $mystate->{'_interval'}); $nstate->{'time'} = $ftime; } # # If no progress, see if we should timeout # my $cursize = (stat($fname))[7]; if (!defined($cursize) || $cursize == $fsize) { if ($curtime - $ftime > $mystate->{'_timeout'}) { tbwarn("$node: no progress saving state after " . int(($curtime - $ftime) / 60) . " minutes, terminating state save for this node\n"); $status->{$node} = "timeout"; } if (defined($cursize)) { print "$node: imagefile has not grown (still at $cursize bytes) in the last"; } else { print "$node: imagefile does not exist after"; } } else { $nstate->{'size'} = $cursize; $nstate->{'time'} = $curtime; print "$node: imagefile grew from $fsize to $cursize in"; } print " ", int($curtime - $ftime), " seconds\n"; } $mystate->{'_iterations'} = $iterations; return 1; } # # Handle swapout time actions. # Returns zero if successful, non-zero otherwise. # sub doSwapoutAction($$%) { my ($pid, $eid, %soaction) = @_; # swapout disk image related state my $statesavecmd = "/usr/local/bin/create-swapimage"; my $stateinterval = 60; my $statetimeout = (5 * 60); my %state = (); my %nodes = ExpNodeVnames($pid, $eid, 1, 1); my @pnodes = keys(%nodes); if (@pnodes > 0) { print STDERR "Performing swapout admin MFS actions.\n"; TBDebugTimeStamp("Performing swapout actions"); my @failed = (); my %myargs = (); $myargs{'name'} = "tbswap"; $myargs{'command'} = $soaction{'command'}; if (defined($soaction{'timeout'})) { $myargs{'timeout'} = $soaction{'timeout'}; } $myargs{'timestamp'} = 1; # # If this is swapout disk state saving, we do lots o stuff. # if ($soaction{'command'} =~ /^$statesavecmd/) { # # Setup a progress function # $myargs{'pfunc'} = \&doSwapoutProgress; $myargs{'pinterval'} = $stateinterval; $myargs{'pcookie'} = \%state; $state{'_interval'} = $stateinterval; $state{'_timeout'} = $statetimeout; $state{'_iterations'} = 0; my $swapdir = PROJROOT() . "/$pid/exp/$eid/swapinfo"; my @enodes = (); foreach my $node (@pnodes) { my $vname = $nodes{$node}; # # XXX time saving hack. # # If the swap state files don't exist, don't bother to boot # the node into the MFS. Complain about unsaved nodes unless # they are delay nodes. # if (! -r "$swapdir/$vname.part" || ! -r "$swapdir/$vname.sig") { # XXX should do this based on role, not name if ($vname !~ /^tbs?delay\d+$/) { tbwarn "no swap info for $node ($vname), ". "not saving disk state!"; } next; } push(@enodes, $node); $state{$node} = { 'name' => "$swapdir/$vname-swap.ndz", 'time' => 0, 'size' => 0 }; } @pnodes = @enodes; } # # We may have disqualified all the nodes above, so see if there # is still anything to do. # if (@pnodes == 0) { print STDERR "No nodes require state saving.\n"; } elsif (TBAdminMfsRunCmd(\%myargs, \@failed, @pnodes)) { if ($soaction{'isfatal'}) { foreach my $failed (@failed) { tbreport(SEV_ERROR, 'run_command_failed', $soaction{'command'}, $failed); } tberror "Failed to run '" . $soaction{'command'} . "' on @failed!"; return 1; } foreach my $failed (@failed) { tbreport(SEV_WARNING, 'run_command_failed', $soaction{'command'}, $failed); } tbwarn "Failed to run '" . $soaction{'command'} . "' on @failed!"; } } return 0; }