#!/usr/bin/perl -w # # EMULAB-COPYRIGHT # Copyright (c) 2000-2010 University of Utah and the Flux Group. # All rights reserved. # use strict; use English; use Getopt::Std; use POSIX qw(setsid ceil); use POSIX ":sys_wait_h"; # # This function as the main assign loop. It converts the virtual # topology into a top input including LAN and delay translation. It # then snapshots the current testbed physical state and runs assign, # looping a couple times if assign fails. When assign successfully # completes it will interpret the results. Attempt to match any # existing portmap entries and then update the delays and vlans table. # # XXX Internally created nodes (jailhost,delay,sim) are not treated # consistently. Needs more thought. # # Return codes: We catch all errors with the END block below, and if # anyone calls die() (exit value is 255) we add in the CANRECOVER bit. # Failures in assign always cause the caller to stop retrying. # # The CANRECOVER bit indicates 'recoverability' (no db or physical # state was modified by the time the error occurred). This is relevant # to only modify operations (update). # my $WRAPPER_SUCCESS = 0x00; my $WRAPPER_FAILED = 0x01; # Failed (Add other values) my $WRAPPER_FAILED_CANRECOVER = 0x40; # Can recover from update my $WRAPPER_FAILED_FATALLY = 0x80; # Do not use this. # Set this once we modify DB state; forces no recover in fatal(). my $NoRecover = 0; sub usage () { print STDERR "Usage: $0 [-v] [-u [-f] | -n] pid eid\n"; print STDERR " -v - Enables verbose output\n"; print STDERR " -u - Enables update mode\n"; print STDERR " -f - Fix current resources during update mode\n"; print STDERR " -n - Run assign, but do not reserve/modify resources.\n"; print STDERR " -r - Regression mode.\n"; print STDERR " -x - Turn on the prepass\n"; print STDERR " -m - Set the multiplex factor; overrides experiment.\n"; print STDERR " -p - Do a precheck for mapability on an empty testbed - ". "implies -n\n"; print STDERR " -l - Use rspec v2 instead of the text file format\n"; exit($WRAPPER_FAILED); } my $optlist = "dvunfprqczxm:ko:al"; my $verbose = 0; my $debug = 0; my $fixmode = 0; my $updating = 0; my $impotent = 0; my $precheck = 0; my $prepass = 0; my $alloconly = 0; my $outfile; my $mfactor; my $regression = 0; my $noassign = 0; # Only with regression mode, use previous solution. my $noregfree = 0; # Only with regression mode, leave physical state at end. my $usecurrent = 0; # Only with regression mode, use current solution. my $quiet = 0; my $clear = 0; my $warnings = 0; my $maxrun = 3; # Maximum number of times we run assign. my $gotlock = 0; my $userspec = 0; my $vtop; # # Configure variables # my $TB = "@prefix@"; my $DBNAME = "@TBDBNAME@"; my $TBOPS = "@TBOPSEMAIL@"; my $ASSIGN = "$TB/libexec/assign"; my $WRAPPER2 = "$TB/libexec/assign_wrapper2"; my $PTOPGEN = "$TB/libexec/ptopgen"; my $VTOPGEN = "$TB/bin/vtopgen"; my $NFREE = "$TB/bin/nfree"; # # Load the Testbed support stuff. # use lib "@prefix@/lib"; use libdb; use libtestbed; use libtblog; use libvtop; use libadminctrl; use User; # Protos sub fatal(@); sub debug($); sub chat($); sub RunAssign($$); sub FinalizeRegression($); sub AssignLoop(); sub MapperWrapper(); sub PrintSolution(); # un-taint path $ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin'; delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; # Turn off line buffering on output $| = 1; # # We want warnings to cause assign_wrapper to exit abnormally. # # FIXME: Is this still needed. "warn" is only used once. Also this # will cause perl internal warnings (such as "Use of uninitialized # value ..." to cause assign_wrapper to fail. -- kevina # $SIG{__WARN__} = sub { tbwarn $_[0];$warnings++; }; # # Parse command arguments. Once we return from getopts, all that should be # left are the required arguments. # my %options = (); if (! getopts($optlist, \%options)) { usage(); } if (@ARGV < 2) { usage(); } if (defined($options{"v"})) { TBDebugTimeStampsOn(); $verbose++; } if (defined($options{"a"})) { $alloconly++; } if (defined($options{"d"})) { $debug++; } if (defined($options{"u"})) { $updating = 1; } if (defined($options{"n"})) { $impotent = 1; } if (defined($options{"f"})) { $fixmode = 1; } if (defined($options{"p"})) { $precheck = 1; } if (defined($options{"x"})) { $prepass = 1; } if (defined($options{"o"})) { $outfile = $options{"o"}; } if (defined($options{"m"})) { $mfactor = $options{"m"}; } if (defined($options{"r"})) { if ($DBNAME eq "tbdb") { fatal("Cannot use regression mode on main DB"); } $regression = 1; $clear = 1; $fixmode = 1; TBDebugTimeStampsOn(); $usecurrent = 1 if (defined($options{"z"})); $noregfree = 1 if (defined($options{"k"})); } if (defined($options{"q"})) { $quiet = 1; } if (defined($options{"c"})) { $clear = 1; } if (defined($options{"l"})) { $userspec = 1; } my $pid = $ARGV[0]; my $eid = $ARGV[1]; my $experiment = Experiment->Lookup($pid, $eid); if (!defined($experiment)) { fatal("Could not lookup experiment object $pid,$eid!") } # # Verify that this person can muck with the experiment. # my $this_user = User->ThisUser(); if (! defined($this_user)) { tbdie("You ($UID) do not exist!"); } if (!TBAdmin() && ! $experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) { fatal("You do not have permission to map this experiment!"); } # multiplex_factor default. $mfactor = $experiment->multiplex_factor() if (!defined($mfactor) && defined($experiment->multiplex_factor())); # # These are the flags to the vtop creation code. # my $vtopflags = 0; $vtopflags |= $libvtop::VTOP_FLAGS_VERBOSE if ($verbose); $vtopflags |= $libvtop::VTOP_FLAGS_QUIET if ($quiet); $vtopflags |= $libvtop::VTOP_FLAGS_UPDATE if ($updating); $vtopflags |= $libvtop::VTOP_FLAGS_FIXNODES if ($fixmode || $usecurrent); $vtopflags |= $libvtop::VTOP_FLAGS_IMPOTENT if ($impotent); $vtopflags |= $libvtop::VTOP_FLAGS_ALLOCONLY if ($alloconly); $vtopflags |= $libvtop::VTOP_FLAGS_REGRESSION if ($regression); MapperWrapper(); if ($regression) { if (0) { $updating = 1; $fixmode = 1; $clear = 0; $vtopflags |= ($libvtop::VTOP_FLAGS_UPDATE|$libvtop::VTOP_FLAGS_FIXNODES); MapperWrapper(); } FinalizeRegression(0); } PrintSolution() if ($outfile); exit(0); sub MapperWrapper() { chat("Starting the new and improved mapper wrapper.\n"); TBDebugTimeStamp("Create libvtop started"); $vtop = libvtop->Create($experiment, $this_user, $vtopflags); if (!defined($vtop)) { fatal("Could not create vtop structure for $experiment"); } TBDebugTimeStamp("Create libvtop ended"); if (!$impotent && ($updating || $clear)) { if ($clear) { chat("Freeing reserved nodes ...\n"); system("export NORELOAD=1; $NFREE -x -a $pid $eid") == 0 or fatal("Could not release nodes."); } chat("Clearing physical state before updating.\n"); $experiment->RemovePhysicalState(); exit(0) if ($clear && !$regression); } TBDebugTimeStamp("vtopgen started"); $vtop->CreateVtop() == 0 or fatal("Could not create vtop for $experiment"); TBDebugTimeStamp("vtopgen finished"); AssignLoop(); return 0; } # # The assign loop. # sub AssignLoop() { my $currentrun = 1; my $canceled = 0; my $tried_precheck = 0; # Admission control counts my %admission_control = (); # XXX plab hack - only run assign once on plab topologies, since # they're easy to map and the physical topology does not change # frequently. if ($vtop->plabcount() && $vtop->plabcount == $vtop->virtnodecount()) { $maxrun = 2; } TBDebugTimeStamp("mapper loop started"); while (1) { chat("Assign run $currentrun\n"); my $prefix = ($debug || $regression ? "$pid-$eid" : "$pid-$eid-$$"); # # When precheck is on, we only do one run in impotent mode and exit. # if ($precheck) { $prefix .= ".empty"; $impotent = 1; chat("Trying assign on an empty testbed.\n"); } # # Serialize with the pool daemon if using shared nodes. # if ((!($impotent || $regression)) && $vtop->sharednodecount()) { while (1) { # # Use a countup/countdown counter, so that multiple mappers # can run, but not while the pool_daemon is running. # my $lock_result = DBQueryFatal("update emulab_locks set value=value+1 ". "where name='pool_daemon' and value>=0"); $gotlock = $lock_result->affectedrows; last if ($gotlock); chat("Waiting for pool daemon lock ...\n"); sleep(10); } } # # RunAssign returns 0 if successful. # returns -1 if failure, but assign says to stop trying. # returns 1 if failure, but assign says to try again. # returns 2 if we made some forward progress. # my $retval = RunAssign($precheck, $prefix); if ($gotlock) { DBQueryFatal("update emulab_locks set value=value-1 ". "where name='pool_daemon'"); $gotlock = 0; } # Success! last if ($retval == 0); if ($retval < 0 || $regression) { # # Failure in assign. # FinalizeRegression(1) if ($regression); fatal({type => 'primary', severity => SEV_ERROR, error => ['unretriable_assign_error']}, "Unretriable error. Giving up."); } # # When precheck is off, we do a precheck run if the first try fails # to find a solution. This avoids looping on an unsolvable topology. # if (!$precheck && !$tried_precheck) { chat("Trying assign on an empty testbed to verify mapability.\n"); my $save_impotent = $impotent; $impotent = 1; my $retval = RunAssign(1, $prefix . ".empty"); if ($retval != 0) { fatal({type=>'extra', cause=>'user', severity=>SEV_ERROR, error=>['infeasible_resource_assignment']}, "This experiment cannot be instantiated on this ". "testbed. You have most likely asked for hardware ". "this testbed does not have, such as nodes of a type ". "it does not contain, or nodes with too many network ". "interfaces. You will need to modify this experiment ". "before it can be swapped in - re-submitting the ". "experiment as-is will always result in failure."); } chat("Assign succeeded on an empty testbed.\n"); $impotent = $save_impotent; $tried_precheck = 1; } if ($currentrun >= $maxrun && $retval != 2) { fatal({type => 'primary', severity => SEV_ERROR, error => ['reached_assign_run_limit']}, "Reached run limit. Giving up."); } chat("Waiting 5 seconds and trying again...\n"); sleep(5); $currentrun++; } GatherAssignStats($pid, $eid, %{ $vtop->exptstats() }) if (! ($impotent || $alloconly)); TBDebugTimeStamp("mapper loop finished"); return 0; } # # The guts of an assign run. # sub RunAssign($$) { my ($precheck, $prefix) = @_; my $typelimitfile = $prefix .".limits"; my $ptopfile = $prefix . ".ptop"; my $vtopfile = $prefix . ".vtop"; if ($userspec) { $ptopfile .= ".xml"; $vtopfile .= ".xml"; } my $assignexitcode = 0; # Debugging hack for regression mode. Avoid really long assign runs. if ($regression && $noassign) { if (! -e "assign.log") { chat("No existing assign results file!\n"); return -1; } chat("Using existing assign results file\n"); goto skiprun; } # # Do admission control test, and gather the info. # my %admission_control; if (!$regression) { if (!TBAdmissionControlCheck(undef, $experiment, \%admission_control)){ tberror("Failed admission control checks!\n"); return -1; } } # # Append this admission control results to ptopgen. # if (scalar(keys(%admission_control))) { open(TYPELIMIT, ">$typelimitfile") or return -1; foreach my $typeclass (keys(%admission_control)) { my $count = $admission_control{$typeclass}; print TYPELIMIT "$typeclass $count\n"; } close(TYPELIMIT); } # # Snapshot physical resources. # # if updating (-u), include any resources that may already be # allocated to experiment in the PTOP results. # my $ptopargs = "-p $pid "; $ptopargs .= "-e $eid " if ($updating); $ptopargs .= "-u " if ($updating && $experiment->elabinelab()); $ptopargs .= "-m $mfactor " if (defined($mfactor)); $ptopargs .= "-v " if ($vtop->virtnodecount()); $ptopargs .= "-r " if ($vtop->remotenodecount()); $ptopargs .= "-S " if ($vtop->simnodecount()); $ptopargs .= "-h " if ($vtop->sharednodecount()); $ptopargs .= "-a " if ($precheck); $ptopargs .= "-c " . $experiment->delay_capacity() . " " if (defined($experiment->delay_capacity())); if ($userspec == 1) { $ptopargs .= "-x -g 2 "; } $ptopargs .= "-l $typelimitfile" if (scalar(keys(%admission_control))); chat("ptopargs: '$ptopargs'\n"); TBDebugTimeStamp("ptopgen started"); system("$PTOPGEN $ptopargs > $ptopfile"); if ($?) { tberror("Failure in ptopgen\n"); return -1; } TBDebugTimeStamp("ptopgen finished"); # # Now generate a vtop file and dump it to a file. # if (! open(VTOPFILE, "> $vtopfile")) { tberror("Could not open $vtopfile: $!\n"); return -1; } my $reslibvtop; if ($userspec == 1) { $reslibvtop = $vtop->PrintRspec(*VTOPFILE); } else { $reslibvtop = $vtop->PrintTop(*VTOPFILE); } if ($reslibvtop != 0) { tberror("Could not print vtop file for $experiment\n"); return -1; } close(VTOPFILE); if (! ($impotent || $regression || $alloconly)) { if ($experiment->Update({"maximum_nodes" => $vtop->maximum_nodes(), "minimum_nodes" => $vtop->minimum_nodes(), "virtnode_count"=> $vtop->virtnodecount() })){ tberror("Could not update min/max/virt nodes for $experiment\n"); return -1; } } # Run assign my $cmd = "assign"; my $args = ""; $args .= "-f rspec " if ($userspec == 1); $args .= "-f text " if ($userspec == 0); $args .= "$ptopfile $vtopfile"; $args = "-P $args" if (!$vtop->sharednodecount()); $args = "-uod -c .75 $args" if ($vtop->virtnodecount() || $vtop->simnodecount()); $args = "-n $args" if ($precheck); $args = "-s 123456 $args" if ($regression); $args .= " PN=1.0" if ($vtop->sharednodecount()); # The prepass speeds up assign on big topos with virtual nodes. if ($prepass) { $cmd = "assign_prepass"; $args = "-m $mfactor $args" if (defined($mfactor)); } chat("assign command: '$cmd $args'\n"); # # Fork a child to run assign. Parent spins watching the cancel flag # and waiting for assign to finish. # TBDebugTimeStamp("assign started"); if (my $childpid = fork()) { while (1) { sleep(2); if (waitpid($childpid, &WNOHANG) == $childpid) { $assignexitcode = $? >> 8; last; } # Check cancel flag. if ($experiment->canceled()) { if ((my $pgrp = getpgrp($childpid)) > 0) { kill('TERM', -$pgrp); waitpid($childpid, 0); tberror({cause => 'canceled', severity => SEV_IMMEDIATE, error => ['cancel_flag']}, "Cancel flag set; aborting assign run!\n"); return -1; } # Loop again to reap child above before exit. } } } else { # # Change our session so the child can get a killpg without killing # the parent. # POSIX::setsid(); exec("nice $WRAPPER2 $cmd $args > assign.log 2>&1"); die("Could not start assign!\n"); } TBDebugTimeStamp("assign finished"); # Check cancel flag before continuing. if ($experiment->canceled()) { tberror({cause => 'canceled', severity => SEV_IMMEDIATE, error => ['cancel_flag']}, "Cancel flag set; aborting assign run!\n"); return -1; } # Check for possible full filesystem ... if (-z "assign.log") { tbnotice("assign.log is zero length! Stopping ...\n"); return -1; } # # Saving up assign.log coz each swapin/modify is different and it # is nice to have every mapping for debugging and archiving # purposes We do not call it .log though, since we do not want it # copied out to the user directory every swapin. See Experiment.pm # system("/bin/cp assign.log ${prefix}.assign"); # # We no longer care what assign has to say when it fails. # Any relevent info was already sent to stderr so just # tell the caller whether we want to keep trying or not. # if ($assignexitcode) { return (($assignexitcode == 1) ? 1 : -1); } # # If we were doing the precheck, go ahead and exit now - there is no # useful information to parse out # if ($precheck) { chat("Precheck succeeded.\n"); return 0; } skiprun: chat("Reading assign results.\n"); if (!open(ASSIGNFP, "assign.log")) { print("Could not open assign logfile! $!\n"); return -1; } # New solution each time. $vtop->ClearSolution(); TBDebugTimeStamp("ReadSolution started"); if ($vtop->ReadTextSolution(*ASSIGNFP) != 0) { print("Could not parse assign logfile! $!\n"); return -1; } close(ASSIGNFP); TBDebugTimeStamp("ReadSolution ended"); if (defined($vtop->genirspec())) { TBDebugTimeStamp("Map Geni Resources Started"); if ($vtop->MapResources() != 0) { print("Could not map external resources! $!\n"); return -1; } TBDebugTimeStamp("Map Geni Resources ended"); } TBDebugTimeStamp("InterpNodes Started"); if ($vtop->InterpNodes() != 0) { print("Could not interpret nodes.\n"); return -1; } TBDebugTimeStamp("InterpNodes ended, AllocNodes Started"); # Check cancel flag before continuing. if ($experiment->canceled()) { tberror({cause => 'canceled', severity => SEV_IMMEDIATE, error => ['cancel_flag']}, "Cancel flag set; aborting assign run!\n"); return -1; } my $retval = $vtop->AllocNodes(); if ($retval != 0) { if ($retval < 1) { print("Could not allocate any nodes.\n"); return -1; } return 1; } TBDebugTimeStamp("AllocNodes ended, InterpLinks Started"); if ($vtop->InterpLinks() != 0) { print("Could not setup links\n"); return -1; } TBDebugTimeStamp("InterpLinks ended, InitializePhysNodes Started"); if ($vtop->InitializePhysNodes() != 0) { print("Could not InitializePhysNodes\n"); return -1; } TBDebugTimeStamp("InitializePhysNodes ended"); if (! ($impotent || $regression || $alloconly)) { TBDebugTimeStamp("ReserveSharedBandwidth started"); if ($experiment->ReserveSharedBandwidth($updating) != 0) { print("Could not reserve shared bandwidth\n"); return -1; } TBDebugTimeStamp("ReserveSharedBandwidth ended"); } return 0; } # # In regression mode we want to save the physical state and then clear # the physical resources. # sub FinalizeRegression($) { my ($error) = @_; my $cwd; chomp($cwd = `/bin/pwd`); if (!$error) { chat("Saving physical state in regression mode\n"); if (system("/bin/rm -rf $pid-$eid.pstate")) { tberror("Could not clean physical state directory\n"); return -1; } if ($experiment->BackupPhysicalState("$cwd/$pid-$eid.pstate", 1) != 0) { print STDERR "Could not save physical state!\n"; exit(1); } # Generate a vtop file with all resources fixed. chat("Generating new vtop file with all resources fixed.\n"); if (! open(VTOPFILE, "> $pid-$eid.fixed")) { tberror("Could not open $pid-$eid.fixed: $!\n"); return -1; } if ($vtop->PrintTop(*VTOPFILE) != 0) { tberror("Could not print fixed vtop file for $experiment\n"); return -1; } close(VTOPFILE); } return 0 if ($noregfree); chat("Removing physical state in regression mode\n"); if ($experiment->RemovePhysicalState() != 0) { print STDERR "Could not remove physical state!\n"; exit(1); } if ($vtop->newreservednodes()) { my @newreservednodes = $vtop->newreservednodes(); system("export NORELOAD=1; $NFREE -x $pid $eid @newreservednodes"); if ($?) { exit(1); } } return 0; } # # Print a solution in rspec format. # sub PrintSolution() { my $output = undef; if ($outfile ne "-") { if (! open(OUTFILE, "> $outfile")) { tberror("Could not open $outfile: $!\n"); return -1; } $output = *OUTFILE; } if ($vtop->PrintSolution($output) != 0) { tberror("Could not print solution for $experiment\n"); return -1; } close($output) if (defined($output)); return 0; } # We will come through here no matter how we exit. END { # Watch for getting here cause of a die()/exit() statement someplace. my $exitcode = $?; # # Do not want to leave this around, it will lock the pool daemon out. # if ($gotlock) { DBQueryFatal("update emulab_locks set value=value-1 ". "where name='pool_daemon'"); $gotlock = 0; } if ($exitcode && $exitcode != $WRAPPER_FAILED) { $exitcode = $WRAPPER_FAILED|$WRAPPER_FAILED_FATALLY; } if ($warnings > 0) { tberror("$warnings warnings.\n"); $exitcode |= $WRAPPER_FAILED; } # Set recover bit if we are going to fail. $exitcode = $exitcode|$WRAPPER_FAILED_CANRECOVER if ($exitcode && !$NoRecover); # And change the exitcode to be what we want it to be. $? = $exitcode; } sub fatal(@) { # # Free any newly reserved nodes (in update mode) so that tbswap knows # it is safe to recover the experiment. If we bypass this and leave # through the END block then NoRecover will still be set and tbswap # will know to swap the experiment out. # if ($updating) { if (defined($vtop)) { my @newreservednodes = $vtop->newreservednodes(); my $NoRecover = $vtop->norecover(); if (@newreservednodes) { $NoRecover = 0 if (system("$NFREE -x $pid $eid @newreservednodes") == 0); } else { # When not updating this is meaningless to tbswap. $NoRecover = 0; } } } tberror(@_); # We next go to the END block above. exit($WRAPPER_FAILED); } sub debug($) { if ($verbose) { print $_[0]; } } sub chat($) { if (! $quiet) { print $_[0]; } }