#!/usr/bin/perl -wT # # EMULAB-COPYRIGHT # Copyright (c) 2000-2011 University of Utah and the Flux Group. # All rights reserved. # use English; use Getopt::Std; use POSIX; # # Run the linktest code from experiment swapin; this script serves as # a wrapper for run_linktest.pl. We store the pid in the DB, and allow # for linktest cancelation from the webpage. This script is only run on # boss; otherwise the user is running run_linktest.pl directly on ops or # on a node, and can terminate linktest directly (via ^C or SIGTERM). # sub usage() { print("Usage: linktest_control [-d] [-t timeout] [-k | -l ] ". "[-o ] \n". "-l - Run linktest at a specific level; defaults to DB value.\n". "-k - Kill a currently running linktest.\n". "-t - Specify timeout in seconds.\n". "-o - Specify output file for linktest results.\n". "-m - Send email to swapper if linktest fails.\n". "-r - Report results only, don't flag errors.\n". "-d - Turn on debugging output.\n"); exit(-1); } my $optlist = "dkl:o:t:mfr"; my $debug = 2; my $cancel = 0; my $sendmail = 0; my $forcerun = 0; my $reportonly = 0; my $timeout; my $level; my $output; my $child_pid; # Child run_linktest process. # # Configure variables # my $TB = "@prefix@"; my $TBOPS = "@TBOPSEMAIL@"; my $CONTROL = "@USERNODE@"; my $TBBASE = "@TBBASE@"; my $SSH = "$TB/bin/sshtb"; # un-taint path $ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin'; delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; # # Testbed Support libraries # use lib "@prefix@/lib"; use libdb; use libtestbed; use User; use Experiment; # # Turn off line buffering on output # $| = 1; if ($EUID != 0) { die("*** $0:\n". " Must be root! Maybe its a development version?\n"); } # # Parse command arguments. Once we return from getopts, all that should be # left are the required arguments. # %options = (); if (! getopts($optlist, \%options)) { usage(); } if (defined($options{"d"})) { $debug = 2; } if (defined($options{"m"})) { $sendmail = 1; } if (defined($options{"r"})) { $reportonly = 1; } if (defined($options{"f"})) { $forcerun = 1; } if (defined($options{"k"})) { $cancel = 1; } if (defined($options{"t"})) { $timeout = $options{"t"}; if ($timeout =~ /^(\d*)$/) { $timeout = $1; } else { die("Bad data in timeout: $timeout"); } } if (defined($options{"l"})) { $level = $options{"l"}; if ($level =~ /^(\d*)$/) { $level = $1; } else { die("Bad data in level: $level."); } } if (defined($options{"o"})) { $output = $options{"o"}; # Note different taint check (allow /). if ($output =~ /^([-\w\.\/]+)$/) { $output = $1; } else { die("Bad data in output file: $output\n"); } } if (@ARGV != 2) { usage(); } # Slowly convert to using Experiment module. my $experiment = Experiment->Lookup($ARGV[0], $ARGV[1]); if (!defined($experiment)) { tbdie("Could not lookup experiment object!") } my $pid = $experiment->pid(); my $eid = $experiment->eid(); # # Check state. Only in the active state # if ($experiment->state() ne EXPTSTATE_ACTIVE && $experiment->state() ne EXPTSTATE_ACTIVATING && $experiment->state() ne EXPTSTATE_MODIFY_RESWAP) { die("*** $0:\n". " Experiment $pid/$eid must be active!\n"); } # # Verify user and get his DB uid and other info for later. # my $this_user = User->ThisUser(); if (! defined($this_user)) { tbdie("You ($UID) do not exist!"); } my $dbuid = $this_user->uid(); my $user_name = $this_user->name(); my $user_email = $this_user->email(); # # Check permission. Only people with permission to destroy the experiment # can do this. # if (! $experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) { die("*** $0:\n". " You do not have permission to start/stop linktest for ". "$pid/$eid!\n"); } # Need the unix_gidname info to pass to ops. my $unix_gidname = $experiment->GetGroup()->unix_name(); my $errlog = $experiment->UserDir() . "/logs/linktest.log"; my $project = $experiment->GetProject(); my $unix_pidname = $project->unix_name(); # # Lets see if there is a linktest running already. # my $linktest_pid = $experiment->linktest_pid(); if (defined($linktest_pid) && $linktest_pid) { if (! kill(0, $linktest_pid) && ($ERRNO == ESRCH)) { $experiment->Update({'linktest_pid' => 0}); if ($cancel) { print("Linktest has already exited on experiment $pid/$eid!\n"); exit(0); } else { print("Clearing stale linktest pid from DB for $pid/$eid!\n"); } } elsif (!$cancel) { die("*** $0:\n". " Linktest is already running on experiment $pid/$eid!\n"); } else { if (! kill('TERM', $linktest_pid)) { SENDMAIL($TBOPS, "Failed to stop linktest daemon for $pid/$eid", "Could not kill(TERM) process $linktest_pid: $? $!"); die("*** $0:\n". " Failed to stop linktest daemon for $pid/$eid!\n"); } exit(0); } } elsif ($cancel) { die("*** $0:\n". " Linktest is not running on experiment $pid/$eid!\n") } my @hosed = (); $experiment->LinkTestCapable(\@hosed); if (@hosed > 0) { print STDERR "*** Nodes running an OSID that does not support linktest:\n"; print STDERR "*** ", join(' ', @hosed), "\n"; if (!$forcerun) { die("*** $0:\n". " Aborting linktest since not all nodes support it!\n"); } } # # Okay, lets run linktest. First set up a handler so that we can catch # a termination signal and kill ourselves off. # sub cleanup() { $experiment->Update({'linktest_pid' => 0}) if (defined($experiment)); } sub handler($) { $SIG{TERM} = 'IGNORE'; $SIG{INT} = 'IGNORE'; if (defined($child_pid)) { kill('HUP', $child_pid); waitpid($child_pid, 0); undef($child_pid); } cleanup(); die("*** $0:\n". " Linktest has been canceled on experiment $pid/$eid!\n") } sub notify() { my $lev = (defined($level) ? $level : $experiment->linktest_level()); my $logname = undef; my $isopen; my $filespec = ""; my $errbody = ""; $experiment->GetLogFile(\$logname, \$isopen); if (defined($logname)) { $filespec = "\n\nfile://$logname\n"; } if (-e $errlog) { $errbody = `/bin/cat $errlog`; } SENDMAIL($TBOPS, "Linktest Failure: $pid/$eid", "Failure in linktest (level $lev); ". "returned non-zero status.\n". "Activity log file at:\n\n". " $TBBASE/spewlogfile.php3?pid=${pid}&eid=${eid}\n". "$filespec\n". "\n". $errbody, "$user_name <$user_email>"); } # # Fork a child to run the actual linktest script. The parent just waits # for child to exit, or to be signaled to terminate the child. # if (defined($output)) { $child_pid = TBBackGround($output); } else { $child_pid = fork(); } if ($child_pid < 0) { die("*** $0:\n". " Linktest could not fork a new process for $pid/$eid!\n"); } if ($child_pid) { # # Parent. # $SIG{TERM} = \&handler; $SIG{INT} = \&handler; $experiment->Update({'linktest_pid' => $$}); waitpid($child_pid, 0); my $exitval = $? >> 8; if (-e $errlog) { system("/bin/cat $errlog"); } cleanup(); notify() if ($exitval && $sendmail); exit($exitval); } # # Child execs an ssh to users.emulab.net where linktest is actually run. # We tell ssh to allocate a tty so that we can kill it with TERM and have # everything die off properly. # # XXX: "-t -t" to ssh is so that kill -TERM carries across to ops. # my @cmdargs = ("$SSH", "-t", "-t", "-F", "/dev/null", "-host", $CONTROL, "exec", "$TB/sbin/linktest.proxy"); push(@cmdargs, ("-d", "$debug")) if ($debug); push(@cmdargs, ("-t", "$timeout")) if (defined($timeout)); push(@cmdargs, "-l"); push(@cmdargs, (defined($level) ? $level : $experiment->linktest_level())); push(@cmdargs, ("-g", $unix_gidname)); push(@cmdargs, ("-p", $unix_pidname)); push(@cmdargs, ("-u", $dbuid)); push(@cmdargs, ("-e", "$pid/$eid")); push(@cmdargs, ("-o", $errlog)); push(@cmdargs, "-r") if ($reportonly); print "Running '@cmdargs'\n" if ($debug); # For sshtb $UID=0; exec(@cmdargs); die("*** $0:\n". " Could not exec run_linktest.pl\n");