tbrestart.in 4.92 KB
Newer Older
1 2 3
#!/usr/bin/perl -w

#
4
# Copyright (c) 2000-2004, 2007 University of Utah and the Flux Group.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
#

# For restart: event system restart. clear ready bits and startup status,
#             boot status, port counters.

use English;
use Getopt::Std;

#
# This gets invoked from the Web interface. Swap an experiment in or out.
#
sub usage()
{
    print STDOUT "Usage: tbrestart <pid> <eid>\n";
    exit(-1);
}
my  $optlist = "";

#
# Configure variables
#
my $TB     = "@prefix@";
my $DBNAME = "@TBDBNAME@";
my $TBOPS  = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;

# Locals
my $nodereboot = "$TB/bin/node_reboot";
my $waitstart  = time;
my $failed     = 0;
my $state;

#
# Untaint the path
65
# 
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
# Untaint the path
$ENV{'PATH'} = "/bin:/usr/bin:/sbin:/usr/sbin:$TB/libexec:$TB/sbin:$TB/bin";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV != 2) {
    usage();
}
my $pid   = $ARGV[0];
my $eid   = $ARGV[1];
my @nodes = ExpNodes($pid, $eid);

print "Beginning restart in for $pid/$eid. " . TBTimeStamp() . "\n";
TBDebugTimeStamp("tbrestart started");

#
# Must be an active experiment to restart!
95
# 
96 97 98 99 100 101 102 103 104 105
if (! ($state = ExpState($pid, $eid))) {
    die("*** $0:\n".
	"    No such experiment $pid/$eid\n");
}
if ($state ne EXPTSTATE_ACTIVE) {
    die("*** $0:\n".
	"    Experiment must be active to be restart!\n");
}

#
106 107
# Stop the event system. 
# 
108 109 110
if (!$DISABLE_EVENTS) {
    print "Stopping the event system.\n";
    TBDebugTimeStamp("eventsys_control started");
111
    if (system("eventsys_control stop $pid,$eid")) {
112 113 114 115 116 117 118 119
	die("*** $0:\n".
	    "    Failed to stop the event system.\n");
    }
    TBDebugTimeStamp("eventsys_control finished");
}

#
# Clearing the portstat counters seems like a good idea.
120
# 
121 122 123 124 125 126
print "Clearing port counters.\n";
TBDebugTimeStamp("portstats started");
if (system("portstats -z -a -q $pid $eid")) {
    print STDERR "*** WARNING: Failed to clear port counters.\n";
    #
    # This is a non-fatal error.
127
    # 
128 129 130 131 132 133 134 135 136 137
}
TBDebugTimeStamp("portstats finished");

#
# Grab the node list. We are going to reboot each one in turn, instead of
# as a group. Why? Cause we need to know when the node is down so that we
# clear/reset state in the DB. We have no idea what the node is doing at
# this point. This is terribly imperfect of course, since there are no
# guarantees, especially since the events are async (a tbreset and isup
# could be in the event queue for a node). The ready bits present the worst
138
# problem. 
139 140 141 142
#
print "Rebooting all nodes\n";
TBDebugTimeStamp("node reboot started");
foreach my $node ( @nodes ) {
143 144 145 146
    #
    # Must duplicate a check that would be done in node_reboot if we
    # gave it the entire list. No point in rebooting local jails.
    #
147
    my ($jailed, $plab);
148
    
149 150
    if (TBIsNodeVirtual($node, \$jailed, \$plab)) {
	if (! $jailed && ! $plab) {
151 152
	    next;
	}
153
	if (! TBIsNodeRemote($node)) {
154 155 156
	    next;
	}
    }
157
    
158 159 160 161 162 163 164 165 166
    if (system("$nodereboot $node")) {
	die("*** $0:\n".
	    "    Failed to reboot node $node!\n");
    }
    # Clears various things including ready bits.
    TBNodeBootReset($node);
}

print STDOUT "Waiting for nodes to come up ...\n";
167 168
    
foreach my $node ( sort(@nodes) ) {
169 170
    if (! TBNodeStateWait($node, $waitstart, (60*6), undef,
			  (TBDB_NODESTATE_ISUP))) {
171 172 173
	print STDOUT "$node is alive and well\n";
	SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
	next;
174
    }
175 176
    SetNodeBootStatus($node, NODEBOOTSTATUS_FAILED);
    $failed++;
177 178 179 180 181 182 183 184 185
}
TBDebugTimeStamp("node reboot finished");

if ($failed) {
    die("*** $0:\n".
	"    $failed nodes failed to reboot properly! \n");
}

#
186 187
# Start the event system. 
# 
188 189 190
if (!$DISABLE_EVENTS) {
    print "Starting the event system.\n";
    TBDebugTimeStamp("eventsys_control started");
191
    if (system("eventsys_control start $pid,$eid")) {
192 193 194 195 196 197 198 199 200
	die("*** $0:\n".
	    "    Failed to start the event system.\n");
    }
    TBDebugTimeStamp("eventsys_control finished");
}

print "Restart finished. " . TBTimeStamp() . "\n";
TBDebugTimeStamp("tbrestart finished");
exit(0);