testbed-control.in 6.52 KB
Newer Older
1 2
#!/usr/bin/perl -w
#
3
# Copyright (c) 2010-2014 University of Utah and the Flux Group.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
23 24 25 26 27 28 29 30 31 32 33
#
use strict;
use English;
use Getopt::Std;
use Data::Dumper;

#
# Update DB.
#
sub usage()
{
34 35
    print STDERR "Usage: testbed-control [-n] shutdown|boot|restart\n";
    print STDERR "Use -n option to leave logins disabled after restart.\n";
36 37
    exit(-1);
}
38 39
my $optlist   = "n";
my $noenable  = 0;
40 41
my $stopped   = 0;
my $committed = 0;
42 43 44 45 46 47 48 49

#
# Configure variables
#
my $TB	         = "@prefix@";
my $TBOPS        = "@TBOPSEMAIL@";
my $SETSITEVAR   = "$TB/sbin/setsitevar";
my $STARTUP      = "/usr/local/etc/rc.d/3.testbed.sh";
50
my $MFRISBEED    = "/usr/local/etc/rc.d/3.mfrisbeed.sh";
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66

# Protos
sub Fatal($);

# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin:/usr/site/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

use lib "@prefix@/lib";
use libdb;
use libtestbed;
67
use libEmulab;
68
use Image;
69 70 71 72 73 74 75 76

#
# Parse command arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
77 78
if (defined($options{"n"})) {
    $noenable = 1;
79 80 81 82
}
usage()
    if (@ARGV != 1);
usage()
Leigh Stoller's avatar
Leigh Stoller committed
83 84
    if (! ($ARGV[0] eq "boot" || $ARGV[0] eq "shutdown" ||
	   $ARGV[0] eq "restart"));
85 86 87 88 89 90 91 92

#
# Must be root if actually doing this.
#
if ($UID) {
    Fatal("This script must be run as root! Maybe use sudo?")
}

93 94 95 96 97 98 99 100
# Chicken or Egg.
DBQueryFatal("INSERT INTO sitevariables VALUES ".
	     " ('general/testbed_shutdown',NULL,'0', ".
	     "  'Non-zero value indicates that the testbed is shutdown ".
	     "and scripts should not do anything when they run. ".
	     "DO NOT SET THIS BY HAND!', 0)")
    if (!SiteVarExists("general/testbed_shutdown"));

101 102
sub Restart()
{
103 104 105
    # Do not let the restart get interrupted. Bad.
    $SIG{INT} = 'IGNORE';

106 107 108 109 110 111 112 113 114 115 116
    #
    # Need to to do this so the rc script knows it is okay to start.
    #  0  - Testbed is enabled.
    #  1  - Testbed is disabled.
    # -1  - Testbed is coming back online, so start up daemons.
    #
    system("$SETSITEVAR general/testbed_shutdown -1");
    if ($?) {
	Fatal("Could not set general/testbed_shutdown to -1");
    }

117 118
    if ($stopped) {
	print "Starting up testbed daemons.\n";
119
	if (system("$MFRISBEED start") || system("$STARTUP start")) {
120 121 122 123
	    print "*** Could not restart testbed daemons.\n";
	    exit(1);
	}
	print "\n";
124
    }
125 126 127 128 129 130 131 132 133 134 135 136 137 138
    system("$SETSITEVAR general/testbed_shutdown -");
    if ($?) {
	Fatal("Could not clear general/testbed_shutdown");
    }
    if ($noenable) {
	print "Leaving the web interface disabled as directed.\n";
    }
    else {
	print "Turning on the web interface and allowing swaps\n";
	if (system("$SETSITEVAR web/nologins -") ||
	    system("$SETSITEVAR web/message -")) {
	    print "*** Could not renable the web interface and swapping!\n";
	    exit(1);
	}
139 140 141 142
    }
}

if ($ARGV[0] eq "boot") {
143 144 145 146 147 148 149 150 151
    #
    # What if the system is currently started? We do not want to start
    # two copies of things. Look to see if stated and/or bootinfo are
    # running, since these are the two most critical and quickly missed
    # daemons.
    #
    if (CheckDaemonRunning("stated") || CheckDaemonRunning("bootinfo")) {
	Fatal("Testbed appears to be running (stated and/or bootinfo running)");
    }
152
    $stopped = 1;
153
    Restart();
154 155 156 157 158
    #
    # Inform TBOPS.
    #
    SENDMAIL($TBOPS, "Testbed has been restarted!",
	     "Testbed has been restarted ...\n");
159 160 161
    exit(0);
}

162 163 164 165 166 167 168
sub Handler()
{
    Restart();
    exit(1);
}
$SIG{INT} = 'IGNORE';

169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
#
# Stop the testbed before doing the update.
#
print "** Putting the testbed to sleep ...\n";
print "-> Turning off the web interface and disallowing swaps.\n";

my $msg = "Testbed going offline; back in a little while";
$msg =~ s|\&|&amp;|g;
$msg =~ s|\<|&lt;|g;
system("$SETSITEVAR","web/message", $msg);
if ($?) {
    Fatal("Could not update web message");
}
system("$SETSITEVAR web/nologins 1");
if ($?) {
    Fatal("Could not disable web logins and experiment swaps");
}
186 187 188 189
system("$SETSITEVAR general/testbed_shutdown 1");
if ($?) {
    Fatal("Could not set general/testbed_shutdown");
}
190 191
    
print "-> Waiting a few seconds for testbed to quiet down ...\n";
192
$SIG{INT} = \&Handler;
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
sleep(5);
print "-> Looking for experiments still in transition.\n";

#
# Look for any experiments in transition, and wait for them to stop.
# This could take an arbitrarily long time of course, but its the only
# safe way to do it. Don't worry about any experiments that are older
# then 24 hours though; they are likely stuck and not going anywhere.
#
while (1) {
    my $query_result =
	DBQueryWarn("select pid,eid,idx,state,expt_locked ".
		    "   from experiments ".
		    "where expt_locked is not null and ".
		    "      DATE_SUB(NOW(),INTERVAL 1 DAY) <= expt_locked");
    if (!$query_result) {
	Fatal("Error getting info from the DB. Stopping.");
    }
    last
	if (!$query_result->numrows());

    print "-> Experiments still in transition:\n";
    while (my ($pid,$eid,$idx,$state,$expt_locked) =
Leigh Stoller's avatar
Leigh Stoller committed
216
	   $query_result->fetchrow_array()) {
217 218 219 220 221 222 223

	print "  $pid,$eid,$idx -- $state, $expt_locked\n";
    }
    print "-> Waiting 30 seconds before trying again. ^C is okay\n\n";
    sleep(30);
}
print "** Testbed is quiet; stopping testbed daemons ... \n";
224 225
if (system("$STARTUP stop >/dev/null 2>&1") ||
    system("$MFRISBEED stop >/dev/null 2>&1")) {
226 227
    Fatal("Could not stop testbed daemons. Stopping. Best to reboot!");
}
228 229
$stopped = 1;

230 231 232 233 234 235
#
# Inform TBOPS.
#
SENDMAIL($TBOPS, "Testbed has been shutdown!",
	 "Testbed has been shutdown ...\n");

Leigh Stoller's avatar
Leigh Stoller committed
236 237 238 239
if ($ARGV[0] eq "restart") {
    print "** Testbed is stopped. Waiting a moment before restarting ...\n";
    sleep(2);
    Restart();
240 241 242 243 244
    #
    # Inform TBOPS.
    #
    SENDMAIL($TBOPS, "Testbed has been restarted!",
	     "Testbed has been restarted ...\n");
Leigh Stoller's avatar
Leigh Stoller committed
245 246
    exit(0);
}
247 248 249 250 251 252 253 254 255 256
print "** Testbed is stopped. Safe to power off or reboot\n";
exit(0);

sub Fatal($)
{
    my ($msg) = @_;

    die("*** $0:\n".
	"    $msg\n");
}