panic.in 8.91 KB
Newer Older
1
#!/usr/bin/perl -w
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2
#
3
# Copyright (c) 2000-2019 University of Utah and the Flux Group.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
23
#
24
use strict;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
25 26 27 28 29 30 31 32 33 34
use English;
use Getopt::Std;
use POSIX qw(isatty setsid);

#
# Press the panic button. Also invoked from web interface.
#
sub usage()
{
    print(STDERR
Leigh B Stoller's avatar
Leigh B Stoller committed
35 36
	  "Usage: panic [-l level] <pid> <eid>\n".
	  "       panic -r <pid> <eid>\n".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
37
	  "switches and arguments:\n".
Leigh B Stoller's avatar
Leigh B Stoller committed
38 39
	  "-l level - Level 1; reboot nodes into the admin MFS\n".
	  "         - Level 2; disable the control network\n".
40
          "         - Level 3; power off all nodes\n".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
41
	  "-r       - Reset panic state (admin people only)\n".
42
	  "-c       - Clear panic state but do not do anything else\n".
43
	  "-w       - From web interface, create a log file.\n".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
44 45 46 47
	  "<pid>    - The project the experiment belongs to\n".
	  "<eid>    - The experiment name (id)\n");
    exit(-1);
}
48
my $optlist = "rl:wc";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
49
my $reset   = 0;
50
my $clear   = 0;
Leigh B Stoller's avatar
Leigh B Stoller committed
51
my $level   = 1;
52
my $dolog   = 0;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
53

54
sub fatal($);
Leigh B Stoller's avatar
Leigh B Stoller committed
55
sub DoIt();
56
sub PowerMode();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86

#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
	die("*** $0:\n".
	    "    $message\n");
    }
    else {
	print STDERR "$message\n";
    }
    exit($status);
}

#
# Configure variables
#
my $TB     = "@prefix@";
my $TBOPS  = "@TBOPSEMAIL@";
87
my $snmpit = "$TB/bin/snmpit";
88
my $POWER  = "$TB/bin/power";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
89 90 91 92 93 94 95

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
Leigh B Stoller's avatar
Leigh B Stoller committed
96
use Firewall;
97 98
use Experiment;
use User;
99 100
use Logfile;
use libaudit;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
101

102 103
# Locals
my $logfile;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
    
#
# Untaint the path
# 
$ENV{'PATH'} = '/bin:/usr/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
120
my %options = ();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
121 122 123 124 125 126
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV != 2) {
    usage();
}
127 128 129
if (defined($options{"c"})) {
    $clear = 1;
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
130 131 132
if (defined($options{"r"})) {
    $reset = 1;
}
133 134 135
if (defined($options{"w"})) {
    $dolog = 1;
}
Leigh B Stoller's avatar
Leigh B Stoller committed
136
if (defined($options{"l"})) {
137
    $level = $options{"l"};
Leigh B Stoller's avatar
Leigh B Stoller committed
138
    usage()
139
	if ($level < 1 || $level > 3);
Leigh B Stoller's avatar
Leigh B Stoller committed
140
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
141

142 143 144
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
145
}
146 147 148 149 150 151
my $user_uid = $this_user->uid();

# Slowly convert to using Experiment module.
my $experiment = Experiment->Lookup($ARGV[0], $ARGV[1]);
if (!defined($experiment)) {
    tbdie("Could not lookup experiment object!")
Leigh B. Stoller's avatar
Leigh B. Stoller committed
152
}
153 154
my $pid = $experiment->pid();
my $eid = $experiment->eid();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
155 156

#
157 158
# Verify that this person is allowed to press the panic button.
# Note that any script down the line has to do an admin check also. 
Leigh B. Stoller's avatar
Leigh B. Stoller committed
159
#
160 161
if ($UID && !$this_user->IsAdmin() &&
    !$experiment->AccessCheck($this_user, TB_EXPT_MODIFY)) {
Leigh B. Stoller's avatar
Leigh B. Stoller committed
162
    die("*** $0:\n".
Leigh B Stoller's avatar
Leigh B Stoller committed
163
	"    You do not have permission for this experiment!\n");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
164 165 166 167 168
}

#
# Get email info for user.
#
169 170
my $user_name  = $this_user->name();
my $user_email = $this_user->email();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
171

172 173 174 175
# Get email for current swapper.
my $swapper        = $experiment->GetSwapper();
my $swapper_name   = $swapper->name();
my $swapper_email  = $swapper->email();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
176 177 178 179 180 181 182 183

#
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
# terminating consists of a couple of different experiment states down inside
# the tb scripts. 
#
184 185
$experiment->LockTables() == 0
    or fatal("Could not lock experiment tables for $pid/$eid!");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
186 187 188 189

#
# Called from user (via web interface).
#
190
if ($reset || $clear) {
191 192 193 194 195
    if (! ($experiment->state() eq EXPTSTATE_PANICED &&
	   $experiment->paniced())) {
	print STDERR "Not in panic mode\n";
	exit(0);
    }
Leigh B Stoller's avatar
Leigh B Stoller committed
196
    $level = $experiment->paniced();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
197 198
}
else {
199 200 201 202
    if ($experiment->state() eq EXPTSTATE_PANICED && $experiment->paniced()) {
	print STDERR "Already in panic mode\n";
	exit(0);
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
203
    ExitWithStatus(1, "Experiment $pid/$eid is not active!\n")
204 205 206
	if (! ($experiment->state() eq EXPTSTATE_ACTIVE ||
	       $experiment->state() eq EXPTSTATE_ACTIVATING ||
	       $experiment->state() eq EXPTSTATE_SWAPPING));
Leigh B. Stoller's avatar
Leigh B. Stoller committed
207 208
}

209 210 211 212
if ($clear && $level == 2) {
    fatal("Not allowed to clear panic state for a level 2 panic")
}

Leigh B. Stoller's avatar
Leigh B. Stoller committed
213
#
214
# Change experiment state and lock it. Unlock tables at same time.
215
#
216 217
$experiment->Lock(($reset || $clear ?
		   EXPTSTATE_ACTIVE : EXPTSTATE_PANICED), 1) == 0
218
    or fatal("Could not lock $experiment");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
219

220 221 222
if ($clear) {
    $experiment->SetPanicBit(0);
    $experiment->Unlock();
223
    print "Panic state has been cleared, but nothing else has been done\n";
224 225 226
    exit(0);
}

227 228 229 230 231 232 233
#
# Deal with level 3.
#
if ($experiment->paniced() == 3 || $level == 3) {
    exit(PowerMode());
}

Leigh B Stoller's avatar
Leigh B Stoller committed
234 235 236 237
# Force level 2 for firewalled experiments.
if ($experiment->IsFirewalled()) {
    $level = 2
	if (!$reset);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
238 239
}

240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
#
# Coming from the web, use a logfile for spewing.
#
if ($dolog) {
    $logfile = $experiment->CreateLogFile("panic");
    if (defined($logfile)) {
	# We want it to spew to the web.
	$experiment->SetLogFile($logfile);
	# Mark it open since we are going to start using it right away.
	$logfile->Open();

        # Use libaudit to handle the logging and demonizing.
	my $childpid = LogStart(1, $logfile->filename(),
				LIBAUDIT_NODELETE()|LIBAUDIT_LOGTBOPS());
	if ($childpid) {
	    exit(0);
	}
    }
}

Leigh B. Stoller's avatar
Leigh B. Stoller committed
260
#
Leigh B Stoller's avatar
Leigh B Stoller committed
261 262
# XXX - At this point a failure is going to leave things in an
# inconsistent state.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
263
#
Leigh B Stoller's avatar
Leigh B Stoller committed
264 265
if (Firewall::Panic($experiment, $level,
		    ($reset ?
Mike Hibler's avatar
Mike Hibler committed
266
		     Firewall::PANIC_CLEAR() : Firewall::PANIC_PANIC()))) {
Leigh B Stoller's avatar
Leigh B Stoller committed
267
    fatal("Failure in Firewall::Panic()");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
268
}
269
$experiment->Unlock();
Leigh B Stoller's avatar
Leigh B Stoller committed
270
print "Panic Button has been ". ($reset ? "cleared" : "pressed") . "\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
271 272 273 274 275

#
# Send email notification to user *and* to tbops.
#
SENDMAIL("$user_name <$user_email>",
276 277 278
	 "Quarantine notification for Experiment $pid/$eid",
	 "Experiment $pid/$eid has been ".
	 ($reset ? "released from quarantine" : "quarantined") . "\n",
Leigh B. Stoller's avatar
Leigh B. Stoller committed
279
	 "$user_name <$user_email>",
280
	 "Cc:  $swapper_name <$swapper_email>\n".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
281
	 "Bcc: $TBOPS");
282 283 284 285 286

if (defined($logfile)) {
    # Close up the log file so the webpage stops.
    $experiment->CloseLogFile();
}
Leigh B Stoller's avatar
Leigh B Stoller committed
287
exit(0);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
288 289 290 291 292 293

sub fatal($)
{
    my($mesg) = $_[0];
    
    #
294
    # Send a message to the testbed list.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
295 296
    #
    SENDMAIL("$user_name <$user_email>",
297 298 299
	     "Quarantine Failure for Experiment $pid/$eid",
	     "$user_uid ". ($reset ? "cleared" : "set") .
	       " Quarantine mode for experiment $pid/$eid,\n".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
300 301 302
	     "BUT there was a failure!\n\n".
	     "$mesg\n",
	     "$user_name <$user_email>",
303
	     "Cc:  $swapper_name <$swapper_email>\n".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
304 305 306 307 308
	     "Bcc: $TBOPS");

    die("*** $0:\n".
	"    $mesg\n");
}
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358

#
# Power mode.
#
sub PowerMode()
{
    my @failures = ();
    
    # Need a list of nodes, going to call power individually.
    my @nodes = $experiment->NodeList(1, 0);
    if (!@nodes) {
	$experiment->Unlock();
	fatal("Could not get node list for experiment");
    }
    foreach my $node_id (@nodes) {
	# Tell power to send an event.
	system("$POWER " . ($reset ? "on" : "off") . " $node_id");
	if ($?) {
	    push(@failures, $node_id);
	}
    }
    # Change the mode even if we failed on some nodes, will need to deal
    # with it by hand.
    if ($reset) {
	$experiment->SetPanicBit(0);
    }
    else {
	$experiment->SetPanicBit($level);
    }
    if (@failures) {
	$experiment->Unlock();
	fatal("Could not power ". ($reset ? "on" : "off") .
	      " some nodes: @failures");
    }
    $experiment->Unlock();
    print "Panic Button has been ". ($reset ? "cleared" : "pressed") . "\n";

    #
    # Send email notification to user *and* to tbops.
    #
    SENDMAIL("$user_name <$user_email>",
	     "Quarantine notification for Experiment $pid/$eid",
	     "Experiment $pid/$eid has been ".
	     ($reset ? "released from quarantine" : "quarantined") . "\n",
	     "$user_name <$user_email>",
	     "Cc:  $swapper_name <$swapper_email>\n".
	     "Bcc: $TBOPS");

    return 0;
}