node_reboot.in 14.8 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3 4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6 7 8
# All rights reserved.
#

9 10 11 12
use English;
use Getopt::Std;

#
13 14
# Reboot a node (or nodes). Will power cycle the node as a last resort.
# Use -e option to reboot all nodes in an experiment.
15
#
16 17
# Exit value is 0 if all nodes reboot okay, or the number of nodes
# could not be rebooted.
18 19 20
#
sub usage()
{
21 22
    print STDOUT "Usage: node_reboot [-d] [-f] [-n] [-w] node [node ...]\n" .
	         "       node_reboot [-d] [-f] [-n] [-w] -e pid,eid\n".
23
	"Use the -d option to turn on debugging\n" .
24
	"Use the -e option to reboot all the nodes in an experiment\n" .
25
	"Use the -n option to not wait for nodes to go down\n" .
26 27
	"Use the -w option to to wait for nodes is come back up\n" .
	"Use the -f option to power cycle (and not wait for nodes to die)\n";
28 29
    exit(-1);
}
30 31 32
# The hidden -r option runs this in "realmode", ie don't send an event, but
# really do the work instead.
my  $optlist = "dfe:nwr";
33 34 35 36 37

#
# Configure variables
#
my $TB		= "@prefix@";
38
my $CLIENT_BIN  = "@CLIENT_BINDIR@";
39
my $BOSSNODE    = "@BOSSNODE@";
40 41

#
42
# Testbed Support libraries
43
#
44 45 46
use lib "@prefix@/lib";
use libdb;
use libtestbed;
47
use event;
48
use POSIX qw(strftime);
49

Robert Ricci's avatar
Robert Ricci committed
50
my $ssh		= "$TB/bin/sshtb -n";
51
my $power	= "$TB/bin/power";
52
my $ipod	= "$TB/sbin/apod";
53
my $vnodesetup	= "$TB/sbin/vnode_setup";
54
my $logfile	= "$TB/log/power.log";
55 56 57 58
my $ping	= "/sbin/ping";
my %pids	= ();
my @row;
my @nodes       = ();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
59
my $debug       = 0;
60
my $force       = 0;
61
my $waitmode    = 0;
62 63
my $realmode    = 0;
my $nowait      = 0;
64
my $failed      = 0;
65 66 67
my $eidmode     = 0;
my $pid;
my $eid;
68 69 70 71 72 73

# un-taint path
$ENV{'PATH'} = '/bin:/sbin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Turn off line buffering on output
Mac Newbold's avatar
Mac Newbold committed
74
$| = 1;
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91

#
# We don't want to run this script unless its the real version.
#
if ($EUID != 0) {
    die("Must be root! Maybe its a development version?");
}

#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (defined($options{"d"})) {
92 93 94 95
    $debug = 1;
}
if (defined($options{"f"})) {
    $force = 1;
96
}
97 98 99
if (defined($options{"w"})) {
    $waitmode = 1;
}
100 101 102 103 104 105
if (defined($options{"r"})) {
    $realmode = 1;
}
if (defined($options{"n"}) && !defined($options{"w"})) {
    $nowait = 1;
}
106 107 108 109
if (defined($options{"e"})) {
    if (@ARGV) {
	usage();
    }
110

111 112 113 114
    $eidmode = $options{"e"};
    if ($eidmode =~ /([-\w]*),([-\w]*)/) {
	$pid = $1;
	$eid = $2;
115 116
    }
    else {
117 118
	print STDOUT "Invalid argument to -e option: $eidmode\n";
	usage();
119
    }
120 121
}

122
# XXX Temporary, until we make event sending the default
123 124 125 126
$realmode=1;
#if ($realmode && $UID && !TBAdmin($UID)) {
#    die("*** You cannot use real mode!\n");
#}
127

128 129 130 131 132 133
#
# If eidmode, then get the node list out of the DB instead of the command
# line. A proper check is made later, so need to be fancy about the query.
#
if ($eidmode) {
    my @row;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
134 135 136 137 138 139 140 141

    #
    # Verify permission to muck with this experiment.
    #
    if ($UID && !TBAdmin($UID) &&
	! TBExptAccessCheck($UID, $pid, $eid, TB_EXPT_MODIFY)) {
	die("*** You not have permission to reboot nodes in $pid/$eid!\n");
    }
142

143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
    my $query_result =
	DBQueryFatal("select node_id from reserved where ".
		     "pid='$pid' and eid='$eid'");

    if ($query_result->numrows == 0) {
	print STDOUT "There are no nodes reserved in pid/eid $pid/$eid\n";
	usage();
    }
    while (@row = $query_result->fetchrow_array()) {
	push(@nodes, $row[0]);
    }
}
else {
    if (@ARGV == 0) {
	usage();
    }
159

160 161 162 163 164 165
    # Untaint the nodes.
    foreach my $node ( @ARGV ) {
	if ($node =~ /^([-\@\w]+)$/) {
	    $node = $1;
	}
	else {
Mac Newbold's avatar
Mac Newbold committed
166 167 168 169
	    die("Bad node name: $node\n");
	}
	if (!TBValidNodeName($node)) {
	    die("Node does not exist: $node\n");
170
	}
171

172 173
	push(@nodes, $node);
    }
174

Leigh B. Stoller's avatar
Leigh B. Stoller committed
175 176 177 178 179 180 181
    #
    # Verify permission to reboot these nodes.
    #
    if ($UID && !TBAdmin($UID) &&
	! TBNodeAccessCheck($UID, TB_NODEACCESS_REBOOT, @nodes)) {
	die("You do not have permission to reboot one (or more) ".
	    "of the nodes!\n");
182 183 184
    }
}

185
#
186 187 188 189
# VIRTNODE HACK: Virtual nodes are special. We can reboot jailed vnodes.
# but not old style (non-jail). Also, if we are going to reboot the physical
# node that a vnode is on, do not bother with rebooting the vnode since
# it will certainly get rebooted anyway!
190
#
191 192 193
my %realnodes = ();
my %virtnodes = ();

194
foreach my $node ( @nodes ) {
195
    my ($jailed, $plab);
196

197 198
    if (TBIsNodeVirtual($node, \$jailed, \$plab)) {
	if (! $jailed && ! $plab) {
199 200 201 202
	    print "*** Skipping old style (non-jail) virtual node $node ...\n";
	    next;
	}
	my $pnode;
203

204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
	if (! TBPhysNodeID($node, \$pnode)) {
	    die("*** $0:\n".
		"    No physical node for $node!\n");
	}
	$virtnodes{$node} = $pnode;
    }
    else {
	$realnodes{$node} = $node;
    }
}
for my $node ( keys(%virtnodes) ) {
    my $pnode = $virtnodes{$node};

    if (defined($realnodes{$pnode})) {
	print "*** Dropping $node since its host ($pnode) will reboot ...\n";
	delete($virtnodes{$node});
220 221
    }
}
222
if (! keys(%realnodes) && ! keys(%virtnodes)) {
223
    print "No nodes to reboot. Exiting ...\n";
224 225 226
    exit(0);
}

227 228 229 230 231
#
# By here we've done all the preliminaries... send the event, unless we're
# in realmode.
#

232
my @sortednodes = sort(keys(%realnodes));
233 234 235 236 237 238 239 240

if (!$realmode) {
    EventSendFatal(host      => $BOSSNODE ,
		   objtype   => TBDB_TBEVENT_COMMAND ,
		   eventtype => TBDB_COMMAND_REBOOT ,
		   objname   => join(",",@sortednodes) );
    if (!$nowait) {
	# In here we can do some output to tell the user what's going on.
241 242 243 244 245 246
	if ($waitmode) {
	    # Wait for [SHUTDOWN,ISUP]

	} else {
	    # Wait for [SHUTDOWN]

247 248
	}
    }
249
    exit(0);
250 251
}

252
#
253 254 255
# Another shark hack. Well, perhaps not. We really don't want 50 nodes
# all rebooting at the same time, PCs *or* sharks. Lets order them
# so that the shelves are grouped together at least, and issue the reboots
Mac Newbold's avatar
Mac Newbold committed
256
# in batches.
257
#
258 259 260 261
while (@sortednodes) {
    my @batch = ();
    my $i     = 0;
    my $lastshelf = 0;
262

263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
    while ($i < 8 && @sortednodes > 0) {
	my $node = shift(@sortednodes);
	my $shelf;
	my $unit;

	#
	# The point of this sillyness is stop at each shelf transition.
	#
	if (IsShelved($node, \$shelf, \$unit)) {
	    if ($lastshelf && $lastshelf ne $shelf) {
		unshift(@sortednodes, $node);
		last;
	    }
	    $lastshelf = $shelf;
	}
278

279 280 281 282 283 284 285 286 287
	push(@batch, $node);
	$i++;
    }

    if ($force) {
        #
        # In force mode, call the power program for the whole batch, and
	# continue on. We don't wait for them to go down or reboot.
        #
288
	info("Force mode: power cycle ".join(" ",@batch));
289
	PowerCycle(@batch);
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
	if ($?) {
	    exit ($? >> 8);
	}
    }
    else {
        #
        # Fire off a reboot process so that we can overlap them all.
        # We need the pid so we can wait for them all before preceeding.
        #
	foreach my $node ( @batch ) {
	    $mypid = RebootNode($node);
	    $pids{$node} = $mypid;
	}
    }

Mac Newbold's avatar
Mac Newbold committed
305
    #
306 307 308 309 310 311 312 313
    # If there are more nodes to go, then lets pause a bit so that we
    # do not get a flood of machines coming up all at the same exact
    # moment.
    #
    if (@sortednodes) {
	print STDOUT "Pausing to give some nodes time to reboot ...\n";
	if ($lastshelf) {
	    sleep(15);
Mac Newbold's avatar
Mac Newbold committed
314
	} else {
315
	    sleep(10);
Mac Newbold's avatar
Mac Newbold committed
316
	}
317
    }
318 319
}

320
#
321
# Wait for all the reboot children to exit before continuing.
322
#
323
my @needPowercycle = ();
324 325 326 327 328
if (! $force) {
    foreach my $node ( sort(keys(%realnodes)) ) {
	my $mypid     = $pids{$node};

	waitpid($mypid, 0);
329 330 331 332 333
	my $status = $? >> 8;
	if ($status == 2) {
	    # Child signaled to us that this node needs a power cycle
	    push @needPowercycle, $node;
	} elsif ($?) {
334 335 336 337 338 339 340
	    $failed++;
	    print STDERR "Reboot of node $node failed!\n";
	}
	else {
	    print STDOUT "$node rebooting ...\n";
	}
    }
341 342
}

343 344 345 346 347 348 349
#
# Power cycle nodes that couldn't be brought down any other way
#
if (@needPowercycle) {
    PowerCycle(@needPowercycle);
}

350
#
351
# Now do vnodes. Do these serially for now (simple).
Mac Newbold's avatar
Mac Newbold committed
352
#
353 354
for my $node ( keys(%virtnodes) ) {
    my $pnode = $virtnodes{$node};
355

356
    if (RebootVNode($node, $pnode)) {
357
	$failed++;
358
	print STDERR "Reboot of node $node on $pnode failed!\n";
359 360
    }
    else {
361
	print STDOUT "$node on $pnode rebooting ...\n";
362 363 364
    }
}

365
if ($failed) {
366
    print STDERR "$failed real nodes could not be rebooted\n";
367 368 369 370
    exit($failed);
}

#
Mac Newbold's avatar
Mac Newbold committed
371 372
# Wait for nodes to reboot. We wait only once, no reboots.
#
373 374 375 376 377 378 379 380
if ($waitmode) {
    my $waitstart = time;

    print STDOUT "Waiting for nodes to come up ...\n";

    # Wait for events to filter through stated! If we do not wait, then we
    # could see nodes still in ISUP.
    sleep(2);
381

382 383 384 385 386 387 388 389 390
    foreach my $node ( sort(@nodes) ) {
	if (!TBNodeStateWait($node, TBDB_NODESTATE_ISUP, $waitstart, (60*6))) {
	    print STDOUT "$node is alive and well\n";
	    SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
	    next;
	}
	SetNodeBootStatus($node, NODEBOOTSTATUS_FAILED);
	$failed++;
    }
391
}
392
print "Done. There were $failed failures to reboot.\n";
393 394 395 396 397
exit $failed;

#
# Reboot a node in a child process. Return the pid to the parent so
# that it can wait on all the children later.
Mac Newbold's avatar
Mac Newbold committed
398
#
399
sub RebootNode {
400
    my ($pc) = @_;
401
    my ($status, $syspid, $mypid, $didipod);
402 403 404

    print STDOUT "Rebooting $pc ...\n";

Mac Newbold's avatar
Mac Newbold committed
405 406 407
    # Report some activity into last_ext_act
    TBActivityReport($pc);

408 409 410 411
    $mypid = fork();
    if ($mypid) {
	return $mypid;
    }
412
    TBdbfork();
413 414 415 416 417

    #
    # See if the machine is pingable. If its not pingable, then we just
    # power cycle the machine rather than wait for ssh to time out.
    #
Mac Newbold's avatar
Mac Newbold committed
418
    # ping returns 0 if any packets make it through.
419
    #
420
    if (! DoesPing($pc)) {
421
	info("$pc appears dead: power cycle");
422
	print STDERR "$pc appears to be dead. Power cycling ...\n" if $debug;
423 424
	# Signal to the parent that the node needs to be power cycled
	exit(2);
425 426 427 428
    }

    #
    # Machine is pingable at least. Try to reboot it gracefully,
Mac Newbold's avatar
Mac Newbold committed
429
    # or power cycle anyway if that does not work.
430
    #
431
    print STDERR "Trying ssh reboot of $pc ...\n" if $debug;
432

433 434 435 436 437
    #
    # Must change our real UID to root so that ssh will work. We save the old
    # UID so that we can restore it after we finish the ssh
    #
    my $oldUID = $UID;
438
#    print STDERR "Saved UID: $oldUID\n" if $debug;
439
    $UID = 0;
440

441 442 443 444
    #
    # Run an ssh command in a child process, protected by an alarm to
    # ensure that the ssh is not hung up forever if the machine is in
    # some funky state.
Mac Newbold's avatar
Mac Newbold committed
445
    #
446
    $syspid = fork();
447

448 449
    if ($syspid) {
	local $SIG{ALRM} = sub { kill("TERM", $syspid); };
450
	alarm 20;
451 452 453 454 455 456 457
	waitpid($syspid, 0);
	alarm 0;

	#
	# The ssh can return non-zero exit status, but still have worked.
	# FreeBSD for example.
	#
458
	print STDERR "reboot of $pc returned $?.\n" if $debug;
459

460
	#
461 462
	# If either ssh is not running or it timed out,
	# send it a ping of death.
Mac Newbold's avatar
Mac Newbold committed
463
	#
464 465 466 467 468 469
	if ($? == 256 || $? == 15) {
	    if ($? == 256) {
		print STDERR "$pc is not running sshd.\n" if $debug;
	    } else {
		print STDERR "$pc is wedged.\n" if $debug;
	    }
470
	    info("$pc: ssh reboot failed ... sending ipod");
471 472
	    print STDERR "Trying Ping-of-Death on $pc ...\n" if $debug;

473
	    system("$ipod $pc");
474
	    $didipod = 1;
475
	} else {
476
	    info("$pc: ssh reboot ($?)");
477
	    $didipod = 0;
478 479 480
	}
    }
    else {
481
	exec("$ssh -host $pc /sbin/reboot");
482 483 484
	exit(0);
    }

485
    #
Mac Newbold's avatar
Mac Newbold committed
486
    # Restore the old UID so that scripts run from this point on get the
487 488 489
    # user's real UID
    #
    $UID = $oldUID;
490
#    print STDERR "Restored UID: $UID\n" if $debug;
491

492 493 494 495 496 497
    #
    # Okay, before we power cycle lets really make sure. We wait a while
    # for it to stop responding to pings, and if it never goes silent,
    # punch the power button.
    #
    if (WaitTillDead($pc) == 0) {
498 499
	my $state = TBDB_NODESTATE_SHUTDOWN;
	TBSetNodeEventState($pc,$state);
500 501
	exit(0);
    }
502

503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
    #
    # Haven't yet tried an ipod, try that and wait again.
    # This further slows down reboot but is probably worth it
    # since this should be a rare case (reboot says it worked but
    # node doesn't reboot) and is vital if the nodes have no
    # power cycle capability to fall back on.
    #
    if (! $didipod) {
	info("$pc: reboot failed ... sending ipod");
	$UID = 0;
	system("$ipod $pc");
	$UID = $oldUID;
	if (WaitTillDead($pc) == 0) {
	    my $state = TBDB_NODESTATE_SHUTDOWN;
	    TBSetNodeEventState($pc,$state);
	    exit(0);
	}
    }

522
    info("$pc: ipod failed ... power cycle");
523
    print STDERR "$pc is still running. Power cycling ...\n" if $debug;
524
    exit(2);
525 526
}

527
#
Mac Newbold's avatar
Mac Newbold committed
528 529
# Reboot a vnode in a child process, and wait for it.
#
530 531 532 533 534 535 536 537 538 539
sub RebootVNode($$) {
    my ($vnode, $pnode) = @_;
    my $syspid;

    print STDOUT "Rebooting $vnode on $pnode ...\n";

    #
    # Run an ssh command in a child process, protected by an alarm to
    # ensure that the ssh is not hung up forever if the machine is in
    # some funky state.
Mac Newbold's avatar
Mac Newbold committed
540
    #
541 542 543 544 545 546 547 548 549 550 551 552 553 554
    $syspid = fork();

    if ($syspid) {
	local $SIG{ALRM} = sub { kill("TERM", $syspid); };
	alarm 20;
	waitpid($syspid, 0);
	alarm 0;
	my $exitstatus = $?;

	#
	# The ssh can return non-zero exit status, but still have worked.
	# FreeBSD for example.
	#
	print STDERR "reboot of $vnode returned $exitstatus.\n" if $debug;
555

556 557
	#
	# Look for setup failure, reported back through ssh.
Mac Newbold's avatar
Mac Newbold committed
558
	#
559 560 561 562 563 564 565 566 567 568 569 570 571 572
	if ($exitstatus) {
	    if ($exitstatus == 256) {
		print STDERR "$pnode is not running sshd.\n" if $debug;
	    }
	    elsif ($exitstatus == 15) {
		print STDERR "$pnode is wedged.\n" if $debug;
	    }
	}
	return($exitstatus);
    }
    #
    # Must change our real UID to root so that ssh will work.
    #
    $UID = 0;
573

574
    exec("$ssh -host $vnode $CLIENT_BIN/vnodesetup -r -j $vnode");
575 576 577
    exit(0);
}

578 579 580 581
#
# Power cycle a PC using the testbed power program.
#
sub PowerCycle {
582 583 584
    my @pcs = @_;

    my $pcstring = join(" ",@pcs);
585

586
    system("$power cycle $pcstring");
587 588 589 590 591
    return $? >> 8;
}

#
# Wait until a machine stops returning ping packets.
Mac Newbold's avatar
Mac Newbold committed
592
#
593
sub WaitTillDead {
594
    my ($pc) = @_;
595 596

    print STDERR "Waiting for $pc to die off\n" if $debug;
597

598 599 600 601 602
    #
    # Sigh, a long ping results in the script waiting until all the
    # packets are sent from all the pings, before it will exit. So,
    # loop doing a bunch of shorter pings.
    #
603 604 605
    for ($i = 0; $i < 30; $i++) {
	if (! DoesPing($pc)) {
	    print STDERR "$pc is rebooting.\n" if $debug;
606 607 608 609 610 611 612
	    return 0;
	}
    }
    print STDERR "$pc is still alive.\n" if $debug;
    return 1;
}

613 614 615 616
#
# Returns 1 if host is responding to pings, 0 otherwise
#
sub DoesPing {
617 618 619
    my ($pc) = @_;
    my $status;
    my $saveuid;
620 621 622 623 624 625 626 627 628 629 630

    $saveuid = $UID;
    $UID = 0;
    system("$ping -q -i 0.25 -c 8 -t 2 $pc >/dev/null 2>&1");
    $UID = $saveuid;
    $status = $? >> 8;

    #
    # Returns 0 if any packets are returned. Returns 2 if pingable
    # but no packets are returned. Other non-zero error codes indicate
    # other problems.  Any non-zero return indicates "not pingable" to us.
Mac Newbold's avatar
Mac Newbold committed
631
    #
632 633 634 635 636 637
    print STDERR "$ping $pc returned $status\n" if $debug;
    if ($status) {
	return 0;
    }
    return 1;
}
638

639

640 641 642 643 644 645 646 647
sub info($) {
    my $message = shift;
    # Print out log entries like this:
    # Sep 20 09:36:00 $message
    open(LOG,">> $logfile");
    print LOG strftime("%b %e %H:%M:%S",localtime)." $message\n";
    close(LOG);
}
648