power.in 16.7 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3 4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2010 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6 7 8
# All rights reserved.
#

9 10 11
#
# Testbed Power Control script
#
12
# power [on|off|cycle] <node> [<node>] ...
13 14 15
#
############################################################

16 17 18
#
# Configure variables
#
19 20 21 22 23 24
my $TB         = "@prefix@";
my $TBOPS      = "@TBOPSEMAIL@";
my $ELABINELAB = @ELABINELAB@;
my $RPCSERVER  = "@OUTERBOSS_NODENAME@";
my $RPCPORT    = "@OUTERBOSS_XMLRPCPORT@";
my $RPCCERT    = "@OUTERBOSS_SSLCERTNAME@";
25
my $WOL        = "$TB/sbin/whol";
26
my $TBLOG	= "@TBLOGFACIL@";
27

28
use lib "@prefix@/lib";
29
use libdb;
30
use libxmlrpc;
Robert Ricci's avatar
Robert Ricci committed
31
use power_ipmi;
32
use power_rpc27;
33
use power_sgmote;
Timothy Stack's avatar
 
Timothy Stack committed
34
use power_mail;
35
use power_whol;
36
use power_ilo;
37
use snmpit_apc;
38
use libtestbed;
39 40
use User;
use Node;
41
use NodeType;
42
use StateWait;
43
use strict;
44
use English;
45
use Getopt::Std;
46
use POSIX qw(strftime);
47
use Sys::Syslog;
48 49 50 51 52 53 54 55 56

sub usage() {
    print << "END";
Usage: $0 [-v n] [-e] <on|off|cycle> <node ...>
-e     Surpress sending of event - for use by scripts that have already sent it
-v n   Run with verbosity level n
END
    1;
}
57

58 59 60
#
# Un-taint path since this gets called from setuid scripts.
#
61
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin:@prefix@/bin';
62 63
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

64 65 66 67 68 69
my $op = "";			#stores operation (on/off/cyc)
my @machines = ();		#stores machines to operate on
my $ip = "";			#stores IP of a power controller
my $outlet = 0;			#stores number of an outlet
my %IPList = ();		#holds machine/ip pairs
my %OutletList = ();		#holds machine/outlet pairs
70
my @wolnodes = ();
71
my $exitval = 0;
72
my $this_user;
73

74 75
# Protos
sub dostatus(@);
76
sub dowol(@);
77
sub logit($);
78

79 80 81 82 83 84 85
#
# Process command-line arguments
#
my %opt = ();
getopts("v:he",\%opt);

if ($opt{h}) {
86
    exit usage();
87 88 89 90 91 92 93 94 95 96 97 98 99 100
}

# useful values are 0, 1, 2 and 3
my $verbose = 0;
if ($opt{v}) {
    $verbose = $opt{v};
}
print "VERBOSE ON: Set to level $verbose\n" if $verbose;

my $sendevent = 1;
if ($opt{e}) {
    $sendevent = 0;
}

101 102 103
#
# Must have at least an op and a machine, so at least 2 ARGV
#
104 105 106 107
if (@ARGV < 2) {
    exit &usage;
}

108

109 110 111
#
# Read in ARGV
#
112
$op = shift (@ARGV);
113
if ($op =~ /^(on|off|cycle|status)$/) {
114
    $op = $1;
115
} else {
116
    exit &usage;
117
}
118 119 120 121

#
# Untaint the arguments.
#
122
@machines = @ARGV;
123
foreach my $n (0..$#ARGV) {
124
    $machines[$n] =~ s/^([-\@\w.]+)$/$1/;
125
}
126 127 128 129

#
# Lowercase nodenames and remove duplicates
#
130 131
my %all_nodes = ();
foreach my $n (0..$#machines) {
132
    $all_nodes{"\L$machines[$n]"} = 1; # Lowercase it and use as hash key
133
}
134
@machines= sort keys %all_nodes;
135

136 137 138 139
#
# Dump the args
#
print "do \"$op\" to @machines\n" if $verbose > 1;
140

141 142 143
# Set up syslog
openlog("power", "pid", $TBLOG);

144 145 146 147 148 149 150 151 152 153
#
# Handle the status command which is not a per-node operation and not
# allowed by anyone except admins.
#
if ($op eq "status") {
    die("Only admins are allowed to query status\n")
	if ($UID != 0 && !TBAdmin($UID));
    exit(dostatus(@machines));
}

154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
#
# ElabinElab is special; Do local permission checks, build up a node list
# and then contact the proxy to do the actual work. No perl bindings yet,
# so have to use the python client program. 
#
if ($ELABINELAB) {
    my @nodelist = ();
    
    foreach my $node (@machines) {
	if (!(($UID == 0) ||
	      TBNodeAccessCheck($UID, TB_NODEACCESS_POWERCYCLE, $node))) {
	    warn "You are not authorized to control $node. Skipping...\n";
	    next;
	}
	push(@nodelist, $node);
    }

    exit(0)
	if (! @nodelist);
173

174 175
    logit("$op: @nodelist\n");

176 177 178 179 180 181 182 183 184 185 186
    libxmlrpc::Config({"server"  => $RPCSERVER,
		       "verbose" => 1,
		       "cert"    => $RPCCERT,
		       "portnum" => $RPCPORT});

    my $rval = libxmlrpc::CallMethod("elabinelab", "power",
				     {"op"    => "$op",
				      "nodes" => join(",", @nodelist)});
    if (!defined($rval)) {
	exit(-1);
    }
187 188 189 190 191
    if ($sendevent && ($op eq "off" || $op eq "cycle")) {
	foreach my $node (@nodelist) {
	    TBSetNodeEventState($node, TBDB_NODESTATE_SHUTDOWN);
	}
    }
192 193
    exit($rval);
}
Mac Newbold's avatar
Mac Newbold committed
194

Mac Newbold's avatar
Mac Newbold committed
195
#
196
# This script can be run by root.
Mac Newbold's avatar
Mac Newbold committed
197
#
198 199 200 201 202 203 204
if ($UID) {
    $this_user = User->ThisUser();
    if (! defined($this_user)) {
	die("*** $0:\n".
	    "    You ($UID) do not exist!\n");
    }
}
Mac Newbold's avatar
Mac Newbold committed
205

206 207
my %timelimited = ();

208 209 210 211 212 213
#
# Though TBNodeAccessCheck can check all nodes at once, we do it one at
# a time, so that we can get a list of all nodes we have access to. This
# is primarily to preserve the pre-libification behavior of power
#
my %outlets = ();
214 215 216 217 218 219 220 221 222 223 224 225
foreach my $nodeid (@machines) {
    my $node = Node->Lookup($nodeid);
    if (defined($node)) {
	#
	# We allow root/admins to power cycle a non-existent node
	# (a new node that has not been added yet).
	#
	if (defined($this_user) &&
	    !$node->AccessCheck($this_user, TB_NODEACCESS_POWERCYCLE)) {
	    warn "You are not authorized to control $nodeid. Skipping...\n";
	    next;
	}
226
    }
227

228 229 230 231 232 233 234
    #
    # Query DB directly since node might not exist yet.
    #
    my $result =
        DBQueryFatal("select power_id,outlet,UNIX_TIMESTAMP(last_power) ".
		     "  from outlets ".
		     "where node_id='$nodeid'");
235
    if ($result->num_rows() == 0) {
236
	warn "No outlets table entry found for $nodeid. Skipping...\n";
237
	SENDMAIL($TBOPS,
238 239
		 "No power outlet for $nodeid",
		 "Unable to power '$op' $nodeid; no outlets table entry!",
240
		 $TBOPS);
241
	next;
242
    }
243
    my ($power_id, $outlet, $last_power) = $result->fetchrow();
Mac Newbold's avatar
Mac Newbold committed
244

245 246 247
    #
    # Default power delay to 60 seconds if non-existent node.
    #
248
    my $power_delay = 60;
249 250 251 252 253 254 255 256
    my $postwol     = 0;
    if (defined($node)) {
	$power_delay = $node->NodeTypeInfo()->power_delay();

	if ($node->NodeAttribute("wakeonlan_afterpower", \$postwol) != 0) {
	    warn "Could not get wakeonlan_afterpower attr for $nodeid.\n";
	    $postwol = 0;
	}
257
    }
258
    my $time_ok = (time() - $power_delay > $last_power ? 1 : 0);
259 260 261

    #
    # Check for rate-limiting, and update the last power cycle time
262 263 264 265
    # if it's been long enough. Root gets to bypass the checks, and
    # we only update the timestamp if it is being turned on or cycled,
    # to allow off then on without waiting (unless the on is too close 
    # to a previos on/cycle command)
266
    #
267 268
    if ( $op ne "off" ) {
	if (! ($time_ok || ($UID == 0)) ) {
269
	    warn "$nodeid was power cycled recently. Skipping...\n";
270
	    next;
Timothy Stack's avatar
 
Timothy Stack committed
271
	} elsif ( $power_id ne "mail" ) {
272
	    DBQueryFatal("update outlets set last_power=CURRENT_TIMESTAMP " .
273
			 "where node_id = '$nodeid'");
274
	}
275
    }
276

277 278 279
    #
    # Associate this node with the power controller it is attached to
    #
280
    push @{$outlets{$power_id}}, [$nodeid, $outlet, $postwol];
281 282
}

283
print "machines= ",join(" ",@machines),"\n" if $verbose;
284 285 286 287 288 289 290 291 292
print "devices= ", join(" ",keys %outlets),"\n" if $verbose;

foreach my $power_id (keys %outlets) {

    #
    # Get the list of outlet numbers used on this power controller
    #
    my @outlets = ();
    my @nodes = ();
293
    my %postwol = ();
294
    foreach my $node (@{$outlets{$power_id}}) {
295
	my ($node_id, $outlet, $wol) = @$node;
296 297
	push @outlets, $outlet;
	push @nodes, $node_id;
298
	$postwol{$node_id} = $wol;
299 300 301
    }
    my $nodestr = join(",",@nodes);

Timothy Stack's avatar
 
Timothy Stack committed
302 303 304 305
    my $type;
    my $IP;
    my $class;

306
    if ($power_id eq "mail" || $power_id =~ /^whol-/ 
307
	|| $power_id=~ /^rmcp-/
308
	|| $power_id eq 'drac'
309
	|| $power_id eq 'ilo' || $power_id eq 'ilo2') {
310
	$type = $power_id;
Timothy Stack's avatar
 
Timothy Stack committed
311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
	$IP = "";
	$class = "";
    }
    else {
	#
	# Find out some information about this power controller
	#
	my $result = DBQueryFatal("select n.type, i.IP, t.class ". 
		"from nodes as n " .
		"left join interfaces as i on n.node_id=i.node_id " .
		"left join node_types as t on n.type=t.type " .
		"where n.node_id='$power_id'");
	if ($result->num_rows() == 0) {
	    warn "No entry found for power controller $power_id. Skipping " .
		"$nodestr\n";
	    $exitval++;
	    next;
	}
	($type, $IP, $class) = $result->fetchrow();
330
    }
331 332
    # Log now, and not worry about errors. Just want to know we tried.
    logit("$op: @nodes\n");
333 334 335 336 337 338

    #
    # Finally, we look at the controller type and construct the proper type
    # of object
    #
    my $errors = 0;
Robert Ricci's avatar
Robert Ricci committed
339 340 341 342 343 344 345 346 347 348 349 350 351 352
    if ($type eq "IPMI") {
        my $device = new power_ipmi($IP,$verbose);
	if (!defined $device) {
	    warn "Unable to contact controller for $nodestr. Skipping...\n";
	    next;
	} else {
	    print "Calling device->power($op,@outlets)\n" if $verbose > 1;
	    if ($device->power($op,@outlets)) {
		print "Control of $nodestr failed.\n";
		$errors++;
	    }
	}
    }
    elsif ($type eq "APC") {
353 354
	my $device = new snmpit_apc($IP,$verbose);
	if (!defined $device) {
355
	    warn "Unable to contact controller for $nodestr. Skipping...\n";
356
	    next;
357
	} else {
358 359 360 361 362 363 364
	    print "Calling device->power($op,@outlets)\n"
		if $verbose > 1;
	    if ($device->power($op,@outlets)) {
		print "Control of $nodestr failed.\n";
		$errors++;
	    }
	}
365
    } elsif ($type =~ "RPC") {
366 367
	if (rpc27ctrl($op,$power_id,@outlets)) {
	    print "Control of $nodestr failed.\n"; $exitval++;
368
	}
369 370 371 372 373 374 375
    } elsif (($class eq "sg") || ($type eq "garcia")) {
	# XXX: 'garcia' is temporary until stargates are subnodes of
	# garcias
	if (sgmotectrl($op,@nodes)) {
	    print "Control of $nodestr failed.\n"; $exitval++;
	    $errors++;
	}
376 377 378 379 380 381
    } elsif ($type =~ /whol-(\w+)/) {
	my $iface = $1;
	if (wholctrl($op,$iface,@nodes)) {
	    print "Control of $nodestr failed.\n"; $exitval++;
	    $errors++;
	}
382
    } elsif ($type =~ /rmcp-(\w+)/) {
383
	require power_rmcp;
384
	if (power_rmcp::rmcpctrl($1,$op,@nodes)) {
385 386 387
	    print "Control of $nodestr failed.\n"; ++$exitval;
	    ++$errors;
	}
388
    } elsif ($type eq 'ilo2' || $type eq 'ilo' || $type eq 'drac') {
389 390 391 392
	if (iloctrl($type,$op,@nodes)) {
	    print "Control of $nodestr failed.\n"; ++$exitval;
            ++$errors;
        }
Timothy Stack's avatar
 
Timothy Stack committed
393 394 395 396 397
    } elsif ($type eq "mail") {
	if (mailctrl($op,@nodes)) {
	    print "Control of $nodestr failed.\n"; $exitval++;
	    $errors++;
	}
Timothy Stack's avatar
 
Timothy Stack committed
398
	$sendevent = 0; # power_mail sends this itself.
399
    } else {
400 401 402 403 404 405 406
	print "power: Unknown power type '$type'\n";
	$errors++;
    }

    if (!$errors) {
	foreach my $node (@nodes) {
	    print "$node now ",($op eq "cycle" ? "rebooting" : $op),"\n";
407
	    if ($sendevent) {
408 409
		my $state = TBDB_NODESTATE_SHUTDOWN;
		TBSetNodeEventState($node,$state);
410
	    }
411 412
	    push(@wolnodes, $node)
		if ($postwol{$node});
413
	}
414 415
    } else {
	$exitval += $errors;
416
    }
417
}
418

419 420 421 422 423 424 425 426 427 428 429 430
#
# Handle the postwol option. This is technically incorrect; we really
# needed to start the wait operation before we turned the node off since
# we could miss the transition by starting to wait afterwards. But, that
# would require a complete reorg of this code and besides, the whole idea
# that nodes that need wakeonlan are not going to actually come back alive
# on their own.
#
if (@wolnodes) {
    if (dowol(@wolnodes) != 0) {
	$exitval++;
    }
431
}
432 433 434

# Return 0 on success. Return non-zero number of nodes that failed.
exit $exitval;
435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457

sub byname() {
    my ($as, $an, $bs, $bn);

    if ($a =~ /(.*[^\d])(\d+)$/) {
	$as = $1; $an = $2;
    } else {
	$as = $a;
    }
    if ($b =~ /(.*[^\d])(\d+)$/) {
	$bs = $1; $bn = $2;
    } else {
	$bs = $b;
    }
    $as cmp $bs || $an <=> $bn;
}

#
# Query the given controllers for their status
#
sub dostatus(@) {
    my @wanted = @_;
    my %ctrls = ();
458
    my %IPs = ();
459 460 461 462 463 464 465 466 467 468 469 470
    my $errors = 0;

    if ($ELABINELAB) {
	warn "Cannot get status from inner elab\n";
	return 1;
    }

    my $doall = (@wanted == 1 && $wanted[0] eq "all");

    #
    # Fetch all possible power controllers
    #
471 472 473 474 475 476
    my $result = DBQueryFatal("select n.node_id,t.type,i.IP ".
			"from nodes as n " .
			"left join node_types as t on n.type=t.type " .
			"left join interfaces as i on n.node_id=i.node_id " .
			"where n.role='powerctrl'");
    while (my ($ctrl, $type, $IP) = $result->fetchrow()) {
477
	$ctrls{$ctrl} = $type;
478
	$IPs{$ctrl} = $IP;
479 480 481 482 483
    }

    @wanted = sort byname keys(%ctrls)
	if ($doall);

Mike Hibler's avatar
Mike Hibler committed
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
    #
    # For anything that was specified that is not a power controller,
    # look it up as a node and discover its controller.
    # XXX this is not very efficient.
    #
    my @nwanted = ();
    for my $node (@wanted) {    
	my $ctrl;

	if (!defined($ctrls{$node})) {
	    $result = DBQueryFatal("select power_id,outlet from outlets ". 
				   "where node_id='$node'");
	    if (!$result || $result->numrows == 0) {
		warn "No such power controller '$node', ignored\n";
		$errors++;
		next;
	    } else {
		($ctrl, $outlet) = $result->fetchrow();
		print "$node is $ctrl outlet $outlet...\n";
	    }
	} else {
	    $ctrl = $node;
	}
	push(@nwanted, $ctrl);
    }

510 511 512
    #
    # Loop through desired controllers getting status
    #
Mike Hibler's avatar
Mike Hibler committed
513
    for my $ctrl (@nwanted) {
514 515
	my %status;

516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
	if ($ctrls{$ctrl} eq "APC") {
	    my $device = new snmpit_apc($IPs{$ctrl}, $verbose);
	    if (!defined $device) {
		warn "Unable to contact controller $ctrl.\n";
		$errors++;
		next;
	    } else {
		print "Calling device->status()\n"
		    if $verbose > 1;
		if ($device->status(\%status)) {
		    print "Could not get status for $ctrl.\n";
		    $errors++;
		    next;
		}
	    }
531 532 533
	    print "$ctrl Current: ", $status{current}, " Amps\n"
		if defined($status{current});
	    for my $outlet (1..24) {
534 535 536 537 538 539
		my $ostr = "outlet$outlet";
		print "$ctrl Outlet $outlet: ", $status{$ostr}, "\n"
		    if (defined($status{$ostr}));
	    }
	    print "\n";
	} elsif ($ctrls{$ctrl} =~ /^RPC/) {
540 541 542 543 544
	    if (rpc27status($ctrl,\%status)) {
		print "Could not get status for $ctrl.\n";
		$errors++;
		next;
	    }
Mike Hibler's avatar
Mike Hibler committed
545
	    print "$ctrl Current: ", $status{current}, " Amps\n"
546
		if defined($status{current});
Mike Hibler's avatar
Mike Hibler committed
547
	    print "$ctrl Power: ", $status{power}, " Watts\n"
548
		if defined($status{power});
549 550 551 552 553
	    if (defined($status{tempF}) || defined($status{tempC})) {
		my $temp = $status{tempF};
		if (!defined($temp)) {
		    $temp = $status{tempC} * 9 / 5 + 32;
		}
Mike Hibler's avatar
Mike Hibler committed
554
		printf "$ctrl Temperature: %.1f F\n", $temp;
555
	    }
556
	    for my $outlet (1..24) {
557
		my $ostr = "outlet$outlet";
Mike Hibler's avatar
Mike Hibler committed
558
		print "$ctrl Outlet $outlet: ", $status{$ostr}, "\n"
559 560
		    if (defined($status{$ostr}));
	    }
Mike Hibler's avatar
Mike Hibler committed
561
	    print "\n";
562 563 564 565 566 567 568 569
	} elsif (!$doall) {
	    warn "Cannot get status for $ctrl (type " .
		$ctrls{$ctrl} . ") yet\n";
	    $errors++;
	}
    }
    return $errors;
}
570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608

#
# Do a normal wakeonlan after power cycle. This is for laptops that do
# come back on (no bios setting to control it either).
#
sub dowol(@)
{
    my (@nodeids) = @_;
    my %macs      = ();

    # XXX Must know the outgoing interface. Using the whol flag. Ick.
    my $query_result =
	DBQueryFatal("select iface from interfaces ".
		     "where node_id='boss' and whol=1");
    if ($query_result->numrows != 1) {
	warn "WOL: Could not get outgoing interface for boss node.\n";
	return -1;
    }
    my ($iface) = $query_result->fetchrow_array();

    #
    # Grab the MACs for all of the nodes.
    #
    foreach my $nodeid (@nodeids) {
	$query_result =
	    DBQueryFatal("select mac from interfaces  ".
			 "where node_id='$nodeid' and ".
			 "      role='" . TBDB_IFACEROLE_CONTROL() . "'");

	if ($query_result->numrows != 1) {
	    warn "WOL: Could not get control interface MAC for $nodeid.\n";
	    next;
	}
	my ($mac) = $query_result->fetchrow_array();
	$macs{$nodeid} = $mac;
    }
    @nodeids = keys(%macs);

    print "Doing a plain WOL to @nodeids via interface $iface\n";
609
    logit("WOL: @nodeids\n");
610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674

    #
    # This is going to take an arbitrary length of time; we have no idea
    # how long it takes for the node to initialize itself and get to a
    # point where a wakeonlan packet will do something. So, we use state
    # waiting to find out when it hits pxeboot. Keep sending wol packets
    # until we get there.
    #
    my @states = (TBDB_NODESTATE_PXEBOOTING);
    if (initStateWait(\@states, @nodeids) != 0) {
	print "WOL: Could not initialize state waiting!\n";
	return -1;
    }

    # We have to wait at least a few seconds for the node to transition from
    # off to its sleep mode.
    sleep(15);

    #
    # Loop no more then 15 times (at 10 seconds per loop).
    #
    my $maxloops = 15;

    while (keys(%macs) && $maxloops > 0) {
	foreach my $nodeid (keys(%macs)) {
	    my $mac = $macs{$nodeid};
	    
	    # Do this a few times since the packet could get lost and
	    # it seems to take a couple of packets to kick it.
	    for (my $i = 0; $i < 5; $i++) {
		system("$WOL $iface $mac");
		select(undef, undef, undef, 0.1);
	    }
	}
	my @done = ();
	my @fail = ();
	
	if (waitForState(\@done, \@fail, 10) != 0) {
	    print "WOL: waitForState returned non zero!\n";
	    endStateWait();
	    return -1;
	}
	if (@fail) {
	    print "WOL: waitForState failed on @fail!\n";
	    foreach my $failed (@fail) {
		delete($macs{$failed});
	    }
	}
	foreach my $nodeid (@done) {
	    print "$nodeid is alive after wakeonlan.\n";
	    delete($macs{$nodeid});
	}

	@nodeids = keys(%macs);
	print "Sending more wol packets to @nodeids ...\n"
	    if (@nodeids);
	$maxloops--;
    }
    endStateWait();
    if (@nodeids) {
	print "WOL: @nodeids did not power on after many wakeonlan packets!\n";
	return -1;
    }
    return 0;
}
675 676 677 678 679 680

sub logit($)
{
    my ($message) = @_;
    my ($me) = getpwuid($UID);

681 682 683 684 685
    syslog("info", "[$me] $message");
}

END {
    closelog();
686
}