#!/usr/bin/perl -wT # # EMULAB-COPYRIGHT # Copyright (c) 2000-2010 University of Utah and the Flux Group. # All rights reserved. # # # Testbed Power Control script # # power [on|off|cycle] [] ... # ############################################################ # # Configure variables # my $TB = "@prefix@"; my $TBOPS = "@TBOPSEMAIL@"; my $ELABINELAB = @ELABINELAB@; my $RPCSERVER = "@OUTERBOSS_NODENAME@"; my $RPCPORT = "@OUTERBOSS_XMLRPCPORT@"; my $RPCCERT = "@OUTERBOSS_SSLCERTNAME@"; my $WOL = "$TB/sbin/whol"; my $TBLOG = "@TBLOGFACIL@"; use lib "@prefix@/lib"; use libdb; use libxmlrpc; use power_ipmi; use power_rpc27; use power_sgmote; use power_mail; use power_whol; use power_ilo; use snmpit_apc; use libtestbed; use User; use Node; use NodeType; use StateWait; use strict; use English; use Getopt::Std; use POSIX qw(strftime); use Sys::Syslog; sub usage() { print << "END"; Usage: $0 [-v n] [-e] -e Surpress sending of event - for use by scripts that have already sent it -v n Run with verbosity level n END 1; } # # Un-taint path since this gets called from setuid scripts. # $ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin:@prefix@/bin'; delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; my $op = ""; #stores operation (on/off/cyc) my @machines = (); #stores machines to operate on my $ip = ""; #stores IP of a power controller my $outlet = 0; #stores number of an outlet my %IPList = (); #holds machine/ip pairs my %OutletList = (); #holds machine/outlet pairs my @wolnodes = (); my $exitval = 0; my $this_user; # Protos sub dostatus(@); sub dowol(@); sub logit($); # # Process command-line arguments # my %opt = (); getopts("v:he",\%opt); if ($opt{h}) { exit usage(); } # useful values are 0, 1, 2 and 3 my $verbose = 0; if ($opt{v}) { $verbose = $opt{v}; } print "VERBOSE ON: Set to level $verbose\n" if $verbose; my $sendevent = 1; if ($opt{e}) { $sendevent = 0; } # # Must have at least an op and a machine, so at least 2 ARGV # if (@ARGV < 2) { exit &usage; } # # Read in ARGV # $op = shift (@ARGV); if ($op =~ /^(on|off|cycle|status)$/) { $op = $1; } else { exit &usage; } # # Untaint the arguments. # @machines = @ARGV; foreach my $n (0..$#ARGV) { $machines[$n] =~ s/^([-\@\w.]+)$/$1/; } # # Lowercase nodenames and remove duplicates # my %all_nodes = (); foreach my $n (0..$#machines) { $all_nodes{"\L$machines[$n]"} = 1; # Lowercase it and use as hash key } @machines= sort keys %all_nodes; # # Dump the args # print "do \"$op\" to @machines\n" if $verbose > 1; # Set up syslog openlog("power", "pid", $TBLOG); # # Handle the status command which is not a per-node operation and not # allowed by anyone except admins. # if ($op eq "status") { die("Only admins are allowed to query status\n") if ($UID != 0 && !TBAdmin($UID)); exit(dostatus(@machines)); } # # ElabinElab is special; Do local permission checks, build up a node list # and then contact the proxy to do the actual work. No perl bindings yet, # so have to use the python client program. # if ($ELABINELAB) { my @nodelist = (); foreach my $node (@machines) { if (!(($UID == 0) || TBNodeAccessCheck($UID, TB_NODEACCESS_POWERCYCLE, $node))) { warn "You are not authorized to control $node. Skipping...\n"; next; } push(@nodelist, $node); } exit(0) if (! @nodelist); logit("$op: @nodelist\n"); libxmlrpc::Config({"server" => $RPCSERVER, "verbose" => 1, "cert" => $RPCCERT, "portnum" => $RPCPORT}); my $rval = libxmlrpc::CallMethod("elabinelab", "power", {"op" => "$op", "nodes" => join(",", @nodelist)}); if (!defined($rval)) { exit(-1); } if ($sendevent && ($op eq "off" || $op eq "cycle")) { foreach my $node (@nodelist) { TBSetNodeEventState($node, TBDB_NODESTATE_SHUTDOWN); } } exit($rval); } # # This script can be run by root. # if ($UID) { $this_user = User->ThisUser(); if (! defined($this_user)) { die("*** $0:\n". " You ($UID) do not exist!\n"); } } my %timelimited = (); # # Though TBNodeAccessCheck can check all nodes at once, we do it one at # a time, so that we can get a list of all nodes we have access to. This # is primarily to preserve the pre-libification behavior of power # my %outlets = (); foreach my $nodeid (@machines) { my $node = Node->Lookup($nodeid); if (defined($node)) { # # We allow root/admins to power cycle a non-existent node # (a new node that has not been added yet). # if (defined($this_user) && !$node->AccessCheck($this_user, TB_NODEACCESS_POWERCYCLE)) { warn "You are not authorized to control $nodeid. Skipping...\n"; next; } } # # Query DB directly since node might not exist yet. # my $result = DBQueryFatal("select power_id,outlet,UNIX_TIMESTAMP(last_power) ". " from outlets ". "where node_id='$nodeid'"); if ($result->num_rows() == 0) { warn "No outlets table entry found for $nodeid. Skipping...\n"; SENDMAIL($TBOPS, "No power outlet for $nodeid", "Unable to power '$op' $nodeid; no outlets table entry!", $TBOPS); next; } my ($power_id, $outlet, $last_power) = $result->fetchrow(); # # Default power delay to 60 seconds if non-existent node. # my $power_delay = 60; my $postwol = 0; if (defined($node)) { $power_delay = $node->NodeTypeInfo()->power_delay(); if ($node->NodeAttribute("wakeonlan_afterpower", \$postwol) != 0) { warn "Could not get wakeonlan_afterpower attr for $nodeid.\n"; $postwol = 0; } } my $time_ok = (time() - $power_delay > $last_power ? 1 : 0); # # Check for rate-limiting, and update the last power cycle time # if it's been long enough. Root gets to bypass the checks, and # we only update the timestamp if it is being turned on or cycled, # to allow off then on without waiting (unless the on is too close # to a previos on/cycle command) # if ( $op ne "off" ) { if (! ($time_ok || ($UID == 0)) ) { warn "$nodeid was power cycled recently. Skipping...\n"; next; } elsif ( $power_id ne "mail" ) { DBQueryFatal("update outlets set last_power=CURRENT_TIMESTAMP " . "where node_id = '$nodeid'"); } } # # Associate this node with the power controller it is attached to # push @{$outlets{$power_id}}, [$nodeid, $outlet, $postwol]; } print "machines= ",join(" ",@machines),"\n" if $verbose; print "devices= ", join(" ",keys %outlets),"\n" if $verbose; foreach my $power_id (keys %outlets) { # # Get the list of outlet numbers used on this power controller # my @outlets = (); my @nodes = (); my %postwol = (); foreach my $node (@{$outlets{$power_id}}) { my ($node_id, $outlet, $wol) = @$node; push @outlets, $outlet; push @nodes, $node_id; $postwol{$node_id} = $wol; } my $nodestr = join(",",@nodes); my $type; my $IP; my $class; if ($power_id eq "mail" || $power_id =~ /^whol-/ || $power_id=~ /^rmcp-/ || $power_id eq 'drac' || $power_id eq 'ilo' || $power_id eq 'ilo2') { $type = $power_id; $IP = ""; $class = ""; } else { # # Find out some information about this power controller # my $result = DBQueryFatal("select n.type, i.IP, t.class ". "from nodes as n " . "left join interfaces as i on n.node_id=i.node_id " . "left join node_types as t on n.type=t.type " . "where n.node_id='$power_id'"); if ($result->num_rows() == 0) { warn "No entry found for power controller $power_id. Skipping " . "$nodestr\n"; $exitval++; next; } ($type, $IP, $class) = $result->fetchrow(); } # Log now, and not worry about errors. Just want to know we tried. logit("$op: @nodes\n"); # # Finally, we look at the controller type and construct the proper type # of object # my $errors = 0; if ($type eq "IPMI") { my $device = new power_ipmi($IP,$verbose); if (!defined $device) { warn "Unable to contact controller for $nodestr. Skipping...\n"; next; } else { print "Calling device->power($op,@outlets)\n" if $verbose > 1; if ($device->power($op,@outlets)) { print "Control of $nodestr failed.\n"; $errors++; } } } elsif ($type eq "APC") { my $device = new snmpit_apc($IP,$verbose); if (!defined $device) { warn "Unable to contact controller for $nodestr. Skipping...\n"; next; } else { print "Calling device->power($op,@outlets)\n" if $verbose > 1; if ($device->power($op,@outlets)) { print "Control of $nodestr failed.\n"; $errors++; } } } elsif ($type =~ "RPC") { if (rpc27ctrl($op,$power_id,@outlets)) { print "Control of $nodestr failed.\n"; $exitval++; } } elsif (($class eq "sg") || ($type eq "garcia")) { # XXX: 'garcia' is temporary until stargates are subnodes of # garcias if (sgmotectrl($op,@nodes)) { print "Control of $nodestr failed.\n"; $exitval++; $errors++; } } elsif ($type =~ /whol-(\w+)/) { my $iface = $1; if (wholctrl($op,$iface,@nodes)) { print "Control of $nodestr failed.\n"; $exitval++; $errors++; } } elsif ($type =~ /rmcp-(\w+)/) { require power_rmcp; if (power_rmcp::rmcpctrl($1,$op,@nodes)) { print "Control of $nodestr failed.\n"; ++$exitval; ++$errors; } } elsif ($type eq 'ilo2' || $type eq 'ilo' || $type eq 'drac') { if (iloctrl($type,$op,@nodes)) { print "Control of $nodestr failed.\n"; ++$exitval; ++$errors; } } elsif ($type eq "mail") { if (mailctrl($op,@nodes)) { print "Control of $nodestr failed.\n"; $exitval++; $errors++; } $sendevent = 0; # power_mail sends this itself. } else { print "power: Unknown power type '$type'\n"; $errors++; } if (!$errors) { foreach my $node (@nodes) { print "$node now ",($op eq "cycle" ? "rebooting" : $op),"\n"; if ($sendevent) { my $state = TBDB_NODESTATE_SHUTDOWN; TBSetNodeEventState($node,$state); } push(@wolnodes, $node) if ($postwol{$node}); } } else { $exitval += $errors; } } # # Handle the postwol option. This is technically incorrect; we really # needed to start the wait operation before we turned the node off since # we could miss the transition by starting to wait afterwards. But, that # would require a complete reorg of this code and besides, the whole idea # that nodes that need wakeonlan are not going to actually come back alive # on their own. # if (@wolnodes) { if (dowol(@wolnodes) != 0) { $exitval++; } } # Return 0 on success. Return non-zero number of nodes that failed. exit $exitval; sub byname() { my ($as, $an, $bs, $bn); if ($a =~ /(.*[^\d])(\d+)$/) { $as = $1; $an = $2; } else { $as = $a; } if ($b =~ /(.*[^\d])(\d+)$/) { $bs = $1; $bn = $2; } else { $bs = $b; } $as cmp $bs || $an <=> $bn; } # # Query the given controllers for their status # sub dostatus(@) { my @wanted = @_; my %ctrls = (); my %IPs = (); my $errors = 0; if ($ELABINELAB) { warn "Cannot get status from inner elab\n"; return 1; } my $doall = (@wanted == 1 && $wanted[0] eq "all"); # # Fetch all possible power controllers # my $result = DBQueryFatal("select n.node_id,t.type,i.IP ". "from nodes as n " . "left join node_types as t on n.type=t.type " . "left join interfaces as i on n.node_id=i.node_id " . "where n.role='powerctrl'"); while (my ($ctrl, $type, $IP) = $result->fetchrow()) { $ctrls{$ctrl} = $type; $IPs{$ctrl} = $IP; } @wanted = sort byname keys(%ctrls) if ($doall); # # For anything that was specified that is not a power controller, # look it up as a node and discover its controller. # XXX this is not very efficient. # my @nwanted = (); for my $node (@wanted) { my $ctrl; if (!defined($ctrls{$node})) { $result = DBQueryFatal("select power_id,outlet from outlets ". "where node_id='$node'"); if (!$result || $result->numrows == 0) { warn "No such power controller '$node', ignored\n"; $errors++; next; } else { ($ctrl, $outlet) = $result->fetchrow(); print "$node is $ctrl outlet $outlet...\n"; } } else { $ctrl = $node; } push(@nwanted, $ctrl); } # # Loop through desired controllers getting status # for my $ctrl (@nwanted) { my %status; if ($ctrls{$ctrl} eq "APC") { my $device = new snmpit_apc($IPs{$ctrl}, $verbose); if (!defined $device) { warn "Unable to contact controller $ctrl.\n"; $errors++; next; } else { print "Calling device->status()\n" if $verbose > 1; if ($device->status(\%status)) { print "Could not get status for $ctrl.\n"; $errors++; next; } } print "$ctrl Current: ", $status{current}, " Amps\n" if defined($status{current}); for my $outlet (1..24) { my $ostr = "outlet$outlet"; print "$ctrl Outlet $outlet: ", $status{$ostr}, "\n" if (defined($status{$ostr})); } print "\n"; } elsif ($ctrls{$ctrl} =~ /^RPC/) { if (rpc27status($ctrl,\%status)) { print "Could not get status for $ctrl.\n"; $errors++; next; } print "$ctrl Current: ", $status{current}, " Amps\n" if defined($status{current}); print "$ctrl Power: ", $status{power}, " Watts\n" if defined($status{power}); if (defined($status{tempF}) || defined($status{tempC})) { my $temp = $status{tempF}; if (!defined($temp)) { $temp = $status{tempC} * 9 / 5 + 32; } printf "$ctrl Temperature: %.1f F\n", $temp; } for my $outlet (1..24) { my $ostr = "outlet$outlet"; print "$ctrl Outlet $outlet: ", $status{$ostr}, "\n" if (defined($status{$ostr})); } print "\n"; } elsif (!$doall) { warn "Cannot get status for $ctrl (type " . $ctrls{$ctrl} . ") yet\n"; $errors++; } } return $errors; } # # Do a normal wakeonlan after power cycle. This is for laptops that do # come back on (no bios setting to control it either). # sub dowol(@) { my (@nodeids) = @_; my %macs = (); # XXX Must know the outgoing interface. Using the whol flag. Ick. my $query_result = DBQueryFatal("select iface from interfaces ". "where node_id='boss' and whol=1"); if ($query_result->numrows != 1) { warn "WOL: Could not get outgoing interface for boss node.\n"; return -1; } my ($iface) = $query_result->fetchrow_array(); # # Grab the MACs for all of the nodes. # foreach my $nodeid (@nodeids) { $query_result = DBQueryFatal("select mac from interfaces ". "where node_id='$nodeid' and ". " role='" . TBDB_IFACEROLE_CONTROL() . "'"); if ($query_result->numrows != 1) { warn "WOL: Could not get control interface MAC for $nodeid.\n"; next; } my ($mac) = $query_result->fetchrow_array(); $macs{$nodeid} = $mac; } @nodeids = keys(%macs); print "Doing a plain WOL to @nodeids via interface $iface\n"; logit("WOL: @nodeids\n"); # # This is going to take an arbitrary length of time; we have no idea # how long it takes for the node to initialize itself and get to a # point where a wakeonlan packet will do something. So, we use state # waiting to find out when it hits pxeboot. Keep sending wol packets # until we get there. # my @states = (TBDB_NODESTATE_PXEBOOTING); if (initStateWait(\@states, @nodeids) != 0) { print "WOL: Could not initialize state waiting!\n"; return -1; } # We have to wait at least a few seconds for the node to transition from # off to its sleep mode. sleep(15); # # Loop no more then 15 times (at 10 seconds per loop). # my $maxloops = 15; while (keys(%macs) && $maxloops > 0) { foreach my $nodeid (keys(%macs)) { my $mac = $macs{$nodeid}; # Do this a few times since the packet could get lost and # it seems to take a couple of packets to kick it. for (my $i = 0; $i < 5; $i++) { system("$WOL $iface $mac"); select(undef, undef, undef, 0.1); } } my @done = (); my @fail = (); if (waitForState(\@done, \@fail, 10) != 0) { print "WOL: waitForState returned non zero!\n"; endStateWait(); return -1; } if (@fail) { print "WOL: waitForState failed on @fail!\n"; foreach my $failed (@fail) { delete($macs{$failed}); } } foreach my $nodeid (@done) { print "$nodeid is alive after wakeonlan.\n"; delete($macs{$nodeid}); } @nodeids = keys(%macs); print "Sending more wol packets to @nodeids ...\n" if (@nodeids); $maxloops--; } endStateWait(); if (@nodeids) { print "WOL: @nodeids did not power on after many wakeonlan packets!\n"; return -1; } return 0; } sub logit($) { my ($message) = @_; my ($me) = getpwuid($UID); syslog("info", "[$me] $message"); } END { closelog(); }