Commit 03478fb9 authored by Leigh Stoller's avatar Leigh Stoller

Power "saving" additions from Barry Trent, who got them from Kevin

Lahey.

Power saving turns off nodes that have been sitting in PXEWAIT (and
are thus free) for more then a set amount of time (see sitevar
general/idlepower_idletime, which defaults to 3600 seconds).

The driver script is tbsetup/idlepower.in and needs to be added to
/etc/crontab at sites that want to do this. Even so, operation is
enabled by the sitevar general/idlepower_enable. Each time it runs, it
checks for nodes that need to be turned off, and then calls power.
Note: This should be a daemon not a cron job.

To be considered for power saving, you must add an attribute to the
node_type_attributes table called 'idlepower_enable', set to 1.

Locally, I hacked up stated and power to make the state transitions
legal so that stated does not whine. I added POWEROFF as a valid
transition from any state, to opmodes NORMAL, NORMALv1, and NORMALv2.
Barry's original patch already had a state transition for PXEKERNEL.
In power, I added code to look at the actual operation, and in the
case of "on", do not send an event if the node is not in POWEROFF,
since a user can foolishly say power on anytime, and the node is on
nothing is every going to change, and the state transition would be
wrong.

node_reboot takes of powering nodes on, when they are in POWEROFF.

Barry on copyright issues:
 "I'm not sure those rights are mine to grant! Remember that this code
 came originally from Kevin Lahey (kml@patheticgeek.net) and
 originated at DETER (although he's apparently not there anymore). I
 don't foresee a problem from our point of view (but I'll double
 check, of course). Shall I try to contact Kevin try to sort this mess
 out, or do you think it's better to coordinate from your end?"
parent a6b87170
......@@ -625,6 +625,14 @@ REPLACE INTO state_transitions VALUES ('SECURELOAD','RELOADSETUP','RELOADING','R
REPLACE INTO state_transitions VALUES ('SECURELOAD','SHUTDOWN','SHUTDOWN','Retry');
REPLACE INTO state_transitions VALUES ('SECURELOAD','SHUTDOWN','GPXEBOOTING','QuoteOK');
REPLACE INTO state_transitions VALUES ('SECURELOAD','SHUTDOWN','SECVIOLATION','QuoteFailed');
REPLACE INTO state_transitions VALUES ('PXEKERNEL','PXEWAIT','POWEROFF','Power Save');
REPLACE INTO state_transitions VALUES ('PXEKERNEL','POWEROFF','SHUTDOWN','Power Recovery');
REPLACE INTO state_transitions VALUES ('NORMAL','*','POWEROFF','Power Off');
REPLACE INTO state_transitions VALUES ('NORMALv1','*','POWEROFF','Power Off');
REPLACE INTO state_transitions VALUES ('NORMALv2','*','POWEROFF','Power Off');
REPLACE INTO state_transitions VALUES ('NORMAL','POWEROFF','SHUTDOWN','Power On');
REPLACE INTO state_transitions VALUES ('NORMALv1','POWEROFF','SHUTDOWN','Power On');
REPLACE INTO state_transitions VALUES ('NORMALv2','POWEROFF','SHUTDOWN','Power On');
--
-- Dumping data for table `state_triggers`
......
......@@ -100,6 +100,8 @@ INSERT INTO sitevariables VALUES ('images/create/maxsize',NULL,'6','Max size (GB
INSERT INTO sitevariables VALUES ('general/testbed_shutdown',NULL,'0','Non-zero value indicates that the testbed is shutdown and scripts should not do anything when they run. DO NOT SET THIS BY HAND!',0);
INSERT INTO sitevariables VALUES ('images/frisbee/maxrate_std',NULL,'72000000','Max bandwidth (Bytes/sec) at which to distribute standard images from the /usr/testbed/images directory.',0);
INSERT INTO sitevariables VALUES ('images/frisbee/maxrate_usr',NULL,'54000000','Max bandwidth (Bytes/sec) at which to distribute user-defined images from the /proj/.../images directory.',0);
INSERT INTO sitevariables VALUES ('general/idlepower_enable',NULL,'0','Enable idle power down to conserve electricity',0);
INSERT INTO sitevariables VALUES ('general/idlepower_idletime',NULL,'3600','Maximum number of seconds idle before a node is powered down to conserve electricity',0);
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
......
#
# Support for idle power down. Contributed by Barry Trent.
#
use strict;
use libdb;
sub DoUpdate($$$)
{
my ($dbhandle, $dbname, $version) = @_;
DBQueryFatal("REPLACE INTO state_transitions VALUES ".
" ('PXEKERNEL','PXEWAIT','POWEROFF','Power Save')");
DBQueryFatal("REPLACE INTO state_transitions VALUES ".
" ('PXEKERNEL','POWEROFF','SHUTDOWN','Power Recovery')");
DBQueryFatal("REPLACE INTO state_transitions VALUES ".
"('NORMAL','*','POWEROFF','Power Off')");
DBQueryFatal("REPLACE INTO state_transitions VALUES ".
"('NORMALv1','*','POWEROFF','Power Off')");
DBQueryFatal("REPLACE INTO state_transitions VALUES ".
"('NORMALv2','*','POWEROFF','Power Off')");
DBQueryFatal("REPLACE INTO state_transitions VALUES ".
"('NORMAL','POWEROFF','SHUTDOWN','Power On')");
DBQueryFatal("REPLACE INTO state_transitions VALUES ".
"('NORMALv1','POWEROFF','SHUTDOWN','Power On')");
DBQueryFatal("REPLACE INTO state_transitions VALUES ".
"('NORMALv2','POWEROFF','SHUTDOWN','Power On')");
DBQueryFatal("INSERT INTO sitevariables VALUES ".
" ('general/idlepower_enable',NULL,'0', ".
" 'Enable idle power down to conserve electricity',0)")
if (!TBSiteVarExists("general/idlepower_enable"));
DBQueryFatal("INSERT INTO sitevariables VALUES ".
" ('general/idlepower_idletime',NULL,'3600', ".
" 'Maximum number of seconds idle before a node is ".
"powered down to conserve electricity',0)")
if (!TBSiteVarExists("general/idlepower_idletime"));
return 0;
}
1;
......@@ -38,7 +38,7 @@ SBIN_STUFF = resetvlans console_setup.proxy sched_reload named_setup \
console_reset db2ns bwconfig frisbeehelper \
rmgroup mkgroup setgroups mkproj modgroups \
exports_setup.proxy vnode_setup eventsys_start \
sfskey_update sfskey_update.proxy rmuser idleswap \
sfskey_update sfskey_update.proxy rmuser idleswap idlepower \
newnode_reboot savelogs.proxy eventsys.proxy \
elabinelab snmpit.proxy panic node_attributes \
nfstrace plabinelab smbpasswd_setup smbpasswd_setup.proxy \
......
#!/usr/bin/perl -w
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2011 University of Utah and the Flux Group.
# All rights reserved.
#
use English;
use Getopt::Std;
use strict;
#
# This gets invoked from crontab.
#
sub usage()
{
print STDOUT "Usage: idlepower [-n] [-s seconds-of-idle]\n";
exit(-1);
}
# Hidden switch: -r = root mode - used by idlemail
my $optlist = "nrs:d";
#
# Configure variables
#
my $TB = "@prefix@";
my $DBNAME = "@TBDBNAME@";
my $TBOPS = "@TBOPSEMAIL@";
my $POWER = "$TB/bin/power";
# Testbed Support libraries
use lib "@prefix@/lib";
use libdb;
use User;
use emutil;
use libtestbed;
# Locals.
my $no_action = 0;
my $seconds_of_idle;
my $rootokay = 0;
my $debug = 0;
# Untaint the path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin:/usr/site/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
# Turn off line buffering on output
$| = 1;
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
my %options = ();
if (! getopts($optlist, \%options)) { usage(); }
if (defined($options{"d"})) { $debug = 1; }
if (defined($options{"n"})) { $no_action= 1; }
if (defined($options{"r"})) { $rootokay = 1; }
if (defined($options{"s"})) {
if ($options{"s"} =~ /^\d+$/) {
$seconds_of_idle = $options{'s'};
}
}
# This script is setuid, so please do not run it as root. Hard to track
# what has happened.
if ($UID == 0 && (!defined($rootokay) || !$rootokay) ) {
die("*** $0:\n".
" Please do not run this as root! Its already setuid!\n");
}
if (@ARGV != 0) {
usage();
}
# Only admins can do this.
if ($UID) {
my $this_user = User->ThisUser();
if (! defined($this_user)) {
die("You ($UID) do not exist!\n");
}
if (!$this_user->IsAdmin()) {
die("*** $0:\n".
" Only testbed administrators can issue an idlepower!\n");
}
}
# Global enable
my $idlepower_enable;
if (! TBGetSiteVar("general/idlepower_enable", \$idlepower_enable)) {
print "Error getting sitevar 'general/idlepower_enable'\n";
exit(-1);
}
if (!$idlepower_enable) {
print "Idle power saving is globally disabled. Exiting ...\n";
exit(0);
}
# Default value for seconds of idle is a sitevar.
if (!defined($seconds_of_idle)) {
if (! TBGetSiteVar("general/idlepower_idletime", \$seconds_of_idle)) {
print "Error getting sitevar 'general/idlepower_idletime'\n";
exit(-1);
}
}
#
# Based in query in ptopgen ... the idea is to find free nodes sitting
# in PXEWAIT for longer then the idle threshold. We only look for nodes
# in PXEWAIT cause we know the power off will not scrog the disk. A node
# that is up and running from its disk might very well not reboot nicely.
#
# Note that we use the node_type_attributes "idlepower_enabled" to
# determine if a node type should be considered for powerdown on idle.
#
my $result =
DBQueryFatal("select a.node_id,a.type,a.phys_nodeid,t.class,t.issubnode,".
"(unix_timestamp(now()) - a.state_timestamp)".
" as idle_time, ".
"(b.pid is not null and b.eid is not null), ".
" np.reserved_pid is not null,np.eventstate ".
"from nodes as a ".
"left join reserved as b on a.node_id=b.node_id ".
"left join nodes as np on a.phys_nodeid=np.node_id ".
"left join node_types as t on t.type=a.type ".
"left outer join ".
" (select type,attrvalue ".
" from node_type_attributes ".
" where attrkey='idlepower_enable' ".
" group by type) as idlepower_enabled ".
" on t.type=idlepower_enabled.type ".
"where (b.node_id is null and t.class='pc' and ".
" (np.eventstate='" . TBDB_NODESTATE_PXEWAIT . "')) and ".
" (a.role='testnode' and t.isremotenode=0) and ".
" idlepower_enabled.attrvalue is not NULL");
# Scan the results, checking permissions and adding to the list
# You get to use a node type if no pid was specified (that is, you get
# to use all nodes), or if there is no entry in the perms table for
# the type/class of node.
#
my @nodes;
while (my ($node,$type,$physnode,$class,$issubnode,$idle_time,$reserved,
$prereserved,$eventstate) = $result->fetchrow_array) {
next if ($issubnode || $reserved || $prereserved);
next if ($idle_time < $seconds_of_idle);
print "$node: $idle_time\n"
if ($debug);
push(@nodes, $node);
if ($type || $physnode || $class || $eventstate) {}
}
if ($#nodes > 0) {
print "Powering off @nodes\n";
} else {
print "No nodes suitable for powering off.\n";
exit(0);
}
exit(0)
if ($no_action);
my $output = emutil::ExecQuiet("$POWER off @nodes");
if ($?) {
print $output;
SENDMAIL($TBOPS,
"idlepower failed",
"Failed to power off: @nodes\n\n".
"Power output:\n".
"$output\n",
$TBOPS);
exit(-1);
}
......@@ -408,6 +408,7 @@ sub nodereboot($$)
# Wait for all the reboot children to exit before continuing.
#
my @needPowercycle = ();
my @needPowerOn = ();
if (scalar(keys(%pids))) {
foreach my $node (sort(keys(%realnodes))) {
my $mypid = $pids{$node};
......@@ -429,6 +430,10 @@ sub nodereboot($$)
# Child signaled to us that this node needs a power cycle
push(@needPowercycle, $node);
}
elsif ($status == 3) {
# Child signaled to us that this node needs to be powered on
push(@needPowerOn, $node);
}
elsif ($mypid != 0 && $?) {
$failed++;
$result->{$node} = -1;
......@@ -455,6 +460,19 @@ sub nodereboot($$)
}
}
#
# Power on nodes that were turned off
#
if (@needPowerOn) {
if (PowerOn(@needPowerOn)) {
tberror "Power on failed for " . join(" ",@needPowerOn);
foreach my $node (@needPowerOn) {
$result->{$node} = -1;
$failed++;
}
}
}
#
# Now do vnodes. Do these serially for now (simple).
#
......@@ -633,6 +651,11 @@ sub RebootNode {
# power cycle the machine rather than wait for ssh to time out.
#
if (! DoesPing($pc, 0)) {
if ($nodestate eq TBDB_NODESTATE_POWEROFF) {
info("$pc powered off: will power on");
tbnotice "$pc powered off; will power on.";
exit(3);
}
info("$pc appears dead: power cycle");
tbnotice "$pc appears dead; will power cycle.";
......@@ -901,6 +924,18 @@ sub PowerCycle {
return $? >> 8;
}
#
# Power on a PC using the testbed power program.
#
sub PowerOn {
my @pcs = @_;
my $pcstring = join(" ",@pcs);
system("$power on $pcstring");
return $? >> 8;
}
#
# Wait until a machine stops returning ping packets.
#
......
......@@ -71,6 +71,7 @@ my %OutletList = (); #holds machine/outlet pairs
my @wolnodes = ();
my $exitval = 0;
my $this_user;
my %nodes = ();
# Protos
sub dostatus(@);
......@@ -152,6 +153,17 @@ if ($op eq "status") {
exit(dostatus(@machines));
}
#
# This script can be run by root.
#
if ($UID) {
$this_user = User->ThisUser();
if (! defined($this_user)) {
die("*** $0:\n".
" You ($UID) do not exist!\n");
}
}
#
# ElabinElab is special; Do local permission checks, build up a node list
# and then contact the proxy to do the actual work. No perl bindings yet,
......@@ -160,13 +172,19 @@ if ($op eq "status") {
if ($ELABINELAB) {
my @nodelist = ();
foreach my $node (@machines) {
if (!(($UID == 0) ||
TBNodeAccessCheck($UID, TB_NODEACCESS_POWERCYCLE, $node))) {
warn "You are not authorized to control $node. Skipping...\n";
foreach my $nodeid (@machines) {
my $node = Node->Lookup($nodeid);
if (!defined($node)) {
die("*** $0:\n".
" No such node $nodeid\n");
}
if ($UID && !$node->AccessCheck($this_user,
TB_NODEACCESS_POWERCYCLE)) {
warn "You are not authorized to control $nodeid. Skipping...\n";
next;
}
push(@nodelist, $node);
$nodes{$nodeid} = $node;
push(@nodelist, $nodeid);
}
exit(0)
......@@ -185,25 +203,41 @@ if ($ELABINELAB) {
if (!defined($rval)) {
exit(-1);
}
if ($sendevent && ($op eq "off" || $op eq "cycle")) {
foreach my $node (@nodelist) {
TBSetNodeEventState($node, TBDB_NODESTATE_SHUTDOWN);
if ($sendevent) {
foreach my $node (values(%nodes)) {
my $oldstate = $node->eventstate();
my $newstate;
# This should not happen.
$oldstate = ""
if (!defined($oldstate));
if ($op eq "off") {
$newstate = TBDB_NODESTATE_POWEROFF();
}
elsif ($op eq "cycle") {
$newstate = TBDB_NODESTATE_SHUTDOWN();
}
elsif ($op eq "on") {
#
# What if the node is on? Nothing is going to
# happen and changing the state is wrong.
#
$newstate = TBDB_NODESTATE_SHUTDOWN()
if ($oldstate eq TBDB_NODESTATE_POWEROFF());
}
print "$node: $oldstate, $newstate\n";
#
# Avoid sending duplicate events; annoys stated.
#
$node->SetEventState($newstate)
if (defined($newstate) && $newstate ne $oldstate);
}
}
exit($rval);
}
#
# This script can be run by root.
#
if ($UID) {
$this_user = User->ThisUser();
if (! defined($this_user)) {
die("*** $0:\n".
" You ($UID) do not exist!\n");
}
}
my %timelimited = ();
#
......@@ -224,6 +258,7 @@ foreach my $nodeid (@machines) {
warn "You are not authorized to control $nodeid. Skipping...\n";
next;
}
$nodes{$nodeid} = $node;
}
#
......@@ -419,8 +454,28 @@ foreach my $power_id (keys %outlets) {
foreach my $node (@nodes) {
print "$node now ",($op eq "cycle" ? "rebooting" : $op),"\n";
if ($sendevent) {
my $state = TBDB_NODESTATE_SHUTDOWN;
TBSetNodeEventState($node,$state);
my $oldstate = $node->eventstate();
my $newstate;
if ($op eq "off") {
$newstate = TBDB_NODESTATE_POWEROFF();
}
elsif ($op eq "cycle") {
$newstate = TBDB_NODESTATE_SHUTDOWN();
}
elsif ($op eq "on") {
#
# What if the node is on? Nothing is going to
# happen and changing the state is wrong.
#
$newstate = TBDB_NODESTATE_SHUTDOWN()
if ($oldstate eq TBDB_NODESTATE_POWEROFF());
}
#
# Avoid sending duplicate events; annoys stated.
#
$node->SetEventState($newstate)
if (defined($newstate) && $newstate ne $oldstate);
}
push(@wolnodes, $node)
if ($postwol{$node});
......
<?php
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2010 University of Utah and the Flux Group.
# Copyright (c) 2000-2011 University of Utah and the Flux Group.
# All rights reserved.
#
require("defs.php3");
......@@ -151,7 +151,7 @@ function SHOWFREENODES()
($row[0] == TBDB_NODESTATE_PXEWAIT) ||
($row[0] == TBDB_NODESTATE_ALWAYSUP) ||
($row[0] == TBDB_NODESTATE_POWEROFF)) {
$freecounts[$type] = $count;
$freecounts[$type] += $count;
}
}
$output = "";
......
<?php
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2010 University of Utah and the Flux Group.
# Copyright (c) 2000-2011 University of Utah and the Flux Group.
# All rights reserved.
#
# Database Constants
......@@ -358,6 +358,7 @@ function TBFreePCs()
" and a.reserved_pid is null ".
" and nt.class = 'pc' and p.pid is null and ".
" (a.eventstate='" . TBDB_NODESTATE_ISUP . "' or ".
" a.eventstate='" . TBDB_NODESTATE_POWEROFF . "' or ".
" a.eventstate='" . TBDB_NODESTATE_PXEWAIT . "') and".
" (p.pid is null)");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment