power_mail.pm.in 5.88 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#!/usr/bin/perl -wT

#
# EMULAB-COPYRIGHT
# Copyright (c) 2005 University of Utah and the Flux Group.
# All rights reserved.
#

# A perl module to power cycle nodes using email to the operators.

package power_mail;

use Exporter;
@ISA = ("Exporter");
@EXPORT = qw( mailctrl );

use lib "@prefix@/lib";
use libdb;
use libtestbed;

my $WWW      = "@WWW@";
my $TBOPS      = "@TBOPSEMAIL@";
23 24 25
my $default_tries = 40;
my $time_tolerance = 2 * 60; # seconds
my $state_update_tolerance = 45; # seconds
26 27 28 29 30 31 32 33 34 35 36 37 38 39

# Turn off line buffering on output
$| = 1;

# usage: mailctrl(cmd, nodes)
# cmd = { "cycle" | "on" | "off" }
# nodes = list of one or more physcial node names
#
# Returns 0 on success. Non-zero on failure.
# 
sub mailctrl($@) {
    my ($cmd, @nodes) = @_;

    my %actual = ();
40 41
    my $open = 1;

42 43
    my ($pid,$eid,$swapper_uid);

44 45 46 47 48 49 50 51
    # XXX Hack so that we only send mail if the robotlab is open, which ought
    # to be the only time this script gets run.  Otherwise, noone is around to
    # do anything about it.
    TBGetSiteVar("robotlab/open", \$open);
    if (!$open) {
	print "Lab not open, no operators available to power $cmd nodes.\n";
	return 1;
    }
52 53 54 55 56

    # Check to see if we have to send mail first.
    foreach my $node (@nodes) {

	my $dbres = DBQueryFatal(
57
		"select (UNIX_TIMESTAMP(NOW()) - UNIX_TIMESTAMP(last_power)) ".
58 59 60
		" < $time_tolerance,r.pid,r.eid from outlets as o ".
		"left join reserved as r on r.node_id=o.node_id ".
		"where o.node_id='$node'");
61 62 63 64 65
	
	if ($dbres->num_rows() == 0) {
	    print "Unknown node $node";
	    next;
	}
66 67

	my $ok;
68
	
69
	($ok, $pid, $eid) = $dbres->fetchrow();
70 71 72 73 74 75

	if (defined($pid) && defined($eid) &&
	    $pid eq NODEDEAD_PID() && $eid eq NODEDEAD_EID()) {
	    print "Can't power nodes that are dead.\n";
	    return 1;
	}
76 77 78 79 80 81

	if (!$ok) {
	    $actual{$node} = 1;
	}
    }

82 83 84
    my $dbres = DBQueryFatal("select expt_swap_uid from experiments" .
			     " where pid='$pid' and eid='$eid'");
    if ($dbres->num_rows() != 0) {
David Johnson's avatar
David Johnson committed
85 86 87 88 89
	my $row = $dbres->fetchrow_hashref();
	$swapper_uid = $row->{'expt_swap_uid'};
    }
    else {
	$swapper_uid = "unknown";
90 91
    }

92 93 94
    if (scalar(keys %actual)) {
	print "Sending mail to the operators\n";
	
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
	## grab the email addrs for any of these bots.
	## if we add lots of bots, this will be inefficient,
	## cause each bot's pname is added to the conditional in the query.
	my $cond_str = "where (";
	my $lpc = 0;
	my @emails;

	foreach my $node (@nodes) {
	    if ($lpc) {
		$cond_str .= " OR";
	    }
	    else {
		$lpc++;
	    }
	    $cond_str .= " node_id='$node'";
	}
111
	$cond_str .= ")";
112
	
113 114 115
	my $dbres = DBQueryFatal("select email from location_info " . 
				 $cond_str . " group by email");

116 117 118 119 120 121 122 123 124 125 126 127 128
	if ($dbres->num_rows() != 0) {
	    my $row;
	    while (($row = $dbres->fetchrow_hashref())) {
		my $email = $row->{'email'};

		push @emails, $email;
	    }
	}

	if (scalar(@emails) == 0) {
	    push @emails, $TBOPS;
	}

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
	my $email_body = 
	    "Someone needs to power $cmd the following nodes:\n" .
	    "\t\n" . join(" ",@nodes) . "\n\nfor $pid/$eid, " .
	    "swapped in by $swapper_uid.\n" . 
	    "\nAnd update power time through this web page:\n" .
	    "\n  https://$WWW/powertime.php3?node_id=" . join(",",@nodes) .
	    "\n";

	$dbres = DBQueryFatal("select node_id,battery_voltage as v, " . 
			      "battery_percentage as p, " . 
			      "(UNIX_TIMESTAMP(NOW()) - battery_timestamp)".
			      " as tdelta from nodes " . 
			      "where battery_voltage is not NULL");
	my $row;
	my %powinfo = ();
	while (($row = $dbres->fetchrow_hashref())) {
	    $powinfo{$row->{'node_id'}} = { 'v' => $row->{'v'},
					    'p' => $row->{'p'},
					    'tdelta' => $row->{'tdelta'}
				      };
	}

	$email_body .= 
	    "\nHere's the last known battery info for these robots.  If \n" . 
	    "it's been more than 3 days since last power update, or if \n" .
	    "the remaining percent is below 50 or the voltage is below 7.5,\n".
	    "you should probably replace the battery.\n\n";

	foreach $bot (@nodes) {
	    if (defined($powinfo{$bot})) {
		my $ts = $powinfo{$bot}{'tdelta'};
		my $time_str;
		if ($ts > (3600*24)) {
		    my $tts = sprintf("%.2f",($ts / (3600*24)));
		    $time_str =  $tts . " days since last update.";
		}
		else {
		    my $tts = sprintf("%.2f",($ts / 3600));
		    $time_str = $tts . " hours since last update.";
		}
		
		$email_body .= 
		    $bot . ": " . sprintf("%.2f",$powinfo{$bot}{'p'}) . 
		    "%, " . sprintf("%.2f",$powinfo{$bot}{'v'}) . 
		    "V, " . $time_str . "\n";
	    }
	    else {
		$email_body .= "$bot: no info!!!\n";
	    }
	}
	
        $email_body .= "\nThe Power Control Dude.\n";
	
182
	foreach $email (@emails) {
183
	    #print "Sending to $email\n\n";
184 185
	    if ($email ne "") {	
		SENDMAIL($email,
186 187
			 "Power $cmd nodes for $pid/$eid\n",
			 $email_body);
188 189 190
	    }
	}

191 192
	
	
193 194 195 196 197 198 199 200
	foreach my $node (keys %actual) {
	    my $tries = $default_tries;
	    my $ok = 0;
	    
	    print "Waiting for node $node\n";
	    
	    while (!$ok) {
		my $dbres = DBQueryFatal(
201 202
			"select (UNIX_TIMESTAMP(NOW()) - " .
			"UNIX_TIMESTAMP(last_power)) < $time_tolerance " .
203 204 205 206 207 208 209 210 211
			"from outlets where node_id='$node'");
		
		if ($dbres->num_rows() == 0) {
		    print "Unknown node $node";
		    next;
		}
		
		($ok) = $dbres->fetchrow();
		
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
		if (($cmd eq "on" || $cmd eq "cycle") &&
		    TBNodeEventStateUpdated($node, $state_update_tolerance)) {
		    # This is something of a hack...  We don't want to wait
		    # forever if someone forgets to update the webpage, so we
		    # check if the event state was updated recently.  And, we
		    # DO NOT send the shutdown event since the thing is already
		    # going.
		    $ok = 1;
		}
		elsif ($ok) {
		    # The operator notified via the web page.
		    my $state = TBDB_NODESTATE_SHUTDOWN;
		    TBSetNodeEventState($node,$state);
		}
		elsif ($tries == 0) {
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
		    print "No more tries left for $node...";
		    return 1;
		}
		elsif (!$ok) {
		    $tries -= 1;
		    print "Sleeping for 30 seconds.\n";
		    sleep(30);
		}
	    }
	}
    }

    return 0;
}

1;