power_rpc27.pm.in 12 KB
Newer Older
1
#!/usr/bin/perl -wT
2

Leigh B. Stoller's avatar
Leigh B. Stoller committed
3 4
#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2010 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6 7 8 9
# All rights reserved.
#


10
package power_rpc27;
11

12 13
use Exporter;
@ISA = ("Exporter");
14
@EXPORT = qw( rpc27status rpc27ctrl );
15

16 17 18 19
use Socket;
use IO::Handle;
use lib "@prefix@/lib";
use libdb;
20
use POSIX qw(strftime);
21

22 23 24 25 26
# A little perl module to power cycle something attached to an RPC27.
# Thats a serially controlled, power controller.
#
# XXX The little secretkey handshake is coded in here. If this changes
# in capture, you have to change it here too. Look for the "pack"
27
# statement below.
28 29 30
#
# XXX Note that if someone has already tipped to the power controller 
# in question, this won't work at all.
31 32 33 34

# Turn off line buffering on output
$| = 1;

35 36 37
# Number of times to try sending command in the face of "Input error"
my $ntries = 3;

38
# Set for more output.
39
my $debug = 0;
40 41

# RPC27 Prompt string
42
my $RPC27_PROMPT = 'RPC-\d+>';
43 44 45 46

# RPC help message.  Used as terminator for status fetch.
my $RPC27_HELPMSG = 'Type "Help" for a list of commands';

47
my %RPC27_CMD =
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
  ("cycle"  => "reboot",
   "on"     => "on",
   "off"    => "off");

sub rpc27status {
    my ($controller, $statusp) = @_;

    my($TIP, $i, $insync);

    #
    # Form the connection to the controller via a "tip" line to the
    # capture process. Once we have that, we can just talk to the
    # controller directly.
    #
    if (!($TIP = tipconnect($controller))) {
	print STDERR "*** Could not form TIP connection to $controller\n";
	return 1;
    }

67
    #
68
    # Send the command.  Try again a few times if there is a retryable error.
69
    #
70 71
    my $status;
    for my $try (1..$ntries) {
72
	$status = syncandsend($controller, $TIP, "status", $statusp);
73 74
	last
	    if $status >= 0;
75
    }
76 77
    close($TIP);
    return $status ? 1 : 0;
78
}
79 80

# Main routine.
81
# usage: rpc27ctrl(cmd, controller, outlet)
82
# cmd = { "cycle" | "on" | "off" | "status" }
83 84
# controller = <node_id>
# outlet = int, 1 <= outlet <= 20
85 86 87
#
# Returns 0 on success. Non-zero on failure.
# 
88
sub rpc27ctrl {
89
    my($cmd, $controller, @outlets) = @_;
90

91 92 93 94 95 96 97
    #
    # Check parameters
    #
    if (!defined($RPC27_CMD{$cmd})) {
	print STDERR "*** Undefined command: '$cmd'\n";
	return 1;
    }
98
    if (grep {$_ < 1 || $_ > 20} @outlets) {
99 100 101 102
	print STDERR "*** Invalid outlet '$outlet': Must be 1-20\n";
	return 1;
    }

103
    #
104 105 106
    # Make a comma-seperated strings of all the outlets to reboot. The RPCs
    # have a buffer limit of 31 characters, which limits us to 8 outlets
    # at a time (assuming the longest command and 2-digit outlet numbers)
107
    #
108 109 110 111 112 113 114 115 116 117 118
    my @outlet_strings = ();
    while (@outlets) {
	my @tmp_outlets = ();
	for (my $i = 0; ($i < 8) && (@outlets); $i++) {
	    push @tmp_outlets,shift(@outlets);
	}
	push @outlet_strings, join(",",@tmp_outlets);
    }
    if ($debug) {
	print "outlet_strings: ", join(" ",map("($_)",@outlet_strings)), "\n";
    }
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143

    #
    # Run the rest in a child process, protected by an alarm to ensure that
    # we are not hung up forever if the controller is in some funky state.
    #
    my $syspid = fork();

    if ($syspid) {
	local $SIG{ALRM} = sub { kill("TERM", $syspid); };
	#
	# Give it 30 seconds for initial connect plus time per outlet.
	# Probably too long.
	#
	alarm 30 + (15 * scalar(@outlet_strings));
	waitpid($syspid, 0);
	alarm 0;
	my $exitstatus = $?;

	if ($exitstatus == 15) {
	    print STDERR "*** power: $controller is wedged.\n";
	}
	return($exitstatus);
    }
    TBdbfork();

144
    #
145 146 147
    # Form the connection to the controller via a "tip" line to the
    # capture process. Once we have that, we can just talk to the
    # controller directly.
148
    #
149 150
    if (! ($TIP = tipconnect($controller))) {
	print STDERR "*** Could not form TIP connection to $controller\n";
151
	exit(1);
152
    }
153 154 155

    foreach my $outlet (@outlet_strings) {
	my $command = "$RPC27_CMD{$cmd} $outlet";
156 157 158
	my $status;
	for my $try (1..$ntries) {
	    $status = syncandsend($controller, $TIP, $command, undef);
159
	    #
160 161 162 163 164
	    # Double check that an off or on command really turned the
	    # outlet(s) off or on.  We assume reliable power off in our
	    # security environment and we have seen cases where powering
	    # off doesn't.  Reliable power on also avoid many unnecessary
	    # failures during firewalled experiment swapout.
165
	    #
166
	    if ($status == 0 && ($cmd eq "off" || $cmd eq "on")) {
167 168 169 170 171 172 173 174 175 176 177
		my %stathash;
		for my $stry (1..$ntries) {
		    $status = syncandsend($controller, $TIP, "status",
					  \%stathash);
		    last
			if $status >= 0;
		}
		if ($status == 0) {
		    my @noutlets = ();
		    for my $o (split(",", $outlet)) {
			if (!defined($stathash{"outlet$o"}) ||
178
			    $stathash{"outlet$o"} !~ /^$cmd$/i) {
179 180 181 182 183 184 185 186 187
			    push(@noutlets, $o);
			}
		    }
		    if (@noutlets != 0) {
			$outlet = join(",", @noutlets);
			$command = "$RPC27_CMD{$cmd} $outlet";
			$status = -1;
			if ($try == $ntries) {
			    print STDERR
188
				"*** Failed to turn $cmd $controller $outlet\n";
189 190 191 192
			}
		    }
		} elsif ($status > 0) {
		    print STDERR
193
			"*** Post-$cmd status command failed on $controller\n";
194 195
		}
	    }
196 197
	    last
		if $status >= 0;
198 199 200
	}
	if ($status) {
	    close($TIP);
201
	    exit(1);
202 203 204
	}
    }
    close($TIP);
205
    exit(0);
206 207 208 209 210 211 212 213
}

#
# Sync up with the power controller, and set it a command. $controller is the
# controller name, for error message purposes, $TIP is the connection to
# the controller opened with tipconnect, and $command is the whole command
# (ie. 'reboot 20,40') to send.
#
214 215 216
# Returns 0 if successful, -1 if the caller should try again,
# 1 on an unexpected error.
#
217 218
sub syncandsend($$$) {
    my ($controller,$TIP,$cmd,$statusp) = @_;
219

220
    #
221
    # Send a newline to get the command prompt, and then wait
222 223
    # for it to print out the command prompt. This loop is set for a small
    # number since if it cannot get the prompt quickly, then something has
224 225
    # gone wrong.
    #
226
    my $insync = 0;
227

228
    for (my $i = 0; $i < 20; $i++) {
229 230
	my $line;

231
	if (syswrite($TIP, "\r") == 0) {
232
	    print STDERR
233
		"*** Power control sync write failed ($controller/$outlet)\n";
234 235 236
	    return 1;
	}

237 238 239 240 241 242
	$line = rpc_readline($TIP);
	if (!defined($line)) {
	    print STDERR
		"*** Power control sync read failed ".
		    "($controller/$outlet)\n";
	    return 1;
243 244 245 246 247 248 249 250 251
	}
	if ($debug) {
	    print "Read: $line";
	}
	if ($line =~ /$RPC27_PROMPT/) {
	    if ($debug) {
		print "Matched prompt '$RPC27_PROMPT'!\n";
	    }
	    $insync = 1;
252 253
	    last;
	}
254
    }
255 256
    if (! $insync) {
	print STDERR "*** Could not sync with power controller! ".
257
	    "($controller)\n";
258 259
	return 1;
    }
260

261 262 263
    #
    # These things have an annoying tendency to fail sometimes by losing
    # the command part of the string (e.g., "6" instead of "reboot 6").
264
    # I think we are overrunning the UART with "\r" since we put out
265 266 267 268 269
    # a pair after every line we read that is not a prompt and when the
    # RPC puts out its banner, it is a good 10 lines before we see the
    # prompt.  So allow some time for the RPC to digest everything before
    # we feed it the command.
    #
270
    sleep(1);
271

272
    if ($debug) {
273
	print "Sending '$cmd' to $controller\n";
274 275 276
    }

    # Okay, got a prompt. Send it the string:
277
    if (syswrite($TIP, "$cmd\r") == 0) {
278 279
    	print STDERR "*** Power control write failed ($controller/$outlet)\n";
    	return 1;
280
    }
281

282 283 284 285 286 287
    #
    # Read and parse all the output until the next prompt to ensure that
    # there was no read error.  We also collect status here if desired.
    #
    my %status = ();
    my $gotcmd = 0;
288
    my $gotstatus = 0;
289 290 291 292
    print "Reading output following command\n"
	if ($debug);
    while (my $line = rpc_readline($TIP)) {
	print "Read: $line"
293
	    if ($debug);
294 295 296 297 298 299 300 301 302
	# skip echoed prompt+command
	if ($line =~ /$cmd/) {
	    $gotcmd = 1;
	    next;
	}
	# didn't recognize our command for some reason, return failure
	if ($line =~ /Input error/) {
	    return -1;
	}
303 304
	#
	# Got the following prompt, all done.
305
	#
306 307 308 309
	# XXX the prompt+command does not always get echoed correctly
	# (e.g. "atatus" or "ststus" instead of "status") so we also
	# exit if we parsed any status data.
	#
310 311 312 313 314 315 316
	# XXX RPC28 units split up the power data into columns for outlets
	# 1-10 and then for outlets 11-21, ala:
	#
	#    RPC28:  True RMS Current:     3.9 Amps      :      4.7 Amps
	#
	# hence the unusual REs for power and current.
	#
317
	if (($gotcmd || $gotstatus) && $line =~ $RPC27_PROMPT) {
318 319 320
	    last;
	}
	if ($statusp) {
321 322
	    if ($line =~ /Temperature:\s+(\d+\.\d+) C/) {
		$status{tempC} = $1;
323
		$gotstatus = 1;
324 325
	    } elsif ($line =~ /Average\ Power:\s+(\d+)\ Watts\s+ 
				(Apparent|\:\s+(\d+)\ Watts)/x) {
326
		$status{power} = $1;
327 328 329
		if ($3) {
		    $status{power} += $3;
		}
330
		$gotstatus = 1;
331 332
	    } elsif ($line =~ /True\ RMS\ Current:\s+(\d+\.\d+)\ Amps\s+
				(Maximum|\:\s+(\d+\.\d+)\ Amps)/x) {
333
		$status{current} = $1;
334 335 336
		if ($3) {
		    $status{current} += $3;
		}
337
		$gotstatus = 1;
338 339 340 341 342 343 344
	    }
	    # note the /g, controllers with 20 ports put two outlets per line
	    elsif (my %ohash = $line =~ /Outlet\s+(\d+)\s+:\s+(On|Off)/g) {
		for my $o (keys(%ohash)) {
		    my $outlet = "outlet$o";
		    $status{$outlet} = $ohash{$o};
		}
345
		$gotstatus = 1;
346 347
	    }
	}
348 349 350
    }

    if ($statusp) {
351 352
	%$statusp = %status;
    }
353
    return 0;
354 355
}

356
#
357 358
# Connect up to the capture process. This should probably be in a library
# someplace.
359
#
360
sub tipconnect($) {
361
    my($controller) = $_[0];
362
    my($server, $portnum, $keylen, $keydata, $capreturn);
363 364 365 366 367
    my($inetaddr, $paddr, $proto);
    my(%powerid_row);
    local *TIP;

    my $query_result =
368
      DBQueryWarn("select * from tiplines where node_id='$controller'");
369 370 371

    if ($query_result->numrows < 1) {
	print STDERR "*** No such tipline: $controller\n";
372
	return 0;
373
    }
374 375 376 377 378 379 380 381 382 383 384
    %powerid_row = $query_result->fetchhash();

    $server  = $powerid_row{'server'};
    $portnum = $powerid_row{'portnum'};
    $keylen  = $powerid_row{'keylen'};
    $keydata = $powerid_row{'keydata'};

    if ($debug) {
	print "tipconnect: $server $portnum $keylen $keydata\n";
    }

385 386 387 388 389
    #
    # We have to send over the key. This is a little hokey, since we have
    # to make it look like the C struct.
    #
    my $secretkey = pack("iZ256", $keylen, $keydata);
390
    my $capret    = pack("i", 0);
391

392 393 394 395 396 397 398 399 400 401
    #
    # This stuff from the PERLIPC manpage.
    # 
    if (! ($inetaddr = inet_aton($server))) {
	print STDERR "*** Cannot map $server to IP address\n";
	return 0;
    }
    $paddr    = sockaddr_in($portnum, $inetaddr);
    $proto    = getprotobyname('tcp');

402
    for (my $i = 0; $i < 20; $i++) {
403 404 405 406
	if (! socket(TIP, PF_INET, SOCK_STREAM, $proto)) {
	    print STDERR "*** Cannot create socket.\n";
	    return 0;
	}
407

408 409 410
	if (! connect(TIP, $paddr)) {
	    print STDERR
		"*** Cannot connect to $controller on $server($portnum)\n";
411
	    close(TIP);
412 413 414 415
	    return 0;
	}
	TIP->autoflush(1);

416 417 418 419 420 421 422 423 424
	#
	# While its a fatal error if the connect fails, the write and the
	# read that follows might fail because the tip is currently is
	# active. The handshake writes back a value and then immediately
	# closes the socket, which could manifest itself as a closed
	# connection on this end, even before we get a change to do these.
	# operations. In that case, just go around the loop again. We hope
	# to succeed at some point. 
	# 
425 426 427
	if (! syswrite(TIP, $secretkey)) {
	    print STDERR
		"*** Cannot write to $controller on $server($portnum)\n";
428
	    goto again;
429 430 431 432
	}
	if (! sysread(TIP, $capret, length($capret))) {
	    print STDERR
		"*** Cannot read from $controller on $server($portnum)\n";
433
	    goto again;
434 435 436 437 438 439 440
	}

	my $foo = unpack("i", $capret);
	if ($debug) {
	    print "Capture returned $foo\n";
	}
	if ($foo == 0) {
441 442
	    return(*TIP);
	}
443 444 445 446 447
	
      again:
	close(TIP);

	if ($i && (($i % 5) == 0)) {
448 449
	    printf STDERR
		"*** WARNING: $controller on $server($portnum) is busy\n".
450
		"    Waiting a bit before trying again. Pass $i.\n";
451 452
	}
	sleep(5);
453
    }
454
    
455
    print STDERR
456
	"*** $controller on $server($portnum) was busy for too long\n";
457
    return 0;
458 459
}

460 461 462 463 464 465 466 467 468 469
sub rpc_readline($)
{
    my ($TIP) = @_;
    my $line;

    my $cc = 0;
    while (1) {
	if (sysread($TIP, $line, 1, $cc) == 0) {
	    return undef;
	}
470
	print "got: =$line=\n" if ($debug > 1);
471
	$cc++;
472
	last if ($line =~ /\n/ || $line =~ /$RPC27_PROMPT/ || $cc > 1023);
473 474 475 476
    }
    return $line;
}

477
1;