Commit 2c3851ff authored by David Johnson's avatar David Johnson

vnodesetup used to depend on there being an existing vnodesetup process

running on the node to force it to reboot.  Since planetlab has a bit of a
history of bugs/oddities in the initscript process (and thus slivers don't
always boot our stuff correctly), this commit special-cases the plab case
so that even if there's not an existing vnodesetup, we kill all the sliver
processes and rerun vnodesetup.
parent 0e93537b
......@@ -752,6 +752,11 @@ sub RebootVNode($$) {
print STDOUT "reboot ($vnode): Rebooting (on $pnode).\n";
# See if we're a plab node (must send the '-p' switch to vnodesetup
# so that plab slivers reboot properly if vnodesetup has died.
my ($jailed,$plab) = (0,0);
TBIsNodeVirtual($vnode, \$jailed, \$plab);
#
# Run an ssh command in a child process, protected by an alarm to
# ensure that the ssh is not hung up forever if the machine is in
......@@ -787,12 +792,21 @@ sub RebootVNode($$) {
}
return($exitstatus);
}
my $addargs = "";
if ($plab) {
$addargs .= "-p ";
}
elsif ($jailed) {
$addargs .= "-j ";
}
#
# Must change our real UID to root so that ssh will work.
#
$UID = 0;
exec("$ssh -host $vnode $CLIENT_BIN/vnodesetup -r -j $vnode");
exec("$ssh -host $vnode $CLIENT_BIN/vnodesetup -r $addargs $vnode");
exit(0);
}
......
......@@ -198,7 +198,7 @@ if ($killit || $haltit) {
exit(killvnode());
}
if ($rebootit) {
if (! -e $pidfile) {
if (!$doplab && ! -e $pidfile) {
die("*** $0:\n".
" No pid for $vnodeid manager!\n");
}
......@@ -493,14 +493,20 @@ sub killvnode() {
if ($mpid =~ /^([-\@\w.]+)$/) {
$mpid = $1;
}
else {
elsif (!($doplab && $mpid eq '')) {
die("*** $0:\n".
" Bad data in pid: $mpid!\n");
}
my $sigtosend = ($haltit ? 'TERM' : 'USR1');
if (kill($sigtosend, $mpid) == 0) {
if ($doplab && $mpid eq '') {
# sometimes plab vservers die and don't rerun initscripts, so
# we're left with a dangling pidfile... so just remove the file
# and continue.
unlink($pidfile);
}
elsif (kill($sigtosend, $mpid) == 0) {
print"*** Could not kill($sigtosend) process $mpid: $!\n";
return -1;
}
......@@ -539,14 +545,45 @@ sub rebootvnode() {
if ($mpid =~ /^([-\@\w.]+)$/) {
$mpid = $1;
}
else {
elsif (!($doplab && $mpid eq '')) {
die("*** $0:\n".
" Bad data in pid: $mpid!\n");
}
if (kill('USR2', $mpid) == 0) {
print"*** Could not kill(USR2) process $mpid: $!\n";
if ($doplab && $mpid eq '') {
print "Killing all vserver processes and restarting sliver.\n";
# sometimes plab vservers die and don't rerun initscripts, so
# we're left with a dangling pidfile... so just remove the file
# and continue.
unlink($pidfile);
# manually kill all processes besides us, just in case the pid file
# wasn't written correctly
killvserver();
# restart things ourselves!
my $spid = fork();
if ($spid == 0) {
# become real root so perl doesn't complain about suid
$UID = 0;
close(STDIN);
close(STDOUT);
close(STDERR);
open(STDIN, "< /dev/null") or
die("opening /dev/null for STDIN: $!");
open(STDERR, "> /dev/null")
or die("opening /dev/null for STDERR: $!");
open(STDOUT, "> /dev/null")
or die("opening /dev/null for STDOUT: $!");
exec("$BINDIR/vnodesetup -p $vnodeid");
die("exec reboot vnodesetup failed!");
}
}
elsif (kill('USR2', $mpid) == 0) {
if ($debug) {
print"*** Could not kill(USR2) process $mpid: $!\n";
}
return -1;
}
hackwaitandexit();
}
......@@ -769,5 +806,6 @@ sub hackwaitandexit()
if ($mtime >= $now || $ctime >= $now);
}
}
exit(1);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment