Commit d2a1ee62 authored by Leigh B Stoller's avatar Leigh B Stoller
Browse files

Be less pedantic about the DB state wrt process id and load

address; Try harder to recover instead of bailing.
parent 2b6575d3
......@@ -11,7 +11,7 @@ use POSIX ":sys_wait_h"; # For &WNOHANG
use Sys::Syslog;
use English;
use Socket;
use Errno qw(EADDRINUSE);
use Errno;
#
# This also kills a running frisbee.
......@@ -157,18 +157,17 @@ if ($killmode) {
if (!$address && $pid) {
#
# This makes no sense. Just send email.
# This makes no sense. Send email, but keep going.
#
my $mesg = "Inconsistent DB state. PID ($pid) but no load address!";
my $mesg = "Inconsistent DB state. PID ($pid) but no load address!\n".
"Trying to recover from this unfortunate situation.\n";
SENDMAIL($TBOPS,
"Frisbee Killer Failed!",
"Frisbee Killer Inconsistency",
"Image: $image\n".
$mesg);
Fatal($mesg);
}
if ($address && !$pid) {
elsif ($address && !$pid) {
#
# Okay, minor problem. It is possible we caught the launcher between
# setting the address and setting the pid. Wait a moment, and then
......@@ -200,18 +199,23 @@ if ($killmode) {
}
#
# Okay, address and pid. We could clear the pid from the DB,
# preventing another killer from thinking it is running, but not
# much point since it is not likely to happen. If it turns out to
# be a problem we can change the way this works.
# Okay, address and pid. Kill if its really running. The killed
# process will clear the address and pid, but if there is none,
# then clear it here.
#
if (! kill('TERM', $pid)) {
SENDMAIL($TBOPS,
"Frisbee Killer Failed!",
"Failed to stop frisbee daemon for $image\n".
"Could not kill(TERM) process $pid: $? $!");
if (kill(0, $pid) || ! $!{ESRCH}) {
if (! kill('TERM', $pid)) {
SENDMAIL($TBOPS,
"Frisbee Killer Failed!",
"Failed to stop frisbee daemon for $image\n".
"Could not kill(TERM) process $pid: $? $!");
Fatal("Failed to stop frisbee daemon for $image!");
Fatal("Failed to stop frisbee daemon for $image!");
}
}
else {
# The original process is gone, so just clear the DB state.
ClearAddress();
}
exit(0);
}
......@@ -228,16 +232,24 @@ if ($address) {
# This would be inconsistent.
if ($pid) {
my $mesg = "Inconsistent DB state. No load address but PID ($pid) set!";
UnlockTables();
#
# But lets try and recover. If the pid is gone, we can just keep going
# and not worry.
#
if (kill(0, $pid) || ! $!{ESRCH}) {
my $mesg = "Inconsistent DB state. No load address but process $pid running!";
UnlockTables();
SENDMAIL($TBOPS,
"Frisbee Startup Failed!",
"Image: $image\n".
$mesg);
Fatal($mesg);
SENDMAIL($TBOPS,
"Frisbee Startup Failure!",
"Image: $image\n".
$mesg);
Fatal($mesg);
}
else {
ClearPid();
$pid = undef;
}
}
#
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment