Commit 8aa8098d authored by Leigh B. Stoller's avatar Leigh B. Stoller
Browse files

Change the handling for when mysqld goes away (CR_SERVER_LOST ||

CR_SERVER_GONE_ERROR). Instead of bailing, sit and loop trying to
reconnect, given that this is known to be a transient error, and we do
not really want our daemons to go belly up during that brief time when
the watchdog is getting it restarted. The query is then resent.

For the perl version of this change, I was a lot more pedantic since
we use this library from a zillion places. Also, there is some special
handling cause of the mysqld watchdog which would become useless if
the test query hung trying to reconnect to the server forever.

As a side effect of this change, we should see way less email when
mysqld goes catanoic since the new code will just loop instead of
generating tons of errors.

Might actually increase overall rebustness. On the other hand, could
end up being a total disaster!
parent c8fb3863
......@@ -224,7 +224,6 @@ use vars qw(@ISA @EXPORT);
TBRobotLabExpt
TBExptContainsNodeCT
);
# Must come after package declaration!
......@@ -234,8 +233,9 @@ use English;
use File::Basename;
use POSIX qw(strftime);
require Mysql;
use vars qw($DBQUERY_MAXTRIES $DBCONN_MAXTRIES @EXPORT_OK @virtualTables
@physicalTables);
use vars qw($DBQUERY_MAXTRIES $DBCONN_MAXTRIES
$DBCONN_EXITONERR $DBQUERY_RECONNECT
@EXPORT_OK @virtualTables @physicalTables);
# Configure variables
my $TB = "@prefix@";
......@@ -269,9 +269,12 @@ else {
# to the DB in the child.
#
my @DB;
$DBQUERY_MAXTRIES = 1;
$DBCONN_MAXTRIES = 5;
@EXPORT_OK = qw($DBQUERY_MAXTRIES $DBCONN_MAXTRIES);
$DBQUERY_MAXTRIES = 1;
$DBQUERY_RECONNECT = 1;
$DBCONN_MAXTRIES = 5;
$DBCONN_EXITONERR = 1;
@EXPORT_OK = qw($DBQUERY_MAXTRIES $DBQUERY_RECONNECT
$DBCONN_EXITONERR $DBCONN_MAXTRIES);
sub TBDBConnect($)
{
......@@ -294,37 +297,50 @@ sub TBDBConnect($)
last;
}
$maxtries--;
sleep(1);
if ($maxtries) {
print STDERR "Cannot connect to DB; trying again in 5 seconds!\n";
sleep(5);
}
}
if (!defined($DB[$dbnum])) {
print STDERR "Cannot connect to DB after several attempts!\n";
# Ensure consistent error value.
print STDERR
"Cannot connect to DB after $DBQUERY_MAXTRIES attempts!\n";
return -1
if (! $DBCONN_EXITONERR);
exit(-1);
}
$DB[$dbnum]->{'dbh'}->{'PrintError'} = 0;
$Mysql::QUIET = 1;
return 0;
}
TBDBConnect(0);
# Old version. Should be renamed or just eventfork.
sub TBdbfork()
{
select(undef, undef, undef, 0.3);
for (my $i = 0; $i < @DB; $i++) {
undef($DB[$i]);
TBDBConnect($i);
}
if ($EVENTSYS) {
EventFork();
}
}
# New version.
sub TBDBReconnect()
{
for (my $i = 0; $i < @DB; $i++) {
undef($DB[$i]);
return -1
if (TBDBConnect($i) != 0);
}
return 0;
}
# To avoid keeping a mysql connection around.
sub TBDBDisconnect()
{
for (my $i = 0; $i < @DB; $i++) {
undef($DB[$i]);
}
select(undef, undef, undef, 0.3);
select(undef, undef, undef, 0.2);
}
# Create a new DB handle and return the handle number
......@@ -4385,27 +4401,11 @@ sub TBExptGetSwapoutAction($$$) {
# the error string from the DB module. Use DBFatal (below) to print/email
# that string, and then exit.
#
sub DBQueryOld($)
{
my($query) = $_[0];
my($result);
$result = $DB[0]->query($query);
if (! $result) {
$DBErrorString =
" Query: $query\n".
" Error: " . $DB[0]->errstr;
}
return $result;
}
sub DBQueryN($$)
{
my($dbnum, $query) = @_;
my $maxtries = $DBQUERY_MAXTRIES;
my $result;
my $db = $DB[$dbnum];
# Not really forever :-)
if (!$maxtries) {
......@@ -4413,7 +4413,11 @@ sub DBQueryN($$)
}
while ($maxtries) {
# Get this each time through the loop since we try reconnect below.
my $db = $DB[$dbnum];
$result = $db->query($query);
if (! defined($result)) {
my $err = $db->err;
......@@ -4427,8 +4431,36 @@ sub DBQueryN($$)
last;
}
#
# If we lose the connection to mysqld; lets try to reconnect.
#
if ($db->err == 2006 || $db->err == 2013) {
# This is really just for the mysqld watchdog daemon.
return undef
if (! $DBQUERY_RECONNECT);
# Do not want to exit if this fails.
$DBCONN_EXITONERR = 0;
# And we want to keep trying for a long time!
$DBCONN_MAXTRIES = 1000;
my $reconnected = TBDBReconnect();
$DBCONN_EXITONERR = 1;
$DBCONN_MAXTRIES = 1;
if ($reconnected < 0) {
$DBErrorString =
" Query: $query\n".
" Error: Could not reconnect to mysqld";
return undef;
}
next;
}
$maxtries--;
DBWarn("mysqld went away. $maxtries tries left", 0);
DBWarn("mysqld went away in process $PID. $maxtries tries left", 0);
sleep(1);
}
return $result;
......
......@@ -59,9 +59,14 @@ if (!$debug) {
exit(0);
}
}
# Restablish connection in child to avoid transient startup error.
libdb::TBDBReconnect();
# Be careful not to exit on transient error
$libdb::DBQUERY_MAXTRIES = 0;
$libdb::DBQUERY_MAXTRIES = 0;
# Do not let the library attempt reconnect since we have to deal with it.
$libdb::DBQUERY_RECONNECT = 0;
#
# Write our pid into the pid file so we can be killed later.
......@@ -140,7 +145,9 @@ sub TryQuery()
#
$libdb::DBQUERY_MAXTRIES = 3;
$libdb::DBCONN_MAXTRIES = 2;
TBdbfork();
# Must always do this in the child.
libdb::TBDBReconnect();
if (!DBQueryWarn("show full processlist")) {
exit(1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment