All new accounts created on Gitlab now require administrator approval. If you invite any collaborators, please let Flux staff know so they can approve the accounts.

Commit 461a1fce authored by Mike Hibler's avatar Mike Hibler

More minor speed ups for stated.

Log would be mail messages in stated-mail.log rather than actually emailing them.
Fewer regular log messages.
Avoid scanning a list unnecessarily if not in debug mode.
Use mysql to pick out certain osfeatures.
Bug fix: typo would let stated block when sent a SIGUSR1.
parent e18aa6d9
#!/usr/bin/perl -w
#
# Copyright (c) 2000-2012 University of Utah and the Flux Group.
# Copyright (c) 2000-2013 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -51,6 +51,7 @@ my $power = "$TB/bin/power";
my $apod = "$TB/sbin/apod";
my $TBLOG = "@TBLOGFACIL@";
my $LOGFILE = "$TB/log/stated.log";
my $MAILFILE = "$TB/log/stated-mail.log";
$| = 1;
......@@ -68,6 +69,12 @@ use POSIX ":sys_wait_h";
use IO::Poll qw(POLLIN);
use POSIX qw(:errno_h);
#
# Set this non-zero if you want to send mail on abnormal stated events.
# Otherwise we just log them to $MAILFILE.
#
my $SPAMME = 0;
# Set up some notification throttling
my $mailgap = 15; # in seconds
my $lastmail = time() - $mailgap + 2; # Send a digest of startup msgs after 2s.
......@@ -311,7 +318,7 @@ sub process_event_queue() {
!($sigrestart || $sigcleanup || $do_reload || $do_reopen)) {
$lastcount = $event_count;
# Don't block if we got a signal!
if ($wait<=0 || $sigrestart || $sigcleanup || $do_reload || $do_reload){
if ($wait<=0 || $sigrestart || $sigcleanup || $do_reload || $do_reopen) {
PollEvents(0, 0);
} else {
#debug("Using blocking event poll - $wait seconds\n");
......@@ -813,7 +820,6 @@ sub stateTransition($$) {
#
debug("Running $PXEBOOT trigger\n");
if ($mode ne $PXEKERNEL) {
info("$node: Forcing mode transition into $PXEKERNEL!\n");
opModeTransition($node, $PXEKERNEL, 1);
$mode=$PXEKERNEL;
}
......@@ -891,7 +897,6 @@ sub stateTransition($$) {
# supposed to, and that this change matches
# what the node is booting.
#
info("$node: Forcing mode transition out of $PXEKERNEL!\n");
opModeTransition($node, $bootopmode, 1);
$mode=$bootopmode;
}
......@@ -908,14 +913,13 @@ sub stateTransition($$) {
"where node_id='$node'");
my ($nextmode) = $query_result->fetchrow();
if ($nextmode) {
info("$node: Forcing mode transition!\n");
opModeTransition($node, $nextmode, 1);
$mode=$nextmode;
}
}
else {
my $str = "$node is running $bootosid, but in ".
"mode $mode\ninstead of mode $bootopmode!\n";
my $str = "$node running $bootosid in ".
"mode $mode instead of mode $bootopmode!\n";
if ($bootopmode eq "RELOAD") {
#
......@@ -1051,13 +1055,14 @@ sub opModeTransition($$;$) {
my ($node,$newmode,$force) = @_;
if (!defined($force)) { $force = 0; }
my $mode = $nodes{$node}{mode};
info("$node: Mode change to $newmode requested ($force)\n");
info("$node: Mode change $mode => $newmode ".
($force ? "forced" : "requested"). "\n");
# Check for invalid transitions
my ($oldstate, $mode, $nextstate);
my ($oldstate, $nextstate);
$oldstate = $nodes{$node}{state};
$mode = $nodes{$node}{mode};
if (defined($modeTrans{"$mode:$oldstate"}) || $force) {
#
......@@ -1070,9 +1075,11 @@ sub opModeTransition($$;$) {
#debug("translist=$translist\n");
#debug("splitlist=".join(", ",split(/[:,]/,$translist))."\n");
my %trans = split(/[:,]/,$translist);
debug("Valid transitions from $mode/$oldstate are:\n");
foreach my $k (sort keys %trans) {
debug("$k => $trans{$k}\n");
if ($debug) {
print STDERR "Valid transitions from $mode/$oldstate are:\n";
foreach my $k (sort keys %trans) {
print STDERR "$k => $trans{$k}\n";
}
}
if (defined($trans{$newmode})) {
$nextstate=$trans{$newmode};
......@@ -1113,14 +1120,17 @@ sub opModeTransition($$;$) {
sub handleCtrlEvent($$) {
my ($node,$event) = @_;
info("$node: CtrlEvent: $event\n");
#
# stated should use the node objects.
my $nodeobj = Node->Lookup($node);
# XXX but since it largely doesn't, we need to make sure to sync up here.
#
my $nodeobj = Node->LookupSync($node);
if (!defined($nodeobj)) {
notify("handleCtrlEvent: Could not lookup node object for $node!\n");
info("$node: CtrlEvent: could not lookup node object, event ignored!\n");
return;
}
$nodeobj->FlushReserved();
info("CtrlEvent: $node, $event\n");
foreach ($event) {
/^$TBPXERESET$/ && do {
......@@ -1222,9 +1232,9 @@ sub handleCtrlEvent($$) {
#
# Failed too many times, power cycle instead.
#
notify("Node $node has timed out $timedout times in ".
notify("Node $node timed out $timedout times in ".
"$PXEWAKEUP!\n".
"Sending it a reboot command\n");
"Sending it a reboot command.\n");
$optarg .= "-k";
}
my $cmd = "$nodereboot -r $optarg $node";
......@@ -1405,39 +1415,33 @@ sub handleCommand($$;$$) {
sub checkGenISUP($) {
my ($node) = @_;
debug("$node: Checking ISUP Generation\n");
my $r = DBQueryWarn("select osfeatures from nodes as n ".
my $r = DBQueryWarn("select FIND_IN_SET('isup',osfeatures) as isup,".
" FIND_IN_SET('ping',osfeatures) as ping ".
"from nodes as n ".
"left join os_info as o on o.osid=n.osid ".
"where node_id='$node' and osfeatures is not null");
my $osfeatures="";
# If we don't get anything back, assume it has no features.
if ($r->num_rows() > 0) {
($osfeatures) = $r->fetchrow();
}
my @features = split(",",$osfeatures);
# Make sure features I care about are defined
my %can=("ping"=>0, "isup"=>0);
foreach my $f (@features) {
$can{"\L$f"}=1; # make sure it's all lowercase
}
my ($isup,$ping) = $r->fetchrow();
# If os will send ISUP on its own, do nothing here.
if ($can{"isup"}) {
debug("$node: Will send own ISUP\n");
return 0;
}
# If os will send ISUP on its own, do nothing here.
if ($isup) {
debug("$node: Will send own ISUP\n");
return;
}
# If os doesn't support isup but can ping, fork and ping it every
# few seconds and send isup when it pings, or timeout after too long.
if ($can{"ping"}) {
debug("$node: Needs to be pinged - calling eventping\n");
system("$TB/sbin/eventping $node &");
return 0;
# If os doesn't support isup but can ping, fork and ping it every
# few seconds and send isup when it pings, or timeout after too long.
if ($ping) {
info("$node: starting eventping\n");
debug("$node: Needs to be pinged - calling eventping\n");
system("$TB/sbin/eventping $node &");
return;
}
}
# If os doesn't support ping or isup, stated sends ISUP just after
# the node gets to BOOTING (a bit early, but the best we can do)
info("$node: sending ISUP event\n");
debug("$node: OS doesn't ping - sending ISUP\n");
EventSendWarn(host => $BOSSNODE ,
objtype => TBDB_TBEVENT_NODESTATE ,
......@@ -1759,8 +1763,9 @@ sub notify($;$) {
# Use a timestamp, now that we're throttling mail
my $tstamp=strftime("%b %e %H:%M:%S",localtime);
showqueue();
chomp($message);
if (!$checkonly) {
info($message);
info("$message\n");
$mailqueue++;
# Queue up the message
# (The queue is a hash of lists of timestamps, keyed by message
......@@ -1788,7 +1793,7 @@ sub notify($;$) {
my $last = pop @tlist;
$mailbody .= "($count copies from $first to $last)\n";
} else {
$mailbody .= "($count copy at $tlist[0])\n";
$mailbody .= "(1 copy at $tlist[0])\n";
}
$mailbody .= "$sep\n";
}
......@@ -1797,7 +1802,13 @@ sub notify($;$) {
$mailqueue=0;
showqueue();
$lastmail = time;
if (!$debug) {
if (!$SPAMME) {
# Just write to the logfile
if (open(MLOG, ">>$MAILFILE")) {
print MLOG "$mailbody";
close(MLOG);
}
} elsif (!$debug) {
SENDMAIL("Stated List <".$TBOPS.">",
"Stated Messsage",$mailbody,
"Stated Daemon <".$TBOPS.">");
......@@ -1812,8 +1823,8 @@ sub notify($;$) {
sub announce($) {
my $message = shift;
my $tstamp=strftime("%b %e %H:%M:%S",localtime);
notify("ANNOUCEMENT: ".$message."\n\n(Sent to $REALTBOPS)\n");
$mailbody = "\n$message\n\n$tstamp\n";
notify("ANNOUNCEMENT: ".$message."\n(Sent to $REALTBOPS)\n");
$mailbody = "\n$message\n$tstamp\n";
if (!$debug) {
SENDMAIL($REALTBOPS,
"Stated Messsage",$mailbody,
......
#
# Create and maintain stated mail log.
#
use strict;
use libinstall;
my $NEWSYSLOG_CONF = "/etc/newsyslog.conf";
my $STATEDMAILLOG = "$LOGDIR/stated-mail.log";
sub InstallUpdate($$)
{
my ($version, $phase) = @_;
#
# If something should run in the pre-install phase.
#
if ($phase eq "pre") {
Phase "stated-mail", "Creating stated-mail.log", sub {
DoneIfExists($STATEDMAILLOG);
CreateFileFatal($STATEDMAILLOG);
ExecQuietFatal("$CHMOD 640 $STATEDMAILLOG");
};
Phase "newsyslog", "Updating $NEWSYSLOG_CONF", sub {
DoneIfEdited($NEWSYSLOG_CONF);
BackUpFileFatal($NEWSYSLOG_CONF);
AppendToFileFatal($NEWSYSLOG_CONF,
"$STATEDMAILLOG 640 9 1000 * Z ".
"/var/run/stated.pid 31");
};
}
#
# If something should run in the post-install phase.
#
if ($phase eq "post") {
Phase "stated", "Restarting stated", sub {
DoneIfDoesntExist("$VARRUN/stated.pid");
SignalDaemon("stated", 'USR1');
};
}
return 0;
}
1;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment