Commit ad2f2355 authored by Robert Ricci's avatar Robert Ricci

Add another part to node_status - move plab phys nodes that have not

checked in for a while into hwdown.

Adding another query to do this was much simpler than trying to make the one
monster query already in this script get what we wanted.
parent 62e61fd9
......@@ -42,6 +42,7 @@ if (($UID != 0) && (!TBAdmin())) {
}
#
# Part 1: Set the node_status.status column for nodes.
# The idea is simple; any nodes that have not reported in (isalive in
# tmcd) within the last XX seconds are moved to the down category.
#
......@@ -89,19 +90,6 @@ while (my ($node,$status,$pid,$remote,$isvirt,$isplab,$timediff) =
}
}
#
# If the plab management sliver hasn't checked in for a couple of
# hours, move the pnode to hwdown.
#
if ($isplab && !$isvirt) {
if ($timediff > 7200) {
MarkNodeDown($node);
TBSetNodeLogEntry($node, $UID, TB_DEFAULT_NODELOGTYPE(),
"'Moved to hwdown; ".
"$node has not reported in for a long time.'");
}
}
#
# Repeat the time check to avoid dropping a node that just came up.
#
......@@ -113,3 +101,25 @@ while (my ($node,$status,$pid,$remote,$isvirt,$isplab,$timediff) =
"where node_id='$node'");
}
}
#
# Part 2: Push nodes into hwdown that have been down for quite a while.
# For now, we only do this for planetlab physnodes, but we may want to do
# it for others, too.
#
my $timeout = TBGetSiteVar("watchdog/isalive/dead_time") * 60;
$query_result =
DBQueryFatal("SELECT n.node_id from nodes as n " .
"left join node_status as ns on ns.node_id=n.node_id ".
"left join reserved as r on r.node_id=n.node_id " .
"where n.type='pcplabphys' " .
" and now() - ns.status_timestamp > $timeout " .
" and !(r.pid='" . NODEDEAD_PID . "' and r.eid='" .
NODEDEAD_EID . "')");
while (my ($node) = $query_result->fetchrow_array) {
MarkPhysNodeDown($node);
TBSetNodeLogEntry($node, $UID, TB_DEFAULT_NODELOGTYPE(),
"'Moved to hwdown by node_status; ".
"$node has not reported in for a long time.'");
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment