Commit 8348ddf2 authored by Leigh Stoller's avatar Leigh Stoller

Add some code that prevents node from going into hwdown when the image

being loaded is a user image.
parent 1993ac8c
......@@ -28,7 +28,7 @@ use vars qw(@ISA @EXPORT);
PROJMEMBERTRUST_ROOT PROJMEMBERTRUST_GROUPROOT
PROJMEMBERTRUST_PROJROOT
PROJROOT GROUPROOT USERROOT TBOPSPID
PROJROOT GROUPROOT USERROOT TBOPSPID
PLABMOND_PID PLABMOND_EID PLABHOLDING_PID PLABHOLDING_EID
TBTrustConvert TBMinTrust TBGrpTrust TBProjTrust
......@@ -152,7 +152,7 @@ use vars qw(@ISA @EXPORT);
TBValidNodeLogType TBValidNodeName TBSetNodeLogEntry
TBSetSchedReload MapNodeOSID TBLockExp TBUnLockExp TBSetExpSwapTime
TBUnixGroupList TBOSID TBOSMaxConcurrent TBOSCountInstances
TBResolveNextOSID
TBResolveNextOSID TBOsidToPid
TBOSLoadMaxOkay TBImageLoadMaxOkay TBImageID ExpSwapper
TBdbfork VnameToNodeid TBExpLocked
TBIsNodeRemote TBExptSetLogFile TBExptClearLogFile TBExptGetLogFile
......@@ -1922,6 +1922,27 @@ sub TBOSID ($$) {
return $row[0];
}
#
# Return pid of an osid (internal name).
#
# usage: TBOsidToPid(char *osid, \$pid)
# returns 1 if osid is valid; store pid into return arg.
# returns 0 if osid is not valid.
#
sub TBOsidToPid ($$) {
my($osid, $ppid) = @_;
my $query_result =
DBQueryFatal("select pid from os_info where osid='$osid'");
if (! $query_result->num_rows) {
return 0;
}
my ($pid) = $query_result->fetchrow_array();
$$ppid = $pid;
return 1;
}
#
# Returns the maximum number of concurrent instantiations of an image.
#
......
......@@ -692,8 +692,22 @@ while ( @nodelist ) {
next;
}
# Reserve it to down experiment.
MarkNodeDown($node);
#
# If the user has picked a standard image and it fails to boot,
# something is wrong, so reserve it to hwdwon experiment. If the
# image belongs to the user, then we assume its the image at fault,
# and allow it to be returned to the pool (caller, tbswap will end
# doing the nfree on nodes with a DOWN allocstate).
#
my $pidofosid;
if (! TBOsidToPid($osids{$node}, \$pidofosid) ||
$pidofosid eq TBOPSPID()) {
MarkNodeDown($node);
TBSetNodeLogEntry($node, $dbuid, TB_DEFAULT_NODELOGTYPE(),
"'Moved to hwdown by os_setup; ".
"failed to boot image for osid " . $osids{$node} .
" in $pid/$eid'");
}
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_DOWN() );
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_DOWN();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment