Commit 979442f3 authored by Kevin Atkinson's avatar Kevin Atkinson

Add code to os_setup to log information about Image usage.

Enough information is logged so that, at any point in time,
it is possible to tell what images are being used.  After
collecting some stats for a while I hope to use this data to
evaluate various strategies for preloading disks with images
other than the default.

Although not its primary purpose, enough information is
collection to be able to get a snapshot of node usage at any
point in time.  This includes what nodes are being used and by
who, as in which experiments and thus which projects.

NOTE: For a while you might see a few of these warnings,
  *** WARNING: os_setup:
  ***   could not find previous state (rsrcidx=484084) in image_history
  ***   table, won't be able to determine newly allocated nodes
if someone does a swapmod to an experiment that was swapped in
before this commit was installed.  This is because os_setup uses
previous information in the table to determine newly allocated
nodes.  This warning can safely be ignored in this case, and should
go away over time.
parent f188c44d
......@@ -1452,6 +1452,89 @@ if ($failed || $failedvnodes || $failedplab) {
tbwarn $summary;
}
# No retry if vnodes failed. Indicates a fatal problem.
my $exit_code = 0;
$exit_code = -1 if ($failedvnodes || $canceled || $noretry);
$exit_code = 1 if ($failed || $failedplab);
#
# If not failing for any reason, record some stats
#
if ($exit_code == 0) {
eval {
my ($exptidx, $state) =
DBQuerySingleFatal("select idx,state from experiments ".
" where pid='$pid' and eid='$eid'");
my ($rsrcidx,$lastrsrc) =
DBQuerySingleFatal("select rsrcidx,lastrsrc from experiment_stats ".
" where exptidx=$exptidx");
my $log_session = tblog_session();
my %prev_alloc;
my $cant_find_prev_alloc = 0;
if ($state eq 'modify_reswap') {
die "lastrsrc not set during swapmod" unless defined $lastrsrc;
my $db_result =
DBQueryFatal("select node_id from image_history where rsrcidx = $lastrsrc");
if ($db_result->numrows() < 1) {
tbwarn("could not find previous state (rsrcidx=$lastrsrc) ".
"in image_history table, won't be able to determine ".
"newly allocated nodes");
$cant_find_prev_alloc = 1;
}
while (my $n = $db_result->fetchrow) {
$prev_alloc{$n} = 1;
}
}
my %todo;
foreach my $node_id ( keys(%osids) ) {
$todo{$node_id} = [$osids{$node_id}];
}
foreach my $imageid ( keys(%reloads) ) {
my @nodelist = @{ $reloads{$imageid} };
foreach my $node_id (@nodelist) {
$todo{$node_id}[1] = $imageid;
}
}
foreach my $node_id ( keys(%todo) ) {
next unless defined $nodes{$node_id};
my ($osid, $imageid) = @{$todo{$node_id}};
$imageid = 0 unless defined $imageid;
my $newly_alloc = exists $prev_alloc{$node_id} ? 0 : 1;
$newly_alloc = 'NULL' if $cant_find_prev_alloc;
my ($node_history_id)
= DBQuerySingleFatal("select max(history_id) ".
" from node_history where node_id = '$node_id'");
my ($osname, $req_type, $phys_type)
= DBQuerySingleFatal("select v.osname, v.type, n.type ".
" from reserved as r ".
" left join virt_nodes as v using (vname, exptidx) ".
" left join nodes as n using (node_id) ".
"where r.node_id = '$node_id'");
my $req_os = $osname ? 1 : 0;
DBQueryFatal("insert into image_history ".
"(stamp, node_history_id, node_id, ".
" action, newly_alloc, rsrcidx, log_session, ".
" req_type, phys_type, req_os, osid, imageid) ".
"values(UNIX_TIMESTAMP(), ".
" $node_history_id, '$node_id', 'os_setup', ".
" $newly_alloc, $rsrcidx, ".
" $log_session, '$req_type', '$phys_type', ".
" $req_os, $osid, $imageid)");
#$done{$node_id} = 1;
}
};
if ($@) {
tbwarn "Unable to log image usage to image_history table.\n", $@;
}
}
#
# If not failing for any reason, save off swap state.
#
......@@ -1460,7 +1543,7 @@ if ($failed || $failedvnodes || $failedplab) {
# appropriate info to enable disk state saving at swapout.
#
my $swapstate;
if (!($failedvnodes || $canceled || $noretry || $failed || $failedplab) &&
if ($exit_code == 0 &&
TBExptGetSwapState($pid, $eid, \$swapstate) && $swapstate) {
TBDebugTimeStamp("Stashing image signatures");
osload_setupswapinfo($pid, $eid);
......@@ -1468,12 +1551,7 @@ if (!($failedvnodes || $canceled || $noretry || $failed || $failedplab) &&
}
TBDebugTimeStamp("os_setup finished");
# No retry if vnodes failed. Indicates a fatal problem.
exit(-1)
if ($failedvnodes || $canceled || $noretry);
exit(1)
if ($failed || $failedplab);
exit 0;
exit($exit_code);
#
# Map an OSID to an image for a node type.
......@@ -1694,9 +1772,3 @@ sub os_setup_one($$$;$)
return 1;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment