os_setup.in 18 KB
Newer Older
1
#!/usr/bin/perl -w
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2011 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
5 6
# All rights reserved.
#
7
use English;
8
use Getopt::Std;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
9
require 'ctime.pl';
10
use POSIX ":sys_wait_h";
11
use Data::Dumper;
12

13
#
14 15 16 17
# Reboot the nodes in an experiment. The nodes table will already contain
# all the information. This script deals with possible disk reloading,
# rebooting, and waiting for nodes to come back alive before allowing
# experiment creation to continue.
18
#
19
# usage: os_setup <pid> <eid>
20
#
Chad Barb's avatar
 
Chad Barb committed
21 22 23 24 25
# errorcode:  0 - all reboots succeeded.
#             1 - some/all reboots failed; retry may help.
#            -1 - failure; retry is inappropriate.
#

26 27
sub usage()
{
28
    print STDERR "Usage: os_setup [-d] <pid> <eid>\n";
29 30
    exit(-1);
}
31 32 33
my $optlist  = "id";
my $debug    = 1;
my $impotent = 0;
34 35 36 37 38

#
# Configure variables
#
my $TB		= "@prefix@";
39
my $TBOPS       = "@TBOPSEMAIL@";
40 41 42 43 44 45 46

# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Turn off line buffering on output
$| = 1; 
47

48 49 50 51 52
#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
53
use libossetup;
54 55
use libreboot;
use libosload;
56
use libtestbed;
Kevin Atkinson's avatar
Kevin Atkinson committed
57
use libtblog;
58
use NodeType;
59
use Experiment;
60
use Image;
61
use OSinfo;
62
use User;
63
use Node;
64

65 66
# Is this needed any longer?
my $dolastload  = 0;
Kevin Atkinson's avatar
 
Kevin Atkinson committed
67

68
TBDebugTimeStampsOn();
Kevin Atkinson's avatar
 
Kevin Atkinson committed
69

70 71 72 73 74 75 76 77 78 79 80
#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV != 2) {
    usage();
}
81
if (defined($options{"d"})) {
82
    $debug = 1;
83
}
84 85
if (defined($options{"i"})) {
    $impotent = 1;
86 87
}

88
#
89
# Verify user and get his DB uid and other info for later.
90
#
91 92 93
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    die_noretry("You ($UID) do not exist!");
94
}
95 96 97
my $user_uid      = $this_user->uid();
my $user_name     = $this_user->name();
my $user_email    = $this_user->email();
98 99
my $user_email_to = "$user_name <$user_email>";

100 101 102
#
# Check permission.
#
103
my $experiment = Experiment->Lookup($ARGV[0], $ARGV[1]);
104
if (!defined($experiment)) {
105
    die_noretry("Could not find experiment object");
106 107 108 109
}
if (!$experiment->AccessCheck($this_user, TB_EXPT_MODIFY)) {
    die_noretry("You do not have permission to swap this experiment!");
}
110 111 112
my $pid = $experiment->pid();
my $eid = $experiment->eid();

113 114
TBDebugTimeStamp("os_setup started");

115
#
116 117 118 119 120 121 122 123 124 125
# List of all nodes in the experiment.
#
my @nodelist = $experiment->NodeList(0, 1);
if (! @nodelist) {
    tbinfo("No nodes in experiment. Exiting ...\n");
    exit(0);
}

#
# Create this "structure" to pass around to the type specific modules.
126
#
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
my $MyStruct = libossetup->New($this_user, $experiment, @nodelist);
if (!defined($MyStruct)) {
    die_noretry("Could not create local data structure!");
}
$MyStruct->debug($debug);
$MyStruct->impotent($impotent);
$MyStruct->noretry(0);
$MyStruct->dolastload($dolastload);

#
# See if the experiment is firewalled and stash for later.
#
$MyStruct->firewalled($experiment->IsFirewalled());

if ($MyStruct->firewalled()) {
    my $firewall;
    my $firewalled = $experiment->IsFirewalled(\$firewall);
    $MyStruct->firewall($firewall);
}
146 147 148 149

#
# Ditto ElabinElab.
#
150
$MyStruct->elabinelab($experiment->elabinelab());
151

152 153 154 155 156
#
# Ditto PlabinElab.
#
my $plcnode;
if (TBExptPlabInElabPLC($pid, $eid, \$plcnode)) {
157 158 159 160 161
    $MyStruct->plabinelab(1);
    $MyStruct->plcnode($plcnode);
}
else {
    $MyStruct->plabinelab(0);
162 163
}

164
#
165 166
# First pass to check that all local files exist. This should probably
# happen earlier in the swap path.
167
#
168 169 170
foreach my $node (@nodelist) {
    if (defined($node->def_boot_path())) {
	my $path = $node->def_boot_path();
171

172 173 174 175 176 177 178 179 180 181 182
	if ($path ne "") {
	    my $ip   = 0;

	    # Split out IP address if it exists.
	    if ($path =~ /^([0-9\.]+):(\/.*)$/) {
		$ip   = $1;
		$path = $2;
	    }

	    # Path must begin with $TFTP
	    if (! ($path =~ /^\/$TFTP\//)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
183
		die_noretry("File $path for node $node must reside in $TFTP");
184 185
	    }

186
	    if (! -f $path) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
187
		die_noretry("File $path for node $node does not exist!");
188 189
	    }
	}
190
    }
191 192
    if (defined($node->next_boot_path())) {
	my $path = $node->next_boot_path();
193

194 195 196 197 198 199 200 201 202 203 204
	if ($path ne "") {
	    my $ip   = 0;

	    # Split out IP address if it exists.
	    if ($path =~ /^([0-9\.]+):(\/.*)$/) {
		$ip   = $1;
		$path = $2;
	    }

	    # Path must begin with $TFTP
	    if (! ($path =~ /^\/$TFTP\//)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
205
		die_noretry("File $path for node $node must reside in $TFTP");
206 207
	    }

208
	    if (! -f $path) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
209
		die_noretry("File $path for node $node does not exist!");
210 211
	    }
	}
212 213
    }

214 215 216
    #
    # XXX - Ditto for RPMs.
    #
217
    foreach my $rpm (split(":", $node->rpms())) {
218
	if (! -f $rpm) {
219 220 221
	    die_noretry({type => 'primary', severity => SEV_ERROR,
			 error => ['file_not_found', 'rpm', $rpm, $node]},
			"RPM $rpm for node $node does not exist!");
222 223
	}
    }
224

225 226 227
    #
    # XXX - Ditto for tarfiles.
    #
228
    foreach my $tarspec (split(":", $node->tarballs())) {
229
	my ($dir, $tar) = split(" ", $tarspec);
230

231
	if (! -f $tar) {
232 233 234
	    die_noretry({type => 'primary', severity => SEV_ERROR,
			 error => ['file_not_found', 'tar', $tar, $node]},
			"Tarfile $tar for node $node does not exist!");
235 236
	}
    }
237
}
238

239
#
240 241
# First pass through to let type/class specific modules see what is
# going on and mark nodes as needed.
242
#
243 244 245 246 247
foreach my $node (@nodelist) {
    my $node_id   = $node->node_id();
    my $type      = $node->type();
    my $class     = $node->class();
    my $imageable = $node->imageable();
248

249 250 251
    # Not sure where to put this.
    $node->_issharednode(defined($node->sharing_mode()) &&
			$node->sharing_mode() eq 'using_shared_local');
252

253 254 255 256
    # Not sure where to put this.
    $node->_iseinenode($MyStruct->elabinelab() &&
		      defined($node->inner_elab_role()) &&
		      $node->inner_elab_role() eq 'node');
257 258

    #
259 260 261
    # Look for type specific module first. Eventually this should be more
    # dynamic in how the modules are loaded/defined, perhaps specified on
    # a per-type basis in the DB.
262
    #
263 264 265 266 267 268 269 270 271 272 273
    my $object = $MyStruct->TypeLookup($node);
    if (!defined($object)) {
	$object = $MyStruct->NewType($type);
	if (!defined($object)) {
	    #
	    # Otherwise use the class.
	    #
	    $object = $MyStruct->NewType($class);
	    if ($@) {
		die_noretry("No type/class specific setup module for $node");
	    }
274 275
	}
    }
276 277 278
    print STDERR "Adding $node_id to type object " . $object->type() . "\n"
	if ($debug);
    $object->AddNode($node);
279 280
}

281 282 283 284
while (1) {
    my $objects    = $MyStruct->OperationList();
    my @volunteers = ();
    my @nodes      = ();
285 286

    #
287
    # Do not bother if we got canceled.
288
    #
289 290
    if (! $MyStruct->canceled()) {
	my $canceled = $experiment->canceled();
291
	if ($canceled) {
292 293
	    $MyStruct->canceled($canceled);
	    
294 295 296
	    tbnotice({cause => 'canceled', severity => SEV_IMMEDIATE,
		      error => ['cancel_flag']},
		     "Swap canceled; will terminate os_setup early!");
297
	    last;
Chad Barb's avatar
 
Chad Barb committed
298
	}
299
    }
300
    
301
    #
302 303
    # Clear the inform lists, since we want to send email in batches
    # as things fail.
304
    #
305
    $MyStruct->ClearInformLists();
306

307
    #
308
    # Go through and ask each one for volunteers. 
309
    #
310 311 312 313 314 315
    foreach my $object (@{ $objects }) {
	print "Asking $object for volunteers\n"
	    if ($debug);
	my @list = $object->Volunteers();
	print "$object returns volunteers: @list\n"
	    if ($debug && @list);
316
	next
317 318 319
	    if (! @list);
	@nodes = (@nodes, @list);
	push(@volunteers, [$object, \@list]);
320
    }
321 322
    last
	if (!@nodes);
323

324
    #
325
    # Light up the nodes in parallel.
326
    #
327 328 329 330 331
    my @results   = ();
    my $coderef   = sub {
	my ($ref) = @_;
	my ($object, $noderef) = @{ $ref };
	my @nodelist = @{ $noderef };
332

333 334 335 336
	print STDERR "Lighting up nodes: @nodelist\n"
	    if ($debug);
	if ($object->LightUpNodes(@nodelist)) {
	    return -1;
337
	}
338 339 340 341 342 343 344
	return 0;
    };
    print STDERR "Lighting up nodes in parallel ...\n";
    
    if (ParRun({"maxwaittime" => 99999},
	       \@results, $coderef, @volunteers)) {
	$MyStruct->die_noretry("*** LightUpNodes: Internal error\n");
345
    }
346

347
    #
348 349
    # Check the exit codes. An error at this phase is unusual, and
    # we want to turn off retry.
350
    #
351 352 353 354 355
    my $errors = 0;
    my $count  = 0;
    foreach my $result (@results) {
	my ($object, $noderef) = @{ $volunteers[$count] };
	my @nodelist = @{ $noderef };
Kirk Webb's avatar
Kirk Webb committed
356

357 358 359 360
	if ($result != 0) {
	    print STDERR "*** Error lighting up nodes: @nodelist\n"
		if ($debug);
	    $MyStruct->noretry(1);
361

362
	    #
363 364
	    # Make sure all the nodes are marked as down so that
	    # we do not wait for them.
365
	    #
366 367
	    foreach my $node (@nodelist) {
		$node->SetAllocState(TBDB_ALLOCSTATE_DOWN());
368
	    }
369
	}
370
	$count++;
371
    }
372 373 374 375

    # And wait. 
    print STDERR "Waiting for nodes ...\n";
    $MyStruct->WaitForNodes(@nodes);
376 377

    #
378
    # Fire off email for this batch.
379
    #
380 381 382 383 384 385 386
    $MyStruct->InformTBopsFatal();
    $MyStruct->InformTBopsWarn();
    $MyStruct->InformUser();

    if ($MyStruct->aborted()) {
	print STDERR "Aborting os_setup cause of fatal errors.\n";
	last;
387
    }
388 389
}

390 391 392
########################################################################
# All of this stuff is for summary reporting. I did not touch it, as
# the code is simply awful. 
393
#
394
# Global variables need for the summary
395
#
396 397 398 399
my $users_fault;
my %tally;
my %total;
my $summary = '';
Kevin Atkinson's avatar
 
Kevin Atkinson committed
400

Kevin Atkinson's avatar
 
Kevin Atkinson committed
401 402 403 404
sub add_defaults($) {
    my ($d) = (@_);
    $d->{failed_fatal}    = 0 unless defined $d->{failed_fatal};
    $d->{failed_nonfatal} = 0 unless defined $d->{failed_nonfatal};
Kevin Atkinson's avatar
 
Kevin Atkinson committed
405
}
Kevin Atkinson's avatar
 
Kevin Atkinson committed
406 407 408 409 410 411 412 413 414 415

sub add_non_fatal($%) {
    my ($line, %d) = @_;
    if ($d{failed_nonfatal} > 0) {
	my $count = ($d{failed_nonfatal} == $d{failed}
		     ? "all"
		     : "$d{failed_nonfatal}/$d{failed}");
	$line .= " ($count non-fatal)";
    }
    return $line;
Kevin Atkinson's avatar
 
Kevin Atkinson committed
416 417
}

Kevin Atkinson's avatar
 
Kevin Atkinson committed
418
sub list_failed_nodes ($%) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
419
    local $^W = 0;
Kevin Atkinson's avatar
 
Kevin Atkinson committed
420 421 422 423 424 425 426
    my ($max_length,%d) = @_;
    my $byvname = sub { $vname{$a} cmp $vname{$b} };
    my @nodes = (sort $byvname @{$d{failed_fatal_list}}, 
		 sort $byvname @{$d{failed_nonfatal_list}});
    @nodes = map {"$vname{$_}($_)"} @nodes;
    my $line = join ' ', @nodes;
    if (length($line) > $max_length) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
427
	$line = '';
Kevin Atkinson's avatar
 
Kevin Atkinson committed
428 429 430
	$max_length -= 4;
	my $length = 0;
	foreach (@nodes) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
431
	    $length += length($_) + 1;
Kevin Atkinson's avatar
 
Kevin Atkinson committed
432 433 434
	    last if $length > $max_length;
	    $line .= "$_ ";
	}
Kevin Atkinson's avatar
 
Kevin Atkinson committed
435
	$line .= "..." if $length > $max_length;
Kevin Atkinson's avatar
 
Kevin Atkinson committed
436
    }
Kevin Atkinson's avatar
 
Kevin Atkinson committed
437
    return $line;
Kevin Atkinson's avatar
 
Kevin Atkinson committed
438 439 440 441 442 443 444 445 446 447 448 449
}

sub add_failed_nodes ($$%) {
    my ($line, $indent, %d) = @_;
    my $nodes_line = list_failed_nodes(78 - $indent, %d);
    if (length($line) + 2 + length($nodes_line) > 78) {
	return "$line:\n".(' 'x$indent)."$nodes_line\n";
    } else {
	return "$line: $nodes_line\n";
    }
}

Kevin Atkinson's avatar
 
Kevin Atkinson committed
450 451 452
#
# First gather stats
#
453 454 455
foreach (keys(%{ $MyStruct->failedlist() })) {
    my $node   = $MyStruct->node($_);
    my $osinfo = $node->_bootosinfo();
456
    my $osid   = $osinfo->osid();
457 458
    my $type   = $node->type();
    my ($what,$fatal)  = @{ $MyStruct->failedlist()->{$_} };
Kevin Atkinson's avatar
 
Kevin Atkinson committed
459

460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
    my ($error_type, $severity);

    if ($what eq 'boot') {
	$error_type = 'node_boot_failed';
    } elsif ($what eq 'reload') {
	$error_type = 'node_load_failed';
    }

    if ($fatal eq 'fatal') {
	$severity = SEV_ERROR;
    } elsif ($fatal eq 'nonfatal') {
	$severity = SEV_WARNING;
    }

    if (defined($error_type) && defined($severity)) {
475
	tbreport($severity, $error_type, $node, $type, $osinfo);
476 477
    }

Kevin Atkinson's avatar
 
Kevin Atkinson committed
478 479
    $tally{$what}{$osid} = {} unless defined $tally{$what}{$osid};
    my $t = $tally{$what}{$osid};
Kevin Atkinson's avatar
 
Kevin Atkinson committed
480

Kevin Atkinson's avatar
 
Kevin Atkinson committed
481 482
    $t->{any_type}{failed}++;
    $t->{any_type}{"failed_${fatal}"}++;
Kevin Atkinson's avatar
 
Kevin Atkinson committed
483

Kevin Atkinson's avatar
 
Kevin Atkinson committed
484 485
    $t->{by_type}{$type}{failed}++;
    $t->{by_type}{$type}{"failed_${fatal}"}++;
Kevin Atkinson's avatar
 
Kevin Atkinson committed
486

Kevin Atkinson's avatar
 
Kevin Atkinson committed
487 488 489
    push @{$t->{any_type}{"failed_${fatal}_list"}}, $_;
    push @{$t->{by_type}{$type}{"failed_${fatal}_list"}}, $_;
}
490 491 492 493 494 495 496 497
foreach (keys(%{ $MyStruct->nodelist() })) {
    my $node   = $MyStruct->node($_);
    my $osinfo = $node->_bootosinfo();

    # Was not setup to do anything, so ignore.
    next
	if (!defined($osinfo));
    
498
    my $osid   = $osinfo->osid();
499
    my $type   = $node->type();
Kevin Atkinson's avatar
 
Kevin Atkinson committed
500 501 502 503 504 505 506 507 508 509 510 511
    $total{$osid}{any_type}++;
    $total{$osid}{by_type}{$type}++;
}

#
# Now report any failed nodes in a concise summary
#
if (defined $tally{reload}) {

    $users_fault = 0;

    foreach my $osid (sort keys %{$tally{reload}}) {
512 513
	my $osinfo  = OSinfo->Lookup($osid);
	my $osname  = $osinfo->osname();
Kevin Atkinson's avatar
 
Kevin Atkinson committed
514 515 516 517 518
	
	my %d     = %{$tally{reload}{$osid}{any_type}};
	my $total = $total{$osid}{any_type};
	
	my $line;
519 520
	$line = sprintf("%d/%d nodes failed to load the os \"%s\"",
			$d{failed}, $total, $osname);
Kevin Atkinson's avatar
 
Kevin Atkinson committed
521 522 523 524 525 526 527 528 529 530
	$line = add_failed_nodes($line, 2, %d);
	
	$summary .= $line;
    }

} elsif (defined $tally{boot}) {

    $users_fault = 1;

    foreach my $osid (sort keys %{$tally{boot}}) {
531 532 533
	my $osinfo     = OSinfo->Lookup($osid);
	my $osname     = $osinfo->osname();
	my $user_image = ($osinfo->pid() eq TBOPSPID() ? 0 : 1);
Kevin Atkinson's avatar
 
Kevin Atkinson committed
534 535 536 537 538 539 540
	
	add_defaults($tally{boot}{$osid}{any_type});
	my %d   = %{$tally{boot}{$osid}{any_type}};
	my %d_t = %{$tally{boot}{$osid}{by_type}};
	my $total   = $total{$osid}{any_type};
	my %total_t = %{$total{$osid}{by_type}};
	
Kevin Atkinson's avatar
 
Kevin Atkinson committed
541
	my $byfailure = sub {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
542
	    my $cmp = $d_t{$b}{failed} <=> $d_t{$a}{failed};
Kevin Atkinson's avatar
 
Kevin Atkinson committed
543 544 545
	    return $cmp if $cmp != 0;
	    return $a cmp $b;
	};
Kevin Atkinson's avatar
 
Kevin Atkinson committed
546 547
	my @node_types = sort $byfailure keys %d_t;
	
548
	$users_fault = 0 if !$user_image;
Kevin Atkinson's avatar
 
Kevin Atkinson committed
549
	foreach my $type (@node_types) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
550
	    $users_fault = 0 if $d_t{$type}{failed} < $total_t{$type};
Kevin Atkinson's avatar
 
Kevin Atkinson committed
551 552
	}
	
Kevin Atkinson's avatar
 
Kevin Atkinson committed
553 554 555 556 557 558 559
	my $line = sprintf("%d/%d %s with a %s osid of \"%s\" failed to boot",
			   $d{failed}, $total,
			   @node_types == 1 ? "$node_types[0]'s" : "nodes",
			   $user_image ? "user" : "system", 
			   $osname);
	$line = add_non_fatal($line, %d);
	
Kevin Atkinson's avatar
 
Kevin Atkinson committed
560
	if (@node_types == 1) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
561
	    
Kevin Atkinson's avatar
 
Kevin Atkinson committed
562
	    my $type = $node_types[0];
Kevin Atkinson's avatar
 
Kevin Atkinson committed
563 564 565
	    
	    $summary .= add_failed_nodes($line, 2, %{$d_t{$type}});
	    
Kevin Atkinson's avatar
 
Kevin Atkinson committed
566
	} else {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
567
	    
Kevin Atkinson's avatar
 
Kevin Atkinson committed
568
	    $summary .= "$line:\n";
Kevin Atkinson's avatar
 
Kevin Atkinson committed
569
	    
Kevin Atkinson's avatar
 
Kevin Atkinson committed
570
	    foreach my $type (@node_types) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
571

Kevin Atkinson's avatar
 
Kevin Atkinson committed
572
		add_defaults($d_t{$type});
Kevin Atkinson's avatar
 
Kevin Atkinson committed
573
		my %d = %{$d_t{$type}};
Kevin Atkinson's avatar
 
Kevin Atkinson committed
574 575
		my $total = $total_t{$type};
		
Kevin Atkinson's avatar
 
Kevin Atkinson committed
576
		if ($d{failed} > 0) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
577 578 579
		    $line = sprintf("  %d/%d %s with this os failed to boot",
				    $d{failed}, $total,
				    "${type}'s");
Kevin Atkinson's avatar
 
Kevin Atkinson committed
580
		    $line = add_non_fatal($line, %d);
Kevin Atkinson's avatar
 
Kevin Atkinson committed
581
		    $line = add_failed_nodes($line, 4, %d);
Kevin Atkinson's avatar
 
Kevin Atkinson committed
582
		} else {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
583
		    $line = sprintf("  %d %s with this os successfully booted.\n",
Kevin Atkinson's avatar
 
Kevin Atkinson committed
584 585
				    $total,
				    $total_t{$type} == 1 ? "$type" : "${type}'s");
586
		}
Kevin Atkinson's avatar
 
Kevin Atkinson committed
587
		$summary .= $line;
Kevin Atkinson's avatar
 
Kevin Atkinson committed
588 589 590 591
	    }
	}
    }
}
592
if (my $count = $MyStruct->failed()) {
593
    tberror ({type=>'summary', cause=>($users_fault ? 'user' : 'unknown')}, 
594 595 596 597
	     "There were $count failed nodes.\n\n", $summary);
}
elsif ($summary) {
    tbwarn($summary);
Kevin Atkinson's avatar
 
Kevin Atkinson committed
598
}
599

600 601 602
# Look to see if anyone set the no retry flag.
my $exit_code = (($experiment->canceled() || $MyStruct->noretry()) ? -1 :
		 $MyStruct->failed() ? 1 : 0);
603 604 605 606 607

#
# If not failing for any reason, record some stats
#
if ($exit_code == 0) {
608 609 610 611
    # Record some stats on the OS requested and the images loaded to
    # the image_history table. Put in in an eval loop to catch any
    # errors so they are non-fatal.
    use strict;
612 613 614 615 616 617 618 619 620 621 622
    eval {
	my ($exptidx, $state) =
	    DBQuerySingleFatal("select idx,state from experiments ".
			       "  where pid='$pid' and eid='$eid'");
	my ($rsrcidx,$lastrsrc) =
	    DBQuerySingleFatal("select rsrcidx,lastrsrc from experiment_stats ".
			       "  where exptidx=$exptidx");
	my $log_session = tblog_session();
	my %prev_alloc;
	my $cant_find_prev_alloc = 0;
	if ($state eq 'modify_reswap') {
623 624
	    die_noretry("lastrsrc not set during swapmod")
		unless defined $lastrsrc;
625 626 627 628 629 630 631 632 633 634 635 636 637 638
	    my $db_result =
		DBQueryFatal("select node_id from image_history where rsrcidx = $lastrsrc");
	    if ($db_result->numrows() < 1) {
		tbwarn("could not find previous state (rsrcidx=$lastrsrc) ".
		       "in image_history table, won't be able to determine ".
		       "newly allocated nodes");
		$cant_find_prev_alloc = 1;
	    }
	    while (my $n = $db_result->fetchrow) {
		$prev_alloc{$n} = 1;
	    }
	}

	my %todo;
639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654

	# Collect the list of nodes and store the osid requested
	# and the imageid loaded (if any).
	foreach my $object (@{$MyStruct->OperationList()}) {
	    foreach my $node_id (keys %{$object->{NODES}}) {
		my $node = $object->{NODES}{$node_id};
		my $osinfo = $node->_bootosinfo();
		my $osid = $osinfo->osid();
		my $op = $node->_setupoperation();
		my $imageid = 0;
		# Only set the imageid if the disk needed a reload
		if ($op == $RELOAD) {
		    my $image = $node->_loadimage();
		    $imageid = $image->imageid();
		}
		$todo{$node_id} = [$osid, $imageid];
655 656 657 658 659 660 661 662 663 664 665 666
	    }
	}

	foreach my $node_id ( keys(%todo) ) {
	    my ($osid, $imageid) = @{$todo{$node_id}};

	    my $newly_alloc = exists $prev_alloc{$node_id} ? 0 : 1;
	    $newly_alloc = 'NULL' if $cant_find_prev_alloc;

	    my ($node_history_id) 
		= DBQuerySingleFatal("select max(history_id) ".
				     "  from node_history where node_id = '$node_id'");
667 668 669 670 671
	    if (! defined $node_history_id) {
		tbwarn("No node_history for $node_id, can't log image usage for node.");
		next;
	    }

672 673
	    my ($erole, $osname, $req_type, $phys_type) 
		= DBQuerySingleFatal("select r.erole, v.osname, v.type, n.type ".
674 675 676 677
				     "  from reserved as r ".
				     "    left join virt_nodes as v using (vname, exptidx) ".
				     "    left join nodes as n using (node_id) ".
				     "where r.node_id = '$node_id'");
678 679 680
	    my $req_os = defined $osname ? ($osname ? 1 : 0) : 'NULL';
	    $erole = 'delay' if $erole eq 'delaynode';
	    $req_type = $erole unless defined $req_type;
681

682 683 684 685 686 687 688 689
	    DBQueryFatal("insert into image_history ".
			 "(stamp, node_history_id, node_id, ".
			 " action, newly_alloc, rsrcidx, log_session, ".
			 " req_type, phys_type, req_os, osid, imageid) ".
			 "values(UNIX_TIMESTAMP(), ".
			 "       $node_history_id, '$node_id', 'os_setup', ".
			 "       $newly_alloc, $rsrcidx, ".
			 "       $log_session, '$req_type', '$phys_type', ".
690 691
			 "       $req_os, $osid, $imageid)")
		if (!$impotent);
692 693 694 695 696 697 698
	}
    };
    if ($@) {
	tbwarn "Unable to log image usage to image_history table.\n", $@;
    }
}

699 700 701 702 703 704 705 706
#
# If not failing for any reason, save off swap state.
#
# For all nodes in the experiment that are booting from the disk,
# figure out the image from which they are booting and stash away the
# appropriate info to enable disk state saving at swapout.
#
my $swapstate;
707
if ($exit_code == 0 &&
708 709 710 711 712
    TBExptGetSwapState($pid, $eid, \$swapstate) && $swapstate) {
    TBDebugTimeStamp("Stashing image signatures");
    osload_setupswapinfo($pid, $eid);
    TBDebugTimeStamp("Finished stashing image signatures");
}
713
TBDebugTimeStamp("os_setup finished");
Chad Barb's avatar
 
Chad Barb committed
714

715
exit($exit_code);