os_setup.in 16.7 KB
Newer Older
1
#!/usr/bin/perl -w
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2
#
3
# Copyright (c) 2000-2014 University of Utah and the Flux Group.
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
23
#
24
use English;
25
use Getopt::Std;
26
use POSIX ":sys_wait_h";
27
use Data::Dumper;
28

29
#
30
31
32
33
# Reboot the nodes in an experiment. The nodes table will already contain
# all the information. This script deals with possible disk reloading,
# rebooting, and waiting for nodes to come back alive before allowing
# experiment creation to continue.
34
#
35
# usage: os_setup <pid> <eid>
36
#
Chad Barb's avatar
   
Chad Barb committed
37
38
39
40
41
# errorcode:  0 - all reboots succeeded.
#             1 - some/all reboots failed; retry may help.
#            -1 - failure; retry is inappropriate.
#

42
43
sub usage()
{
44
    print STDERR "Usage: os_setup [-d] <pid> <eid>\n";
45
46
    exit(-1);
}
47
48
49
my $optlist  = "id";
my $debug    = 1;
my $impotent = 0;
50
51
52
53
54

#
# Configure variables
#
my $TB		= "@prefix@";
55
my $TBOPS       = "@TBOPSEMAIL@";
56
57
58
59
60
61
62

# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Turn off line buffering on output
$| = 1; 
63

64
65
66
67
68
#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
69
use libossetup;
70
71
use libreboot;
use libosload;
72
use libtestbed;
Kevin Atkinson's avatar
Kevin Atkinson committed
73
use libtblog;
74
use NodeType;
75
use Experiment;
76
use Image;
77
use OSinfo;
78
use User;
79
use Node;
80

81
82
# Is this needed any longer?
my $dolastload  = 0;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
83

84
TBDebugTimeStampsOn();
Kevin Atkinson's avatar
   
Kevin Atkinson committed
85

86
87
88
89
90
91
92
93
94
95
96
#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV != 2) {
    usage();
}
97
if (defined($options{"d"})) {
98
    $debug = 1;
99
}
100
101
if (defined($options{"i"})) {
    $impotent = 1;
102
103
}

104
#
105
# Verify user and get his DB uid and other info for later.
106
#
107
108
109
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    die_noretry("You ($UID) do not exist!");
110
}
111
112
113
my $user_uid      = $this_user->uid();
my $user_name     = $this_user->name();
my $user_email    = $this_user->email();
114
115
my $user_email_to = "$user_name <$user_email>";

116
117
118
#
# Check permission.
#
119
my $experiment = Experiment->Lookup($ARGV[0], $ARGV[1]);
120
if (!defined($experiment)) {
121
    die_noretry("Could not find experiment object");
122
123
124
125
}
if (!$experiment->AccessCheck($this_user, TB_EXPT_MODIFY)) {
    die_noretry("You do not have permission to swap this experiment!");
}
126
127
128
my $pid = $experiment->pid();
my $eid = $experiment->eid();

129
130
TBDebugTimeStamp("os_setup started");

131
#
132
133
134
135
136
137
138
139
140
141
# List of all nodes in the experiment.
#
my @nodelist = $experiment->NodeList(0, 1);
if (! @nodelist) {
    tbinfo("No nodes in experiment. Exiting ...\n");
    exit(0);
}

#
# Create this "structure" to pass around to the type specific modules.
142
#
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
my $MyStruct = libossetup->New($this_user, $experiment, @nodelist);
if (!defined($MyStruct)) {
    die_noretry("Could not create local data structure!");
}
$MyStruct->debug($debug);
$MyStruct->impotent($impotent);
$MyStruct->noretry(0);
$MyStruct->dolastload($dolastload);

#
# See if the experiment is firewalled and stash for later.
#
$MyStruct->firewalled($experiment->IsFirewalled());

if ($MyStruct->firewalled()) {
    my $firewall;
    my $firewalled = $experiment->IsFirewalled(\$firewall);
    $MyStruct->firewall($firewall);
}
162
163
164
165

#
# Ditto ElabinElab.
#
166
$MyStruct->elabinelab($experiment->elabinelab());
167

168
169
170
171
172
#
# Ditto PlabinElab.
#
my $plcnode;
if (TBExptPlabInElabPLC($pid, $eid, \$plcnode)) {
173
174
175
176
177
    $MyStruct->plabinelab(1);
    $MyStruct->plcnode($plcnode);
}
else {
    $MyStruct->plabinelab(0);
178
179
}

180
181
182
183
184
185
186
187
188
189
190
#
# External node management means that someone else is going to be
# answering DHCP after nodes reboot. For nodes in PXEWAIT, we need
# to make sure they are really rebooted and not just told to check
# in with bootinfo again.
#
if (FeatureEnabled("ExternalNodeManagement", $experiment->GetSwapper(),
		   $experiment->GetProject(), $experiment)) {
    $MyStruct->realreboot(1);
}

191
#
192
193
# First pass to check that all local files exist. This should probably
# happen earlier in the swap path.
194
#
195
196
197
foreach my $node (@nodelist) {
    if (defined($node->def_boot_path())) {
	my $path = $node->def_boot_path();
198

199
200
201
202
203
204
205
206
207
208
209
	if ($path ne "") {
	    my $ip   = 0;

	    # Split out IP address if it exists.
	    if ($path =~ /^([0-9\.]+):(\/.*)$/) {
		$ip   = $1;
		$path = $2;
	    }

	    # Path must begin with $TFTP
	    if (! ($path =~ /^\/$TFTP\//)) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
210
		die_noretry("File $path for node $node must reside in $TFTP");
211
212
	    }

213
	    if (! -f $path) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
214
		die_noretry("File $path for node $node does not exist!");
215
216
	    }
	}
217
    }
218
219
    if (defined($node->next_boot_path())) {
	my $path = $node->next_boot_path();
220

221
222
223
224
225
226
227
228
229
230
231
	if ($path ne "") {
	    my $ip   = 0;

	    # Split out IP address if it exists.
	    if ($path =~ /^([0-9\.]+):(\/.*)$/) {
		$ip   = $1;
		$path = $2;
	    }

	    # Path must begin with $TFTP
	    if (! ($path =~ /^\/$TFTP\//)) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
232
		die_noretry("File $path for node $node must reside in $TFTP");
233
234
	    }

235
	    if (! -f $path) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
236
		die_noretry("File $path for node $node does not exist!");
237
238
	    }
	}
239
240
    }

241
242
243
    #
    # XXX - Ditto for RPMs.
    #
244
    foreach my $rpm (split(":", $node->rpms())) {
245
	if (! -f $rpm) {
246
247
248
	    die_noretry({type => 'primary', severity => SEV_ERROR,
			 error => ['file_not_found', 'rpm', $rpm, $node]},
			"RPM $rpm for node $node does not exist!");
249
250
	}
    }
251

252
253
254
    #
    # XXX - Ditto for tarfiles.
    #
255
    foreach my $tarspec (split(":", $node->tarballs())) {
256
	my ($dir, $tar) = split(" ", $tarspec);
257

258
	if (! -f $tar) {
259
260
261
	    die_noretry({type => 'primary', severity => SEV_ERROR,
			 error => ['file_not_found', 'tar', $tar, $node]},
			"Tarfile $tar for node $node does not exist!");
262
263
	}
    }
264
}
265

266
#
267
268
# First pass through to let type/class specific modules see what is
# going on and mark nodes as needed.
269
#
270
271
272
273
274
foreach my $node (@nodelist) {
    my $node_id   = $node->node_id();
    my $type      = $node->type();
    my $class     = $node->class();
    my $imageable = $node->imageable();
275

276
277
278
    # Not sure where to put this.
    $node->_issharednode(defined($node->sharing_mode()) &&
			$node->sharing_mode() eq 'using_shared_local');
279

280
281
282
283
    # Not sure where to put this.
    $node->_iseinenode($MyStruct->elabinelab() &&
		      defined($node->inner_elab_role()) &&
		      $node->inner_elab_role() eq 'node');
284
285

    #
286
287
288
    # Look for type specific module first. Eventually this should be more
    # dynamic in how the modules are loaded/defined, perhaps specified on
    # a per-type basis in the DB.
289
    #
290
291
292
293
294
295
296
297
298
299
300
    my $object = $MyStruct->TypeLookup($node);
    if (!defined($object)) {
	$object = $MyStruct->NewType($type);
	if (!defined($object)) {
	    #
	    # Otherwise use the class.
	    #
	    $object = $MyStruct->NewType($class);
	    if ($@) {
		die_noretry("No type/class specific setup module for $node");
	    }
301
302
	}
    }
303
304
305
    print STDERR "Adding $node_id to type object " . $object->type() . "\n"
	if ($debug);
    $object->AddNode($node);
306
307
}

308
309
310
311
while (1) {
    my $objects    = $MyStruct->OperationList();
    my @volunteers = ();
    my @nodes      = ();
312
313

    #
314
    # Do not bother if we got canceled.
315
    #
316
317
    if (! $MyStruct->canceled()) {
	my $canceled = $experiment->canceled();
318
	if ($canceled) {
319
320
	    $MyStruct->canceled($canceled);
	    
321
322
323
	    tbnotice({cause => 'canceled', severity => SEV_IMMEDIATE,
		      error => ['cancel_flag']},
		     "Swap canceled; will terminate os_setup early!");
324
	    last;
Chad Barb's avatar
   
Chad Barb committed
325
	}
326
    }
327
    
328
    #
329
330
    # Clear the inform lists, since we want to send email in batches
    # as things fail.
331
    #
332
    $MyStruct->ClearInformLists();
333

334
    #
335
    # Go through and ask each one for volunteers. 
336
    #
337
338
339
340
341
342
    foreach my $object (@{ $objects }) {
	print "Asking $object for volunteers\n"
	    if ($debug);
	my @list = $object->Volunteers();
	print "$object returns volunteers: @list\n"
	    if ($debug && @list);
343
	next
344
345
346
	    if (! @list);
	@nodes = (@nodes, @list);
	push(@volunteers, [$object, \@list]);
347
    }
348
349
    last
	if (!@nodes);
350

351
    #
352
    # Light up the nodes in parallel.
353
    #
354
355
356
357
358
    my @results   = ();
    my $coderef   = sub {
	my ($ref) = @_;
	my ($object, $noderef) = @{ $ref };
	my @nodelist = @{ $noderef };
359

360
361
362
363
	print STDERR "Lighting up nodes: @nodelist\n"
	    if ($debug);
	if ($object->LightUpNodes(@nodelist)) {
	    return -1;
364
	}
365
366
367
368
369
370
371
	return 0;
    };
    print STDERR "Lighting up nodes in parallel ...\n";
    
    if (ParRun({"maxwaittime" => 99999},
	       \@results, $coderef, @volunteers)) {
	$MyStruct->die_noretry("*** LightUpNodes: Internal error\n");
372
    }
373

374
    #
375
376
    # Check the exit codes. An error at this phase is unusual, and
    # we want to turn off retry.
377
    #
378
379
380
381
382
    my $errors = 0;
    my $count  = 0;
    foreach my $result (@results) {
	my ($object, $noderef) = @{ $volunteers[$count] };
	my @nodelist = @{ $noderef };
Kirk Webb's avatar
Kirk Webb committed
383

384
385
386
387
	if ($result != 0) {
	    print STDERR "*** Error lighting up nodes: @nodelist\n"
		if ($debug);
	    $MyStruct->noretry(1);
388

389
	    #
390
391
	    # Make sure all the nodes are marked as down so that
	    # we do not wait for them.
392
	    #
393
394
	    foreach my $node (@nodelist) {
		$node->SetAllocState(TBDB_ALLOCSTATE_DOWN());
395
	    }
396
	}
397
	$count++;
398
    }
399
400
401
402

    # And wait. 
    print STDERR "Waiting for nodes ...\n";
    $MyStruct->WaitForNodes(@nodes);
403
404

    #
405
    # Fire off email for this batch.
406
    #
407
408
409
410
411
412
413
    $MyStruct->InformTBopsFatal();
    $MyStruct->InformTBopsWarn();
    $MyStruct->InformUser();

    if ($MyStruct->aborted()) {
	print STDERR "Aborting os_setup cause of fatal errors.\n";
	last;
414
    }
415
416
}

417
418
419
########################################################################
# All of this stuff is for summary reporting. I did not touch it, as
# the code is simply awful. 
420
#
421
# Global variables need for the summary
422
#
423
424
425
426
my $users_fault;
my %tally;
my %total;
my $summary = '';
Kevin Atkinson's avatar
   
Kevin Atkinson committed
427

Kevin Atkinson's avatar
   
Kevin Atkinson committed
428
429
430
431
sub add_defaults($) {
    my ($d) = (@_);
    $d->{failed_fatal}    = 0 unless defined $d->{failed_fatal};
    $d->{failed_nonfatal} = 0 unless defined $d->{failed_nonfatal};
Kevin Atkinson's avatar
   
Kevin Atkinson committed
432
}
Kevin Atkinson's avatar
   
Kevin Atkinson committed
433
434
435
436
437
438
439
440
441
442

sub add_non_fatal($%) {
    my ($line, %d) = @_;
    if ($d{failed_nonfatal} > 0) {
	my $count = ($d{failed_nonfatal} == $d{failed}
		     ? "all"
		     : "$d{failed_nonfatal}/$d{failed}");
	$line .= " ($count non-fatal)";
    }
    return $line;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
443
444
}

Kevin Atkinson's avatar
   
Kevin Atkinson committed
445
sub list_failed_nodes ($%) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
446
    local $^W = 0;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
447
448
449
450
451
452
453
    my ($max_length,%d) = @_;
    my $byvname = sub { $vname{$a} cmp $vname{$b} };
    my @nodes = (sort $byvname @{$d{failed_fatal_list}}, 
		 sort $byvname @{$d{failed_nonfatal_list}});
    @nodes = map {"$vname{$_}($_)"} @nodes;
    my $line = join ' ', @nodes;
    if (length($line) > $max_length) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
454
	$line = '';
Kevin Atkinson's avatar
   
Kevin Atkinson committed
455
456
457
	$max_length -= 4;
	my $length = 0;
	foreach (@nodes) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
458
	    $length += length($_) + 1;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
459
460
461
	    last if $length > $max_length;
	    $line .= "$_ ";
	}
Kevin Atkinson's avatar
   
Kevin Atkinson committed
462
	$line .= "..." if $length > $max_length;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
463
    }
Kevin Atkinson's avatar
   
Kevin Atkinson committed
464
    return $line;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
465
466
467
468
469
470
471
472
473
474
475
476
}

sub add_failed_nodes ($$%) {
    my ($line, $indent, %d) = @_;
    my $nodes_line = list_failed_nodes(78 - $indent, %d);
    if (length($line) + 2 + length($nodes_line) > 78) {
	return "$line:\n".(' 'x$indent)."$nodes_line\n";
    } else {
	return "$line: $nodes_line\n";
    }
}

Kevin Atkinson's avatar
   
Kevin Atkinson committed
477
478
479
#
# First gather stats
#
480
481
482
foreach (keys(%{ $MyStruct->failedlist() })) {
    my $node   = $MyStruct->node($_);
    my $osinfo = $node->_bootosinfo();
483
    my $osid   = $osinfo->osid();
484
485
    my $type   = $node->type();
    my ($what,$fatal)  = @{ $MyStruct->failedlist()->{$_} };
Kevin Atkinson's avatar
   
Kevin Atkinson committed
486

487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
    my ($error_type, $severity);

    if ($what eq 'boot') {
	$error_type = 'node_boot_failed';
    } elsif ($what eq 'reload') {
	$error_type = 'node_load_failed';
    }

    if ($fatal eq 'fatal') {
	$severity = SEV_ERROR;
    } elsif ($fatal eq 'nonfatal') {
	$severity = SEV_WARNING;
    }

    if (defined($error_type) && defined($severity)) {
502
	tbreport($severity, $error_type, $node, $type, $osinfo);
503
504
    }

Kevin Atkinson's avatar
   
Kevin Atkinson committed
505
506
    $tally{$what}{$osid} = {} unless defined $tally{$what}{$osid};
    my $t = $tally{$what}{$osid};
Kevin Atkinson's avatar
   
Kevin Atkinson committed
507

Kevin Atkinson's avatar
   
Kevin Atkinson committed
508
509
    $t->{any_type}{failed}++;
    $t->{any_type}{"failed_${fatal}"}++;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
510

Kevin Atkinson's avatar
   
Kevin Atkinson committed
511
512
    $t->{by_type}{$type}{failed}++;
    $t->{by_type}{$type}{"failed_${fatal}"}++;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
513

Kevin Atkinson's avatar
   
Kevin Atkinson committed
514
515
516
    push @{$t->{any_type}{"failed_${fatal}_list"}}, $_;
    push @{$t->{by_type}{$type}{"failed_${fatal}_list"}}, $_;
}
517
518
519
520
521
522
523
524
foreach (keys(%{ $MyStruct->nodelist() })) {
    my $node   = $MyStruct->node($_);
    my $osinfo = $node->_bootosinfo();

    # Was not setup to do anything, so ignore.
    next
	if (!defined($osinfo));
    
525
    my $osid   = $osinfo->osid();
526
    my $type   = $node->type();
Kevin Atkinson's avatar
   
Kevin Atkinson committed
527
528
529
530
531
532
533
534
535
536
537
538
    $total{$osid}{any_type}++;
    $total{$osid}{by_type}{$type}++;
}

#
# Now report any failed nodes in a concise summary
#
if (defined $tally{reload}) {

    $users_fault = 0;

    foreach my $osid (sort keys %{$tally{reload}}) {
539
540
	my $osinfo  = OSinfo->Lookup($osid);
	my $osname  = $osinfo->osname();
Kevin Atkinson's avatar
   
Kevin Atkinson committed
541
542
543
544
545
	
	my %d     = %{$tally{reload}{$osid}{any_type}};
	my $total = $total{$osid}{any_type};
	
	my $line;
546
547
	$line = sprintf("%d/%d nodes failed to load the os \"%s\"",
			$d{failed}, $total, $osname);
Kevin Atkinson's avatar
   
Kevin Atkinson committed
548
549
550
551
552
553
554
555
556
557
	$line = add_failed_nodes($line, 2, %d);
	
	$summary .= $line;
    }

} elsif (defined $tally{boot}) {

    $users_fault = 1;

    foreach my $osid (sort keys %{$tally{boot}}) {
558
559
560
	my $osinfo     = OSinfo->Lookup($osid);
	my $osname     = $osinfo->osname();
	my $user_image = ($osinfo->pid() eq TBOPSPID() ? 0 : 1);
Kevin Atkinson's avatar
   
Kevin Atkinson committed
561
562
563
564
565
566
567
	
	add_defaults($tally{boot}{$osid}{any_type});
	my %d   = %{$tally{boot}{$osid}{any_type}};
	my %d_t = %{$tally{boot}{$osid}{by_type}};
	my $total   = $total{$osid}{any_type};
	my %total_t = %{$total{$osid}{by_type}};
	
Kevin Atkinson's avatar
   
Kevin Atkinson committed
568
	my $byfailure = sub {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
569
	    my $cmp = $d_t{$b}{failed} <=> $d_t{$a}{failed};
Kevin Atkinson's avatar
   
Kevin Atkinson committed
570
571
572
	    return $cmp if $cmp != 0;
	    return $a cmp $b;
	};
Kevin Atkinson's avatar
   
Kevin Atkinson committed
573
574
	my @node_types = sort $byfailure keys %d_t;
	
575
	$users_fault = 0 if !$user_image;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
576
	foreach my $type (@node_types) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
577
	    $users_fault = 0 if $d_t{$type}{failed} < $total_t{$type};
Kevin Atkinson's avatar
   
Kevin Atkinson committed
578
579
	}
	
Kevin Atkinson's avatar
   
Kevin Atkinson committed
580
581
582
583
584
585
586
	my $line = sprintf("%d/%d %s with a %s osid of \"%s\" failed to boot",
			   $d{failed}, $total,
			   @node_types == 1 ? "$node_types[0]'s" : "nodes",
			   $user_image ? "user" : "system", 
			   $osname);
	$line = add_non_fatal($line, %d);
	
Kevin Atkinson's avatar
   
Kevin Atkinson committed
587
	if (@node_types == 1) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
588
	    
Kevin Atkinson's avatar
   
Kevin Atkinson committed
589
	    my $type = $node_types[0];
Kevin Atkinson's avatar
   
Kevin Atkinson committed
590
591
592
	    
	    $summary .= add_failed_nodes($line, 2, %{$d_t{$type}});
	    
Kevin Atkinson's avatar
   
Kevin Atkinson committed
593
	} else {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
594
	    
Kevin Atkinson's avatar
   
Kevin Atkinson committed
595
	    $summary .= "$line:\n";
Kevin Atkinson's avatar
   
Kevin Atkinson committed
596
	    
Kevin Atkinson's avatar
   
Kevin Atkinson committed
597
	    foreach my $type (@node_types) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
598

Kevin Atkinson's avatar
   
Kevin Atkinson committed
599
		add_defaults($d_t{$type});
Kevin Atkinson's avatar
   
Kevin Atkinson committed
600
		my %d = %{$d_t{$type}};
Kevin Atkinson's avatar
   
Kevin Atkinson committed
601
602
		my $total = $total_t{$type};
		
Kevin Atkinson's avatar
   
Kevin Atkinson committed
603
		if ($d{failed} > 0) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
604
605
606
		    $line = sprintf("  %d/%d %s with this os failed to boot",
				    $d{failed}, $total,
				    "${type}'s");
Kevin Atkinson's avatar
   
Kevin Atkinson committed
607
		    $line = add_non_fatal($line, %d);
Kevin Atkinson's avatar
   
Kevin Atkinson committed
608
		    $line = add_failed_nodes($line, 4, %d);
Kevin Atkinson's avatar
   
Kevin Atkinson committed
609
		} else {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
610
		    $line = sprintf("  %d %s with this os successfully booted.\n",
Kevin Atkinson's avatar
   
Kevin Atkinson committed
611
612
				    $total,
				    $total_t{$type} == 1 ? "$type" : "${type}'s");
613
		}
Kevin Atkinson's avatar
   
Kevin Atkinson committed
614
		$summary .= $line;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
615
616
617
618
	    }
	}
    }
}
619
if (my $count = $MyStruct->failed()) {
620
    tberror ({type=>'summary', cause=>($users_fault ? 'user' : 'unknown')}, 
621
622
623
624
	     "There were $count failed nodes.\n\n", $summary);
}
elsif ($summary) {
    tbwarn($summary);
Kevin Atkinson's avatar
   
Kevin Atkinson committed
625
}
626

627
628
629
# Look to see if anyone set the no retry flag.
my $exit_code = (($experiment->canceled() || $MyStruct->noretry()) ? -1 :
		 $MyStruct->failed() ? 1 : 0);
630
631
632
633
634

#
# If not failing for any reason, record some stats
#
if ($exit_code == 0) {
635
636
637
    # Record some stats on the OS requested and the images loaded to
    # the image_history table. Put in in an eval loop to catch any
    # errors so they are non-fatal.
638
639
640
641
642
643
644
645
646
    my %todo;

    # Collect the list of nodes and store the osid requested
    # and the imageid loaded (if any).
    foreach my $object (@{$MyStruct->OperationList()}) {
	foreach my $node_id (keys %{$object->{NODES}}) {
	    my $node = $object->{NODES}{$node_id};
	    my $osinfo = $node->_bootosinfo();
	    my $op = $node->_setupoperation();
647
	    my $image;
648
649
	    # Only set the imageid if the disk needed a reload
	    if ($op == $RELOAD) {
650
		$image = $node->_loadimage();
651
	    }
652
	    $todo{$node_id} = [$osinfo, $image];
653
654
	}
    }
655
656
    Image->RecordImageHistory($experiment, 'os_setup',
			      tblog_session(), $impotent, \%todo);
657
658
}

659
660
661
662
663
664
665
666
#
# If not failing for any reason, save off swap state.
#
# For all nodes in the experiment that are booting from the disk,
# figure out the image from which they are booting and stash away the
# appropriate info to enable disk state saving at swapout.
#
my $swapstate;
667
if ($exit_code == 0 &&
668
669
670
671
672
    TBExptGetSwapState($pid, $eid, \$swapstate) && $swapstate) {
    TBDebugTimeStamp("Stashing image signatures");
    osload_setupswapinfo($pid, $eid);
    TBDebugTimeStamp("Finished stashing image signatures");
}
673
TBDebugTimeStamp("os_setup finished");
Chad Barb's avatar
   
Chad Barb committed
674

675
exit($exit_code);