os_setup.in 41.3 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2
3
4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2006 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6
7
# All rights reserved.
#
8
use English;
9
use Getopt::Std;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
10
require 'ctime.pl';
11

12
#
13
14
15
16
# Reboot the nodes in an experiment. The nodes table will already contain
# all the information. This script deals with possible disk reloading,
# rebooting, and waiting for nodes to come back alive before allowing
# experiment creation to continue.
17
#
18
# TODO: Reload disk images.
19
#
20
# usage: os_setup <pid> <eid>
21
#
Chad Barb's avatar
   
Chad Barb committed
22
23
24
25
26
# errorcode:  0 - all reboots succeeded.
#             1 - some/all reboots failed; retry may help.
#            -1 - failure; retry is inappropriate.
#

27
28
sub usage()
{
29
    print STDERR "Usage: os_setup [-d] <pid> <eid>\n";
30
31
    exit(-1);
}
32
my  $optlist = "d";
33
34
35
36
37
38

#
# Configure variables
#
my $TB		= "@prefix@";
my $DBNAME	= "@TBDBNAME@";
39
my $TBOPS       = "@TBOPSEMAIL@";
40
my $TESTMODE    = @TESTMODE@;
41
my $TFTP	= "/tftpboot";
42

43
44
45
46
47
#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
48
49
use libreboot;
use libosload;
50
use libtestbed;
Kevin Atkinson's avatar
Kevin Atkinson committed
51
use libtblog;
52
use libArchive;
53
use Template;
54
use NodeType;
55

Leigh B. Stoller's avatar
Leigh B. Stoller committed
56
57
TBDebugTimeStampsOn();

58
my $vnode_setup = "$TB/sbin/vnode_setup";
59
my $osselect    = "$TB/bin/os_select";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
60
my $nodereboot  = "$TB/bin/node_reboot";
61
my $elab_setup  = "$TB/sbin/elabinelab";
62
my $dbg		= 0;
63
my $failed      = 0;
64
my $noretry     = 0;
65
my $failedvnodes= 0;
66
my $failedplab  = 0;
67
my $canceled    = 0;
68
69
my %nodes       = ();
my %vnodes      = ();
70
71
72
my %vnodephosts = ();
my %vnode2pnode = ();
my %pnodevcount = ();
73
my %plabvnodes  = ();
74
my %osids       = ();
75
my %canfail     = ();
76
77
78
my %bios_waittime   = ();	# Indexed by node_type.
my %reboot_waittime = ();	# Indexed by osid.
my %node_types  = ();		# Indexed by node_id.
Kevin Atkinson's avatar
   
Kevin Atkinson committed
79
my %vname = ();                 # Indexed by node_id.
80

Kevin Atkinson's avatar
   
Kevin Atkinson committed
81
82
83
84
85
86
#
# This variable keeps track of the failed nodes of all types.
#   values = 'UNKNOWN' 'RELOAD', 'BOOT', 'OTHER'
my %failed_nodes = (); 
my %failed_nonfatal_nodes = ();

87
#
88
89
# Ah, Frisbee works so lets do auto reloading for nodes that do not have
# the proper OS loaded on it. This will be a hash of lists; for each
90
# imageid, a list of the nodes to pass to os_load for that imageid.
91
#
92
93
my %reloads     = ();
my %reboots	= ();
94
my %reconfigs	= ();
95
my %rebooted    = ();
96
my $doautoload  = 1;
97
my $dolastload  = 1;
98

99
100
101
# Protos
sub SetupReload($$$);
sub FirewallSetup($);
102
sub os_setup_one($$$);
103
				  
104
105
106
107
108
109
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

$| = 1; #Turn off line buffering on output

Kevin Atkinson's avatar
   
Kevin Atkinson committed
110
111
112
113
114
115
116
117
118
119
120
121

#
# Used to die with a -1 return code, to indicate to caller (tbswap)
# that the failure is not likely to be fixed with another attempt.
#
sub die_noretry($)
{
    my ($mesg) = shift;
    tberror $mesg;
    exit(-1);
}

122
123
124
125
126
127
128
129
130
131
132
#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV != 2) {
    usage();
}
133
134
135
136
if (defined($options{"d"})) {
    $dbg = 1;
}

137
138
139
140
141
142
143
144
145
146
my $pid = $ARGV[0];
my $eid = $ARGV[1];

#
# Untaint args.
#
if ($pid =~ /^([-\@\w]+)$/) {
    $pid = $1;
}
else {
Chad Barb's avatar
   
Chad Barb committed
147
    die_noretry("Bad data in pid: $pid.");
148
149
150
151
152
}
if ($eid =~ /^([-\@\w]+)$/) {
    $eid = $1;
}
else {
Chad Barb's avatar
   
Chad Barb committed
153
    die_noretry("Bad data in eid: $eid.");
154
155
}

156
#
157
# Figure out who called us. Only root, people with admin status
158
159
# in the DB, or the owner of the experiment can run this script.
#
160
161
if ($UID && !TBAdmin($UID) &&
    !TBExptAccessCheck($UID, $pid, $eid, TB_EXPT_MODIFY)) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
162
    die_noretry("You do not have permission to swap this experiment!");
163
164
}

165
166
167
168
169
170
#
# Verify user and get his DB uid.
# XXX - copied from elsewhere: is this translation needed anymore?
#
my $dbuid;
if (! UNIX2DBUID($UID, \$dbuid)) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
171
    tbdie("You do not exist in the Emulab Database.");
172
173
174
175
176
177
178
}

#
# Get email info for user, in case we have to alert them about failures
#
my ($user_name,$user_email);
if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
179
    tbdie("Cannot determine your name and email address.");
180
181
182
}
my $user_email_to = "$user_name <$user_email>";

183
184
TBDebugTimeStamp("os_setup started");

185
186
187
188
189
#
# See if the experiment is firewalled
#
my $firewall;
my $firewalled = TBExptFirewall($pid, $eid, \$firewall);
190
my $firewallimageid;
191
192
193
194
195
196
197
198
199
200

#
# Ditto ElabinElab.
#
my $elabinelab;
if (! TBExptIsElabInElab($pid, $eid, \$elabinelab)) {
    die("*** $0:\n".
	"    Could not get elabinelab status for experiment $pid/$eid\n");
}

201
202
203
204
205
206
207
208
209
210
#
# Ditto PlabinElab.
#
my $plabinelab = 0;
my $plcnode;
my $plcimageid;
if (TBExptPlabInElabPLC($pid, $eid, \$plcnode)) {
    $plabinelab = 1;
}

211
#
212
# Get the set of nodes, as well as the nodes table information for them.
213
#
214
my $db_result =
Kevin Atkinson's avatar
   
Kevin Atkinson committed
215
    DBQueryFatal("select n.*,l.pid,r.vname from reserved as r ".
216
		 "left join nodes as n on n.node_id=r.node_id ".
217
218
		 "left join last_reservation as l on n.node_id=l.node_id ".
		 "where r.pid='$pid' and r.eid='$eid'");
219

220
if ($db_result->numrows < 1) {
221
    print "There are no nodes in experiment '$eid' in project '$pid'.\n";
Chad Barb's avatar
   
Chad Barb committed
222
    exit 0;
223
224
}

225
while (my %row = $db_result->fetchhash()) {
226
227
228
229
230
    my $node      = $row{'node_id'};
    my $osid      = $row{'def_boot_osid'};
    my $type      = $row{'type'};
    my $jailnode  = $row{'jailflag'};
    my $failmode  = $row{'failureaction'};
Kevin Atkinson's avatar
   
Kevin Atkinson committed
231
    my $vname     = $row{'vname'};
232
233
234
235
236
237
238
    my $typeinfo  = NodeType->Lookup($type);
    my $class     = $typeinfo->class();
    my $subnode   = $typeinfo->issubnode();
    my $virtnode  = $typeinfo->isvirtnode();
    my $imageable = $typeinfo->imageable();
    my $plabnode  = $typeinfo->isplabdslice();
    my $bios_wait = $typeinfo->bios_waittime();
239
    my $bootpath  = 0;
240

241
    #
242
243
    # VIRTNODE HACK: Virtual nodes are special. Jailed vnodes can do quite
    # a bit, and so run them through the checks below.
244
    #
245
    if ($virtnode) {
246
	$vnodes{$node} = ($jailnode || $plabnode);
247
	$plabvnodes{$node} = $plabnode;
248
	if (! $jailnode && ! $plabnode) {
249
250
251
	    next;
	}
    }
Timothy Stack's avatar
   
Timothy Stack committed
252
    elsif ($subnode && !$imageable) {
253
	print "Will skip subnode $node ISUP wait.\n";
254
    }
255
    else {
Chad Barb's avatar
   
Chad Barb committed
256
257
	my $nodeAllocState;
	TBGetNodeAllocState( $node, \$nodeAllocState );
258
	$nodes{$node}  = $node;
Chad Barb's avatar
   
Chad Barb committed
259
	$nodeAllocStates{$node} = $nodeAllocState;
260
261
262
263
264
265
266
	if ($nodeAllocState eq TBDB_ALLOCSTATE_RES_RECONFIG()) {
	    # Terrible use of state machine.
	    $reconfigs{$node} = 1;
	}
	elsif ($nodeAllocState ne TBDB_ALLOCSTATE_RES_READY()) {
	    # only reboot node if assign_wrapper just pulled it into expt.
	    # (e.g. it isnt ALLOCSTATE_RES_READY)
Chad Barb's avatar
   
Chad Barb committed
267
268
	    $reboots{$node} = 1;
	}
269
    }
270
271
272
    $osids{$node}         = $osid;
    $bios_waittime{$type} = $bios_wait;
    $node_types{$node}    = $type;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
273
    $vname{$node}         = $vname;
274

275
    #
276
277
    # Make sure the files specified in the paths exist. We mount the
    # user tftp directory on boss node, so we can ignore the IP address,
278
    # and just check the path directly.
279
280
281
282
    #
    if (defined($row{'def_boot_path'})) {
	my $path = $row{'def_boot_path'};

283
284
285
286
287
288
289
290
291
292
293
	if ($path ne "") {
	    my $ip   = 0;

	    # Split out IP address if it exists.
	    if ($path =~ /^([0-9\.]+):(\/.*)$/) {
		$ip   = $1;
		$path = $2;
	    }

	    # Path must begin with $TFTP
	    if (! ($path =~ /^\/$TFTP\//)) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
294
		die_noretry("File $path for node $node must reside in $TFTP");
295
296
	    }

297
	    if (! -f $path) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
298
		die_noretry("File $path for node $node does not exist!");
299
	    }
300
	    $bootpath = 1;
301
	}
302
303
304
305
    }
    if (defined($row{'next_boot_path'})) {
	my $path = $row{'next_boot_path'};

306
307
308
309
310
311
312
313
314
315
316
	if ($path ne "") {
	    my $ip   = 0;

	    # Split out IP address if it exists.
	    if ($path =~ /^([0-9\.]+):(\/.*)$/) {
		$ip   = $1;
		$path = $2;
	    }

	    # Path must begin with $TFTP
	    if (! ($path =~ /^\/$TFTP\//)) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
317
		die_noretry("File $path for node $node must reside in $TFTP");
318
319
	    }

320
	    if (! -f $path) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
321
		die_noretry("File $path for node $node does not exist!");
322
323
	    }
	}
324
325
    }

326
327
328
329
    #
    # XXX - Ditto for RPMs.
    #
    foreach my $rpm (split(":", $row{'rpms'})) {
330
	if (! -f $rpm) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
331
	    die_noretry("RPM $rpm for node $node does not exist!");
332
333
	}
    }
334

335
336
337
338
339
    #
    # XXX - Ditto for tarfiles.
    #
    foreach my $tarspec (split(":", $row{'tarballs'})) {
	my ($dir, $tar) = split(" ", $tarspec);
340

341
	if (! -f $tar) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
342
	    die_noretry("Tarfile $tar for node $node does not exist!");
343
344
	}
    }
345
346
347
348
349

    #
    # If there is a path specified, then we don't worry anymore about it.
    # The user must know what is going on. The OSID might have a path
    # associated with it, which means the same thing; we don't worry about
350
    # it.
351
    #
Timothy Stack's avatar
   
Timothy Stack committed
352
    if (!$bootpath && !$jailnode && !$plabnode && $imageable) {
353
354
355
	#
	# These checks are not necessary if the front end and web page
	# are doing the right thing, but lets be careful anyway.
356
	#
357
	if (! $osid) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
358
	    die_noretry("$node has no bootpath and no def_boot_osid set!");
359
360
361
362
363
364
365
	}

	#
	# Grab the info for this OSID. This is part of the image check.
	#
	my $osid_result =
	    DBQueryFatal("select * from os_info where osid='$osid'");
366

367
	if ($osid_result->numrows == 0) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
368
	    die_noretry("No such OSID $osid is defined!");
369
	}
370

371
372
373
374
	my %osid_row   = $osid_result->fetchhash();

	#
	# If there is an actual path, its an OSKit kernel not an image.
375
	#
376
	if (! defined($osid_row{'path'}) || $osid_row{'path'} eq "") {
377
	    #
378
	    # Not an OSKit kernel.
379
	    # Make sure this OSID is actually loaded on the machine.
380
	    #
381
382
	    my $p_result =
		DBQueryFatal("select * from partitions ".
383
384
			     "where node_id='$node' and osid='$osid'".
			     "order by partition");
385
386

	    #
387
	    # If not loaded, then see if the user was looking for the generic
388
389
	    # name of the OS that is loaded.
	    #
390
391
	    if ($p_result->numrows == 0) {
		#
392
		# Check to see if a non specific version specified.
393
		#
394
395
396
397
		if (! defined($osid_row{'version'}) ||
		    $osid_row{'version'} eq "") {

		    #
398
		    # A non-specific version. There needs to be a way to
399
		    # map it to another osid.
400
401
		    #
		    if (!defined($osid_row{'nextosid'})) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
402
			die_noretry("No mapping can be made for $osid ($node)!");
403
		    }
404
405
406

		    my $nextosid = TBResolveNextOSID($osid, $pid, $eid);
		    if (!defined($nextosid)) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
407
			die_noretry("No mapping can be made for $osid ($node)!");
408
		    }
409
		
410
411
412
		    #
		    # See if the nextosid is already on the disk. If not,
		    # it needs to be loaded.
413
		    #
414
		    my $o_result =
415
416
417
418
419
420
421
			DBQueryFatal("select osid from partitions as p ".
				     "where p.node_id='$node' and ".
				     "      p.osid='$nextosid'");

		    if (! $o_result->numrows) {
			#
			# User wants a specific version of an OS, but its not
422
			# loaded on the machine.
423
424
425
			#
			print "Mapping $osid on $node to $nextosid ".
			    "and setting up a reload.\n";
426

427
428
			SetupReload($node, $nextosid, $type);
			$osids{$node} = $nextosid;
429
430
		    }
		    else {
431
			#
432
433
			# Already loaded.
			#
434
			print "Mapping $osid on $node to $nextosid.\n";
435
436
437

			if ($dolastload &&
			    defined($row{'pid'}) && $row{'pid'} ne $pid) {
438
			    SetupReload($node, $nextosid, $type);
439
440
			}
			else {
441
			    system("$osselect $nextosid $node") and
Kevin Atkinson's avatar
   
Kevin Atkinson committed
442
443
				die_noretry("Could not set boot OS to ".
					    "$nextosid for $node");
444
			}
445
			$osids{$node} = $nextosid;
446
		    }
447
448
		}
		else {
449
450
		    #
		    # User wants a specific version of an OS, but its not
451
		    # loaded on the machine.
452
		    #
453
454
455
456
457
		    SetupReload($node, $osid, $type);
		}
	    }
	    else {
		#
458
		# OSID is loaded, but might need to be cleaned.
459
460
461
462
		#
		if ($dolastload &&
		    defined($row{'pid'}) && $row{'pid'} ne $pid) {
		    SetupReload($node, $osid, $type);
463
		}
464
465
466
	    }
	}
    }
467

468
    #
469
    # Set the canfail bit.
470
    #
471
    $canfail{$node} = (($failmode eq NODEFAILMODE_FATAL()) ? 0 : 1);
472

473
474
475
476
477
478
479
480
481
    #
    # Set the reboot waittime from the osid now that we have it
    # finalized.
    #
    $osid = $osids{$node};
    if (!exists($reboot_waittime{$osid})) {
	$reboot_waittime{$osid} = TBOSIDRebootWaittime($osid);
    }

482
    print STDERR "$node - $osids{$node} - $canfail{$node}\n"
483
	if $dbg;
484
}
485

486
#
487
# Collect some info about vnodes.
488
489
490
491
492
493
494
495
496
497
#
foreach my $vnode (keys(%vnodes)) {
    my $jailed = $vnodes{$vnode};
    my $pnode;

    if (! $jailed) {
	next;
    }

    if (! TBPhysNodeID($vnode, \$pnode)) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
498
	die_noretry("Cannot determine phys_nodeid for $vnode!");
499
    }
500
501
502
503
504
505
506
507
508
509

    #
    # Count up the number of jailed nodes on this pnode, and add the
    # mapping. We use this below for determining how long to wait for
    # a particular vnode.
    #
    $pnodevcount{$pnode} = 0
	if (!defined($pnodevcount{$pnode}));
    $pnodevcount{$pnode}++;
    $vnode2pnode{$vnode} = $pnode;
510
511

    if (!exists($nodes{$pnode})) {
512
513
514
515
516
517
	#
	# Typical on remote nodes; we do not allocate the underlying
	# phys node to the experiment.
	#
	next;
    }
518

519
    # Nothing else to do for local jail nodes at this time ...
520
521
}

522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
#
# Setup the firewall first.  Once it is up we can continue with the
# remaining nodes.
#
# There is very little point in setting up the other nodes at the same time
# as they will not be able to PXE boot until the firewall is up.  We could
# fire them off a little early in hopes of overlapping any BIOS boot time
# with the last stages of the firewall setup, but it probably isn't worth
# the complexity (and would not work with nodes for which "reboot" means
# "fall out of PXEWAIT and boot".
#
# Note that we formerly did just do them all at once and let the nodes
# continually PXE-timeout and reboot until the firewall came up.  But that
# can actually take longer than what we do now, if a node happened to
# timeout and reboot just as the firewall came up (i.e., we would have to
# wait an extra BIOS-reboot cycle, which can be 90 seconds or more.
#
if ($firewalled) {
    my $node = $firewall;

    TBDebugTimeStamp("rebooting/reloading firewall");
    if (!FirewallSetup($node)) {
	tbwarn "Firewall node $node failed to boot.".
	    "This has been reported to testbed-ops.";

	# XXX do we need to set NODEBOOTSTATUS_FAILED here?

	#
	# We assume that firewall node images are "standard" here,
	# and whine to tbops.
	#
	MarkNodeDown($node);
	TBSetNodeLogEntry($node, $dbuid, TB_DEFAULT_NODELOGTYPE(),
			  "'Moved to hwdown by os_setup; ".
			  "failed to boot image for osid " . $osids{$node} .
			  " in $pid/$eid'");
	SENDMAIL($TBOPS, "1 node is down",
		 "Node:\n".
		 "  $node\n".
		 "in pid/eid $pid/$eid appears to be dead.\n\n".
		 "The node has been taken out of the pool until this matter ".
		 "is resolved.\n");

	$failed++;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
566
	$failed_nodes{$node} = 'UNKNOWN';
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
	goto tballdone;
    }

    #
    # Check for cancelation.  Firewall setup may have taken awhile.
    #
    if (!$canceled) {
	TBGetCancelFlag($pid, $eid, \$canceled);
	if ($canceled) {
	    tbnotice "Swap canceled; will terminate os_setup early!";
	    goto tballdone;
	}
    }

    #
    # remove it from the nodelist
    #
    delete $nodes{$node};
}

587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
#
# Likewise, setup a PLC node before other plabinelab nodes.
# XXX right now, we setup PLC before ANY other node, whether it is
# part of the inner plab or not.
#
if ($plabinelab) {
    my $node = $plcnode;

    TBDebugTimeStamp("rebooting/reloading PLC node");
    if (!os_setup_one($node, $plcimageid, "PLC")) {
	tbwarn "PLC node $node failed to boot".
	    "This has been reported to testbed-ops.";
	SENDMAIL($TBOPS, "1 node is down",
		 "Node:\n".
		 "  $node\n".
		 "in pid/eid $pid/$eid failed to boot after loading OS.\n\n".
		 "The nodes have been freed.\n");
	$failed++;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
605
	$failed_nodes{$node} = 'UNKNOWN';
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
	goto tballdone;
    }

    #
    # Check for cancelation.  PLC setup may have taken awhile.
    #
    if (!$canceled) {
	TBGetCancelFlag($pid, $eid, \$canceled);
	if ($canceled) {
	    tbnotice "Swap canceled; will terminate os_setup early!";
	    goto tballdone;
	}
    }

    #
    # remove it from the nodelist
    #
    delete $nodes{$node};
}

626
#
627
# We need to issue the reboots and the reloads in parallel.
628
#
629
TBDebugTimeStamp("rebooting/reloading nodes started");
630
if (!$TESTMODE) {
631
    my @children = ();
632

633
    foreach my $imageid ( keys(%reloads) ) {
634
	my @nodelist = @{ $reloads{$imageid} };
635

636
	foreach my $node (@nodelist) {
637
638
	    TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_RELOAD() );
	    $nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_RELOAD();
639
	    # No point in reboot/reconfig obviously, since node will reboot!
640
	    delete $reboots{$node};
641
	    delete $reconfigs{$node};
642
	    $rebooted{$node} = 1;
643
644
	}

645
646
647
648
649
650
651
652
653
654
655
	my %reload_args     = ();
	my $reload_failures = {};

	$reload_args{'debug'}     = $dbg;
	$reload_args{'asyncmode'} = 1;
	$reload_args{'imageid'}   = $imageid;
	$reload_args{'nodelist'}  = [ @nodelist ];

	my $pid = osload(\%reload_args, $reload_failures);
	push(@children, [ $pid, \&osload_wait,
			  [ @nodelist ], $reload_failures ]);
656
657
658
	sleep(5);
    }

659
660
661
    #
    # Fire off the reboots.
    # 
662
    if (keys(%reboots)) {
Chad Barb's avatar
   
Chad Barb committed
663
664
	foreach my $node (keys(%reboots)) {
	    if ($nodeAllocStates{$node} eq TBDB_ALLOCSTATE_RES_INIT_CLEAN()) {
Chad Barb's avatar
   
Chad Barb committed
665
		TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_REBOOT_CLEAN());
Chad Barb's avatar
   
Chad Barb committed
666
667
		$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_REBOOT_CLEAN();
	    } else {
Chad Barb's avatar
   
Chad Barb committed
668
		TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_REBOOT_DIRTY());
Chad Barb's avatar
   
Chad Barb committed
669
670
		$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_REBOOT_DIRTY();
	    }
671
672
	    # See below, needed for vnode_setup.
	    $rebooted{$node} = 1;
Chad Barb's avatar
   
Chad Barb committed
673
674
	}

675
676
677
678
679
680
681
682
683
684
685
686
687
	my @nodelist        = keys(%reboots);
	my %reboot_args     = ();
	my $reboot_failures = {};

	$reboot_args{'debug'}     = $dbg;
	$reboot_args{'waitmode'}  = 0;
	$reboot_args{'asyncmode'} = 1;
	$reboot_args{'nodelist'}  = [ @nodelist ];

	my $pid = nodereboot(\%reboot_args, $reboot_failures);
	push(@children, [ $pid, \&nodereboot_wait,
			  [ @nodelist ], $reboot_failures ]);
	sleep(2);
688
689
    }

690
    #
691
    # Fire off the reconfigs.
692
693
    #
    if (keys(%reconfigs)) {
694
695
696
697
698
699
700
701
702
703
704
705
706
	my @nodelist        = keys(%reconfigs);
	my %reboot_args     = ();
	my $reboot_failures = {};

	$reboot_args{'debug'}     = $dbg;
	$reboot_args{'waitmode'}  = 0;
	$reboot_args{'asyncmode'} = 1;
	$reboot_args{'reconfig'}  = 1;
	$reboot_args{'nodelist'}  = [ @nodelist ];

	my $pid = nodereboot(\%reboot_args, $reboot_failures);
	push(@children, [ $pid, \&nodereboot_wait,
			  [ @nodelist ], $reboot_failures ]);
707
708
    }

709
710
711
712
713
714
715
716
    #
    # Wait for all of the children to exit. We look at the $pid to know if
    # command failed/ended immediately; otherwise we need to wait on it.
    # For any failures, record the node failures for later so that we do
    # not wait for them needlessly.
    #
    while (@children) {
	my ($pid, $waitfunc, $listref, $hashref) = @{ pop(@children) };
717

718
719
720
	# This is not likely to happen.
	next
	    if ($pid == 0);
721

722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
	if ($pid > 0) {
	    next
		if (! &$waitfunc($pid));
	}
	
	#
	# Failure. Record the failures for later. If the $pid<0 then the
	# entire list failed. Otherwise, have to scan the return hash to
	# find the failures.
	#
	my @nodelist = ();
	
	if ($pid < 0) {
	    @nodelist = @{ $listref };
	}
	else {
	    foreach my $node (keys(%{ $hashref })) {
		push(@nodelist, $node)
		    if ($hashref->{$node});
	    }
	}
743

744
745
746
747
748
749
	#
	# These errors are unusal enough that we do not want to retry
	# or keep going even if canfail is set. Better to stop and let
	# someone look at what happened.
	#
	$noretry = 1;
750

751
	foreach my $node (@nodelist) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
752
	    tbnotice "Not waiting for $node since its reload/reboot failed!";
753
	    $failed++;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
754
	    $failed_nodes{$node} = 'UNKNOWN';
755
	    delete($nodes{$node});
756
757
758

	    TBSetNodeAllocState($node, TBDB_ALLOCSTATE_DOWN());
	    $nodeAllocStates{$node} = TBDB_ALLOCSTATE_DOWN();
759
760
761
	}
    }
}
762
763
TBDebugTimeStamp("rebooting/reloading finished");

764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
#
# XXX declare the inner plab nodes as UP since we won't be hearing from
# them again (they are talking only to their PLC).
#
if ($plabinelab) {
    my @plabnodes = ();
    TBExptPlabInElabNodes($pid, $eid, \@plabnodes);
    foreach my $node (@plabnodes) {
	if (exists($nodes{$node})) {
	    tbnotice "Not waiting for emulated plab node $node";
	    SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
	    TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_READY());
	    $nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_READY();
	    TBSetNodeEventState($node, TBDB_NODESTATE_ISUP());
	    delete($nodes{$node});
	}
    }
}

783
784
785
786
787
#
# Remaining nodes we need to wait for. Why do we wait in the face of errors
# above? So that they enter a reasonably known state before we try to tear
# things down. Otherwise we could end up power cycling nodes a lot more often.
# This should probably be handled in other ways, say via stated or the alloc
788
# state machine.
789
#
790
791
my @nodelist = keys(%nodes);

792
#
793
794
795
# Now lets wait for them to come back alive. Set up a retry list though
# so that we can give each node at least 1 second chance. Avoids pointless
# experiment failures.
796
#
797
if (@nodelist) {
798
    print "Waiting for local testbed nodes to finish rebooting ...\n";
799
}
800

801
802
803
804
805
806
807
my %retries;
my %waitstart;
foreach my $node ( @nodelist ) {
    $retries{$node} = 1;
    $waitstart{$node} = time;
}

808
809
810
811
812
813
814
815
#
# List of nodes to inform the user and testbed-ops about in the event
# of failures.  We coalesce the nodes here so we only sent one message.
#
my @informuser = ();
my @informtbopswarn = ();
my @informtbopsfatal = ();

816
817
818
819
TBDebugTimeStamp("Local node waiting started");
while ( @nodelist ) {
    my $node   = shift(@nodelist);
    my $wstart = $waitstart{$node};
820
    my $actual_state;
821
822
823
824
825
826
827
828
    my $waittime = (60 * 7);	# The default.

    # Compute actual waittime.
    if (defined($bios_waittime{$node_types{$node}}) &&
	defined($reboot_waittime{$osids{$node}})) {
	$waittime = ($bios_waittime{$node_types{$node}} +
		     $reboot_waittime{$osids{$node}}) * 2;
    }
829

830
    if (!TBNodeStateWait($node, $wstart, $waittime, \$actual_state,
831
832
			 (TBDB_NODESTATE_TBFAILED, TBDB_NODESTATE_ISUP))) {
	if ($actual_state eq TBDB_NODESTATE_TBFAILED) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
833
	    tbwarn "$node reported a TBFAILED event; not retrying";
834
835
836
	    $retries{$node} = 0;
	    goto tbfailed;
	}
837
838
839
	print "$node is alive and well\n";
	SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
	TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_READY() );
840
	$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_READY();
841
	next;
842
843
    }

844
845
846
847
848
849
850
851
    #
    # Check for cancelation. Do not want to retry the reboots if the
    # swap was canceled.
    #
    if (!$canceled) {
	TBGetCancelFlag($pid, $eid, \$canceled);

	if ($canceled) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
852
	    tbnotice "Swap canceled; will terminate os_setup early!";
853
854
855
	}
    }

856
    if ($retries{$node} && !($canceled || $noretry)) {
857
	$retries{$node} -= 1;
858

Kevin Atkinson's avatar
   
Kevin Atkinson committed
859
	tbnotice "Rebooting $node and waiting again ...";
860

861
862
863
864
865
866
867
868
	if (system("$nodereboot $node") == 0) {
	    push(@nodelist, $node);
	    $waitstart{$node} = time;
	    next;
	}
	# Fall through on failure.
    }

Kevin Atkinson's avatar
   
Kevin Atkinson committed
869
    tbwarn "$node may be down. This has been reported to testbed-ops.";
870

871
872
873
  tbfailed:
    SetNodeBootStatus($node, NODEBOOTSTATUS_FAILED);

874
    if ($canfail{$node} && !($canceled || $noretry)) {
875
	push(@informuser, $node);
Kevin Atkinson's avatar
   
Kevin Atkinson committed
876
	$failed_nonfatal_nodes{$node} = 'UNKNOWN';
Kevin Atkinson's avatar
   
Kevin Atkinson committed
877
	tbnotice "Continuing with experiment setup anyway ...";
878
	next;
879
    }
880

881
882
    #
    # If the user has picked a standard image and it fails to boot,
883
    # something is wrong, so reserve it to hwdown experiment. If the
884
885
886
887
888
889
890
891
892
893
894
895
    # image belongs to the user, then we assume its the image at fault,
    # and allow it to be returned to the pool (caller, tbswap will end
    # doing the nfree on nodes with a DOWN allocstate).
    #
    my $pidofosid;
    if (! TBOsidToPid($osids{$node}, \$pidofosid) ||
	$pidofosid eq TBOPSPID()) {
	MarkNodeDown($node);
	TBSetNodeLogEntry($node, $dbuid, TB_DEFAULT_NODELOGTYPE(),
			  "'Moved to hwdown by os_setup; ".
			  "failed to boot image for osid " . $osids{$node} .
			  " in $pid/$eid'");
896
897
898
	push(@informtbopsfatal, $node);
    } else {
	push(@informtbopswarn, $node);
899
    }
900
    TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_DOWN() );
901
    $nodeAllocStates{$node} = TBDB_ALLOCSTATE_DOWN();
902
903

    $failed++;
Kevin Atkinson's avatar
   
Kevin Atkinson committed
904
    $failed_nodes{$node} = 'UNKNOWN';
905
}
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944

#
# Spam time!  Send mail to the user and testbed-ops about failures.
#
my $count = scalar(@informuser);
if ($count > 0) {
    SENDMAIL($user_email_to, "$count nodes are down",
	     "Nodes:\n".
	     "  " . join(" ", @informuser) . "\n".
	     "in pid/eid $pid/$eid appear to be dead.\n\n".
	     "Your experiment will continue to run since these failures\n".
	     "are nonfatal, although you might encounter other problems\n".
	     "if your experiment depends explicitly on these nodes.\n".
	     "You should terminate this experiment if it cannot ".
	     "tolerate these failures.\n\n".
	     "Testbed Operations has also been notified.\n\n".
	     "Thanks\n".
	     "Testbed Operations\n",
	     0,
	     "Cc: $TBOPS");
}
$count = scalar(@informtbopsfatal);
if ($count > 0) {
    SENDMAIL($TBOPS, "$count nodes are down",
	     "Nodes:\n".
	     "  " . join(" ", @informtbopsfatal) . "\n".
	     "in pid/eid $pid/$eid appear to be dead.\n\n".
	     "The nodes have been taken out of the pool until this matter ".
	     "is resolved.\n");
}
$count = scalar(@informtbopswarn);
if ($count > 0) {
    SENDMAIL($TBOPS, "$count nodes are down",
	     "Nodes:\n".
	     "  " . join(" ", @informtbopswarn) . "\n".
	     "in pid/eid $pid/$eid failed to boot after loading OS.\n\n".
	     "The nodes have been freed.\n");
}

945
TBDebugTimeStamp("Local node waiting finished");
946

947
948
949
950
951
#
# Now deal with virtual nodes.
#
# We do this in a sub script since nodes are not owned by the user
# and so must be setuid root so that ssh will work.
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
#
my @vnodelist = keys(%vnodes);

#
# Set the allocstate for the local vnodes that were sucessfully rebooted
# and came to ISUP above. These do not need to be setup again! We move
# them to RES_READY, so vnode_setup will ignore them. If they fail to
# hit ISUP, we will move them to DOWN so that vnode_setup will ignore
# them again, in the teardown phase.
#
# Note, we do this even if there were failures above, since the teardown
# phase is going to happen, and we want vnode_setup to know which nodes
# came up with phynodes okay (need to be torndown) and which ones never
# had the chance (no need to teardown). Think swapmod, which does teardown
# in the ACTIVATING state.
#
foreach my $vnode (@vnodelist) {
    my $pnode  = $vnode2pnode{$vnode};

971
    # Default retry count.
972
    $retries{$vnode} = 0;
973

974
    # Remote node, always does setup.
975
976
    next
	if (!exists($nodes{$pnode}));
977
    
978
979
    # Pnode was neither rebooted or reconfiged, so leave allocstate alone
    # for vnode_setup (has to be done).
980
    next
981
	if (!exists($rebooted{$pnode}) && !exists($reconfigs{$pnode}));
982
983
984
985
986
987
988

    if ($nodeAllocStates{$pnode} eq TBDB_ALLOCSTATE_RES_READY()) {
	TBSetNodeAllocState($vnode, TBDB_ALLOCSTATE_RES_READY());
	$nodeAllocStates{$vnode} = TBDB_ALLOCSTATE_RES_READY();
    }
}

989
990
991
992
993
994
995
#
# Reset the failure lists. See above.
#
@informuser = ();
@informtbopswarn = ();
@informtbopsfatal = ();

996
#
997
998
999
# XXX - Don't bother if something above failed. A waste of time and
# usually leads to cascading errors.
#
1000
if ($canceled && @vnodelist) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
1001
    tbnotice "Skipping virtual node setup since swapin was canceled!";
1002
1003
}
elsif ($failed && @vnodelist) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
1004
1005
    tbnotice "Skipping virtual node setup since there were previous ".
	"failures!";
1006
1007
}
elsif (@vnodelist) {
Kirk Webb's avatar
Kirk Webb committed
1008
    my $vnode_setup_args = ""; # add any generic args here.
1009
    my @retry_list = ();
Kirk Webb's avatar
Kirk Webb committed
1010

Leigh B. Stoller's avatar
Leigh B. Stoller committed
1011
    TBDebugTimeStamp("Setting up virtual nodes");
1012
    print "Setting up virtual testbed nodes ...\n";
1013

Kirk Webb's avatar
Kirk Webb committed
1014
1015
1016
1017
1018
1019
1020
1021
    # If there are any plab vnodes, we have to adjust batching and timeouts
    # accordingly.
    if (grep($_, values(%plabvnodes))) {
        my $plabnumbatch = TBGetSiteVar("plab/setup/vnode_batch_size");
        my $plabwait     = TBGetSiteVar("plab/setup/vnode_wait_time");        
        $vnode_setup_args .= " -n $plabnumbatch -w $plabwait ";
    }

1022
  retry:
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1023
    TBDebugTimeStamp("Setting up virtual nodes");
Kirk Webb's avatar
Kirk Webb committed
1024
    system("$vnode_setup $vnode_setup_args $pid $eid");
1025
    if ($?) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
1026
	die_noretry("Vnode setup failed!");
1027
    }
1028
1029
    print "Waiting for virtual testbed nodes to finish setting up ...\n";
    TBDebugTimeStamp("Virtual node waiting started");
1030

1031
1032
1033
    foreach my $node (@vnodelist) {
	$waitstart{$node} = time;
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1034
    @vnodelist = sort(@vnodelist);
1035

1036
1037
1038
    while ( @vnodelist ) {
	my $node   = shift(@vnodelist);
	my $pnode  = $vnode2pnode{$node};
1039
	my $islocal= exists($nodes{$pnode});
1040
1041
	my $wstart = $waitstart{$node};
	my $curallocstate;
1042
	my $actual_state;
1043

Kirk Webb's avatar
   
Kirk Webb committed
1044
1045
1046
1047
1048
1049
1050
1051
1052
        #
        # Base the maxwait for vnodes on the reboot_waittime field for
        # their respective OSIDs, with some slop time that scales up
        # as a function of the number of vnodes on the parent pnode.
        #
        my $osid        = $osids{$node};
        my $reboot_time = $reboot_waittime{$osid};
	my $maxwait     = $reboot_time + (40 * $pnodevcount{$pnode});

1053
1054
1055
	TBGetNodeAllocState($node, \$curallocstate);

	#
1056
	# See if vnode_setup already determined the node was dead.
1057
1058
1059
1060
	#
	if ($curallocstate ne TBDB_ALLOCSTATE_DOWN() &&
	    $curallocstate ne TBDB_ALLOCSTATE_DEAD()) {

1061
1062
1063
1064
1065
	    if (!TBNodeStateWait($node, $wstart, $maxwait, \$actual_state,
				 (TBDB_NODESTATE_TBFAILED,
				  TBDB_NODESTATE_ISUP))) {

		if ($actual_state eq TBDB_NODESTATE_TBFAILED) {
Kevin Atkinson's avatar
   
Kevin Atkinson committed
1066
		    tbwarn "$node reported a TBFAILED event.";
1067
1068
		    goto vtbfailed;
		}
1069
		print "$node is alive and well\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1070
1071
		TBDebugTimeStamp("Virtual node $node setup ISUP");
		
1072
1073
1074
		# Might have already been set above.
		TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_READY);
		SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
1075
		next;
1076
	    }
1077

1078
	  vtbfailed:
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1079
	    TBDebugTimeStamp("Virtual node $node setup FAILED");
1080
1081
	    SetNodeBootStatus($node, NODEBOOTSTATUS_FAILED);
	    TBSetNodeAllocState($node, TBDB_ALLOCSTATE_DOWN());
1082
1083
1084
1085
1086
1087
1088
1089

	    #
	    # If a local node, lets retry since jail setup appears to be
	    # rather flaky.