os_setup.in 14.4 KB
Newer Older
1
2
#!/usr/bin/perl -wT
use English;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
3
require 'ctime.pl';
4

5
#
6
# TODO: Delta stuff.
7
#       Reload disk images.
8
#       Finish up local OS spec stuff. Kinda hacky right now. Does not deal
9
10
#       with command lines or disk images. Only does neboot type stuff and
#	hardwires the path to /tftpboot/proj/$pid/$imagepaths{$os}
11
12
#

13
14
15
#
# Parse an IR file and determine what OS has been requested on each node.
# Do the database magic to make it so. Only root, admin types, or the
16
17
# owners of the nodes in an experiments may do this. The machines are then
# rebooted (or power cycled).
18
19
20
# 
# usage: os_setup <pid> <eid> <ir_file>
#
21
22
23
24
25
26
27

#
# Configure variables
#
my $TB		= "@prefix@";
my $DBNAME	= "@TBDBNAME@";

28
my $ssh		= "ssh -n -q";
29
my $power	= "$TB/bin/power";
30
my $ping	= "/sbin/ping";
31
32
my $mail        = "/usr/bin/mail";
my $tbops       = "testbed-ops\@flux.cs.utah.edu";
33
my $dbg		= 0;
34
35
my %imagepaths  = ();
my %imageparts  = ();
36
my %nodeos      = ();
37
my %nodepath    = ();
38
my %nodepart    = ();
39
my %waitfor     = ();
40
my %pids	= ();
41
my $SAVEUID	= $UID;
42
my @row;
43

44
45
46
#
# This stuff is BOGUS! Quick hack for paper deadline to make Jay happy.
#
47
my $doreloading = 0;
48
49
50
51
52
53
my $NETDISK     = "/tftpboot/netdisk";
my $PAPERADDR	= "boss.emulab.net";
my $IMAGE       = "/usr/testbed/images/wd0-all.ndz";
my $RELOADCMD   = "${PAPERADDR}:${IMAGE} wd0";
my %reload      = ();
    
54
55
56
57
58
59
60
61
62
63
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

$| = 1; #Turn off line buffering on output

#
# Set up for querying the database.
# 
use Mysql;
64
my $DB = Mysql->connect("localhost", $DBNAME, "script", "none");
65
66
67

if ( $#ARGV != 2) {
    die("Usage: os_setup <pid> <eid> <ir_file>\n". 	
68
	"Sets node OS configuration from a .ir file.\n");
69
70
71
72
73
74
75
76
77
}
my $pid = $ARGV[0];
my $eid = $ARGV[1];
my $ir  = $ARGV[2];

#
# Figure out who called us. Only root, tbroot, people with admin status
# in the DB, or the owner of the experiment can run this script.
#
78
79
80
81
82
83
84

$db_result = $DB->query("select expt_head_uid from experiments ".
			"where eid='$eid' and pid='$pid'");
if ($db_result->numrows < 1) {	
  die("There is no experiment '$eid' in project '$pid'.\n");
}

85
86
87
88
if ($UID != 0) {
    my ($me) = getpwuid($UID)
	or die "$UID not in passwd file";

89
    @row = $db_result->fetchrow_array();
90
    if ($row[0] ne "$me") {
91
	print STDERR "Checking for admin status ...\n" if $dbg;
92
	$db_result = $DB->query("select admin from users where uid='$me'");
93
        @row = $db_result->fetchrow_array();
94
	if ($row[0] != 1) {
95
	    die("os_setup: You must be root or a TB administrator\n");
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
	}
    }
}

#
# Open up the ir file.
#
if (-e "$ir") {
    open(IN, $ir) || die("Couldn't open $ir\n");
}
else {
    die("Couldn't open $ir\n");
}

#
# Look for the start of the OS section. Exit if not found
#
my $ossection=0;

while (<IN>) {
    if ( /^start os/i ) { 
	$ossection=1; 
	print STDERR "Start OS section...\n" if $dbg;
	last;
    }
}
if ($ossection == 0) {
    die("No OS section in $ir\n");
}

126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#
# Parse the images table to see what the local images are.
# 
while (<IN>) {
    if ( /^start images/i ) { 
	print STDERR "Start IMAGES section...\n" if $dbg;
	next;
    }
    elsif ( /^end images/i ) {
	print STDERR "End IMAGES section...\n" if $dbg;
	last;
    }
    my ($id,$path,$part) = split();
    print STDERR "$id $path $part\n" if $dbg;
    $imagepaths{$id} = $path;
    $imageparts{$id} = $part;
}

144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#
# Okay, parse the OS section.
# 
# Search the NODES section for OS labels. Consult the database to make
# sure the node specified in the IR file is really in the pid/eid that
# was given on the command line. 
# 
while (<IN>) {
    if ( /^start nodes/i ) { 
	print STDERR "Start NODES section...\n" if $dbg;
	next;
    }
    elsif ( /^end nodes/i ) {
	print STDERR "End NODES section...\n" if $dbg;
	last;
    }
    my ($node,$os) = split();
    print STDERR "$node $os\n" if $dbg;
    $nodeos{$node} = $os;

    $db_result = $DB->query("select pid,eid from reserved ".
			    "where node_id='$node'");
    if ($db_result->numrows < 1) {	
167
	die("There is no node '$node' reserved in the DB.\n");
168
    }
169
    @row = $db_result->fetchrow_array();
170
171
172
173
    if ($row[0] ne "$pid" ||
	$row[1] ne "$eid") {
	die("Node '$node' pid/eid mismatch: $pid/$eid ... $row[0]/$row[1]\n");
    }
174

175
176
177
178
179
180
    #
    # BOGUS
    # 
    $db_result = $DB->query("select pid from last_reservation ".
			    "where node_id='$node'");
    $reload{$node} = 0;
181
182
183
184
185
186
    if ($doreloading) {
	if ($db_result->numrows) {
	    @row = $db_result->fetchrow_array();
	    if ($row[0] ne $pid) {
		$reload{$node} = 1;
	    }
187
188
189
	}
    }

190
191
192
193
    #
    # First see if the OS spec is a local one from the images table. We
    # leave it up to the user to make sure the OS is capable of running
    # on the node! This local image stuff is pretty hacky right now. I'm
194
    # only going to allow netboot type stuff and stuff on partition 4.
195
196
    #
    if ($imagepaths{$os}) {
197
198
199
200
	if ($imageparts{$os} eq "4") {
	    $nodepart{$node} = 4;
	    $waitfor{$node}  = 0;
	    next;
201
	}
202
203
204
205
206
207
208
209
210
211
212
213
	if ($imageparts{$os} eq "mb") {
	    #
	    # Okay, now it gets really bad. I'm going to form the tftpboot
	    # path right here, since local OS specs are not in the database
	    # disk_images table.
	    # 
	    $nodepath{$node} = "/tftpboot/proj/$pid/$imagepaths{$os}";
	    $waitfor{$node}  = 0;
	    next;
	}
	die("Improper local OS spec: $os. ".
	    "Partition can currently only be 4 or 'mb' (multiboot)");
214
215
    }

216
217
218
219
220
    #
    # Check to make sure that the OS spec is valid by checking the database.
    # Cross check type from nodes table against image_ids for that type in
    # the disk_images table. 
    #
221
222
223
    $db_result = $DB->query("select ".
			    "disk_images.image_id,disk_images.img_path ".
			    "from disk_images left join nodes ".
224
225
			    "on nodes.type=disk_images.type ".
			    "and disk_images.image_id='$os' ".
226
                            "where nodes.node_id='$node'");
227
228
229
    if ($db_result->numrows < 1) {	
	die("Improper image specification $node:$os in IR file.\n");
    }
230
    @row = $db_result->fetchrow_array();
231
    $nodepath{$node} = $row[1];
232
233
234
235
236
237
238
239

    #
    # See if the OS that is going to be booted supports a ping feature.
    # This is the only way we can tell if the machine has come back alive.
    # If not, then we effectively do not support the os for anything at all.
    #
    if (OSFeatureSupported($os, "ping")) {
	$waitfor{$node} = 1;
240
241
    }
    else {
242
	$waitfor{$node} = 0;
243
    }
244
245
246
247
248
249
250
251
}

#
# Lifted right out of delay_setup.
# 
foreach my $node ( keys %nodeos ) {
    my $pc	= $node;
    my $os	= $nodeos{$node};
252
    my $path    = $nodepath{$node};
253

254
255
256
    print STDOUT "Changing default OS for $pc to $os:$path ".
	         "and rebooting ...\n";

257
    #
258
    # database goo. Reset the OS stuff.
259
    #
260
    $sth = $DB->query("update nodes set ".
261
		      "def_boot_image_id='$os',def_boot_path='$path' ".
262
263
264
		      "where node_id='$pc'");
    if ($sth == 0) {
	die("Database update failed. Aborted...\n");
265
266
    }

267
268
269
270
271
272
    if ($nodepart{$node}) {
	#
	# At this point we would perhaps want to do an os_load, but I'm
	# not ready to do that. Just skip the node and let the user deal
	# with it.
	#
273
	printf STDOUT "Skipping $node. You will need to load the OS.\n";
274
275
276
	next;
    }

277
278
279
280
281
282
283
284
285
286
    #
    # BOGUS!
    #
    if ($reload{$pc}) {
	$sth = $DB->query("update nodes set ".
			  "next_boot_path='$NETDISK',".
			  "next_boot_cmd_line='$RELOADCMD' ".
			  "where node_id='$pc'");
    }

287
    #
288
289
    # Fire off a reboot process so that we can overlap them all.
    # We need the pid so we can wait for them all before preceeding.
290
    #
291
292
    $mypid = RebootNode($pc);
    $pids{$pc} = $mypid;
293
}
294

295
296
297
298
299
#
# Wait for all the reboot children to exit before continuing.
#
foreach my $node ( keys %nodeos ) {
    my $pc	= $node;
300
    my $mypid     = $pids{$pc};
301

302
    waitpid($mypid, 0);
303
304
    if ($?) {
	die("Reboot of node $pc failed!");
305
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
306
307
308
    my $t = ctime(time);
    print STDOUT "$t";
    print STDOUT "$pc rebooting ...\n"
309
310
}

Leigh B. Stoller's avatar
Leigh B. Stoller committed
311
312
313
my $t = ctime(time);
print STDOUT "$t";
	
314
315
print STDOUT "Waiting for testbed nodes to finish rebooting ...\n";

316
317
my $waitstart = time;

318
319
320
321
#
# Now lets wait for them to come back alive.
#
foreach my $node ( keys %nodeos ) {
322
323
    $node =~ /^([a-zA-Z0-9_\-]*)$/;
    my $pc = $1;
324

325
326
327
328
329
    #
    # Don't bother to wait for nodes that are running foreign OSs since
    # we are not going to deal with them anyway later in the process.
    #
    if ($waitfor{$pc} == 0) {
330
	print STDOUT "Not waiting for $pc to come alive. Foreign OS.\n";
331
332
333
	next;
    }	

334
    if (WaitTillAlive($pc) == 0) {
Leigh B. Stoller's avatar
Leigh B. Stoller committed
335
336
337
	my $t = ctime(time);
	print STDOUT "$t";
	
338
	print STDOUT "$pc is alive and well\n";
339
340
	next;
    }
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367

    print STDOUT "$pc may be down. This has been reported to testbed-ops.\n";
    print STDOUT "Please end this experiment, and try again.\n";
    
    # Reserve it to testbed down

    $cmd = "update reserved set pid='testbed',eid='down' ".
      "where eid='$eid' and pid='$pid' and node_id='$pc'";
    print "Using '$cmd'\n" if $dbg;
    $db_result = $DB->query($cmd) 
      || print STDERR "WARNING: Couldn't change reservation:".
	$DB->errmsg."\n";
    if ($db_result->num_rows < 1 ) {
      print STDERR "WARNING: Couldn't change reservation!\n";
    }

    # Send mail to testbed-ops about it
    open(MAIL,"| $mail -s \"TESTBED: $pc down?\" $tbops");
    print MAIL "User ".getpwuid($SAVEUID)." was running expt. $eid\n";
    print MAIL "in proj. $pid using ir file /proj/$pid/exp/$eid/tbdata/$ir\n";
    print MAIL "but $pc appears to be unresponsive.\n";
    print MAIL "\nPlease look into this matter. $pc has been reserved to\n";
    print MAIL "the testbed/down experiment until this has been resolved.\n\n";
    print MAIL "Thanks,\nTestbed Operations\ntestbed-ops\@flux.cs.utah.edu\n";
    close(MAIL);
    
    die("Oops, $pc did not come back alive!\n");
368
369
370
371
372
373
374
375
376
377
378
}

print STDOUT "OS Setup Done!\n";
exit 0;

#
# Power cycle a PC using the testbed power program.
#
sub PowerCycle {
    local($pc) = @_;

379
380
    if (system("$power cycle $pc") != 0) {
	print STDERR "WARNING: Could not power cycle $pc. Skipping ...\n";
381
382
383
    }
}

384
sub WaitTillAlive {
385
    my ($pc) = @_;
386

387
388
389
390
391
    my $maxwait = 150;
    if ($reload{$pc}) {
	$maxwait += 150;
    }

392
393
394
395
396
397
    print STDERR "Waiting for $pc to come alive\n" if $dbg;
    #
    # Sigh, a long ping results in the script waiting until all the
    # packets are sent from all the pings, before it will exit. So,
    # loop doing a bunch of shorter pings.
    #
398
    my $lasttime = ( (time - $waitstart) > 60 ? 61 : (time - $waitstart));
399
400
    for ($i = 0; $i < 120; $i++) {
	open(PING, "$ping -c 3 -t 4 $pc 2>&1 |");
401
402
403
404
405
406
407
408
	do {
	    $_ = <PING>;
	    if ( $_ =~ /bytes from/ ) {
		print STDERR "Yep, $pc alive and well\n" if $dbg;
		return 0;
	    }
	}
	until ( $_ =~ /transmitted, (\d*) packets received/ );
409
410
411
412
413
414
415
	my $curtime = time - $waitstart;
	print "Waited ",$curtime," seconds...\n" if $dbg;
	if ( $curtime % 60 < $lasttime % 60 ) { 
	  print STDERR "Still waiting for $pc - its been ",
	  (int ($curtime/60))," min.\n";
	}
	$lasttime = $curtime;
416
	if ($i > 3 && $curtime > $maxwait) { last; }
417
418
    }
    close(PING);
419
    print STDERR "$pc is not responding. Better check into it.\n" if $dbg;
420
421
422
    return 1;
}

423
424
425
426
427
428
429
#
# Reboot a node in a child process. Return the pid to the parent so
# that it can wait on all the children later.
# 
sub RebootNode {
    local($pc) = @_;

Leigh B. Stoller's avatar
Leigh B. Stoller committed
430
431
432
    my $t = ctime(time);
    print STDOUT "$t";
	
433
434
    print STDOUT "Rebooting $pc ...\n";

435
436
437
    $mypid = fork();
    if ($mypid) {
	return $mypid;
438
439
440
441
442
443
444
445
446
    }

    #
    # See if the machine is pingable. If its not pingable, then
    # we just power cycle the machine rather than wait for a bunch
    # of ssh/rsh commands to time out.
    #
    print STDERR "Pinging $pc ... \n" if $dbg;
    if (-e $ping) {
447
	open(PING, "$ping -c 4 -t 4 $pc 2>&1 |");
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
    }
    else {
	die("PING command $ping not found!\n");
    }
    do {
    }
    until ( <PING> =~ /transmitted, (\d*) packets received/ );
    close(PING);
    print STDERR "Got back $1 ping packets from $pc.\n" if $dbg;

    #
    # Power cycle if the machine is dead. It will come back up with the
    # proper OS, cause we modified the database above.
    #
    if ( $1 == 0 ) {
	print STDERR "$pc appears to be dead. Power cycling ...\n" if $dbg;
	PowerCycle($pc);
	exit(0);
    }

    #
    # Machine is pingable at least. Try to reboot it gracefully,
    # or power cycle anyway if that does not work. To this, we must
    # change our real UID to root so that ssh will work.
    #
    print STDERR "Rebooting $pc with ssh command ...\n" if $dbg;

    #
    # Run an ssh command in a child process, protected by an alarm to
    # ensure that the ssh is not hung up forever if the machine is in
    # some funky state.
    # 
    $syspid = fork();
    if ($syspid) {
	local $SIG{ALRM} = sub { kill("TERM", $syspid); };
483
484
	alarm 60;
	waitpid($syspid, 0);
485
486
487
488
489
490
491
492
493
494
495
496
497
498
	alarm 0;

	#
	# If ssh times out, just punch the button.
	# 
	if ($? == 15) {
	    print STDERR "$pc appears to be wedged. Power cycling ...\n"
		if $dbg;
	    PowerCycle($pc);
	    exit(0);
	}
    }
    else {
	$UID = 0;
499
	exec("$ssh $pc /sbin/reboot");
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
	exit(0);
    }
    
    #
    # Okay, before we power cycle lets really make sure. On FreeBSD, it might
    # have rebooted, but since the connection is terminated, system returns
    # an error status. So, lets ping it again and if its pingable, the
    # reboot must have failed. If it is not pingable, I assume that the
    # reboot really worked, and the exit value can be ignored.
    #
    my $exit_value = $? >> 8;
    print STDERR "reboot returned $exit_value. Lets make sure it dies\n"
	if $dbg;

    if (WaitTillDead($pc) == 0) {
	exit(0);
    }
	
    print STDERR "$pc appears to still be running. Power cycling ...\n"
	if $dbg;
    PowerCycle($pc);
    exit(0);
}

524
525
526
527
528
529
530
531
532
sub WaitTillDead {
    local($pc) = @_;

    print STDERR "Waiting for $pc to die off\n" if $dbg;
    #
    # Sigh, a long ping results in the script waiting until all the
    # packets are sent from all the pings, before it will exit. So,
    # loop doing a bunch of shorter pings.
    #
533
    for ($i = 0; $i < 15; $i++) {
534
	open(PING, "$ping -c 4 -t 4 $pc 2>&1 |");
535
536
537
538
539
540
541
542
543
544
545
546
547
	do {
	}
	until ( <PING> =~ /transmitted, (\d*) packets received/ );

	if ( $1 == 0 ) {
	    print STDERR "Good, $pc must have rebooted.\n" if $dbg;
	    return 0;
	}
    }
    close(PING);
    print STDERR "$pc is still alive.\n" if $dbg;
    return 1;
}
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565

sub OSFeatureSupported {
    local($os)      = $_[0];
    local($feature) = $_[1];

    $db_result = $DB->query("select osfeatures from disk_images ".
			    "where image_id='$os'");

    if ($db_result->numrows < 1) {
	return 0;
    }
    foreach $osfeature (split(',', $db_result->fetchrow_array())) {
	if ($feature eq $osfeature) {
	    return 1;
	}
    }
    return 0;
}