os_setup.in 14.2 KB
Newer Older
1
2
#!/usr/bin/perl -wT
use English;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
3
require 'ctime.pl';
4

5
#
6
# TODO: Delta stuff.
7
#       Reload disk images.
8
#       Finish up local OS spec stuff. Kinda hacky right now. Does not deal
9
10
#       with command lines or disk images. Only does neboot type stuff and
#	hardwires the path to /tftpboot/proj/$pid/$imagepaths{$os}
11
12
#

13
14
15
#
# Parse an IR file and determine what OS has been requested on each node.
# Do the database magic to make it so. Only root, admin types, or the
16
17
# owners of the nodes in an experiments may do this. The machines are then
# rebooted (or power cycled).
18
19
20
# 
# usage: os_setup <pid> <eid> <ir_file>
#
21
22
23
24
25
26
27

#
# Configure variables
#
my $TB		= "@prefix@";
my $DBNAME	= "@TBDBNAME@";

28
my $ssh		= "ssh -n -q";
29
my $power	= "$TB/bin/power";
30
my $ping	= "/sbin/ping";
31
32
my $mail        = "/usr/bin/mail";
my $tbops       = "testbed-ops\@flux.cs.utah.edu";
33
my $dbg		= 0;
34
35
my %imagepaths  = ();
my %imageparts  = ();
36
my %nodeos      = ();
37
my %nodepath    = ();
38
my %nodepart    = ();
39
my %waitfor     = ();
40
my %pids	= ();
41
my $SAVEUID	= $UID;
42
my @row;
43

44
45
46
#
# This stuff is BOGUS! Quick hack for paper deadline to make Jay happy.
#
47
my $doreloading = 0;
48
my $forcereload = 0;
49
50
51
52
53
54
my $NETDISK     = "/tftpboot/netdisk";
my $PAPERADDR	= "boss.emulab.net";
my $IMAGE       = "/usr/testbed/images/wd0-all.ndz";
my $RELOADCMD   = "${PAPERADDR}:${IMAGE} wd0";
my %reload      = ();
    
55
56
57
58
59
60
61
62
63
64
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

$| = 1; #Turn off line buffering on output

#
# Set up for querying the database.
# 
use Mysql;
65
my $DB = Mysql->connect("localhost", $DBNAME, "script", "none");
66
67
68

if ( $#ARGV != 2) {
    die("Usage: os_setup <pid> <eid> <ir_file>\n". 	
69
	"Sets node OS configuration from a .ir file.\n");
70
71
72
73
74
75
76
77
78
}
my $pid = $ARGV[0];
my $eid = $ARGV[1];
my $ir  = $ARGV[2];

#
# Figure out who called us. Only root, tbroot, people with admin status
# in the DB, or the owner of the experiment can run this script.
#
79
80
81
82
83
84
85

$db_result = $DB->query("select expt_head_uid from experiments ".
			"where eid='$eid' and pid='$pid'");
if ($db_result->numrows < 1) {	
  die("There is no experiment '$eid' in project '$pid'.\n");
}

86
87
88
89
if ($UID != 0) {
    my ($me) = getpwuid($UID)
	or die "$UID not in passwd file";

90
    @row = $db_result->fetchrow_array();
91
    if ($row[0] ne "$me") {
92
	print STDERR "Checking for admin status ...\n" if $dbg;
93
	$db_result = $DB->query("select admin from users where uid='$me'");
94
        @row = $db_result->fetchrow_array();
95
	if ($row[0] != 1) {
96
	    die("os_setup: You must be root or a TB administrator\n");
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
	}
    }
}

#
# Open up the ir file.
#
if (-e "$ir") {
    open(IN, $ir) || die("Couldn't open $ir\n");
}
else {
    die("Couldn't open $ir\n");
}

#
# Look for the start of the OS section. Exit if not found
#
my $ossection=0;

while (<IN>) {
    if ( /^start os/i ) { 
	$ossection=1; 
	print STDERR "Start OS section...\n" if $dbg;
	last;
    }
}
if ($ossection == 0) {
    die("No OS section in $ir\n");
}

127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#
# Parse the images table to see what the local images are.
# 
while (<IN>) {
    if ( /^start images/i ) { 
	print STDERR "Start IMAGES section...\n" if $dbg;
	next;
    }
    elsif ( /^end images/i ) {
	print STDERR "End IMAGES section...\n" if $dbg;
	last;
    }
    my ($id,$path,$part) = split();
    print STDERR "$id $path $part\n" if $dbg;
    $imagepaths{$id} = $path;
    $imageparts{$id} = $part;
}

145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#
# Okay, parse the OS section.
# 
# Search the NODES section for OS labels. Consult the database to make
# sure the node specified in the IR file is really in the pid/eid that
# was given on the command line. 
# 
while (<IN>) {
    if ( /^start nodes/i ) { 
	print STDERR "Start NODES section...\n" if $dbg;
	next;
    }
    elsif ( /^end nodes/i ) {
	print STDERR "End NODES section...\n" if $dbg;
	last;
    }
    my ($node,$os) = split();
    print STDERR "$node $os\n" if $dbg;
    $nodeos{$node} = $os;

    $db_result = $DB->query("select pid,eid from reserved ".
			    "where node_id='$node'");
    if ($db_result->numrows < 1) {	
168
	die("There is no node '$node' reserved in the DB.\n");
169
    }
170
    @row = $db_result->fetchrow_array();
171
172
173
174
    if ($row[0] ne "$pid" ||
	$row[1] ne "$eid") {
	die("Node '$node' pid/eid mismatch: $pid/$eid ... $row[0]/$row[1]\n");
    }
175

176
177
178
179
180
    #
    # BOGUS
    # 
    $db_result = $DB->query("select pid from last_reservation ".
			    "where node_id='$node'");
181
    $reload{$node} = $forcereload;
182
183
184
185
186
187
    if ($doreloading) {
	if ($db_result->numrows) {
	    @row = $db_result->fetchrow_array();
	    if ($row[0] ne $pid) {
		$reload{$node} = 1;
	    }
188
189
190
	}
    }

191
192
193
194
    #
    # First see if the OS spec is a local one from the images table. We
    # leave it up to the user to make sure the OS is capable of running
    # on the node! This local image stuff is pretty hacky right now. I'm
195
    # only going to allow netboot type stuff and stuff on partition 4.
196
197
    #
    if ($imagepaths{$os}) {
198
199
200
201
	if ($imageparts{$os} eq "4") {
	    $nodepart{$node} = 4;
	    $waitfor{$node}  = 0;
	    next;
202
	}
203
204
205
206
207
208
209
210
211
212
213
214
	if ($imageparts{$os} eq "mb") {
	    #
	    # Okay, now it gets really bad. I'm going to form the tftpboot
	    # path right here, since local OS specs are not in the database
	    # disk_images table.
	    # 
	    $nodepath{$node} = "/tftpboot/proj/$pid/$imagepaths{$os}";
	    $waitfor{$node}  = 0;
	    next;
	}
	die("Improper local OS spec: $os. ".
	    "Partition can currently only be 4 or 'mb' (multiboot)");
215
216
    }

217
218
219
220
221
    #
    # Check to make sure that the OS spec is valid by checking the database.
    # Cross check type from nodes table against image_ids for that type in
    # the disk_images table. 
    #
222
223
224
    $db_result = $DB->query("select ".
			    "disk_images.image_id,disk_images.img_path ".
			    "from disk_images left join nodes ".
225
226
			    "on nodes.type=disk_images.type ".
			    "and disk_images.image_id='$os' ".
227
                            "where nodes.node_id='$node'");
228
229
230
    if ($db_result->numrows < 1) {	
	die("Improper image specification $node:$os in IR file.\n");
    }
231
    @row = $db_result->fetchrow_array();
232
    $nodepath{$node} = $row[1];
233
234
235
236
237
238
239
240

    #
    # See if the OS that is going to be booted supports a ping feature.
    # This is the only way we can tell if the machine has come back alive.
    # If not, then we effectively do not support the os for anything at all.
    #
    if (OSFeatureSupported($os, "ping")) {
	$waitfor{$node} = 1;
241
242
    }
    else {
243
	$waitfor{$node} = 0;
244
    }
245
246
247
248
249
250
251
252
}

#
# Lifted right out of delay_setup.
# 
foreach my $node ( keys %nodeos ) {
    my $pc	= $node;
    my $os	= $nodeos{$node};
253
    my $path    = $nodepath{$node};
254

255
256
257
    print STDOUT "Changing default OS for $pc to $os:$path ".
	         "and rebooting ...\n";

258
    #
259
    # database goo. Reset the OS stuff.
260
    #
261
    $sth = $DB->query("update nodes set ".
262
		      "def_boot_image_id='$os',def_boot_path='$path' ".
263
264
265
		      "where node_id='$pc'");
    if ($sth == 0) {
	die("Database update failed. Aborted...\n");
266
267
    }

268
269
270
271
272
273
    if ($nodepart{$node}) {
	#
	# At this point we would perhaps want to do an os_load, but I'm
	# not ready to do that. Just skip the node and let the user deal
	# with it.
	#
274
	printf STDOUT "Skipping $node. You will need to load the OS.\n";
275
276
277
	next;
    }

278
279
280
281
282
283
284
285
286
287
    #
    # BOGUS!
    #
    if ($reload{$pc}) {
	$sth = $DB->query("update nodes set ".
			  "next_boot_path='$NETDISK',".
			  "next_boot_cmd_line='$RELOADCMD' ".
			  "where node_id='$pc'");
    }

288
    #
289
290
    # Fire off a reboot process so that we can overlap them all.
    # We need the pid so we can wait for them all before preceeding.
291
    #
292
293
    $mypid = RebootNode($pc);
    $pids{$pc} = $mypid;
294
}
295

296
297
298
299
300
#
# Wait for all the reboot children to exit before continuing.
#
foreach my $node ( keys %nodeos ) {
    my $pc	= $node;
301
    my $mypid     = $pids{$pc};
302

303
    waitpid($mypid, 0);
304
305
    if ($?) {
	die("Reboot of node $pc failed!");
306
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
307
    print STDOUT "$pc rebooting ...\n"
308
309
}

310
311
print STDOUT "Waiting for testbed nodes to finish rebooting ...\n";

312
313
my $waitstart = time;

314
315
316
317
#
# Now lets wait for them to come back alive.
#
foreach my $node ( keys %nodeos ) {
318
319
    $node =~ /^([a-zA-Z0-9_\-]*)$/;
    my $pc = $1;
320

321
322
323
324
325
    #
    # Don't bother to wait for nodes that are running foreign OSs since
    # we are not going to deal with them anyway later in the process.
    #
    if ($waitfor{$pc} == 0) {
326
	print STDOUT "Not waiting for $pc to come alive. Foreign OS.\n";
327
328
329
	next;
    }	

330
    if (WaitTillAlive($pc) == 0) {
331
	print STDOUT "$pc is alive and well\n";
332
333
	next;
    }
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360

    print STDOUT "$pc may be down. This has been reported to testbed-ops.\n";
    print STDOUT "Please end this experiment, and try again.\n";
    
    # Reserve it to testbed down

    $cmd = "update reserved set pid='testbed',eid='down' ".
      "where eid='$eid' and pid='$pid' and node_id='$pc'";
    print "Using '$cmd'\n" if $dbg;
    $db_result = $DB->query($cmd) 
      || print STDERR "WARNING: Couldn't change reservation:".
	$DB->errmsg."\n";
    if ($db_result->num_rows < 1 ) {
      print STDERR "WARNING: Couldn't change reservation!\n";
    }

    # Send mail to testbed-ops about it
    open(MAIL,"| $mail -s \"TESTBED: $pc down?\" $tbops");
    print MAIL "User ".getpwuid($SAVEUID)." was running expt. $eid\n";
    print MAIL "in proj. $pid using ir file /proj/$pid/exp/$eid/tbdata/$ir\n";
    print MAIL "but $pc appears to be unresponsive.\n";
    print MAIL "\nPlease look into this matter. $pc has been reserved to\n";
    print MAIL "the testbed/down experiment until this has been resolved.\n\n";
    print MAIL "Thanks,\nTestbed Operations\ntestbed-ops\@flux.cs.utah.edu\n";
    close(MAIL);
    
    die("Oops, $pc did not come back alive!\n");
361
362
363
364
365
366
367
368
369
370
371
}

print STDOUT "OS Setup Done!\n";
exit 0;

#
# Power cycle a PC using the testbed power program.
#
sub PowerCycle {
    local($pc) = @_;

372
373
    if (system("$power cycle $pc") != 0) {
	print STDERR "WARNING: Could not power cycle $pc. Skipping ...\n";
374
375
376
    }
}

377
sub WaitTillAlive {
378
    my ($pc) = @_;
379

380
381
    my $maxwait = 150;
    if ($reload{$pc}) {
382
	$maxwait += 350;
383
384
    }

385
386
387
388
389
390
    print STDERR "Waiting for $pc to come alive\n" if $dbg;
    #
    # Sigh, a long ping results in the script waiting until all the
    # packets are sent from all the pings, before it will exit. So,
    # loop doing a bunch of shorter pings.
    #
391
    my $lasttime = ( (time - $waitstart) > 60 ? 61 : (time - $waitstart));
392
    for ($i = 0; $i < 200; $i++) {
393
	open(PING, "$ping -c 3 -t 4 $pc 2>&1 |");
394
395
396
397
398
399
400
401
	do {
	    $_ = <PING>;
	    if ( $_ =~ /bytes from/ ) {
		print STDERR "Yep, $pc alive and well\n" if $dbg;
		return 0;
	    }
	}
	until ( $_ =~ /transmitted, (\d*) packets received/ );
402
403
404
405
406
407
408
	my $curtime = time - $waitstart;
	print "Waited ",$curtime," seconds...\n" if $dbg;
	if ( $curtime % 60 < $lasttime % 60 ) { 
	  print STDERR "Still waiting for $pc - its been ",
	  (int ($curtime/60))," min.\n";
	}
	$lasttime = $curtime;
409
	if ($i > 3 && $curtime > $maxwait) { last; }
410
411
    }
    close(PING);
412
    print STDERR "$pc is not responding. Better check into it.\n" if $dbg;
413
414
415
    return 1;
}

416
417
418
419
420
421
422
423
424
#
# Reboot a node in a child process. Return the pid to the parent so
# that it can wait on all the children later.
# 
sub RebootNode {
    local($pc) = @_;

    print STDOUT "Rebooting $pc ...\n";

425
426
427
    $mypid = fork();
    if ($mypid) {
	return $mypid;
428
429
430
431
432
433
434
435
436
    }

    #
    # See if the machine is pingable. If its not pingable, then
    # we just power cycle the machine rather than wait for a bunch
    # of ssh/rsh commands to time out.
    #
    print STDERR "Pinging $pc ... \n" if $dbg;
    if (-e $ping) {
437
	open(PING, "$ping -c 4 -t 4 $pc 2>&1 |");
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
    }
    else {
	die("PING command $ping not found!\n");
    }
    do {
    }
    until ( <PING> =~ /transmitted, (\d*) packets received/ );
    close(PING);
    print STDERR "Got back $1 ping packets from $pc.\n" if $dbg;

    #
    # Power cycle if the machine is dead. It will come back up with the
    # proper OS, cause we modified the database above.
    #
    if ( $1 == 0 ) {
	print STDERR "$pc appears to be dead. Power cycling ...\n" if $dbg;
	PowerCycle($pc);
	exit(0);
    }

    #
    # Machine is pingable at least. Try to reboot it gracefully,
    # or power cycle anyway if that does not work. To this, we must
    # change our real UID to root so that ssh will work.
    #
    print STDERR "Rebooting $pc with ssh command ...\n" if $dbg;

    #
    # Run an ssh command in a child process, protected by an alarm to
    # ensure that the ssh is not hung up forever if the machine is in
    # some funky state.
    # 
    $syspid = fork();
    if ($syspid) {
	local $SIG{ALRM} = sub { kill("TERM", $syspid); };
473
474
	alarm 60;
	waitpid($syspid, 0);
475
476
477
478
479
480
481
482
483
484
485
486
487
488
	alarm 0;

	#
	# If ssh times out, just punch the button.
	# 
	if ($? == 15) {
	    print STDERR "$pc appears to be wedged. Power cycling ...\n"
		if $dbg;
	    PowerCycle($pc);
	    exit(0);
	}
    }
    else {
	$UID = 0;
489
	exec("$ssh $pc /sbin/reboot");
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
	exit(0);
    }
    
    #
    # Okay, before we power cycle lets really make sure. On FreeBSD, it might
    # have rebooted, but since the connection is terminated, system returns
    # an error status. So, lets ping it again and if its pingable, the
    # reboot must have failed. If it is not pingable, I assume that the
    # reboot really worked, and the exit value can be ignored.
    #
    my $exit_value = $? >> 8;
    print STDERR "reboot returned $exit_value. Lets make sure it dies\n"
	if $dbg;

    if (WaitTillDead($pc) == 0) {
	exit(0);
    }
	
    print STDERR "$pc appears to still be running. Power cycling ...\n"
	if $dbg;
    PowerCycle($pc);
    exit(0);
}

514
515
516
517
518
519
520
521
522
sub WaitTillDead {
    local($pc) = @_;

    print STDERR "Waiting for $pc to die off\n" if $dbg;
    #
    # Sigh, a long ping results in the script waiting until all the
    # packets are sent from all the pings, before it will exit. So,
    # loop doing a bunch of shorter pings.
    #
523
    for ($i = 0; $i < 15; $i++) {
524
	open(PING, "$ping -c 4 -t 4 $pc 2>&1 |");
525
526
527
528
529
530
531
532
533
534
535
536
537
	do {
	}
	until ( <PING> =~ /transmitted, (\d*) packets received/ );

	if ( $1 == 0 ) {
	    print STDERR "Good, $pc must have rebooted.\n" if $dbg;
	    return 0;
	}
    }
    close(PING);
    print STDERR "$pc is still alive.\n" if $dbg;
    return 1;
}
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555

sub OSFeatureSupported {
    local($os)      = $_[0];
    local($feature) = $_[1];

    $db_result = $DB->query("select osfeatures from disk_images ".
			    "where image_id='$os'");

    if ($db_result->numrows < 1) {
	return 0;
    }
    foreach $osfeature (split(',', $db_result->fetchrow_array())) {
	if ($feature eq $osfeature) {
	    return 1;
	}
    }
    return 0;
}