elabinelab.in 48.4 KB
Newer Older
1
#!/usr/bin/perl -w
2
3
#
# EMULAB-COPYRIGHT
Mike Hibler's avatar
Mike Hibler committed
4
# Copyright (c) 2004-2011 University of Utah and the Flux Group.
5
6
7
8
9
10
11
12
# All rights reserved.
#
# TODO: ntpinfo table.
#       Current source directory? From where?
#
use English;
use Getopt::Std;

13
14
15
16
# Load the Testbed support stuff.
use lib "@prefix@/lib";
use libdb;
use libtestbed;
17
use libtblog;
18
use Experiment;
19
use User;
20
use Lan;
21

22
23
24
25
26
#
# Do things necessary for setting up inner elab experiment. 
#
sub usage()
{
27
    print STDOUT "Usage: elabinelab [-d] [-g] [-u] pid eid\n";
28
    print STDOUT "       elabinelab [-d] [-k | -f] pid eid\n";
29
    print STDOUT "       elabinelab [-d] -r pid eid [node ...]\n";
30
31
32
 
    exit(-1);
}
33
my $optlist  = "dgkfurP";
34
my $debug    = 1;
35
my $verbose  = 0;
36
my $killmode = 0;
37
my $fwboot   = 0;
38
my $dbgooonly= 0;
39
40
my $update   = 0;
my $remove   = 0;
41

42
43
44
45
46
47
48
49
#
# XXX experimental speed hacks.
#     $inparallel    reboots all server in parallel (rather than serially)
#		     after setup
#     $restartnodes  uses a new bootinfo RESTART command to quickly move
#		     inner nodes from control of outer boss to inner boss
#		     avoiding all node reboots
#
50
my $inparallel = 1;
51
my $restartnodes = 0;
52

53
54
sub DumpDBGoo();

55
56
57
58
59
60
#
# Configure variables
#
my $TB		= "@prefix@";
my $TBOPS       = "@TBOPSEMAIL@";
my $CONTROL	= "@USERNODE@";
Russ Fish's avatar
Russ Fish committed
61
my $DBNAME      = "@TBDBNAME@";
62
63
my $TBOPSPID    = TBOPSPID();
my $SSH		= "$TB/bin/sshtb";
64
my $SCP		= "/usr/bin/scp";
65
my $nodereboot  = "$TB/bin/node_reboot";
66
my $noderestart	= "$TB/sbin/bootinfosend -R";
67
my $makeconf    = "$TB/sbin/dhcpd_makeconf";
68
my $nodewait    = "$TB/sbin/node_statewait";
69
my $snmpit      = "$TB/bin/snmpit";
70
71
72

# Protos
sub TearDownEmulab();
73
74
sub RemoveNodes();
sub UpdateEmulab();
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin:/usr/site/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

#
# We don't want to run this script unless its the real version.
#
if ($EUID != 0) {
    die("*** $0:\n".
	"    Must be root! Maybe its a development version?\n");
}

# Locals
my $SAVEUID     = $UID;
my $workdir;
96
my $expdir;
97
98
99
my %noderoles	= ();
my $opsnode;
my $bossnode;
Mike Hibler's avatar
Mike Hibler committed
100
101
my $fsnode;
my $routernode;
102
my @expnodes    = ();
103
my $query_result;
104
105
my $inner_experiment;
my $inner_nsfile;
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (defined($options{"g"})) {
    $dbgooonly = 1;
}
if (defined($options{"d"})) {
    $debug = 1;
}
121
122
123
if (defined($options{"k"})) {
    $killmode = 1;
}
124
125
126
if (defined($options{"f"})) {
    $fwboot = 1;
}
127
128
129
130
131
132
if (defined($options{"u"})) {
    $update = 1;
}
if (defined($options{"r"})) {
    $remove = 1;
}
133
134
135
if (defined($options{"P"})) {
    $inparallel = 1;
}
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
if (! @ARGV) {
    usage();
}
my ($pid,$eid) = @ARGV;

#
# Untaint the arguments.
#
if ($pid =~ /^([-\w]+)$/) {
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
if ($eid =~ /^([-\w]+)$/) {
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}
156
157
158
159
160
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    die("*** $0:\n".
	"    Could not map $pid/$eid to its object!\n");
}
161
$workdir = TBExptWorkDir($pid, $eid);
162
$expdir = PROJROOT() . "/$pid/exp/$eid";
163

164
165
166
167
168
# Build Logfile names
my $opslogfile  = "$workdir/opsnode.log";
my $fslogfile   = "$workdir/fsnode.log";
my $bosslogfile = "$workdir/bossnode.log";

169
170
171
#
# Verify user and get his DB uid.
#
172
173
174
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
175
}
176
177
178
my $user_uid   = $this_user->uid();
my $user_name  = $this_user->name();
my $user_email = $this_user->email();
179

Leigh B. Stoller's avatar
Leigh B. Stoller committed
180
TBDebugTimeStampsOn();
181

182
183
184
185
#
# Get elabinelab status to make sure, and to see if we need to fire off
# an experiment inside once its setup.
#
186
187
188
189
190
my $elabinelab           = $experiment->elabinelab();
my $elabinelab_eid       = $experiment->elabinelab_eid();
my $elabinelab_nosetup   = $experiment->elabinelab_nosetup();
my $elabinelab_singlenet = $experiment->elabinelab_singlenet();

191
192
193
exit(0)
    if (!$elabinelab);

194
195
196
197
198
199
#
# See if the experiment is firewalled
#
my $firewall;
my $firewalled = TBExptFirewall($pid, $eid, \$firewall);

200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#
# Presetup; turn off firewall.
#
if ($fwboot) {
    exit(0)
	if (!$firewalled);
    
    print "Turning off firewall rules on $firewall\n";
    $UID = 0;
    system("$SSH -host $firewall ipfw add 1 allow all from any to any");
    if ($?) {
	die("*** $0:\n".
	    "    Error turning off firewall rules ($firewall)!\n");
    }
    exit(0);
}

217
218
219
220
221
#
# If we are going to start an inner experiment, grab the stuff we need
# from the DB and save it. 
#
if (defined($elabinelab_eid)) {
222
    $inner_experiment = Experiment->Lookup($pid, $elabinelab_eid);
223
224
    die("*** $0:\n".
	"    No such experiment in DB for $pid/$elabinelab_eid\n")
225
	if (!defined($inner_experiment));
226

227
228
229
    $inner_experiment->GetNSFile(\$inner_nsfile) == 0 or
	die("*** $0:\n".
	    "    Could not get NS file for $inner_experiment\n");
230
231
    
    die("*** $0:\n".
232
233
	"    No nsfile in DB for $inner_experiment")
	if (!defined($inner_nsfile) || $inner_nsfile eq "");
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
}

#
# Get the role for each node.
#
$query_result =
    DBQueryFatal("select r.node_id,r.inner_elab_role from reserved as r ".
		 "where r.pid='$pid' and r.eid='$eid'");
while (my ($node_id,$role) = $query_result->fetchrow_array()) {
    # Like, the firewall node.
    next
	if (!defined($role));
	
    $noderoles{$node_id} = $role;
    $bossnode = $node_id
Mike Hibler's avatar
Mike Hibler committed
249
250
251
	if ($role eq 'boss' || $role eq 'boss+router');
    $routernode = $node_id
	if ($role eq 'router');
252
    $opsnode = $node_id
Mike Hibler's avatar
Mike Hibler committed
253
254
255
	if ($role eq 'ops' || $role eq 'ops+fs');
    $fsnode = $node_id
	if ($role eq 'fs');
256
257
258
259
260
261
262
263
264
265
    push(@expnodes, $node_id)
	if ($role eq 'node');
}

#
# Tear down an inner emulab.
# 
if ($killmode) {
    exit(TearDownEmulab());
}
266
267
268
269
270
271
elsif ($remove) {
    exit(RemoveNodes());
}
elsif ($update) {
    exit(UpdateEmulab());
}
272
273
274
275
276
277

#
# Get elabinelab info. If this is a container for an actual experiment,
# then need to fire off the experiment once the inner emulab is ready to
# go.
# 
Leigh B. Stoller's avatar
Leigh B. Stoller committed
278
TBDebugTimeStamp("Dumping DB state");
279
280
281
282
283
DumpDBGoo();
exit(0)
    if ($dbgooonly);

#
284
# For SSH and SCP below
285
286
#
$UID = 0;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
287

288
#
289
290
# The firewall should be off at this point; called from os_setup with -f.
# 
291

Leigh B. Stoller's avatar
Leigh B. Stoller committed
292
293
294
#
# This is temporary. I think I will switch this over to grabbing the latest
# version from the web server.
295
#
296
# XXX ugh, copy over a newer mkextrafs.pl as well (one that supports -2).
297
# XXX ughII, we only copy over a FreeBSD version, this will break a Linux boss.
298
#
299
my $mkelab = "$TB/etc/rc.mkelab";
300
301
if (-e "$expdir/rc.mkelab") {
    $mkelab = "$expdir/rc.mkelab";
302
}
303
304
305
306
307
my $mkextrafs = "";
if (-e "$TB/etc/mkextrafs.pl") {
    $mkextrafs = "$TB/etc/mkextrafs.pl";
}
print "Copying $mkelab $mkextrafs to ${bossnode}/${opsnode}";
308
309
310
print "/${fsnode}"
    if (defined($fsnode));
print "\n";
311
312
313
system("scp $mkelab $mkextrafs ${bossnode}:/tmp");
system("scp $mkelab $mkextrafs ${opsnode}:/tmp");
system("scp $mkelab $mkextrafs ${fsnode}:/tmp")
314
    if (defined($fsnode));
315

316
317
318
if (defined($fsnode)) {
    TBDebugTimeStamp("Setting up fsnode");
    print "Setting up fsnode on $fsnode\n";
319
    system("$SSH -host $fsnode /tmp/rc.mkelab -s -d > $fslogfile 2>&1");
320
321
322
323
324
325
326
    if ($?) {
	$UID = $SAVEUID;
	SENDMAIL("$user_name <$user_email>",
		 "ElabInElab Failure: $pid/$eid",
		 "Error building the fs node ($fsnode)",
		 $TBOPS,
		 "Cc: $TBOPS",
327
		 ($fslogfile));
328
329
330
331
332
	print STDERR "*** $0:\n".
	    "    Error building the fsnode ($fsnode)!\n";
	exit(($debug ? 0 : -1));
    }
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
333
TBDebugTimeStamp("Setting up opsnode");
334
print "Setting up opsnode on $opsnode\n";
335
system("$SSH -host $opsnode /tmp/rc.mkelab -s -d > $opslogfile 2>&1");
336
337
338
339
if ($?) {
    $UID = $SAVEUID;
    SENDMAIL("$user_name <$user_email>",
	     "ElabInElab Failure: $pid/$eid",
340
	     "Error building the ops node ($opsnode)",
341
342
	     $TBOPS,
	     "Cc: $TBOPS",
343
	     ($opslogfile));
344
345
346
347
    print STDERR "*** $0:\n".
	         "    Error building the opsnode ($opsnode)!\n";
    exit(($debug ? 0 : -1));
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
348
TBDebugTimeStamp("Setting up bossnode");
349
print "Setting up bossnode on $bossnode\n";
350
system("$SSH -host $bossnode /tmp/rc.mkelab -s -d > $bosslogfile 2>&1");
351
352
353
354
if ($?) {
    $UID = $SAVEUID;
    SENDMAIL("$user_name <$user_email>",
	     "ElabInElab Failure: $pid/$eid",
355
	     "Error building the boss node ($bossnode)",
356
357
	     $TBOPS,
	     "Cc: $TBOPS",
358
	     ($bosslogfile));
359
360
361
362
363
    print STDERR "*** $0:\n".
	         "    Error building the bossnode ($bossnode)!\n";
    exit(($debug ? 0 : -1));
}

364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
if ($verbose) {
    # Send these log files off now so that we can look at them.
    if (defined($fsnode)) {
	SENDMAIL("$user_name <$user_email>",
		 "ElabInElab Setup Log: $pid/$eid",
		 "Logs for building fs/ops/boss ($fsnode/$opsnode/$bossnode)",
		 $TBOPS,
		 "Cc: $TBOPS",
		 ($fslogfile, $opslogfile, $bosslogfile));
    }
    else {
	SENDMAIL("$user_name <$user_email>",
		 "ElabInElab Setup Log: $pid/$eid",
		 "Logs for building ops/boss ($opsnode/$bossnode)",
		 $TBOPS,
		 "Cc: $TBOPS",
		 ($opslogfile, $bosslogfile));
    }
382
}
383
$UID  = $SAVEUID;
384
385

# Run as real user for the next few scripts, which are setuid.
386
$EUID = $UID;
387

388
goto skipsetup
389
    if ($elabinelab_nosetup);
390

391
#
392
393
394
395
396
# Restart DHCPD, but first mark the nodes as being ready to boot inside
# the inner emulab, so that dhcpd_makeconf knows what nodes to change
# the entries for.
#
DBQueryFatal("update reserved set inner_elab_boot=1 ".
397
	     "where pid='$pid' and eid='$eid'");
398

399
400
401
402
403
404
405
print "Regenerating DHCPD config file and restarting daemon.\n";
system("$makeconf -i -r");
if ($?) {
    die("*** $0:\n".
	"    Failed to reconfig/restart DHCPD.\n");
}

406
407
408
409
410
411
412
if ($inparallel) {
    my $nodes = "$bossnode $opsnode";
    $nodes .= " $fsnode"
	if (defined($fsnode));
    print "Rebooting servers ($nodes).\n";
    TBDebugTimeStamp("Rebooting servers");
    system("$nodereboot -w $nodes");
413
414
    if ($?) {
	die("*** $0:\n".
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
	    "    Error rebooting the servers ($nodes)!\n");
    }
} else {
    if (defined($fsnode)) {
	# Reboot fs and wait for it to come back.
	print "Rebooting fsnode ($fsnode).\n";
	TBDebugTimeStamp("Rebooting fsnode");
	system("$nodereboot -w $fsnode");
	if ($?) {
	    die("*** $0:\n".
		"    Error rebooting the fsnode ($fsnode)!\n");
	}
    }
    # Reboot ops and wait for it to come back.
    print "Rebooting opsnode ($opsnode).\n";
    TBDebugTimeStamp("Rebooting opsnode");
    system("$nodereboot -w $opsnode");
    if ($?) {
	die("*** $0:\n".
	    "    Error rebooting the opsnode ($opsnode)!\n");
    }
    # Reboot boss and wait for it to come back.
    print "Rebooting bossnode ($bossnode).\n";
    TBDebugTimeStamp("Rebooting bossnode");
    system("$nodereboot -w $bossnode");
    if ($?) {
	die("*** $0:\n".
	    "    Error rebooting the bossnode ($bossnode)!\n");
443
    }
444
}
445
$EUID = 0;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
446

447
448
449
# Reboot the experimental nodes. They will come up inside the inner elab.
# DO NOT WAIT! They are not going to report ISUP from this point on. 
if (@expnodes) {
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
    #
    # First we try the magic pxeboot restart.
    # The nodes should still be in PXEWAIT, so we send them a restart
    # to make them re-DHCP.  This should get them quickly reparented to
    # the inner boss.
    #
    # If this doesn't work, we fall back on rebooting the nodes.
    #
    if ($restartnodes) {
	TBDebugTimeStamp("Redirecting experimental nodes to inner boss");
	my $stat = 0;
	# Run as real user again.
	$EUID = $UID;
	foreach my $node (@expnodes) {
	    $stat = system("$noderestart $node");
	    last if ($stat);
	}
	$EUID = 0;
	if ($stat) {
	    tbwarn("Node restart failed ($stat), falling back to reboot.");
	    goto rebootnodes;
	}

	#
	# Ssh into inner boss and use a utility script to determine
	# when the nodes have reported in and are in PXEWAIT (part of the
	# inner elab). Note the short timeout, since this operation should
	# be virtually instantaneous.
	#
	print "Waiting for nodes to restart and join the inner emulab.\n";
	TBDebugTimeStamp("Waiting for inner nodes to restart");
	$UID  = 0;
	$stat = system("$SSH -host $bossnode ".
		       "/usr/testbed/sbin/node_statewait -t 15 -a");
	$UID  = $SAVEUID;
	if ($stat) {
	    tbwarn("Error ($stat) waiting for nodes to restart, falling back to reboot.");
	    goto rebootnodes;
	}

	goto restartworked;
    }

rebootnodes:
494
    print "Rebooting inner experimental nodes.\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
495
    TBDebugTimeStamp("Rebooting experimental nodes");
496
497
    # Run as real user again.
    $EUID = $UID;
498
    system("$nodereboot -b @expnodes");
499
500
501
502
    if ($?) {
	die("*** $0:\n".
	    "    Error rebooting the expnodes (@expnodes)!\n");
    }
503
    $EUID = 0;
504
505
506
507
508

    #
    # Instead, we ssh into the node and use a utility script to determine
    # when the nodes have rebooted and are in PXEWAIT (part of the inner elab).
    #
509
    # Run as real root for ssh.
510
511
512
513
514
515
516
517
518
519
520
    $UID  = 0;

    print "Waiting for nodes to reboot and join the inner emulab.\n";
    TBDebugTimeStamp("Waiting for inner nodes to reboot");
    system("$SSH -host $bossnode /usr/testbed/sbin/node_statewait -t 180 -a");
    if ($?) {
	print STDERR "*** $0:\n".
	             "    Error waiting for inner nodes to join!\n";
	exit(($debug ? 0 : -1));
    }
    $UID  = $SAVEUID;
521

522
restartworked:
523
524
525
526
527
528
529
530
    #
    # To avoid confusion later (with swapmod, which wants them to be ISUP),
    # and so the web interface does not show the nodes as down, set the 
    # state to ISUP.
    #
    foreach my $node (@expnodes) {
	TBSetNodeEventState($node, TBDB_NODESTATE_ISUP());
    }
531
}
532
533

#
534
535
536
537
538
# Fire off inner elab experiment.
# 
if (defined($elabinelab_eid)) {
    # Formatted to make batchexp happy.
    my $nsfilename = "/tmp/$pid-$elabinelab_eid-$$.nsfile";
539
    
540
541
542
543
544
545
    #
    # Write NS file to temp file so we can send it over.
    #
    open(NS, "> /tmp/$$.ns")
	or die("*** $0:\n".
	       "    Could not write ns code to tmp file!\n");
546
    print NS $inner_nsfile;
547
548
549
550
551
552
553
    print NS "\n";
    close(NS);

    #
    # Copy the file over.
    #
    $UID = 0;
554
    print "Sending NS file to inner bossnode ($bossnode).\n";
555
    system("cat /tmp/$$.ns | $SSH -host $bossnode '(cat > $nsfilename)'");
556
557
    if ($?) {
	die("*** $0:\n".
558
	    "    Could not copy ns code to inner boss ($bossnode)!\n");
559
    }
560
561

    #
562
563
    # Now run batchexp on the node as the user. If firewalled, experiment
    # must start async (cause we have to turn the firewall back on). 
564
    #
565
566
567
568
    my $optarg = ($firewalled ? "" : "-w");
	
    print "Starting experiment $pid/$elabinelab_eid on inner emulab.\n";
    TBDebugTimeStamp("Starting inner experiment");
569
570
    system("$SSH -host $bossnode ".
	   " 'sudo -u $user_uid /usr/testbed/bin/batchexp ".
571
	   "  -q -i $optarg -S \"ElabInElab Experiment\" ".
572
573
574
575
576
	   "  -L \"ElabInElab ElabInElab\" -E \"ElabInElab Experiment\" ".
	   "  -p $pid -e $elabinelab_eid $nsfilename'");
    
    $UID = $SAVEUID;
    unlink("/tmp/$$.ns");
577
}
578
skipsetup:
579

580
#
581
582
583
584
585
586
# Turn the firewall back on.
#
# XXX If this fails, we have to do something much stronger! We do not want
# nodes coming up and starting something if the firewall is not active.
# Maybe hit the panic button from here (turning off the control network).
#
587
588
589
590
591
592
#
if ($firewalled) {
    print "Turning firewall back on\n";
    $UID = 0;
    system("$SSH -host $firewall ipfw delete 1");
    if ($?) {
593
594
595
596
597
598
599
	print STDERR "*** Error turning back on firewall rules ($firewall)!\n".
		     "    Will retry again.\n";
	system("$SSH -host $firewall ipfw delete 1");
	if ($?) {
	    die("*** $0:\n".
		"    Error turning back on firewall rules! Retry failed.\n");
	}
600
601
602
603
    }
    $UID = $SAVEUID;
}

604
605
606
TBDebugTimeStamp("ElabInElab setup done");
exit(0);

607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
#
# Dump parts of the DB that are needed for inner elab to run. The idea
# is to create a set of files named by the table name. Note that mysqld
# cannot write to the project tree cause of directory permissions. Put the
# files into the workdir for now, and them copy them over. 
#
sub DumpDBGoo()
{
    my $statedir = "$workdir/elabinelab";

    if (-d $statedir) {
	system("rm -rf $statedir");
    }
    mkdir($statedir, 0777) or
	die("*** $0:\n".
	    "    Could not mkdir $statedir\n");
    
    chmod(0777, $statedir) or
	die("*** $0:\n".
	    "    Could not chmod $statedir\n");

    #
    # These tables are dumped completely.
    #
631
    my @FULLTABLES = ("node_types", "node_type_attributes", "interface_types",
632
633
		      "interface_capabilities",
		      "switch_paths", "switch_stack_types", "switch_stacks",
Timothy Stack's avatar
   
Timothy Stack committed
634
635
		      "node_type_features", "node_types_auxtypes", "osid_map",
		      "os_boot_cmd");
636
637
638

    #
    # These tables are dumped by role (node/ops). For each one dump the table
639
640
    # as is, unless its the fs or ops node. For those we want to change the
    # node_id to "fs" or "ops" and their type to ops.
641
    #
642
643
644
    my @NODETABLES = ("node_auxtypes", "node_status", "nodes", 
		      "node_rusage", "node_hostkeys", "node_activity",
		      "interface_state");
645
646
647
648

    #
    # These tables are dumped by project ID.
    #
649
    my @PROJTABLES = ("projects", "groups");
650
651

    #
652
    # These tables are dumped by user ID (for the project members).
653
    #
654
    my @USERTABLES = ("users", "user_pubkeys");
655
656
657

    foreach my $table (@FULLTABLES) {
	unlink("$statedir/$table");
658
659
660
661
662
	DBQueryWarn("create temporary table temp_${table} ".
		    "select t.* from $table as t")
	    or die("*** $0:\n".
		   "    Could not dump table $table\n");

663
664
	if ($table eq "node_type_attributes") {
	    my $attributes_result =
665
		DBQueryFatal("select type,attrvalue from temp_${table} ".
666
667
668
669
			     "where attrkey='delay_capacity'");

	    while (my ($ntype,$value) = $attributes_result->fetchrow_array()) {
		my $newvalue = $value - 1;
670
671
672

		next
		    if ($newvalue < 0);
673
674
675
		
		DBQueryFatal("update temp_${table} set ".
			     "   attrvalue='$newvalue' ".
676
677
			     "where type='$ntype' and ".
			     "      attrkey='delay_capacity'");
678
	    }
679
680
681
	}

	DBQueryWarn("select * from temp_$table ".
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
		    "into outfile '$statedir/$table'")
	    or die("*** $0:\n".
		   "    Could not dump table $table\n");
    }

    foreach my $table (@NODETABLES) {
	unlink("$statedir/$table");
	#
	# Create a temporary table.
	#
	DBQueryWarn("create temporary table temp_${table} ".
		    "select t.* from reserved as r ".
		    "left join $table as t on t.node_id=r.node_id ".
		    "left join virt_nodes as v on v.vname=r.vname and ".
		    "     v.pid=r.pid and v.eid=r.eid ".
		    "where r.pid='$pid' and r.eid='$eid' and ".
		    "      t.node_id is not null and ".
Mike Hibler's avatar
Mike Hibler committed
699
		    "      v.inner_elab_role in ('node','fs','ops','ops+fs')")
700
701
702
	    or die("*** $0:\n".
		   "    Could not create temporary table temp_$table\n");
	#
703
704
	# Rename the fs and ops node in each table. For the nodes table,
	# there is a bunch of other stuff to do.
705
	#
706
707
708
	DBQueryFatal("update temp_${table} set node_id='fs' ".
		     "where node_id='$fsnode'")
	    if (defined($fsnode));
709
710
711
712
713
714
	DBQueryFatal("update temp_${table} set node_id='ops' ".
		     "where node_id='$opsnode'");

	if ($table eq "nodes") {
	    DBQueryFatal("update temp_${table} set ".
			 " type='ops', ".
715
			 " phys_nodeid=node_id, ".
716
717
			 " role='ctrlnode', ".
			 " op_mode='OPSNODEBSD' ".
718
			 "where node_id in ('fs','ops')");
719
720
721
722
723
724
725

	    # Also add the nodes that correspond to the "trunk" wires.
	    DBQueryFatal("insert into temp_${table} ".
			 "select distinct n.* from wires as w ".
			 "left join nodes as n on w.node_id1=n.node_id or ".
			 "     w.node_id2=n.node_id ".
			 "where w.type='Trunk'");
726
727
728
729

	    # Clear any node reservations on the inside
	    DBQueryFatal("update temp_${table} set ".
			 " reserved_pid=null where reserved_pid is not null");
730
731
732
733
734
735
736
737
738
739

	    # Put the inner nodes into "limbo" so they DTRT when restarted
	    if ($restartnodes) {
		DBQueryFatal("update temp_${table} set".
			     "  op_mode='PXEKERNEL',next_op_mode='',".
			     "  eventstate='". TBDB_NODESTATE_PXELIMBO . "',".
			     "  temp_boot_osid=NULL,next_boot_osid=NULL,".
			     "  osid=NULL".
			     " where role='testnode'");
	    }
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
	}
    
	DBQueryWarn("select * from temp_$table ".
		    "into outfile '$statedir/$table'")
	    or die("*** $0:\n".
		   "    Could not dump table $table\n");
    }

    foreach my $table (@PROJTABLES) {
	unlink("$statedir/$table");
	DBQueryWarn("select * from $table ".
		    "where pid='$pid' ".
		    "into outfile '$statedir/$table'")
	    or die("*** $0:\n".
		   "    Could not dump table $table\n");
    }
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
    #
    # Cleared versions of the project/group stats tables.
    #
    unlink("$statedir/project_stats");
    DBQueryFatal("create temporary table temp_project_stats ".
		 "like project_stats");
    DBQueryFatal("insert into temp_project_stats (pid,pid_idx) ".
		 "select pid,pid_idx from project_stats ".
		 "where pid='$pid'");
    DBQueryFatal("select * from temp_project_stats ".
		 "into outfile '$statedir/project_stats'");

    unlink("$statedir/group_stats");
    DBQueryFatal("create temporary table temp_group_stats ".
		 "like group_stats");
    DBQueryFatal("insert into temp_group_stats ".
		 "  (pid,pid_idx,gid,gid_idx,gid_uuid) ".
		 "select pid,pid_idx,gid,gid_idx,gid_uuid ".
		 "   from group_stats ".
		 "where pid='$pid'");
    DBQueryFatal("select * from temp_group_stats ".
		 "into outfile '$statedir/group_stats'");
778

779
780
781
782
783
784
785
786
787
788
789
790
    #
    # Special case the group and user policy tables. Not sure what to
    # really do about this; should there be any restrictions inside the
    # inner elab?
    #
    unlink("$statedir/group_policies");
    DBQueryWarn("select * from group_policies ".
		"where pid='$pid' or pid='+' or pid='-' ".
		"into outfile '$statedir/group_policies'")
	or die("*** $0:\n".
	       "    Could not dump table group_policies\n");

791
792
    foreach my $table (@USERTABLES) {
	unlink("$statedir/$table");
793
794

	DBQueryWarn("create temporary table temp_$table ".
795
		    "select distinct t.* from group_membership as gm ".
796
797
		    "left join users as u on u.uid_idx=gm.uid_idx ".
		    "left join $table as t on t.uid_idx=u.uid_idx ".
798
799
		    "where (gm.pid='$pid' or ".
		    "       gm.pid='" . TBOPSPID() . "') and gm.gid=gm.pid ".
800
		    " and t.uid_idx is not NULL and ".
801
802
803
804
		    " u.status='" . USERSTATUS_ACTIVE() . "'")
	    or die("*** $0:\n".
		   "    Could not create table temp_$table\n");

805
806
807
808
809
	# Clean up ... these are created in the inner elab.
	DBQueryFatal("delete from temp_${table} ".
		     "where uid='elabman' or uid='elabckup' or ".
		     "      uid='operator'");

810
	if ($table eq "users") {
811
	    my $creator_uid = $experiment->creator();
812
813
814
	    
	    DBQueryFatal("update temp_${table} set ".
			 " admin=1 ".
815
			 "where uid='$creator_uid'");
816
817

	    #
818
	    # Save time; force all other users to start out
819
820
821
822
823
	    # frozen since most users in the project do not ever
	    # actually log in. 
	    #
	    DBQueryFatal("update temp_${table} set ".
			 " status='" . USERSTATUS_FROZEN() . "' ".
824
			 "where uid!='$creator_uid'");
825
826
827
	}

	DBQueryWarn("select * from temp_$table ".
828
829
		    "into outfile '$statedir/$table'")
	    or die("*** $0:\n".
830
		   "    Could not dump table temp_$table\n");
831
    }
832
833
834
835
836
837
838
839
840
841
842
843
    #
    # We want a cleared stats table, so do it here.
    #
    DBQueryFatal("create temporary table temp_user_stats ".
		 "like user_stats");
    DBQueryFatal("insert into temp_user_stats ".
		 "  (uid,uid_idx,uid_uuid) ".
		 "select uid,uid_idx,uid_uuid from temp_users");
    DBQueryWarn("select * from temp_user_stats ".
		"into outfile '$statedir/user_stats'")
	or die("*** $0:\n".
	       "    Could not dump table temp_user_stats\n");
844
845
846

    # The group_membership is also special.
    DBQueryWarn("select gm.* from group_membership as gm ".
847
		"left join users as u on u.uid_idx=gm.uid_idx ".
848
		"where (gm.pid='$pid' or ".
849
		"       (gm.pid='" . TBOPSPID() . "' and gm.pid=gm.gid)) and ".
850
		" u.status='" . USERSTATUS_ACTIVE() . "' and ".
851
852
		" gm.uid!='elabman' and gm.uid!='elabckup' and ".
		" gm.uid!='operator' ".
853
854
855
856
857
		"into outfile '$statedir/group_membership'")
	or die("*** $0:\n".
	       "    Could not dump table group_membership\n");

    #
Mike Hibler's avatar
Mike Hibler committed
858
    # Initial images; note that these images are not going to exist inside!
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
    # 
    DBQueryWarn("select * from images ".
		"where pid='$pid' or (pid='$TBOPSPID' and global=1) ".
		"into outfile '$statedir/images'")
	or die("*** $0:\n".
	       "    Could not dump table images\n");
	    
    DBQueryWarn("create temporary table temp_os_info ".
		"select * from os_info ".
		"where pid='$pid' or (pid='$TBOPSPID' and shared=1)")
	or die("*** $0:\n".
	       "    Could not create table temp_os_info\n");

    # Ack. The MFS paths have a hardcoded "boss" in them, but that is going
    # to resolve incorrectly to an inner control IP, which will not work
    # from the pxeboot kernel since it uses the outer control network.
    # Just remove the host spec; pxeboot will do the right thing.
876
    my $query_result =
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
	DBQueryFatal("select osid,path from temp_os_info ".
		     "where path like '%:%'");
    
    while (my ($osid,$hostpath) = $query_result->fetchrow_array()) {
	my ($host,$path) = $hostpath =~ /^(.*):(.*)$/;

	DBQueryFatal("update temp_os_info set path='$path' where osid='$osid'");
    }

    DBQueryWarn("select * from temp_os_info ".
		"into outfile '$statedir/os_info'")
	or die("*** $0:\n".
	       "    Could not dump table os_info\n");
	    
    DBQueryWarn("select o.* from osidtoimageid as o ".
		"left join images as i on i.imageid=o.imageid ".
		"where i.pid='$pid' or (i.pid='$TBOPSPID' and i.global=1) ".
		"into outfile '$statedir/osidtoimageid'")
	or die("*** $0:\n".
	       "    Could not dump table osidtoimageid\n");
897
898
899
900
901
902
903
904
905
	    
    # Subosids.  Only take the mapping to parents for those children that
    # are in the e-in-e pid or are in emulab-ops and are shared.
    DBQueryWarn("select osm.* from os_submap as osm ".
		"left join os_info as osi on osm.osid=osi.osid ".
		"where osi.pid='$pid' or (osi.pid='$TBOPSPID' and osi.shared=1) ".
		"into outfile '$statedir/os_submap'")
	or die("*** $0:\n".
	       "    Could not dump table os_submap\n");
906
907
908
909
910
911
912
913
914
915
916
917

    #
    # interfaces table. Need to tag the interfaces being used as the control
    # network, with the proper tag so they do not say they experimental
    # interfaces in the inner emulab. Use a temp table again.
    #
    DBQueryWarn("create temporary table temp_interfaces ".
		"select t.* from reserved as r ".
		"left join interfaces as t on t.node_id=r.node_id ".
		"left join virt_nodes as v on v.vname=r.vname and ".
		"     v.pid=r.pid and v.eid=r.eid ".
		"where r.pid='$pid' and r.eid='$eid' and ".
Mike Hibler's avatar
Mike Hibler committed
918
		"      v.inner_elab_role in ('node','ops','fs','ops+fs')")
919
920
921
	or die("*** $0:\n".
	       "    Could not create temporary table temp_interfaces\n");

922
    if (! $elabinelab_singlenet) {
923
924
925
926
927
928
929
	# First, mark the real control network as "other" to avoid it being
	# thought of as the control network!.
	DBQueryWarn("update temp_interfaces ".
		    "set role='" . TBDB_IFACEROLE_OUTER_CONTROL() . "' " .
		    "where role='" . TBDB_IFACEROLE_CONTROL() . "'")
	    or die("*** $0:\n".
		   "    Could not delete control ifaces from temp_interfaces\n");
930

931
932
933
934
935
936
	DBQueryWarn("update temp_interfaces set ".
		    " role='" . TBDB_IFACEROLE_CONTROL() . "' " .
		    "where IP!='' and role='" . TBDB_IFACEROLE_EXPERIMENT() . "'")
	    or die("*** $0:\n".
		   "    Could not update roles in temp_interfaces\n");
    }
937

938
939
940
941
942
943
944
    # And rename the fs/ops nodes as above.
    if (defined($fsnode)) {
	DBQueryWarn("update temp_interfaces set node_id='fs' ".
		    "where node_id='$fsnode'")
	    or die("*** $0:\n".
		   "    Could not fs node_id in temp_interfaces\n");
    }
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
    DBQueryWarn("update temp_interfaces set node_id='ops' ".
		"where node_id='$opsnode'")
	or die("*** $0:\n".
	       "    Could not ops node_id in temp_interfaces\n");

    # Also add the interfaces that correspond to the "trunk" wires.
    DBQueryFatal("insert into temp_interfaces ".
		 "select distinct i.* from wires as w ".
		 "left join interfaces as i on w.node_id1=i.node_id or ".
		 "     w.node_id2=i.node_id ".
		 "where w.type='Trunk'");

    DBQueryWarn("select * from temp_interfaces ".
		"into outfile '$statedir/interfaces'")
	or die("*** $0:\n".
	       "    Could not dump table interfaces\n");

    # And the wires table. Strip out the control wires; not needed.
    DBQueryWarn("create temporary table temp_wires ".
		"select t.* from reserved as r ".
		"left join virt_nodes as v on v.vname=r.vname and ".
		"     v.pid=r.pid and v.eid=r.eid ".
967
		"left join wires as t on t.node_id1=r.node_id ".
968
		($elabinelab_singlenet == 0 ? " and t.type='Node' " : " ") .
969
		"where r.pid='$pid' and r.eid='$eid' and ".
Mike Hibler's avatar
Mike Hibler committed
970
		"      v.inner_elab_role in ('node','ops','fs','ops+fs') ")
971
972
973
	or die("*** $0:\n".
	       "    Could not create temporary table temp_wires\n");

974
975
976
977
978
979
980
    # And rename the fs/ops node as above.
    if (defined($fsnode)) {
	DBQueryWarn("update temp_wires set node_id1='fs' ".
		    "where node_id1='$fsnode'")
	    or die("*** $0:\n".
		   "    Could not fs node_id in temp_wires\n");
    }
981
982
983
984
985
    DBQueryWarn("update temp_wires set node_id1='ops' ".
		"where node_id1='$opsnode'")
	or die("*** $0:\n".
	       "    Could not ops node_id in temp_wires\n");

986
    if (! $elabinelab_singlenet) {
987
988
989
990
991
992
993
994
995
996
997
	# But we need to take out the wires that are being used as the
	# inner control network, or at least mark them as Control.
	$query_result =
	    DBQueryWarn("select node_id,card,port from temp_interfaces ".
			"where role='" . TBDB_IFACEROLE_CONTROL() . "' ");

	while (my ($node_id,$card,$port) = $query_result->fetchrow_array()) {
	    DBQueryWarn("update temp_wires set type='Control' ".
			"where node_id1='$node_id' and card1=$card and ".
			"      port1=$port");
	}
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
    }
    # Okay, now add the "trunk" wires in without any alteration.
    DBQueryWarn("insert into temp_wires ".
		"select * from wires where type='Trunk'") 
	or die("*** $0:\n".
	       "    Could not add trunk lines to temp_wires\n");

    DBQueryWarn("select * from temp_wires ".
		"into outfile '$statedir/wires'")
	or die("*** $0:\n".
	       "    Could not dump table wires\n");

    #
1011
1012
    # Ack, we need to create a reservation for the fs and ops nodes,
    # or else they will look free and it will not be able to check in.
1013
1014
1015
1016
1017
1018
    #
    DBQueryWarn("create temporary table temp_reserved ".
		"select r.* from reserved as r ".
		"left join virt_nodes as v on v.vname=r.vname and ".
		"     v.pid=r.pid and v.eid=r.eid ".
		"where r.pid='$pid' and r.eid='$eid' ".
Mike Hibler's avatar
Mike Hibler committed
1019
		"      and v.inner_elab_role in ('fs','ops','ops+fs')")
1020
1021
	or die("*** $0:\n".
	       "    Could not create temporary table temp_reserved\n");
1022
1023
1024
1025
    if (defined($fsnode)) {
	DBQueryWarn("update temp_reserved set ".
		    "   node_id='fs', ".
		    "   pid='$TBOPSPID', ".
1026
1027
		    "   eid='opsnodes', ".
		    "   exptidx=1 ".
1028
1029
1030
1031
		    "where node_id='$fsnode'")
	    or die("*** $0:\n".
		   "    Could not update temporary table temp_reserved\n");
    }
1032
1033
1034
    DBQueryWarn("update temp_reserved set ".
		"   node_id='ops', ".
		"   pid='$TBOPSPID', ".
1035
1036
		"   eid='opsnodes', ".
		"   exptidx=1 ".
1037
1038
1039
1040
1041
1042
1043
1044
		"where node_id='$opsnode'")
	or die("*** $0:\n".
	       "    Could not update temporary table temp_reserved\n");
    DBQueryWarn("select * from temp_reserved ".
		"into outfile '$statedir/reserved'")
	or die("*** $0:\n".
	       "    Could not dump table reserved\n");

Mike Hibler's avatar
Mike Hibler committed
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
    # Copy tiplines table for all nodes so web form gives us a console icon!
    DBQueryWarn("select t.tipname,t.node_id,'',0,0,NULL ".
		"from reserved as r ".
		"left join virt_nodes as v on v.vname=r.vname and ".
		"     v.pid=r.pid and v.eid=r.eid ".
		"left join tiplines as t on t.node_id=r.node_id ".
		"where r.pid='$pid' and r.eid='$eid' and ".
		"      v.inner_elab_role='node' ".
		"into outfile '$statedir/tiplines'")
	or die("*** $0:\n".
	       "    Could not dump table tiplines\n");

1057
1058
1059
1060
1061
    #
    # Dump the DB schema too, so we can check in the inner Elab that this data
    # is compatible with the sql/database-create.sql schema file there, *before*
    # loading it into the db.  Added/removed columns would misalign row data.
    #
Russ Fish's avatar
Russ Fish committed
1062
1063
1064
1065
1066
1067
1068
1069
1070
    my $schemafile = "$expdir/outer_db_schema";
    system("rm -f $schemafile")
	if (-f $schemafile);
    my $isvers5     = system("mysql -V | egrep -q -s 'Distrib 5.'") == 0;
    my $extraopts   = ($isvers5 ? "--skip-quote-names" : "");
    #
    # XXX: Requires that mysqldump be in caller's $PATH - probably an OK
    # assumption, but maybe not always
    #
1071
1072
    my $mysqldump   = "mysqldump -d $extraopts $DBNAME " .
	"@FULLTABLES @NODETABLES @PROJTABLES @USERTABLES";
Russ Fish's avatar
Russ Fish committed
1073
    system("$mysqldump 2> /dev/null > $schemafile");
1074

1075
    #
1076
    # Tar up the directory and send it over to (real) ops.
1077
1078
1079
    #
    $UID = 0;
    system("tar cf - -C $statedir . | ".
Kirk Webb's avatar
   
Kirk Webb committed
1080
	   "   gzip | $SSH -F /dev/null -host $CONTROL ".
1081
	   "   '(cat > $expdir/dbstate.tar.gz)'");
1082
1083
1084
1085
1086
1087
1088
1089
    if ($?) {
	die("*** $0:\n".
	    "    Could not create dbstate.tar.gz\n");
    }
    $UID = $SAVEUID;
    return 0;
}

1090
#
1091
1092
# Tear down an inner Emulab as cleanly as possible to avoid power cycling
# nodes.
1093
1094
1095
1096
1097
1098
# 
sub TearDownEmulab()
{
    my $tbdir      = "/usr/testbed";
    my $wap        = "$tbdir/sbin/withadminprivs";
    my $nodereboot = "$tbdir/bin/node_reboot";
1099

1100
1101
1102
1103
1104
1105
1106
    #
    # We want to rebuild the DHCPD file so that when we reboot the inner nodes
    # they come back to the outer emulab. We cannot just free the nodes, cause
    # then the reload daemon might beat us to it, and end up power cycling the
    # nodes, and that would be bad. So, munge the DB and clear the "role" slot
    # for inner nodes. 
    #
1107
    DBQueryFatal("update reserved set inner_elab_role=NULL,inner_elab_boot=0 ".
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
		 "where pid='$pid' and eid='$eid'");

    #
    # XXX Failure at this point will leave things in an inconsistent state
    # cause we have just munged the reserved table. Since we were trying
    # to swap out the experiment, I think this will be okay. Wait and see.
    #
    return 0
	if (!defined($bossnode));

    #
    # Now regen the DHCPD file.
    #
1121
    # Run as real user since script is setuid.
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
    $EUID = $UID;
    
    print "Regenerating DHCPD config file and restarting daemon.\n";
    system("$makeconf -i -r");
    if ($?) {
	die("*** $0:\n".
	    "    Failed to reconfig/restart DHCPD.\n");
    }
    $EUID = 0;

1132
1133
1134
1135
1136
1137
1138
1139
1140
    #
    # Kill inner vlans table entries; this is the table that maps
    # inner to outer vlans. We do not care about that anymore since
    # all of the vlans are going to be torn down (using the outer
    # ids).
    #
    DBQueryFatal("delete from elabinelab_vlans ".
		 "where pid='$pid' and eid='$eid'");

1141
    #
1142
1143
    # If firewalled, just return now since all nodes will be powered
    # off anyway.
1144
    #
1145
1146
    if ($firewalled) {
	print "Skipping clean shutdown since experiment is firewalled.\n";
1147
1148
1149
	return 0;
    }

1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
    #
    # When the nodes reboot, we want them to do something reasonable. We
    # have no idea what is loaded on the disk, so they should go into an
    # MFS and wait, but then a bunch of nodes will all try to load the big
    # MFS at once, and that could wreak havoc. So, clear the boot osids
    # so they go into PXEWAIT. I could use os_select, but clearing all the
    # OSIDs for a node is apparently a bad thing and generates warnings and
    # emails. Why is that? So just clear the DB state until I figure out
    # why that is.
    #
    DBQueryFatal("update nodes set ".
1161
1162
		 "  def_boot_osid=NULL,next_boot_osid=NULL,".
		 "  temp_boot_osid=NULL ".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1163
1164
		 "where " .
		    join(" or ", map("node_id='$_'",
1165
1166
				     ($bossnode, $opsnode,
				      defined($fsnode) ? $fsnode : (),
1167
				      @expnodes))));
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1168
    
1169
1170
1171
1172
1173
    #
    # SSH in and kill the inner DHCPD daemon so that it does not reply
    # to rebooting nodes along the inner control network.
    #
    $UID = 0;
1174

1175
1176
1177
    print "Killing DHCPD on inner boss ($bossnode)\n";
    system("$SSH -host $bossnode /usr/local/etc/rc.d/2.dhcpd.sh stop");
    if ($?) {
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
	#
	# This error is non-fatal. If DHCPD cannot be killed, then the inner
	# boss is scrogged or never set up properly. Just return and let
	# the nodes get power cycled (if need be). At some point we need a
	# state machine to control this setup stuff. 
	# 
	print STDERR "*** $0:\n".
	             "    Could not stop DHCPD on inner bossnode ($bossnode)!\n".
		     "    Continuing anyway; outer boss will use power cycle.\n";
	return 0;
1188
    }
1189

1190
1191
1192
    #
    # Now we ask inner boss to reboot all of the testnodes. Maybe need an
    # option to node_reboot, but for now just pass them on the command line.
1193
1194
1195
1196
1197
1198
    #
    if (! @expnodes) {
	$UID = $SAVEUID;
	return 0;
    }
    
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
    print "Asking inner boss ($bossnode) to reboot inner nodes\n";
    system("$SSH -host $bossnode $wap $nodereboot -b @expnodes");
    if ($?) {
	#
	# This error is non-fatal; Outer boss will just resort to power cycle.
	#
	print STDERR "*** $0:\n".
	             "    Could not reboot some inner nodes!\n".
		     "    Continuing anyway; outer boss will use power cycle.\n";
    }
1209
    $UID = $SAVEUID;
1210

1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
    #
    # Now we wait for them to reach PXEWAIT. Again, use our utility script
    # instead of stated stuff.
    #
    $EUID = $UID;
    print "Waiting for inner nodes to reach PXEWAIT\n";
    system("$nodewait @expnodes");
    if ($?) {
	#
	# This error is non-fatal; Outer boss will just resort to power cycle.
	#
	print STDERR "*** $0:\n".
	             "    Some machines did not reboot properly!\n".
		     "    Continuing anyway; outer boss will use power cycle.\n";
    }
    return 0;
}
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237

#
# Remove nodes from an inner Emulab.
# 
sub RemoveNodes()
{
    my $tbdir      = "/usr/testbed";
    my $wap        = "$tbdir/sbin/withadminprivs";
    my $nodereboot = "$tbdir/bin/node_reboot";
    my $deletenode = "$tbdir/sbin/deletenode";
1238
    my $creator    = $experiment->creator();
1239
    my @nodes	   = ();
1240
    my $paniced    = 0;
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253

    #
    # If firewalled, check to see if paniced. Right now that means the nodes
    # are going to be powered off, so need to do the clean shutdown dance.
    # 
    if ($firewalled) {
	TBExptGetPanicBit($pid, $eid, \$paniced);
    }

    #
    # Actually, this should not even happen; a paniced experiment cannot be
    # modified at all.
    #
1254
1255
    if ($paniced) {
	print "A paniced experiment cannot be modified! What happened?\n";
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
	return -1;
    }

    #
    # Grab the list of nodes. We want to clear the reserved table bits so
    # that we can regen the DHCPD file. 
    #
    shift(@ARGV);	# pid
    shift(@ARGV);	# eid

    foreach my $node (@ARGV) {
	# Untaint the nodes.
	if ($node =~ /^([-\w]+)$/) {
	    $node = $1;
	}
	else {
	    die("*** Tainted node name: $node\n");
	}
	push(@nodes, $node);
    }
    return 0
	if (!@nodes);

    #
    # Grab the vlans table. We need to find any ports used by the nodes
    # getting deleted, and move them back to the default vlan. 
    #
1283
1284
    my @delmembers = ();
    my @todelete   = ();
1285
1286
    
    my $query_result =
1287
1288
	DBQueryWarn("select inner_id,outer_id from elabinelab_vlans ".
		    "where pid='$pid' and eid='$eid'");
1289
1290
1291
    return -1
	if (!$query_result);

1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
    while (my ($inner_id,$outer_id) = $query_result->fetchrow_array()) {
	my $vlan = VLan->Lookup($outer_id);
	if (!defined($vlan)) {
	    print STDERR "*** No such vlan $outer_id ($inner_id)\n";
	    return -1;
	}
	my @members;

	if ($vlan->MemberList(\@members) != 0) {
	    print STDERR "*** Unable to load members for $vlan\n";
	    return -1;
	}
	my $id         = $outer_id;
	my $changed    = 0;
1306

1307
1308
1309
	foreach my $member (@members) {
	    my $node;
	    my $iface;
1310

1311
1312
1313
	    if ($member->GetNodeIface(\$node, \$iface) != 0) {
		print STDERR "Missing attributes for $member in $vlan\n";
		return -1;
1314
	    }
1315
1316
1317
1318
1319
1320
	    my $nodeid = $node->node_id();
	    
	    # See if this node is in the list of nodes to be deleted,
	    if (grep {$_ eq $nodeid} @nodes) {
		push(@todelete, "$nodeid:$iface");
		push(@delmembers, $member);
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
		$changed = 1;
	    }
	}
    }

    # Remove ports from the vlans.
    if (@todelete) {
	print "Removing ports from deleted nodes: @todelete\n";
	system("$snmpit -m default @todelete");
	if ($?) {
	    return -1;
	}
    }
    # Only if the above succeeds, do we update the vlans table.
1335
1336
    foreach my $member (@delmembers) {
	$member->Delete() == 0
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
	    or return -1;
    }
    
    #
    # We want to rebuild the DHCPD file so that when we reboot the inner nodes
    # they come back to the outer emulab. We cannot just free the nodes, cause
    # then the reload daemon might beat us to it, and end up power cycling the
    # nodes, and that would be bad. So, munge the DB and clear the "role" and
    # boot slots for nodes about to be released (by tbswap).
    #
    DBQuery