swapexp.in 22.9 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2
3
4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6
7
8
# All rights reserved.
#

9
10
11
12
use English;
use Getopt::Std;

#
Chad Barb's avatar
Chad Barb committed
13
# This gets invoked from the Web interface.
Chad Barb's avatar
   
Chad Barb committed
14
# Swap an experiment in, swap it out, restart or modify.
15
#
Chad Barb's avatar
Chad Barb committed
16

17
18
sub usage()
{
19
    print STDOUT "Usage: swapexp [-b] [-i | -a | -f] [-r] ".
20
	"<-s in | out | restart | modify | pause> <pid> <eid> [<nsfile>]\n";
21
22
    exit(-1);
}
23
my  $optlist = "biafrs:";
24

25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
	die("*** $0:\n".
	    "    $message\n");
    }
    else {
	print STDERR "$message\n";
    }
    exit($status);
}

49
50
51
52
53
54
#
# Configure variables
#
my $TB     = "@prefix@";
my $TBOPS  = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
55
my $TBINFO = "$TB/expinfo";
56
my $TBDOCBASE = "@TBDOCBASE@";
57
58
59
60
61
62
63
64
65

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;

my $tbdir    = "$TB/bin/";
66
my $tbdata   = "tbdata";
67
my $batch    = 0;
68
my $idleswap = 0;
69
70
my $autoswap = 0;
my $force    = 0;
Chad Barb's avatar
Chad Barb committed
71
my $reboot   = 0;
72
my $errorstat= -1;
73
my $modifyHosed = 0;
Chad Barb's avatar
   
Chad Barb committed
74

75
76
77
78
79
my $inout;
my $logname;
my $dbuid;
my $user_name;
my $user_email;
80
my @allnodes;
81
my @row;
82
my $action;
83
84
85
my $nextswapstate;
my $tempswapstate;

Chad Barb's avatar
   
Chad Barb committed
86

87
88
89
#
# Untaint the path
# 
90
$ENV{'PATH'} = "/bin:/usr/bin:$TB/libexec/vis";
91
92
93
94
95
96
97
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

98
99
100
101
102
103
104
#
# Set umask for start/swap. We want other members in the project to be
# able to swap/end experiments, so the log and intermediate files need
# to be 664 since some are opened for append.
#
umask(0002);

105
106
107
108
109
110
111
112
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
113
114
115
if (defined($options{"i"})) {
    $idleswap = 1;
}
116
117
118
119
120
121
if (defined($options{"a"})) {
    $autoswap = 1;
}
if (defined($options{"f"})) {
    $force = 1;
}
122
123
124
if (defined($options{"b"})) {
    $batch = 1;
}
Chad Barb's avatar
   
Chad Barb committed
125
126
127
if (defined($options{"r"})) {
    $reboot = 1;
}
128
129
130
if (defined($options{"s"})) {
    $inout = $options{"s"};

Chad Barb's avatar
Chad Barb committed
131
132
133
    if ($inout ne "out"     &&
	$inout ne "in"      &&
	$inout ne "restart" &&
134
	$inout ne "pause"   &&
Chad Barb's avatar
   
Chad Barb committed
135
	$inout ne "modify") {
136
137
138
139
140
141
142
	usage();
    }
}
else {
    usage();
}

Chad Barb's avatar
   
Chad Barb committed
143
144
145
146
147
148
if (@ARGV != (($inout eq "modify") ? 3 : 2)) {
    usage();
}
my $pid   = $ARGV[0];
my $eid   = $ARGV[1];

149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#
# Untaint the arguments.
#
if ($pid =~ /^([-\@\w.]+)$/) {
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
if ($eid =~ /^([-\@\w.]+)$/) {
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}
164
my $repfile = "$eid.report";
165
166
my $workdir = TBExptWorkDir($pid, $eid);
my $userdir = TBExptUserDir($pid, $eid);
167
168
169
170
171
172
173
174
175
176
177
my $tempnsfile;
my $modnsfile;

if ($inout eq "modify") {
    $tempnsfile = $ARGV[2];

    #
    # Untaint nsfile argument; Allow slash.
    #
    if ($tempnsfile =~ /^([-\w.\/]+)$/) {
	$tempnsfile = $1;
178
179
    }
    else {
180
181
182
183
	die("Tainted nsfile name: $tempnsfile");
    }
    $modnsfile = "$eid-modify.ns";
}
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201

#
# Verify user and get his DB uid.
#
if (! UNIX2DBUID($UID, \$dbuid)) {
    die("*** $0:\n".
	"    You do not exist in the Emulab Database.\n");
}

#
# Get email info for user.
#
if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
    die("*** $0:\n".
	"    Cannot determine your name and email address.\n");
}

#
Chad Barb's avatar
   
Chad Barb committed
202
# Verify that this person can muck with the experiment.
203
204
205
206
207
# Note that any script down the line has to do an admin check also. 
#
if ($UID && !TBAdmin($UID) &&
    !TBExptAccessCheck($dbuid, $pid, $eid, TB_EXPT_DESTROY)) {
    die("*** $0:\n".
Chad Barb's avatar
   
Chad Barb committed
208
	"    You do not have permission to swap or modify this experiment!\n");
209
210
}

211
212
213
214
# Must do this before lock tables!
# idleswap is in minutes, threshold is in hours
$idleswap_time = 60 * TBGetSiteVar("idle/threshold");

215
216
217
218
219
#
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
# terminating consists of a couple of different experiment states down inside
Chad Barb's avatar
Chad Barb committed
220
# the tb scripts.
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
#
DBQueryFatal("lock tables experiments write");

$query_result =
    DBQueryFatal("SELECT * FROM experiments WHERE eid='$eid' and pid='$pid'");

if (! $query_result->numrows) {
    die("*** $0:\n".
	"    No such experiment $pid/$eid exists!\n");
}
my %hashrow = $query_result->fetchhash();
my $expt_head_login = $hashrow{'expt_head_uid'};
my $estate          = $hashrow{'state'};
my $expt_path       = $hashrow{'path'};
my $isbatchexpt     = $hashrow{'batchmode'};
my $ebatchstate     = $hashrow{'batchstate'};
237
my $cancelflag  = $hashrow{'canceled'};
238
239
240
241
242
243
244
245
246
247
my $swappablebit= $hashrow{'swappable'};
my $idleswapbit = $hashrow{'idleswap'};
my $autoswapbit = $hashrow{'autoswap'};
my $swappablestr= ( $swappablebit ? "Yes" : "No" );
my $idleswapstr = ( $idleswapbit ? "Yes" : "No" );
my $autoswapstr = ( $autoswapbit ? "Yes" : "No" );
my $noswap      = $hashrow{'noswap_reason'};
my $noidleswap  = $hashrow{'noidleswap_reason'};
my $idleswaptime= $hashrow{'idleswap_timeout'} / 60.0;
my $autoswaptime= $hashrow{'autoswap_timeout'} / 60.0;
248

249
250
if ($inout ne "out") {
    # I'm going to update this below, so fix the value before I use it.
251
    $idleswap_time = min($idleswaptime * 60, $idleswap_time);
252
253
254
    $idleswaptime = $idleswap_time / 60.0;
}

255
256
my $swapsettings = 
  "Idle-Swap:   $idleswapstr".
257
  ($idleswapbit ? ", at $idleswaptime hours\n" : " (Reason: $noidleswap)\n").
258
259
  "Auto-Swap:   $autoswapstr".
  ($autoswapbit ? ", at $autoswaptime hours\n" : "\n");
260

261
if (! chdir($workdir)) {
262
    die("*** $0:\n".
263
	"    Could not chdir to $workdir: $!\n");
264
265
}

266
#
267
# Batchmode.
268
#
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
if ($isbatchexpt) {
    #
    # When coming from the daemon, sanity check the batch state.
    #
    if ($batch) {
	if ($inout eq "in") {
	    die("*** $0:\n".
		"    Batch experiment $pid/$eid is not in the proper state!\n".
		"    Currently $ebatchstate, but should be ACTIVATING\n")
		if ($ebatchstate ne BATCHSTATE_ACTIVATING);
	}
	elsif ($inout eq "out") {
	    die("*** $0:\n".
		"    Batch experiment $pid/$eid is not in the proper state!\n".
		"    Currently $ebatchstate, but should be TERMINATING\n")
		if ($ebatchstate ne BATCHSTATE_TERMINATING);
	}
	else {
	    die("*** $0:\n".
		"    Improper request from batch daemon for $pid/$eid!\n");
	}
    }
    else {
	#
	# User is requesting that a batch either be injected or paused.
	# Sanity check the state, but otherwise let the batch daemon
	# handle it.
296
297
298
299
	#
	ExitWithStatus(1, "Batch experiment $pid/$eid is still canceling!")
	    if ($cancelflag);

300
	if ($inout eq "in") {
301
302
303
	    ExitWithStatus(1,
			   "Batch experiment $pid/$eid must be PAUSED to\n".
			   "swap in. Currently $ebatchstate.")
304
305
306
307
		if ($ebatchstate ne BATCHSTATE_PAUSED);
	    TBSetBatchState($pid, $eid, BATCHSTATE_POSTED);
	}
	elsif ($inout eq "out") {
308
309
310
311
312
313
314
315
316
317
	    ExitWithStatus(1,
			   "Batch experiment $pid/$eid must be RUNNING or\n".
			   "ACTIVATING to swap out. Currently $ebatchstate.")
		if ($ebatchstate ne BATCHSTATE_RUNNING &&
		    $ebatchstate ne BATCHSTATE_ACTIVATING);

	    #
	    # Since the batch daemon has control, all we can do is set
	    # the cancel bit.
	    # 
318
319
320
	    TBSetBatchCancelFlag($pid, $eid, BATCHMODE_CANCELSWAP);
	}
	elsif ($inout eq "pause") {
321
322
323
	    ExitWithStatus(1,
			   "Batch experiment $pid/$eid must be POSTED to\n".
			   "pause. Currently $ebatchstate.")
324
		if ($ebatchstate ne BATCHSTATE_POSTED);
325
326
327
328
329

	    #
	    # If the batchstate is POSTED, we can just set it to PAUSED
	    # since the batch_daemon is locked out from messing with it.
	    #
330
	    TBSetBatchState($pid, $eid, BATCHSTATE_PAUSED);
331
	}
332
	elsif ($inout eq "modify") {
333
334
335
336
337
338
	    ExitWithStatus(1,
			   "Batch experiment $pid/$eid must be PAUSED or\n".
			   "RUNNING to modify. Currently $ebatchstate.")
		if ($ebatchstate ne BATCHSTATE_PAUSED &&
		    $ebatchstate ne BATCHSTATE_RUNNING);
	    #
339
	    # Otherwise, proceed with the modify. The experiment will be
340
341
	    # locked below, and so it cannot be injected or otherwise messed
	    # with since its state is going to be changed before we unlock
342
343
344
345
	    # the experiments table. The batch daemon will leave it alone
	    # until the modify is done. If the modify fails and cannot recover
	    # it is going to get swapped out; that is okay since the batch
	    # daemon does not keep state internally. 
346
	    #
347
348
	    goto doit;
	}
349
350
	else {
	    die("*** $0:\n",
351
		"    Operation $inout not allowed on a batch experiment!\n");
352
	}
353
354
	ExitWithStatus(0, 
		       "Batch experiment $pid/$eid state has been changed.\n");
355
    }
356
  doit:
357
}
358
359
360
361
362
363
364
365
366
367
else {
    #
    # If the cancel flag is set, then user must wait for that to clear before
    # we can do anything else.
    #
    ExitWithStatus(1,
		   "Experiment $pid/$eid has its cancel flag set!.\n".
		   "You must wait for that to clear before you can swap or\n".
		   "or modify the experiment.\n")
	if ($cancelflag);
368

369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
    #
    # Check the state for the various operations.
    #
    if (!$force) {
        SWITCH: for ($inout) {
	    /^in$/i && do {
		if ($ebatchstate ne BATCHSTATE_PAUSED()) {
		    ExitWithStatus(1,
				   "Experiment $pid/$eid is not swapped out!");
		}
		last SWITCH;
	    };
	    /^out$/i && do {
		if ($ebatchstate eq BATCHSTATE_PAUSED()) {
		    ExitWithStatus(1,
				   "Experiment $pid/$eid is swapped out!");
		}
		elsif ($ebatchstate ne BATCHSTATE_RUNNING() &&
		       $ebatchstate ne BATCHSTATE_ACTIVATING()) {
		    ExitWithStatus(1,
				   "Experiment $pid/$eid is not swapped in!");
		}

		if ($ebatchstate eq BATCHSTATE_ACTIVATING()) {
		    #
		    # All we can do is set the cancel flag and hope that
		    # it gets noticed. We do not wait. 
		    # 
		    TBSetBatchCancelFlag($pid, $eid, BATCHMODE_CANCELSWAP);

		    ExitWithStatus(0,
				   "Experiment $pid/$eid swapin has been  ".
				   "marked for cancelation.\n".
				   "You will receive email when the original ".
				   "swap request has finished.");
		}
		last SWITCH;
	    };
	    /^restart$/i && do {
		if ($ebatchstate ne BATCHSTATE_RUNNING()) {
		    ExitWithStatus(1,
				   "Experiment $pid/$eid is not swapped in!");
		}
		last SWITCH;
	    };
	    /^modify$/i && do {
		if ($ebatchstate ne BATCHSTATE_RUNNING() &&
		    $ebatchstate ne BATCHSTATE_PAUSED()) {
		    ExitWithStatus(1,
				   "Experiment $pid/$eid is in transition!");
		}
		last SWITCH;
	    };
	    die("*** $0:\n".
		"    Missing state check for action: $action\n");
	}
    }
426
427
}

428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
#
# Determine the temporary and next state for experiment. If the experiment
# is a batch experiment, then the next state is actually handled by the
# batch daemon, but we still have to deal with the temporary state. 
#
SWITCH: for ($inout) {
    /^in$/i && do {
	$tempswapstate = BATCHSTATE_ACTIVATING();
	$nextswapstate = BATCHSTATE_RUNNING();
	last SWITCH;
    };
    /^out$/i && do {
	$tempswapstate = BATCHSTATE_TERMINATING();
	$nextswapstate = BATCHSTATE_PAUSED();
	last SWITCH;
    };
    /^restart$/i && do {
	$tempswapstate = BATCHSTATE_RUNNING_BUSY();
	$nextswapstate = BATCHSTATE_RUNNING();
	last SWITCH;
    };
    /^modify$/i && do {
	$tempswapstate = BATCHSTATE_MODIFYING();
	$nextswapstate = $ebatchstate;
	last SWITCH;
    };
454
    die("*** $0:\n".
455
	"    Missing state check for action: $action\n");
456
}
457
458
 
# Update idleswap_timeout to whatever the current value is.
459
if ($inout ne "out") {
460
461
462
    DBQueryFatal("update experiments set idleswap_timeout='$idleswap_time' ".
		 "where eid='$eid' and pid='$pid'");
}
463

464
465
# Lock the record, set the intermediate state, and unlock the table.
TBLockExp($pid, $eid, $tempswapstate);
466
467
468
469
DBQueryFatal("unlock tables");

#
# XXX - At this point a failure is going to leave things in an
470
471
472
473
# inconsistent state. Be sure to call fatal() only since we are
# going into the background, and we have to send email since no
# one is going to see printed error messages (output goes into the
# log file, which will be sent along in the email). 
474
475
#

476
477
478
479
480
481
482
483
484
if ($inout eq "in") {
    $action = "swapped in";
}
if ($inout eq "out") {
    $action = "swapped out";
}
if ($inout eq "restart") {
    $action = "restarted";
}
Chad Barb's avatar
   
Chad Barb committed
485
486
487
if ($inout eq "modify") {
    $action = "modified";
}
488

489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
#
# Get email address of the experiment head, which may be different than
# the person who is actually terminating the experiment, since its polite
# to let the original creator know whats going on. 
#
my $expt_head_name;
my $expt_head_email;

if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
    print STDERR "*** WARNING: ".
	         "Could not determine name/email for $expt_head_login.\n";
    $expt_head_name  = "TBOPS";
    $expt_head_email = $TBOPS;
}

504
505
506
507
508
509
510
511
512
513
514
515
#
# Before going to background, we have to copy out the NS file!
#
if ($inout eq "modify") {
    unlink($modnsfile);
    if (system("/bin/cp", "$tempnsfile", "$modnsfile")) {
	die("*** $0:\n".
	    "    Could not copy $tempnsfile to $modnsfile");
    }
    chmod(0664, "$modnsfile");
}

516
517
518
519
#
# If not in batch mode, go into the background. Parent exits.
#
if (! $batch) {
520
    $logname = TBExptCreateLogFile($pid, $eid, "swapexp");
521
    TBExptSetLogFile($pid, $eid, $logname);
522
    TBExptOpenLogFile($pid, $eid);
Chad Barb's avatar
Chad Barb committed
523

524
525
526
527
    if (TBBackGround($logname)) {
	#
	# Parent exits normally
	#
528
529
	print "Experiment $pid/$eid is now being $action.\n".
	    "You will be notified via email when the this is done.\n";
530
531
532
533
	exit(0);
    }
}

534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
#
# Gather stats; start clock ticking
#
if ($inout eq "in") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "out") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "modify") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0,
		    TBDB_STATS_FLAGS_START);
}

550
551
552
#
# Remove old report file since its contents are going to be invalid.
#
553
if ($inout ne "restart" && -e $repfile) {
554
555
556
    unlink("$repfile");
}

557
558
559
560
#
# Sanity check states in case someone changes something.
#
if ($inout eq "out") {
561
    if ($force || $idleswap) { $arg = "-force"; } else { $arg = ""; }
Chad Barb's avatar
   
Chad Barb committed
562
    print STDOUT "Running 'tbswap out' with arguments: $pid $eid\n";
563
    if (system("$tbdir/tbswap out $arg $pid $eid") != 0) {
564
	$errorstat = $? >> 8;
565
	fatal("tbswap out failed!");
566
    }
Chad Barb's avatar
Chad Barb committed
567

568
569
    $estate = ExpState($pid,$eid);
    if ($estate ne EXPTSTATE_SWAPPED) {
570
	fatal("Experiment is in the wrong state: $estate");
571
572
    }
}
573
elsif ($inout eq "in") {
Chad Barb's avatar
   
Chad Barb committed
574
575
    print STDOUT "Running 'tbswap in' with arguments: $pid $eid\n";
    if (system("$tbdir/tbswap in $pid $eid") != 0) {
576
	$errorstat = $? >> 8;
577
	fatal("tbswap in failed!");
578
    }
Chad Barb's avatar
Chad Barb committed
579

580
581
    $estate = ExpState($pid,$eid);
    if ($estate ne EXPTSTATE_ACTIVE) {
582
	fatal("Experiment is in the wrong state: $estate");
583
    }
584

585
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
Chad Barb's avatar
Chad Barb committed
586
}
Chad Barb's avatar
   
Chad Barb committed
587
elsif ($inout eq "modify") {
Chad Barb's avatar
Chad Barb committed
588
589
    my $modifyError = "";

590
591
592
    GatherSwapStats($pid, $eid, $dbuid,
		    TBDB_STATS_SWAPMODIFY, 0, TBDB_STATS_FLAGS_PREMODIFY);

Chad Barb's avatar
Chad Barb committed
593
    print "Backing up old experiment state ... " . TBTimeStamp() . "\n";
594
    if (TBExptBackupVirtualState($pid, $eid)) {
595
	fatal("Could not backup experiment state; cannot safely continue!");
Chad Barb's avatar
Chad Barb committed
596
    }
597
598
599
    # Must deal with the prerender explicitly since it runs background.
    system("prerender -r $pid $eid");
    
Chad Barb's avatar
Chad Barb committed
600
601
602
603
604
    TBExptRemoveVirtualState($pid, $eid);

    #
    # Rerun tbprerun if modifying.
    #
605
    if (system("$tbdir/tbprerun -m $pid $eid $modnsfile") != 0) {
Chad Barb's avatar
Chad Barb committed
606
607
608
	$modifyError = "tbprerun failed!";
    }

Chad Barb's avatar
   
Chad Barb committed
609
610
611
612
    #
    # If experiment is currently swapped out, no need to do an update 
    # after modifying it.
    #
Chad Barb's avatar
Chad Barb committed
613
    if (! $modifyError && $estate eq EXPTSTATE_ACTIVE) {
Chad Barb's avatar
   
Chad Barb committed
614
	print STDOUT "Running 'tbswap update' with arguments: $pid $eid\n";
Chad Barb's avatar
Chad Barb committed
615
616
617
618

	my $rebootSwitch = "";
	if ($reboot) {
	    $rebootSwitch = "-reboot";
Chad Barb's avatar
   
Chad Barb committed
619
620
	}

Chad Barb's avatar
Chad Barb committed
621
622
623
	if (system("$tbdir/tbswap update $rebootSwitch $pid $eid") != 0) {
	    $errorstat = $? >> 8;
	    $modifyError = "tbswap update failed!";
Chad Barb's avatar
   
Chad Barb committed
624
	}
Chad Barb's avatar
   
Chad Barb committed
625

626
627
628
629
630
631
	#
	# See what state tbswap left it in. It might have swapped it out
	# or restored it, if there was an error. 
	# 
	$estate = ExpState($pid, $eid);
	
Chad Barb's avatar
Chad Barb committed
632
633
	if (! $modifyError) {
	    if ($estate ne EXPTSTATE_ACTIVE) {
634
		$modifyHosed = 1;
Chad Barb's avatar
Chad Barb committed
635
		fatal("Experiment is in the wrong state: $estate!");
Chad Barb's avatar
   
Chad Barb committed
636
	    }
Chad Barb's avatar
Chad Barb committed
637
638
	    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
	}
639
640
641
642
	elsif ($estate ne EXPTSTATE_ACTIVE) {
	    # Was active, now its not! tbswap was not able to recover.
	    $modifyHosed = 1;
	}
Chad Barb's avatar
Chad Barb committed
643
644
645
    }

    if ($modifyError) {
646
	print STDOUT "Modify Error: $modifyError\n";
Chad Barb's avatar
Chad Barb committed
647
	print STDOUT "Recovering experiment state...\n";
648
	
649
650
	# Must deal with the prerender explicitly since it runs background.
	system("prerender -r $pid $eid");
651
	TBExptRemoveVirtualState($pid, $eid);
652
	
653
654
	if (TBExptRestoreVirtualState($pid, $eid) == 0) {
	    TBExptClearBackupState($pid, $eid);
655
656
	    # Must deal with the prerender explicitly since it runs background.
	    system("prerender -t $pid $eid");
657
658
659
660
661
	    fatal("Update aborted; old state restored.");
	}
	else {
	    $modifyHosed = 1;
	    fatal("Experiment state could not be restored!");
Chad Barb's avatar
Chad Barb committed
662
	}
Chad Barb's avatar
   
Chad Barb committed
663
    }
664
    TBExptClearBackupState($pid, $eid);
665
}
Chad Barb's avatar
   
Chad Barb committed
666
else { # $inout eq "restart" assumed.
667
668
    print STDOUT "Running tbrestart with arguments: $pid $eid\n";
    if (system("$tbdir/tbrestart $pid $eid") != 0) {
669
	fatal("tbrestart failed!");
670
671
    }
}
672

673
674
675
676
677
678
679
680
681
682
#
# Try to copy off the files for testbed information gathering.
#
TBSaveExpLogFiles($pid, $eid);

#
# Make a copy of the work dir in the user visible space so the user
# can see the log files. This overwrites existing files of course,
# but thats okay.
#
683
system("cp -Rfp $workdir/ $userdir/tbdata/");
684

685
686
687
688
#
# Gather stats. 
#
if ($inout eq "in") {
689
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0);
690
691
}
elsif ($inout eq "out") {
692
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
693
		    ($idleswap ? TBDB_STATS_FLAGS_IDLESWAP() : 0));
694
695
}
elsif ($inout eq "modify") {
696
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0);
697
698
}

699
700
701
702
703
704
#
# Set the swapper uid on success only, and *after* gathering swap stats!
#
TBExptSetSwapUID($pid, $eid, $dbuid);

#
705
706
# In batch mode, just exit without sending email or unlocking. The
# batch daemon will take care of that and setting the proper state. 
707
708
709
710
711
#
if ($batch) {
    exit(0);
}

Chad Barb's avatar
   
Chad Barb committed
712
713
714
715
716
#
# HACK! if successful, put new NS file in DB.
#

if ($inout eq "modify") {
717
    $nsdata_string = `cat $modnsfile`;
Chad Barb's avatar
   
Chad Barb committed
718
719
720
721
722
723
    if (defined($nsdata_string)) {
	$nsdata_string = DBQuoteSpecial($nsdata_string);

	DBQueryWarn("delete from nsfiles WHERE eid='$eid' and pid='$pid'");
	DBQueryWarn("insert into nsfiles (pid, eid, nsfile) ".
		    "VALUES('$pid', '$eid', $nsdata_string)");
724
725
726
    }
    else {
	print "Warning!! Could not read nsfile '$modnsfile'!\n";
Chad Barb's avatar
   
Chad Barb committed
727
728
729
    }
}

730
731
732
733
734
735
736
#
# Clear the log file so the web page stops spewing. 
#
if (defined($logname)) {
    TBExptCloseLogFile($pid, $eid);
}

737
738
739
740
741
742
743
744
745
746
747
748
749
#
# Must unlock before exit.
#
TBUnLockExp($pid, $eid, $nextswapstate);

#
# Since the swap completed, clear the cancel flag. This must be done
# after we change the experiment state (above). 
#
TBSetBatchCancelFlag($pid, $eid, BATCHMODE_CANCELCLEAR);

print "Swap Success!\n";

750
751
752
753
#
# Send email notification to user.
#
my $message =
754
755
    "Experiment $eid in project $pid has been ";

756
if ($inout eq "out" && ($idleswap || $autoswap || $force) ) {
757
    $message .= "forcibly swapped out by\nEmulab";
758
759
760
761
762
    if ($idleswap) {
	$message .= " because it was idle for too long (Idle-Swap).\n".
	  "(See also the Idle-Swap info in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($autoswap) {
763
764
	$message .= " because it exceeded its Maximum Duration.\n".
	  "(See also the Max. Duration info in \n".
765
766
767
768
769
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($force) {
	$message .= ". (See also our Node Usage Policies in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    }
770
771
772
773
774
}
else {
    $message .= "$action.\n";
}

775
776
777
778
779
if ($inout eq "in") {
    # Add the swap settings...
    $message .="\nCurrent swap settings:\n$swapsettings";
}

780
781
$message .=
    "\n".
782
783
    "Appended below is the output. If you have any questions or comments,\n" .
    "please include the output in your message to $TBOPS\n";
784
785

SENDMAIL("$user_name <$user_email>",
786
	 "Experiment $pid/$eid \u$action",
787
	 $message,
788
	 ($idleswap ? $TBOPS : "$user_name <$user_email>"),
789
790
	 "Cc:  $expt_head_name <$expt_head_email>\n".
	 "Bcc: $TBLOGS",
791
792
	 (($inout eq "restart") ? ($logname) :
	  (($repfile, $logname), (defined($modnsfile) ? ($modnsfile) : ()))));
793
794
795
796
797
798

exit 0;

sub fatal($)
{
    my($mesg) = $_[0];
Chad Barb's avatar
Chad Barb committed
799

800
801
    print STDOUT "*** $0:\n".
	         "    $mesg\n";
802

803
804
805
806
807
808
809
810
811
812
813
814
815
    #
    # Gather stats. 
    #
    if ($inout eq "in") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, $errorstat);
    }
    elsif ($inout eq "out") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, $errorstat);
    }
    elsif ($inout eq "modify") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, $errorstat);
    }

Chad Barb's avatar
   
Chad Barb committed
816
    #
817
    # If hosed, we entirely terminate the experiment.
Chad Barb's avatar
   
Chad Barb committed
818
    #
819
    if ($modifyHosed) {
Chad Barb's avatar
   
Chad Barb committed
820
	#
821
	# Note: $estate is indeed still set appropriately!
Chad Barb's avatar
   
Chad Barb committed
822
823
824
825
826
827
828
	#
	if ($estate eq EXPTSTATE_ACTIVE) {
	    print "Running 'tbswap out' with arguments: $pid $eid\n";
	    if (system("$tbdir/tbswap out -force $pid $eid") != 0) {
		print "tbswap out failed!\n";
	    }
	}
Chad Barb's avatar
Chad Barb committed
829

Chad Barb's avatar
   
Chad Barb committed
830
831
832
833
	print "Running tbend with arguments: -force $pid $eid\n";
	if (system("$tbdir/tbend -force $pid $eid") != 0) {
	    print "tbend failed!\n";
	}
834
835
	# Must override since we are so badly hosed. 
	$ebatchstate = BATCHSTATE_PAUSED;
Chad Barb's avatar
   
Chad Barb committed
836
837
    }

838
839
840
    # Copy over the log files so the user can see them.
    system("/bin/cp -Rfp $workdir/ $userdir/tbdata");

841
    #
842
843
    # In batch mode, exit without sending the email or unlocking. The
    # batch daemon will take care of that and setting the proper state. 
844
845
    #
    if ($batch) {
846
	exit($errorstat);
847
848
    }

849
    #
Chad Barb's avatar
Chad Barb committed
850
    # Clear the log file so the web page stops spewing.
851
852
853
854
855
    #
    if (defined($logname)) {
	TBExptCloseLogFile($pid, $eid);
    }

856
857
858
859
860
861
862
863
864
    # Unlock and reset state to its original value. 
    TBUnLockExp($pid, $eid, $ebatchstate);

    #
    # Clear the cancel flag now that the operation is complete. Must be done
    # after we change the experiment state (above).
    #
    TBSetBatchCancelFlag($pid, $eid, BATCHMODE_CANCELCLEAR);

865
866
867
868
    #
    # Send a message to the testbed list. Append the logfile.
    #
    SENDMAIL("$user_name <$user_email>",
869
	     "Swap ${inout} Failure: $pid/$eid",
870
	     $mesg,
871
	     ($idleswap ? $TBOPS : "$user_name <$user_email>"),
872
	     "Cc:  $expt_head_name <$expt_head_email>\n".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
873
	     "Cc:  $TBOPS",
874
	     (($logname), (defined($modnsfile) ? ($modnsfile) : ())));
875

Leigh B. Stoller's avatar
Leigh B. Stoller committed
876
    if ($modifyHosed) {
Chad Barb's avatar
   
Chad Barb committed
877
878
879
880
881
882
883
884
885
	#
	# Copy off the workdir to the user directory, Then back up both of
	# them for post-mortem debugging.
	#
	system("/bin/cp -Rfp $workdir/ $userdir/tbdata");
	system("/bin/rm -rf  ${workdir}-failed");
	system("/bin/mv -f   $workdir ${workdir}-failed");
	system("/bin/rm -rf  ${userdir}-failed");
	system("/bin/mv -f   $userdir ${userdir}-failed");
Chad Barb's avatar
Chad Barb committed
886
	TBExptDestroy($pid, $eid);
Chad Barb's avatar
   
Chad Barb committed
887
888
    }

889
    exit($errorstat);
890
}