swapexp.in 28.3 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2
3
4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6
7
8
# All rights reserved.
#

9
10
use English;
use Getopt::Std;
11
use POSIX qw(isatty setsid);
12
13

#
Chad Barb's avatar
Chad Barb committed
14
# This gets invoked from the Web interface.
Chad Barb's avatar
   
Chad Barb committed
15
# Swap an experiment in, swap it out, restart or modify.
16
#
Chad Barb's avatar
Chad Barb committed
17

18
19
sub usage()
{
20
    print(STDERR
21
	  "Usage: swapexp [-q] [-b | -w] [-i | -a | -f] [-r] [-e]\n".
22
23
24
25
	  "               <-s in | out | restart | modify | pause>\n".
	  "               <pid> <eid> [<nsfile>]\n".
	  "switches and arguments:\n".
	  "-w       - wait for non-batchmode experiment swap/modify\n".
26
	  "-q       - be less chatty\n".
27
28
29
30
31
32
	  "-r       - reboot nodes when doing a modify experiment\n".
	  "-e       - restart event scheduler when doing a modify experiment\n".
	  "-s <op>  - Operation to perform; one of those listed above\n".
	  "<pid>    - The project the experiment belongs to\n".
	  "<eid>    - The experiment name (id)\n".
	  "<nsfile> - Optional NS file to parse for experiment modify\n");
33
34
    exit(-1);
}
35
my  $optlist = "biafres:wq";
36

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
	die("*** $0:\n".
	    "    $message\n");
    }
    else {
	print STDERR "$message\n";
    }
    exit($status);
}

61
62
63
64
65
66
#
# Configure variables
#
my $TB     = "@prefix@";
my $TBOPS  = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
67
my $TBINFO = "$TB/expinfo";
68
my $TBDOCBASE = "@TBDOCBASE@";
69
my $TBBASE = "@TBBASE@";
70
71
72
73
74
75
76
77
78

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;

my $tbdir    = "$TB/bin/";
79
my $tbdata   = "tbdata";
80
my $batch    = 0;
81
my $idleswap = 0;
82
83
my $autoswap = 0;
my $force    = 0;
Chad Barb's avatar
Chad Barb committed
84
my $reboot   = 0;
85
my $waitmode = 0;
86
my $quiet    = 0;
87
my $eventsys_restart   = 0;
88
my $errorstat= -1;
89
my $modifyHosed = 0;
Chad Barb's avatar
   
Chad Barb committed
90

91
92
93
94
95
my $inout;
my $logname;
my $dbuid;
my $user_name;
my $user_email;
96
my @allnodes;
97
my @row;
98
my $action;
99
my $nextswapstate;
100
my $termswapstate;
Chad Barb's avatar
   
Chad Barb committed
101

102
103
104
#
# Untaint the path
# 
105
$ENV{'PATH'} = "/bin:/usr/bin:$TB/libexec/vis";
106
107
108
109
110
111
112
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

113
114
115
116
117
118
119
#
# Set umask for start/swap. We want other members in the project to be
# able to swap/end experiments, so the log and intermediate files need
# to be 664 since some are opened for append.
#
umask(0002);

120
121
122
123
124
125
126
127
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
128
129
130
if (defined($options{"i"})) {
    $idleswap = 1;
}
131
132
133
if (defined($options{"w"})) {
    $waitmode = 1;
}
134
135
136
137
138
139
if (defined($options{"a"})) {
    $autoswap = 1;
}
if (defined($options{"f"})) {
    $force = 1;
}
140
141
142
if (defined($options{"b"})) {
    $batch = 1;
}
Chad Barb's avatar
   
Chad Barb committed
143
144
145
if (defined($options{"r"})) {
    $reboot = 1;
}
146
147
148
if (defined($options{"e"})) {
    $eventsys_restart = 1;
}
149
150
151
if (defined($options{"q"})) {
    $quiet = 1;
}
152
153
154
if (defined($options{"s"})) {
    $inout = $options{"s"};

Chad Barb's avatar
Chad Barb committed
155
156
157
    if ($inout ne "out"     &&
	$inout ne "in"      &&
	$inout ne "restart" &&
158
	$inout ne "pause"   &&
Chad Barb's avatar
   
Chad Barb committed
159
	$inout ne "modify") {
160
161
162
163
164
165
166
	usage();
    }
}
else {
    usage();
}

167
168
169
170
171
usage()
    if (($waitmode && $batch) ||
	($inout ne "modify" && @ARGV != 2) ||
	(($waitmode || $batch) && ($idleswap || $autoswap || $force)));

172
173
174
175
176
if ($eventsys_restart && $inout ne "modify") {
    print STDOUT "Usage: swapexp: -e (eventsys_restart) can be used ".
                 "only with -s modify\n";
    usage();
}
Chad Barb's avatar
   
Chad Barb committed
177
178
179
my $pid   = $ARGV[0];
my $eid   = $ARGV[1];

180
181
182
#
# Untaint the arguments.
#
183
if ($pid =~ /^([-\w\.]+)$/) {
184
185
186
187
188
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
189
if ($eid =~ /^([-\w\.]+)$/) {
190
191
192
193
194
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}
195
my $repfile = "$eid.report";
196
197
my $workdir = TBExptWorkDir($pid, $eid);
my $userdir = TBExptUserDir($pid, $eid);
198
199
200
my $tempnsfile;
my $modnsfile;

Leigh B. Stoller's avatar
Leigh B. Stoller committed
201
if ($inout eq "modify" && @ARGV > 2) {
202
203
204
205
206
    $tempnsfile = $ARGV[2];

    #
    # Untaint nsfile argument; Allow slash.
    #
207
    if ($tempnsfile =~ /^([-\w\.\/]+)$/) {
208
	$tempnsfile = $1;
209
210
    }
    else {
211
212
213
214
215
216
217
218
219
220
	die("Tainted nsfile name: $tempnsfile\n");
    }
    #
    # Called from ops interactively. Make sure NS file in /proj or /users.
    #
    # Use realpath to resolve any symlinks.
    #
    my $translated = `realpath $tempnsfile`;
    if ($translated =~ /^([-\w\.\/]+)$/) {
	$tempnsfile = $1;
221
    }
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
    else {
	die("Tainted nsfile returned by realpath: $translated\n");
    }

    #
    # The file must reside in /proj, /groups, or /users. Since this script
    # runs as the caller, regular file permission checks ensure its a file
    # the user is allowed to use. /tmp/$guid-$nsref.nsfile also allowed
    # since this script is invoked directly from web interface, which generates
    # a name that should not be guessable, so as long as it looks to be in
    # proper format, we accept it. 
    #
    if (! ($tempnsfile =~ /^\/tmp\/[-\w]+-\d+\.nsfile/) &&
	! ($tempnsfile =~ /^\/var\/tmp\/php\w+/) &&
	! ($tempnsfile =~ /^\/proj/) &&
	! ($tempnsfile =~ /^\/groups/) &&
	! ($tempnsfile =~ /^\/users/)) {
	die("$tempnsfile does not resolve to an appropriate directory!\n");
    }

    if (! -f $tempnsfile || -z $tempnsfile || ! -r $tempnsfile) {
243
244
245
	die("*** $0:\n".
	    "    $tempnsfile does not look like an NS file!\n");
    }
246
247
    $modnsfile = "$eid-modify.ns";
}
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265

#
# Verify user and get his DB uid.
#
if (! UNIX2DBUID($UID, \$dbuid)) {
    die("*** $0:\n".
	"    You do not exist in the Emulab Database.\n");
}

#
# Get email info for user.
#
if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
    die("*** $0:\n".
	"    Cannot determine your name and email address.\n");
}

#
Chad Barb's avatar
   
Chad Barb committed
266
# Verify that this person can muck with the experiment.
267
268
269
270
271
# Note that any script down the line has to do an admin check also. 
#
if ($UID && !TBAdmin($UID) &&
    !TBExptAccessCheck($dbuid, $pid, $eid, TB_EXPT_DESTROY)) {
    die("*** $0:\n".
Chad Barb's avatar
   
Chad Barb committed
272
	"    You do not have permission to swap or modify this experiment!\n");
273
274
}

275
276
277
278
# Must do this before lock tables!
# idleswap is in minutes, threshold is in hours
$idleswap_time = 60 * TBGetSiteVar("idle/threshold");

279
280
281
282
283
284
285
286
287
#
# In wait mode, block interrupt until we spin off the background process.
#
if ($waitmode) {
    $SIG{TERM} = 'IGNORE';
    $SIG{QUIT} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';
}

288
289
290
291
292
#
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
# terminating consists of a couple of different experiment states down inside
Chad Barb's avatar
Chad Barb committed
293
# the tb scripts.
294
295
296
297
298
299
300
301
302
303
304
305
306
#
DBQueryFatal("lock tables experiments write");

$query_result =
    DBQueryFatal("SELECT * FROM experiments WHERE eid='$eid' and pid='$pid'");

if (! $query_result->numrows) {
    die("*** $0:\n".
	"    No such experiment $pid/$eid exists!\n");
}
my %hashrow = $query_result->fetchhash();
my $expt_head_login = $hashrow{'expt_head_uid'};
my $estate          = $hashrow{'state'};
307
my $batchstate      = $hashrow{'batchstate'};
308
my $expt_path       = $hashrow{'path'};
309
my $expt_locked     = $hashrow{'expt_locked'};
310
my $isbatchexpt     = $hashrow{'batchmode'};
311
my $canceled        = $hashrow{'canceled'};
312
my $linktest_level  = $hashrow{'linktest_level'};
313
314
315
316
317
318
319
320
321
322
my $swappablebit= $hashrow{'swappable'};
my $idleswapbit = $hashrow{'idleswap'};
my $autoswapbit = $hashrow{'autoswap'};
my $swappablestr= ( $swappablebit ? "Yes" : "No" );
my $idleswapstr = ( $idleswapbit ? "Yes" : "No" );
my $autoswapstr = ( $autoswapbit ? "Yes" : "No" );
my $noswap      = $hashrow{'noswap_reason'};
my $noidleswap  = $hashrow{'noidleswap_reason'};
my $idleswaptime= $hashrow{'idleswap_timeout'} / 60.0;
my $autoswaptime= $hashrow{'autoswap_timeout'} / 60.0;
323

324
325
if ($inout ne "out") {
    # I'm going to update this below, so fix the value before I use it.
326
    $idleswap_time = min($idleswaptime * 60, $idleswap_time);
327
328
329
    $idleswaptime = $idleswap_time / 60.0;
}

330
331
my $swapsettings = 
  "Idle-Swap:   $idleswapstr".
332
  ($idleswapbit ? ", at $idleswaptime hours\n" : " (Reason: $noidleswap)\n").
333
334
  "Auto-Swap:   $autoswapstr".
  ($autoswapbit ? ", at $autoswaptime hours\n" : "\n");
335

336
if (! chdir($workdir)) {
337
    die("*** $0:\n".
338
	"    Could not chdir to $workdir: $!\n");
339
340
}

341
#
342
343
344
# This script is called from the batch daemon.
# 
if ($batch) {
345
    #
346
347
348
    # Sanity Check. If called from the daemon, must already be locked,
    # must be a batch experiment, and must be in proper state for the
    # operation requested. 
349
    #
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
    die("*** $0:\n".
	"    Experiment $pid/$eid is supposed to be a batch experiment!\n")
	if (!$isbatchexpt);
    
    die("*** $0:\n".
	"    Batch experiment $pid/$eid should be locked!\n")
	if (!defined($expt_locked) ||
	    $batchstate ne BATCHSTATE_LOCKED());

    if ($inout eq "in") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be QUEUED.\n")
	    if ($estate ne EXPTSTATE_QUEUED);
	
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid has been canceled! Aborting.\n")
	    if ($canceled);
    }
    elsif ($inout eq "out") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be ACTIVE.\n")
	    if ($estate ne EXPTSTATE_ACTIVE);
374
375
    }
    else {
376
377
378
379
380
381
	die("*** $0:\n".
	    "    Improper request from batch daemon for $pid/$eid!\n");
    }
}
else {
    if ($isbatchexpt) {
382
383
384
385
	#
	# User is requesting that a batch either be injected or paused.
	# Sanity check the state, but otherwise let the batch daemon
	# handle it.
386
387
	#
	ExitWithStatus(1, "Batch experiment $pid/$eid is still canceling!")
388
	    if ($canceled);
389

390
	if ($inout eq "in") {
391
	    ExitWithStatus(1,
392
393
394
395
			   "Batch experiment $pid/$eid must be SWAPPED to\n".
			   "QUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_SWAPPED);
	    SetExpState($pid, $eid, EXPTSTATE_QUEUED);
396
397
	}
	elsif ($inout eq "out") {
398
	    ExitWithStatus(1,
399
400
401
402
			   "Batch experiment $pid/$eid must be ACTIVE or\n".
			   "ACTIVATING to swap out. Currently $estate.")
		if ($estate ne EXPTSTATE_ACTIVE &&
		    $estate ne EXPTSTATE_ACTIVATING);
403
404
405
406
407

	    #
	    # Since the batch daemon has control, all we can do is set
	    # the cancel bit.
	    # 
408
	    TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
409
410
	}
	elsif ($inout eq "pause") {
411
	    ExitWithStatus(1,
412
413
414
			   "Batch experiment $pid/$eid must be QUEUED to\n".
			   "DEQUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_QUEUED);
415
416

	    #
417
418
419
420
	    # XXX. The batch daemon might already have the experiment, but
	    # not have shipped it off to startexp. Change the state
	    # anyway. The error will be noticed later when startexp dies,
	    # and the batch daemon gets the error back. This sucks.
421
	    #
422
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
423
	}
424
	elsif ($inout eq "modify") {
425
	    ExitWithStatus(1,
426
427
428
429
430
431
			   "Batch experiment $pid/$eid must be SWAPPED or\n".
			   "ACTIVE to modify. Currently $estate.")
		if (($estate ne EXPTSTATE_SWAPPED &&
		     $estate ne EXPTSTATE_ACTIVATING) ||
		    $batchstate != BATCHSTATE_UNLOCKED());

432
	    #
433
	    # Otherwise, proceed with the modify. The experiment will be
434
435
	    # locked below, and so it cannot be injected or otherwise messed
	    # with since its state is going to be changed before we unlock
436
437
438
439
	    # the experiments table. The batch daemon will leave it alone
	    # until the modify is done. If the modify fails and cannot recover
	    # it is going to get swapped out; that is okay since the batch
	    # daemon does not keep state internally. 
440
	    #
441
442
	    goto doit;
	}
443
444
	else {
	    die("*** $0:\n",
445
		"    Operation $inout not allowed on a batch experiment!\n");
446
	}
447
448
	ExitWithStatus(0, 
		       "Batch experiment $pid/$eid state has been changed.\n");
449
      doit:
450
    }
451
452
453
454
455
456
457
458
459
460
    else {
	#
	# If the cancel flag is set, then user must wait for that to
	# clear before we can do anything else.
	#
	ExitWithStatus(1,
		       "Experiment $pid/$eid has its cancel flag set!.\n".
		       "You must wait for that to clear before you can swap\n".
		       "or modify the experiment.\n")
	    if ($canceled);
461

462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
	#
	# Check the state for the various operations.
	#
	if (!$force) {
	  SWITCH: for ($inout) {
	      /^in$/i && do {
		  if ($estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped out!");
		  }
		  last SWITCH;
	      };
	      /^out$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_ACTIVATING()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in ".
				     "or activating!\n");
		  }
		  
		  if ($estate eq EXPTSTATE_ACTIVATING()) {
		      #
		      # All we can do is set the cancel flag and hope that
		      # it gets noticed. We do not wait. 
		      # 
		      TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
		      
		      ExitWithStatus(0,
				     "Experiment $pid/$eid swapin has been  ".
				     "marked for cancelation.\n".
				     "You will receive email when the original ".
				     "swap request has finished.");
		  }
		  last SWITCH;
	      };
	      /^restart$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in!");
		  }
		  last SWITCH;
	      };
	      /^modify$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid must be ACTIVE or\n".
				     "SWAPPED to modify!\n");
		  }
		  last SWITCH;
	      };
	      die("*** $0:\n".
		  "    Missing state check for action: $action\n");
	  }
516
517
	}
    }
518
519
}

520
521
522
523
524
525
526
#
# Determine the temporary and next state for experiment. If the experiment
# is a batch experiment, then the next state is actually handled by the
# batch daemon, but we still have to deal with the temporary state. 
#
SWITCH: for ($inout) {
    /^in$/i && do {
527
	$nextswapstate = EXPTSTATE_ACTIVATING();
528
529
530
	last SWITCH;
    };
    /^out$/i && do {
531
	$nextswapstate = EXPTSTATE_SWAPPING();
532
533
534
	last SWITCH;
    };
    /^restart$/i && do {
535
	$nextswapstate = EXPTSTATE_RESTARTING();
536
537
538
	last SWITCH;
    };
    /^modify$/i && do {
539
540
	$nextswapstate = (($estate eq EXPTSTATE_SWAPPED()) ?
			  EXPTSTATE_MODIFY_PARSE() : EXPTSTATE_MODIFY_REPARSE());
541
542
	last SWITCH;
    };
543
    die("*** $0:\n".
544
	"    Missing state check for action: $action\n");
545
}
546
547
 
# Update idleswap_timeout to whatever the current value is.
548
if ($inout ne "out") {
549
550
551
    DBQueryFatal("update experiments set idleswap_timeout='$idleswap_time' ".
		 "where eid='$eid' and pid='$pid'");
}
552

553
554
555
556
557
558
559
#
# On a failure, we go back to this swapstate. Might be modified below.
# 
$termswapstate = $estate;

# Lock the record, set the nextstate, and unlock the table.
TBLockExp($pid, $eid, $nextswapstate);
560
561
562
563
DBQueryFatal("unlock tables");

#
# XXX - At this point a failure is going to leave things in an
564
565
566
567
# inconsistent state. Be sure to call fatal() only since we are
# going into the background, and we have to send email since no
# one is going to see printed error messages (output goes into the
# log file, which will be sent along in the email). 
568
569
#

570
571
572
573
574
575
576
577
578
if ($inout eq "in") {
    $action = "swapped in";
}
if ($inout eq "out") {
    $action = "swapped out";
}
if ($inout eq "restart") {
    $action = "restarted";
}
Chad Barb's avatar
   
Chad Barb committed
579
580
581
if ($inout eq "modify") {
    $action = "modified";
}
582

583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
#
# Get email address of the experiment head, which may be different than
# the person who is actually terminating the experiment, since its polite
# to let the original creator know whats going on. 
#
my $expt_head_name;
my $expt_head_email;

if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
    print STDERR "*** WARNING: ".
	         "Could not determine name/email for $expt_head_login.\n";
    $expt_head_name  = "TBOPS";
    $expt_head_email = $TBOPS;
}

598
599
600
#
# Before going to background, we have to copy out the NS file!
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
601
if ($inout eq "modify" && defined($modnsfile)) {
602
603
604
605
606
607
608
609
    unlink($modnsfile);
    if (system("/bin/cp", "$tempnsfile", "$modnsfile")) {
	die("*** $0:\n".
	    "    Could not copy $tempnsfile to $modnsfile");
    }
    chmod(0664, "$modnsfile");
}

610
611
612
613
#
# If not in batch mode, go into the background. Parent exits.
#
if (! $batch) {
614
    $logname = TBExptCreateLogFile($pid, $eid, "swapexp");
615
    TBExptSetLogFile($pid, $eid, $logname);
616
    TBExptOpenLogFile($pid, $eid);
Chad Barb's avatar
Chad Barb committed
617

618
619
620
621
622
    if (my $childpid = TBBackGround($logname)) {
	#
	# Parent exits normally, except if in waitmode. 
	#
	if (!$waitmode) {
623
624
625
	    print("Experiment $pid/$eid is now being $action.\n".
		  "You will be notified via email when the this is done.\n")
		if (! $quiet);
626
627
	    exit(0);
	}
628
629
630
631
632
633
634
635
	print("Waiting for experiment $eid to finish its swap${action}\n")
	    if (! $quiet);
	    
	if (isatty(STDIN) && !$quiet) {
	    print("You may type ^C at anytime; you will be notified via email.".
		  "\n".
		  "You will not actually interrupt the experiment itself.\n");
	}
636
637
638
639
640
641
642
643
644
645
646
647
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

648
	#
649
	# Wait until child exits or until user gets bored and types ^C.
650
	#
651
652
	waitpid($childpid, 0);
	
653
654
	print("Done. Exited with status: $?\n")
	    if (! $quiet);
655
	exit($? >> 8);
656
657
658
    }
}

659
660
661
662
663
664
665
666
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
    POSIX::setsid();
}

667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
#
# Gather stats; start clock ticking
#
if ($inout eq "in") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "out") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "modify") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0,
		    TBDB_STATS_FLAGS_START);
}

683
684
685
#
# Remove old report file since its contents are going to be invalid.
#
686
if ($inout ne "restart" && -e $repfile) {
687
688
689
    unlink("$repfile");
}

690
691
692
693
#
# Sanity check states in case someone changes something.
#
if ($inout eq "out") {
694
695
696
697
    my $optarg = (($force || $idleswap) ? "-force" : "");
    
    print STDOUT "Running 'tbswap out $optarg $pid $eid'\n";
    if (system("$tbdir/tbswap out $optarg $pid $eid") != 0) {
698
	$errorstat = $? >> 8;
699
	fatal("tbswap out failed!");
700
    }
701
    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
702
}
703
elsif ($inout eq "in") {
704
    print STDOUT "Running 'tbswap in $pid $eid'\n";
Chad Barb's avatar
   
Chad Barb committed
705
    if (system("$tbdir/tbswap in $pid $eid") != 0) {
706
	$errorstat = $? >> 8;
707
	fatal("tbswap in failed!");
708
    }
709
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
710

711
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
Chad Barb's avatar
Chad Barb committed
712
}
Chad Barb's avatar
   
Chad Barb committed
713
elsif ($inout eq "modify") {
Chad Barb's avatar
Chad Barb committed
714
    my $modifyError = "";
715
    my $oldstate    = $estate;
Chad Barb's avatar
Chad Barb committed
716

717
718
719
    GatherSwapStats($pid, $eid, $dbuid,
		    TBDB_STATS_SWAPMODIFY, 0, TBDB_STATS_FLAGS_PREMODIFY);

Chad Barb's avatar
Chad Barb committed
720
    print "Backing up old experiment state ... " . TBTimeStamp() . "\n";
721
    if (TBExptBackupVirtualState($pid, $eid)) {
722
	fatal("Could not backup experiment state; cannot safely continue!");
Chad Barb's avatar
Chad Barb committed
723
724
725
    }

    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
726
727
    # Rerun tbprerun if modifying, but only if new NS file provided.
    # Yep, we allow reswap without changing the NS file. For Shashi and SIM. 
Chad Barb's avatar
Chad Barb committed
728
    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
729
730
731
732
733
    if (defined($modnsfile)) {
	print STDOUT "Running 'tbprerun $pid $eid $modnsfile'\n";
	if (system("$tbdir/tbprerun $pid $eid $modnsfile") != 0) {
	    $modifyError = "tbprerun failed!";
	}
Chad Barb's avatar
Chad Barb committed
734
735
    }

Chad Barb's avatar
   
Chad Barb committed
736
    #
737
    # Our next state depends on whether the experiment was active or swapped.
Chad Barb's avatar
   
Chad Barb committed
738
    #
739
740
741
    if (! $modifyError) {
	if ($estate eq EXPTSTATE_SWAPPED) {
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
Chad Barb's avatar
   
Chad Barb committed
742
	}
743
744
745
746
	else {
	    SetExpState($pid, $eid, EXPTSTATE_MODIFY_RESWAP);
	    
	    my $optarg = ($reboot ? "-reboot" : "");
747
	    $optarg .= ($eventsys_restart ? " -eventsys_restart" : "");
748
749
750
751
752
753

	    print STDOUT "Running 'tbswap update $optarg $pid $eid'\n";
	    if (system("$tbdir/tbswap update $optarg $pid $eid") != 0) {
		$errorstat = $? >> 8;
		$modifyError = "tbswap update failed!";
	    }
Chad Barb's avatar
   
Chad Barb committed
754

755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
	    #
	    # See what tbswap did. It might have swapped it out if there
	    # was an error. 
	    # 
	    if (! $modifyError) {
		SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
		$estate = EXPTSTATE_ACTIVE;
	    }
	    elsif ($errorstat & 0x40) {
		#
		# Icky. Magic return code that says tbswap swapped it out.
		# We do not want tbswap to muck with states anymore, so
		# need to know what it did. At some point we should clean
		# up the exit reporting! Anyway, fatal() needs to know the
		# the right state to go back to (no longer ACTIVE).
		#
		$estate = EXPTSTATE_SWAPPED;
		$termswapstate = EXPTSTATE_SWAPPED;
                # Old accounting info.
		TBSetExpSwapTime($pid, $eid);
Chad Barb's avatar
   
Chad Barb committed
775
	    }
776
	}
Chad Barb's avatar
Chad Barb committed
777
778
779
    }

    if ($modifyError) {
780
	print STDOUT "Modify Error: $modifyError\n";
Chad Barb's avatar
Chad Barb committed
781
	print STDOUT "Recovering experiment state...\n";
782
	
783
784
	# Must deal with the prerender explicitly since it runs background.
	system("prerender -r $pid $eid");
785
	TBExptRemoveVirtualState($pid, $eid);
786
	
787
	if (TBExptRestoreVirtualState($pid, $eid) == 0) {
788
789
	    # Must deal with the prerender explicitly since it runs background.
	    system("prerender -t $pid $eid");
790
791
792
793
794
	    fatal("Update aborted; old state restored.");
	}
	else {
	    $modifyHosed = 1;
	    fatal("Experiment state could not be restored!");
Chad Barb's avatar
Chad Barb committed
795
	}
Chad Barb's avatar
   
Chad Barb committed
796
    }
797
    
798
    TBExptClearBackupState($pid, $eid);
799
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
800
}
Chad Barb's avatar
   
Chad Barb committed
801
else { # $inout eq "restart" assumed.
802
    print STDOUT "Running 'tbrestart $pid $eid'\n";
803
    if (system("$tbdir/tbrestart $pid $eid") != 0) {
804
	fatal("tbrestart failed!");
805
    }
806
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
807
}
808

809
810
811
812
813
814
815
816
817
818
#
# Try to copy off the files for testbed information gathering.
#
TBSaveExpLogFiles($pid, $eid);

#
# Make a copy of the work dir in the user visible space so the user
# can see the log files. This overwrites existing files of course,
# but thats okay.
#
819
system("cp -Rfp $workdir/ $userdir/tbdata/");
820

821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
#
# Deal with linktest. If requested, swapping in or modifying, and experiment
# is indeed active, then run it!
#
# XXX - linktest uses files from $userdir/tbdata/, so the above cp must
#       happen first!
#
if ($linktest_level && ExpState($pid, $eid) eq EXPTSTATE_ACTIVE) {
    #
    # Run it. No worries about failures.
    #
    my $output = "linktest.$$";
    my $optarg = "-l $linktest_level -o $output";
    
    print STDOUT "Running 'run_linktest.pl $optarg -e $pid/$eid'\n";
    if (system("$tbdir/run_linktest.pl $optarg -e $pid/$eid") != 0) {
	system("cat $output")
	    if (-r $output);
	
	print STDERR "*** WARNING: ".
	             "Linktest run returned non-zero status!\n";
	
	SENDMAIL("$user_name <$user_email>",
		 "Linktest Failure: $pid/$eid",
		 "Failure in linktest (level $linktest_level); ".
		 "returned non-zero status",
		 "$user_name <$user_email>",
		 "Cc: $expt_head_name <$expt_head_email>\n".
		 "Cc: $TBOPS",
		 ($output));
    }
}

854
855
856
857
#
# Gather stats. 
#
if ($inout eq "in") {
858
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0);
859
860
}
elsif ($inout eq "out") {
861
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
862
		    ($idleswap ? TBDB_STATS_FLAGS_IDLESWAP() : 0));
863
864
}
elsif ($inout eq "modify") {
865
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0);
866
}
867
868
# Old accounting info.
TBSetExpSwapTime($pid, $eid);
869

870
871
872
873
874
875
#
# Set the swapper uid on success only, and *after* gathering swap stats!
#
TBExptSetSwapUID($pid, $eid, $dbuid);

#
876
# In batch mode, just exit without sending email or unlocking. The
877
# batch daemon will take care of that.
878
879
880
881
882
#
if ($batch) {
    exit(0);
}

883
884
885
886
887
888
889
#
# Clear the log file so the web page stops spewing. 
#
if (defined($logname)) {
    TBExptCloseLogFile($pid, $eid);
}

890
891
892
#
# Must unlock before exit.
#
893
TBUnLockExp($pid, $eid);
894
895
896
897
898

#
# Since the swap completed, clear the cancel flag. This must be done
# after we change the experiment state (above). 
#
899
TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
900
901
902

print "Swap Success!\n";

903
904
905
906
#
# Send email notification to user.
#
my $message =
907
908
    "Experiment $eid in project $pid has been ";

909
if ($inout eq "out" && ($idleswap || $autoswap || $force) ) {
910
    $message .= "forcibly swapped out by\nEmulab";
911
912
913
914
915
    if ($idleswap) {
	$message .= " because it was idle for too long (Idle-Swap).\n".
	  "(See also the Idle-Swap info in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($autoswap) {
916
917
	$message .= " because it exceeded its Maximum Duration.\n".
	  "(See also the Max. Duration info in \n".
918
919
920
921
922
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($force) {
	$message .= ". (See also our Node Usage Policies in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    }
923
924
925
926
927
}
else {
    $message .= "$action.\n";
}

928
929
930
931
932
if ($inout eq "in") {
    # Add the swap settings...
    $message .="\nCurrent swap settings:\n$swapsettings";
}

933
934
$message .=
    "\n".
935
936
    "Appended below is the output. If you have any questions or comments,\n" .
    "please include the output in your message to $TBOPS\n";
937
938

SENDMAIL("$user_name <$user_email>",
939
	 "Experiment $pid/$eid \u$action",
940
	 $message,
941
	 ($idleswap ? $TBOPS : "$user_name <$user_email>"),
942
943
	 "Cc:  $expt_head_name <$expt_head_email>\n".
	 "Bcc: $TBLOGS",
944
945
	 (($inout eq "restart") ? ($logname) :
	  (($repfile, $logname), (defined($modnsfile) ? ($modnsfile) : ()))));
946
947
948
949
950
951

exit 0;

sub fatal($)
{
    my($mesg) = $_[0];
Chad Barb's avatar
Chad Barb committed
952

953
954
    print STDOUT "*** $0:\n".
	         "    $mesg\n";
955

956
957
958
959
960
961
962
963
964
965
966
967
968
    #
    # Gather stats. 
    #
    if ($inout eq "in") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, $errorstat);
    }
    elsif ($inout eq "out") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, $errorstat);
    }
    elsif ($inout eq "modify") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, $errorstat);
    }

969
970
971
972
973
974
975
    #
    # Clear backup state since not needed anymore; experiment is toast. 
    # 
    if ($inout eq "modify") {
	TBExptClearBackupState($pid, $eid);
    }

Chad Barb's avatar
   
Chad Barb committed
976
    #
977
    # If hosed, we entirely terminate the experiment.
Chad Barb's avatar
   
Chad Barb committed
978
    #
979
    if ($modifyHosed) {
Chad Barb's avatar
   
Chad Barb committed
980
	#
981
	# Note: $estate is indeed still set appropriately!
Chad Barb's avatar
   
Chad Barb committed
982
983
	#
	if ($estate eq EXPTSTATE_ACTIVE) {
984
	    print "Running 'tbswap out -force $pid $eid'\n";
Chad Barb's avatar
   
Chad Barb committed
985
986
987
988
	    if (system("$tbdir/tbswap out -force $pid $eid") != 0) {
		print "tbswap out failed!\n";
	    }
	}
Chad Barb's avatar
Chad Barb committed
989

990
	print "Running 'tbend -force $pid $eid'\n";
Chad Barb's avatar
   
Chad Barb committed
991
992
993
	if (system("$tbdir/tbend -force $pid $eid") != 0) {
	    print "tbend failed!\n";
	}
994
	# Must override since we are so badly hosed. 
995
	$termswapstate = EXPTSTATE_TERMINATED;
Chad Barb's avatar
   
Chad Barb committed
996
997
    }

998
999
1000
    # Copy over the log files so the user can see them.
    system("/bin/cp -Rfp $workdir/ $userdir/tbdata");

1001
1002
1003
    # Set proper state, which is typically the way we came in.
    SetExpState($pid, $eid, $termswapstate);

1004
    #
1005
    # In batch mode, exit without sending the email or unlocking. The
1006
    # batch daemon will take care of that.
1007
1008
    #
    if ($batch) {
1009
	exit($errorstat);
1010
1011
    }

1012
    #
Chad Barb's avatar
Chad Barb committed
1013
    # Clear the log file so the web page stops spewing.
1014
1015
1016
1017
1018
    #
    if (defined($logname)) {
	TBExptCloseLogFile($pid, $eid);
    }

1019
1020
    # Unlock and reset state to its terminal value.
    TBUnLockExp($pid, $eid);
1021
1022
1023
1024
1025

    #
    # Clear the cancel flag now that the operation is complete. Must be done
    # after we change the experiment state (above).
    #
1026
    TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
1027

1028
1029
1030
1031
    #
    # Send a message to the testbed list. Append the logfile.
    #
    SENDMAIL("$user_name <$user_email>",
1032
	     "Swap ${inout} Failure: $pid/$eid",
1033
1034
1035
1036
1037
1038
	     $mesg . "\n\n" .
	     "Please look at the log below to see what happened. If the error\n".
	     "resulted from a lack of resources (not enough nodes), you can\n".
	     "use this web page to get a summary of free nodes:\n\n".
	     "  ${TBBASE}/nodecontrol_list.php3\n".
	     "\n".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1039
	     "Please do not try again until you see enough nodes free.",
1040
	     ($idleswap ? $TBOPS : "$user_name <$user_email>"),
1041
	     "Cc:  $expt_head_name <$expt_head_email>\n".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1042
	     "Cc:  $TBOPS",
1043
	     (($logname), (defined($modnsfile) ? ($modnsfile) : ())));
1044

Leigh B. Stoller's avatar
Leigh B. Stoller committed
1045
    if ($modifyHosed) {
Chad Barb's avatar
   
Chad Barb committed
1046
1047
1048
1049
1050
1051
1052
1053
1054
	#
	# Copy off the workdir to the user directory, Then back up both of
	# them for post-mortem debugging.
	#
	system("/bin/cp -Rfp $workdir/ $userdir/tbdata");
	system("/bin/rm -rf  ${workdir}-failed");
	system("/bin/mv -f   $workdir ${workdir}-failed");
	system("/bin/rm -rf  ${userdir}-failed");
	system("/bin/mv -f   $userdir ${userdir}-failed");
Chad Barb's avatar
Chad Barb committed
1055
	TBExptDestroy($pid, $eid);
Chad Barb's avatar
   
Chad Barb committed
1056
1057
    }

1058
    exit($errorstat);
1059
}