swapexp.in 29.1 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2
3
4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6
7
8
# All rights reserved.
#

9
10
use English;
use Getopt::Std;
11
use POSIX qw(isatty setsid);
12
13

#
Chad Barb's avatar
Chad Barb committed
14
# This gets invoked from the Web interface.
Chad Barb's avatar
   
Chad Barb committed
15
# Swap an experiment in, swap it out, restart or modify.
16
#
Chad Barb's avatar
Chad Barb committed
17

18
19
sub usage()
{
20
    print(STDERR
21
	  "Usage: swapexp [-q] [-b | -w] [-i | -a | -f] [-r] [-e]\n".
22
23
24
25
	  "               <-s in | out | restart | modify | pause>\n".
	  "               <pid> <eid> [<nsfile>]\n".
	  "switches and arguments:\n".
	  "-w       - wait for non-batchmode experiment swap/modify\n".
26
	  "-q       - be less chatty\n".
27
28
29
30
31
32
	  "-r       - reboot nodes when doing a modify experiment\n".
	  "-e       - restart event scheduler when doing a modify experiment\n".
	  "-s <op>  - Operation to perform; one of those listed above\n".
	  "<pid>    - The project the experiment belongs to\n".
	  "<eid>    - The experiment name (id)\n".
	  "<nsfile> - Optional NS file to parse for experiment modify\n");
33
34
    exit(-1);
}
35
my  $optlist = "biafres:wq";
36

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
	die("*** $0:\n".
	    "    $message\n");
    }
    else {
	print STDERR "$message\n";
    }
    exit($status);
}

61
62
63
64
65
66
#
# Configure variables
#
my $TB     = "@prefix@";
my $TBOPS  = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
67
my $TBINFO = "$TB/expinfo";
68
my $TBDOCBASE = "@TBDOCBASE@";
69
my $TBBASE = "@TBBASE@";
70
71
72
73
74
75
76
77
78

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;

my $tbdir    = "$TB/bin/";
79
my $tbdata   = "tbdata";
80
my $batch    = 0;
81
my $idleswap = 0;
82
83
my $autoswap = 0;
my $force    = 0;
Chad Barb's avatar
Chad Barb committed
84
my $reboot   = 0;
85
my $waitmode = 0;
86
my $quiet    = 0;
87
my $eventsys_restart   = 0;
88
my $errorstat= -1;
89
90
my $modifyHosed   = 0;
my $modifySwapped = 0;
Chad Barb's avatar
   
Chad Barb committed
91

92
93
94
95
96
my $inout;
my $logname;
my $dbuid;
my $user_name;
my $user_email;
97
my @allnodes;
98
my @row;
99
my $action;
100
my $nextswapstate;
101
my $termswapstate;
Chad Barb's avatar
   
Chad Barb committed
102

103
104
105
#
# Untaint the path
# 
106
$ENV{'PATH'} = "/bin:/usr/bin:$TB/libexec/vis";
107
108
109
110
111
112
113
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

114
115
116
117
118
119
120
#
# Set umask for start/swap. We want other members in the project to be
# able to swap/end experiments, so the log and intermediate files need
# to be 664 since some are opened for append.
#
umask(0002);

121
122
123
124
125
126
127
128
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
129
130
131
if (defined($options{"i"})) {
    $idleswap = 1;
}
132
133
134
if (defined($options{"w"})) {
    $waitmode = 1;
}
135
136
137
138
139
140
if (defined($options{"a"})) {
    $autoswap = 1;
}
if (defined($options{"f"})) {
    $force = 1;
}
141
142
143
if (defined($options{"b"})) {
    $batch = 1;
}
Chad Barb's avatar
   
Chad Barb committed
144
145
146
if (defined($options{"r"})) {
    $reboot = 1;
}
147
148
149
if (defined($options{"e"})) {
    $eventsys_restart = 1;
}
150
151
152
if (defined($options{"q"})) {
    $quiet = 1;
}
153
154
155
if (defined($options{"s"})) {
    $inout = $options{"s"};

Chad Barb's avatar
Chad Barb committed
156
157
158
    if ($inout ne "out"     &&
	$inout ne "in"      &&
	$inout ne "restart" &&
159
	$inout ne "pause"   &&
Chad Barb's avatar
   
Chad Barb committed
160
	$inout ne "modify") {
161
162
163
164
165
166
167
	usage();
    }
}
else {
    usage();
}

168
169
170
171
172
usage()
    if (($waitmode && $batch) ||
	($inout ne "modify" && @ARGV != 2) ||
	(($waitmode || $batch) && ($idleswap || $autoswap || $force)));

173
174
175
176
177
if ($eventsys_restart && $inout ne "modify") {
    print STDOUT "Usage: swapexp: -e (eventsys_restart) can be used ".
                 "only with -s modify\n";
    usage();
}
Chad Barb's avatar
   
Chad Barb committed
178
179
180
my $pid   = $ARGV[0];
my $eid   = $ARGV[1];

181
182
183
#
# Untaint the arguments.
#
184
if ($pid =~ /^([-\w\.]+)$/) {
185
186
187
188
189
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
190
if ($eid =~ /^([-\w\.]+)$/) {
191
192
193
194
195
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}
196
my $repfile = "$eid.report";
197
198
my $workdir = TBExptWorkDir($pid, $eid);
my $userdir = TBExptUserDir($pid, $eid);
199
200
201
my $tempnsfile;
my $modnsfile;

Leigh B. Stoller's avatar
Leigh B. Stoller committed
202
if ($inout eq "modify" && @ARGV > 2) {
203
204
205
206
207
    $tempnsfile = $ARGV[2];

    #
    # Untaint nsfile argument; Allow slash.
    #
208
    if ($tempnsfile =~ /^([-\w\.\/]+)$/) {
209
	$tempnsfile = $1;
210
211
    }
    else {
212
213
214
215
216
217
218
219
220
221
	die("Tainted nsfile name: $tempnsfile\n");
    }
    #
    # Called from ops interactively. Make sure NS file in /proj or /users.
    #
    # Use realpath to resolve any symlinks.
    #
    my $translated = `realpath $tempnsfile`;
    if ($translated =~ /^([-\w\.\/]+)$/) {
	$tempnsfile = $1;
222
    }
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
    else {
	die("Tainted nsfile returned by realpath: $translated\n");
    }

    #
    # The file must reside in /proj, /groups, or /users. Since this script
    # runs as the caller, regular file permission checks ensure its a file
    # the user is allowed to use. /tmp/$guid-$nsref.nsfile also allowed
    # since this script is invoked directly from web interface, which generates
    # a name that should not be guessable, so as long as it looks to be in
    # proper format, we accept it. 
    #
    if (! ($tempnsfile =~ /^\/tmp\/[-\w]+-\d+\.nsfile/) &&
	! ($tempnsfile =~ /^\/var\/tmp\/php\w+/) &&
	! ($tempnsfile =~ /^\/proj/) &&
	! ($tempnsfile =~ /^\/groups/) &&
	! ($tempnsfile =~ /^\/users/)) {
	die("$tempnsfile does not resolve to an appropriate directory!\n");
    }

    if (! -f $tempnsfile || -z $tempnsfile || ! -r $tempnsfile) {
244
245
246
	die("*** $0:\n".
	    "    $tempnsfile does not look like an NS file!\n");
    }
247
248
    $modnsfile = "$eid-modify.ns";
}
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266

#
# Verify user and get his DB uid.
#
if (! UNIX2DBUID($UID, \$dbuid)) {
    die("*** $0:\n".
	"    You do not exist in the Emulab Database.\n");
}

#
# Get email info for user.
#
if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
    die("*** $0:\n".
	"    Cannot determine your name and email address.\n");
}

#
Chad Barb's avatar
   
Chad Barb committed
267
# Verify that this person can muck with the experiment.
268
269
270
271
272
# Note that any script down the line has to do an admin check also. 
#
if ($UID && !TBAdmin($UID) &&
    !TBExptAccessCheck($dbuid, $pid, $eid, TB_EXPT_DESTROY)) {
    die("*** $0:\n".
Chad Barb's avatar
   
Chad Barb committed
273
	"    You do not have permission to swap or modify this experiment!\n");
274
275
}

276
277
278
279
# Must do this before lock tables!
# idleswap is in minutes, threshold is in hours
$idleswap_time = 60 * TBGetSiteVar("idle/threshold");

280
281
282
283
284
285
286
287
288
#
# In wait mode, block interrupt until we spin off the background process.
#
if ($waitmode) {
    $SIG{TERM} = 'IGNORE';
    $SIG{QUIT} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';
}

289
290
291
292
293
#
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
# terminating consists of a couple of different experiment states down inside
Chad Barb's avatar
Chad Barb committed
294
# the tb scripts.
295
296
297
298
299
300
301
302
303
304
305
306
307
#
DBQueryFatal("lock tables experiments write");

$query_result =
    DBQueryFatal("SELECT * FROM experiments WHERE eid='$eid' and pid='$pid'");

if (! $query_result->numrows) {
    die("*** $0:\n".
	"    No such experiment $pid/$eid exists!\n");
}
my %hashrow = $query_result->fetchhash();
my $expt_head_login = $hashrow{'expt_head_uid'};
my $estate          = $hashrow{'state'};
308
my $batchstate      = $hashrow{'batchstate'};
309
my $expt_path       = $hashrow{'path'};
310
my $expt_locked     = $hashrow{'expt_locked'};
311
my $isbatchexpt     = $hashrow{'batchmode'};
312
my $canceled        = $hashrow{'canceled'};
313
my $linktest_level  = $hashrow{'linktest_level'};
314
315
316
317
318
319
320
321
322
323
my $swappablebit= $hashrow{'swappable'};
my $idleswapbit = $hashrow{'idleswap'};
my $autoswapbit = $hashrow{'autoswap'};
my $swappablestr= ( $swappablebit ? "Yes" : "No" );
my $idleswapstr = ( $idleswapbit ? "Yes" : "No" );
my $autoswapstr = ( $autoswapbit ? "Yes" : "No" );
my $noswap      = $hashrow{'noswap_reason'};
my $noidleswap  = $hashrow{'noidleswap_reason'};
my $idleswaptime= $hashrow{'idleswap_timeout'} / 60.0;
my $autoswaptime= $hashrow{'autoswap_timeout'} / 60.0;
324

325
326
if ($inout ne "out") {
    # I'm going to update this below, so fix the value before I use it.
327
    $idleswap_time = min($idleswaptime * 60, $idleswap_time);
328
329
330
    $idleswaptime = $idleswap_time / 60.0;
}

331
332
my $swapsettings = 
  "Idle-Swap:   $idleswapstr".
333
  ($idleswapbit ? ", at $idleswaptime hours\n" : " (Reason: $noidleswap)\n").
334
335
  "Auto-Swap:   $autoswapstr".
  ($autoswapbit ? ", at $autoswaptime hours\n" : "\n");
336

337
if (! chdir($workdir)) {
338
    die("*** $0:\n".
339
	"    Could not chdir to $workdir: $!\n");
340
341
}

342
#
343
344
345
# This script is called from the batch daemon.
# 
if ($batch) {
346
    #
347
348
349
    # Sanity Check. If called from the daemon, must already be locked,
    # must be a batch experiment, and must be in proper state for the
    # operation requested. 
350
    #
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
    die("*** $0:\n".
	"    Experiment $pid/$eid is supposed to be a batch experiment!\n")
	if (!$isbatchexpt);
    
    die("*** $0:\n".
	"    Batch experiment $pid/$eid should be locked!\n")
	if (!defined($expt_locked) ||
	    $batchstate ne BATCHSTATE_LOCKED());

    if ($inout eq "in") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be QUEUED.\n")
	    if ($estate ne EXPTSTATE_QUEUED);
	
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid has been canceled! Aborting.\n")
	    if ($canceled);
    }
    elsif ($inout eq "out") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be ACTIVE.\n")
	    if ($estate ne EXPTSTATE_ACTIVE);
375
376
    }
    else {
377
378
379
380
381
382
	die("*** $0:\n".
	    "    Improper request from batch daemon for $pid/$eid!\n");
    }
}
else {
    if ($isbatchexpt) {
383
384
385
386
	#
	# User is requesting that a batch either be injected or paused.
	# Sanity check the state, but otherwise let the batch daemon
	# handle it.
387
388
	#
	ExitWithStatus(1, "Batch experiment $pid/$eid is still canceling!")
389
	    if ($canceled);
390

391
	if ($inout eq "in") {
392
	    ExitWithStatus(1,
393
394
395
396
			   "Batch experiment $pid/$eid must be SWAPPED to\n".
			   "QUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_SWAPPED);
	    SetExpState($pid, $eid, EXPTSTATE_QUEUED);
397
398
	}
	elsif ($inout eq "out") {
399
	    ExitWithStatus(1,
400
401
402
403
			   "Batch experiment $pid/$eid must be ACTIVE or\n".
			   "ACTIVATING to swap out. Currently $estate.")
		if ($estate ne EXPTSTATE_ACTIVE &&
		    $estate ne EXPTSTATE_ACTIVATING);
404
405
406
407
408

	    #
	    # Since the batch daemon has control, all we can do is set
	    # the cancel bit.
	    # 
409
	    TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
410
411
	}
	elsif ($inout eq "pause") {
412
	    ExitWithStatus(1,
413
414
415
			   "Batch experiment $pid/$eid must be QUEUED to\n".
			   "DEQUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_QUEUED);
416
417

	    #
418
419
420
421
	    # XXX. The batch daemon might already have the experiment, but
	    # not have shipped it off to startexp. Change the state
	    # anyway. The error will be noticed later when startexp dies,
	    # and the batch daemon gets the error back. This sucks.
422
	    #
423
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
424
	}
425
	elsif ($inout eq "modify") {
426
	    ExitWithStatus(1,
427
428
429
430
431
432
			   "Batch experiment $pid/$eid must be SWAPPED or\n".
			   "ACTIVE to modify. Currently $estate.")
		if (($estate ne EXPTSTATE_SWAPPED &&
		     $estate ne EXPTSTATE_ACTIVATING) ||
		    $batchstate != BATCHSTATE_UNLOCKED());

433
	    #
434
	    # Otherwise, proceed with the modify. The experiment will be
435
436
	    # locked below, and so it cannot be injected or otherwise messed
	    # with since its state is going to be changed before we unlock
437
438
439
440
	    # the experiments table. The batch daemon will leave it alone
	    # until the modify is done. If the modify fails and cannot recover
	    # it is going to get swapped out; that is okay since the batch
	    # daemon does not keep state internally. 
441
	    #
442
443
	    goto doit;
	}
444
445
	else {
	    die("*** $0:\n",
446
		"    Operation $inout not allowed on a batch experiment!\n");
447
	}
448
449
	ExitWithStatus(0, 
		       "Batch experiment $pid/$eid state has been changed.\n");
450
      doit:
451
    }
452
453
454
455
456
457
458
459
460
461
    else {
	#
	# If the cancel flag is set, then user must wait for that to
	# clear before we can do anything else.
	#
	ExitWithStatus(1,
		       "Experiment $pid/$eid has its cancel flag set!.\n".
		       "You must wait for that to clear before you can swap\n".
		       "or modify the experiment.\n")
	    if ($canceled);
462

463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
	#
	# Check the state for the various operations.
	#
	if (!$force) {
	  SWITCH: for ($inout) {
	      /^in$/i && do {
		  if ($estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped out!");
		  }
		  last SWITCH;
	      };
	      /^out$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_ACTIVATING()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in ".
				     "or activating!\n");
		  }
		  
		  if ($estate eq EXPTSTATE_ACTIVATING()) {
		      #
		      # All we can do is set the cancel flag and hope that
		      # it gets noticed. We do not wait. 
		      # 
		      TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
		      
		      ExitWithStatus(0,
				     "Experiment $pid/$eid swapin has been  ".
				     "marked for cancelation.\n".
				     "You will receive email when the original ".
				     "swap request has finished.");
		  }
		  last SWITCH;
	      };
	      /^restart$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in!");
		  }
		  last SWITCH;
	      };
	      /^modify$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid must be ACTIVE or\n".
				     "SWAPPED to modify!\n");
		  }
		  last SWITCH;
	      };
	      die("*** $0:\n".
		  "    Missing state check for action: $action\n");
	  }
517
518
	}
    }
519
520
}

521
522
523
524
525
526
527
#
# Determine the temporary and next state for experiment. If the experiment
# is a batch experiment, then the next state is actually handled by the
# batch daemon, but we still have to deal with the temporary state. 
#
SWITCH: for ($inout) {
    /^in$/i && do {
528
	$nextswapstate = EXPTSTATE_ACTIVATING();
529
530
531
	last SWITCH;
    };
    /^out$/i && do {
532
	$nextswapstate = EXPTSTATE_SWAPPING();
533
534
535
	last SWITCH;
    };
    /^restart$/i && do {
536
	$nextswapstate = EXPTSTATE_RESTARTING();
537
538
539
	last SWITCH;
    };
    /^modify$/i && do {
540
541
	$nextswapstate = (($estate eq EXPTSTATE_SWAPPED()) ?
			  EXPTSTATE_MODIFY_PARSE() : EXPTSTATE_MODIFY_REPARSE());
542
543
	last SWITCH;
    };
544
    die("*** $0:\n".
545
	"    Missing state check for action: $action\n");
546
}
547
548
 
# Update idleswap_timeout to whatever the current value is.
549
if ($inout ne "out") {
550
551
552
    DBQueryFatal("update experiments set idleswap_timeout='$idleswap_time' ".
		 "where eid='$eid' and pid='$pid'");
}
553

554
555
556
557
558
559
560
#
# On a failure, we go back to this swapstate. Might be modified below.
# 
$termswapstate = $estate;

# Lock the record, set the nextstate, and unlock the table.
TBLockExp($pid, $eid, $nextswapstate);
561
562
563
564
DBQueryFatal("unlock tables");

#
# XXX - At this point a failure is going to leave things in an
565
566
567
568
# inconsistent state. Be sure to call fatal() only since we are
# going into the background, and we have to send email since no
# one is going to see printed error messages (output goes into the
# log file, which will be sent along in the email). 
569
570
#

571
572
573
574
575
576
577
578
579
if ($inout eq "in") {
    $action = "swapped in";
}
if ($inout eq "out") {
    $action = "swapped out";
}
if ($inout eq "restart") {
    $action = "restarted";
}
Chad Barb's avatar
   
Chad Barb committed
580
581
582
if ($inout eq "modify") {
    $action = "modified";
}
583

584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
#
# Get email address of the experiment head, which may be different than
# the person who is actually terminating the experiment, since its polite
# to let the original creator know whats going on. 
#
my $expt_head_name;
my $expt_head_email;

if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
    print STDERR "*** WARNING: ".
	         "Could not determine name/email for $expt_head_login.\n";
    $expt_head_name  = "TBOPS";
    $expt_head_email = $TBOPS;
}

599
600
601
#
# Before going to background, we have to copy out the NS file!
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
602
if ($inout eq "modify" && defined($modnsfile)) {
603
604
605
606
607
608
609
610
    unlink($modnsfile);
    if (system("/bin/cp", "$tempnsfile", "$modnsfile")) {
	die("*** $0:\n".
	    "    Could not copy $tempnsfile to $modnsfile");
    }
    chmod(0664, "$modnsfile");
}

611
612
613
614
#
# If not in batch mode, go into the background. Parent exits.
#
if (! $batch) {
615
    $logname = TBExptCreateLogFile($pid, $eid, "swapexp");
616
    TBExptSetLogFile($pid, $eid, $logname);
617
    TBExptOpenLogFile($pid, $eid);
Chad Barb's avatar
Chad Barb committed
618

619
620
621
622
623
    if (my $childpid = TBBackGround($logname)) {
	#
	# Parent exits normally, except if in waitmode. 
	#
	if (!$waitmode) {
624
625
626
	    print("Experiment $pid/$eid is now being $action.\n".
		  "You will be notified via email when the this is done.\n")
		if (! $quiet);
627
628
	    exit(0);
	}
629
630
631
632
633
634
635
636
	print("Waiting for experiment $eid to finish its swap${action}\n")
	    if (! $quiet);
	    
	if (isatty(STDIN) && !$quiet) {
	    print("You may type ^C at anytime; you will be notified via email.".
		  "\n".
		  "You will not actually interrupt the experiment itself.\n");
	}
637
638
639
640
641
642
643
644
645
646
647
648
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

649
	#
650
	# Wait until child exits or until user gets bored and types ^C.
651
	#
652
653
	waitpid($childpid, 0);
	
654
655
	print("Done. Exited with status: $?\n")
	    if (! $quiet);
656
	exit($? >> 8);
657
    }
658
    TBdbfork();
659
660
}

661
662
663
664
665
666
667
668
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
    POSIX::setsid();
}

669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
#
# Gather stats; start clock ticking
#
if ($inout eq "in") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "out") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "modify") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0,
		    TBDB_STATS_FLAGS_START);
}

685
686
687
#
# Remove old report file since its contents are going to be invalid.
#
688
if ($inout ne "restart" && -e $repfile) {
689
690
691
    unlink("$repfile");
}

692
693
694
695
#
# Sanity check states in case someone changes something.
#
if ($inout eq "out") {
696
697
698
699
    my $optarg = (($force || $idleswap) ? "-force" : "");
    
    print STDOUT "Running 'tbswap out $optarg $pid $eid'\n";
    if (system("$tbdir/tbswap out $optarg $pid $eid") != 0) {
700
	$errorstat = $? >> 8;
701
	fatal("tbswap out failed!");
702
    }
703
    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
704
}
705
elsif ($inout eq "in") {
706
    print STDOUT "Running 'tbswap in $pid $eid'\n";
Chad Barb's avatar
   
Chad Barb committed
707
    if (system("$tbdir/tbswap in $pid $eid") != 0) {
708
	$errorstat = $? >> 8;
709
	fatal("tbswap in failed!");
710
    }
711
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
712

713
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
Chad Barb's avatar
Chad Barb committed
714
}
Chad Barb's avatar
   
Chad Barb committed
715
elsif ($inout eq "modify") {
Chad Barb's avatar
Chad Barb committed
716
    my $modifyError = "";
717
    my $oldstate    = $estate;
Chad Barb's avatar
Chad Barb committed
718

719
720
721
    GatherSwapStats($pid, $eid, $dbuid,
		    TBDB_STATS_SWAPMODIFY, 0, TBDB_STATS_FLAGS_PREMODIFY);

Chad Barb's avatar
Chad Barb committed
722
    print "Backing up old experiment state ... " . TBTimeStamp() . "\n";
723
    if (TBExptBackupVirtualState($pid, $eid)) {
724
	fatal("Could not backup experiment state; cannot safely continue!");
Chad Barb's avatar
Chad Barb committed
725
726
727
    }

    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
728
729
    # Rerun tbprerun if modifying, but only if new NS file provided.
    # Yep, we allow reswap without changing the NS file. For Shashi and SIM. 
Chad Barb's avatar
Chad Barb committed
730
    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
731
732
733
734
735
    if (defined($modnsfile)) {
	print STDOUT "Running 'tbprerun $pid $eid $modnsfile'\n";
	if (system("$tbdir/tbprerun $pid $eid $modnsfile") != 0) {
	    $modifyError = "tbprerun failed!";
	}
Chad Barb's avatar
Chad Barb committed
736
737
    }

Chad Barb's avatar
   
Chad Barb committed
738
    #
739
    # Our next state depends on whether the experiment was active or swapped.
Chad Barb's avatar
   
Chad Barb committed
740
    #
741
742
743
    if (! $modifyError) {
	if ($estate eq EXPTSTATE_SWAPPED) {
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
Chad Barb's avatar
   
Chad Barb committed
744
	}
745
746
747
748
	else {
	    SetExpState($pid, $eid, EXPTSTATE_MODIFY_RESWAP);
	    
	    my $optarg = ($reboot ? "-reboot" : "");
749
	    $optarg .= ($eventsys_restart ? " -eventsys_restart" : "");
750
751
752
753
754
755

	    print STDOUT "Running 'tbswap update $optarg $pid $eid'\n";
	    if (system("$tbdir/tbswap update $optarg $pid $eid") != 0) {
		$errorstat = $? >> 8;
		$modifyError = "tbswap update failed!";
	    }
Chad Barb's avatar
   
Chad Barb committed
756

757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
	    #
	    # See what tbswap did. It might have swapped it out if there
	    # was an error. 
	    # 
	    if (! $modifyError) {
		SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
		$estate = EXPTSTATE_ACTIVE;
	    }
	    elsif ($errorstat & 0x40) {
		#
		# Icky. Magic return code that says tbswap swapped it out.
		# We do not want tbswap to muck with states anymore, so
		# need to know what it did. At some point we should clean
		# up the exit reporting! Anyway, fatal() needs to know the
		# the right state to go back to (no longer ACTIVE).
		#
773
774
775
		# XXX This errorstat (0x40) is important to testbed_stats!
		# We should probably put in a swapout record instead.
		#
776
777
		$estate = EXPTSTATE_SWAPPED;
		$termswapstate = EXPTSTATE_SWAPPED;
778
		$modifySwapped = 1;
779
780
                # Old accounting info.
		TBSetExpSwapTime($pid, $eid);
Chad Barb's avatar
   
Chad Barb committed
781
	    }
782
	}
Chad Barb's avatar
Chad Barb committed
783
784
785
    }

    if ($modifyError) {
786
	print STDOUT "Modify Error: $modifyError\n";
Chad Barb's avatar
Chad Barb committed
787
	print STDOUT "Recovering experiment state...\n";
788
	
789
790
	# Must deal with the prerender explicitly since it runs background.
	system("prerender -r $pid $eid");
791
	TBExptRemoveVirtualState($pid, $eid);
792
	
793
	if (TBExptRestoreVirtualState($pid, $eid) == 0) {
794
795
	    # Must deal with the prerender explicitly since it runs background.
	    system("prerender -t $pid $eid");
796
797
798
799
800
	    fatal("Update aborted; old state restored.");
	}
	else {
	    $modifyHosed = 1;
	    fatal("Experiment state could not be restored!");
Chad Barb's avatar
Chad Barb committed
801
	}
Chad Barb's avatar
   
Chad Barb committed
802
    }
803
    
804
    TBExptClearBackupState($pid, $eid);
805
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
806
}
Chad Barb's avatar
   
Chad Barb committed
807
else { # $inout eq "restart" assumed.
808
    print STDOUT "Running 'tbrestart $pid $eid'\n";
809
    if (system("$tbdir/tbrestart $pid $eid") != 0) {
810
	fatal("tbrestart failed!");
811
    }
812
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
813
}
814

815
816
817
818
819
820
821
822
823
824
#
# Try to copy off the files for testbed information gathering.
#
TBSaveExpLogFiles($pid, $eid);

#
# Make a copy of the work dir in the user visible space so the user
# can see the log files. This overwrites existing files of course,
# but thats okay.
#
825
system("cp -Rfp $workdir/ $userdir/tbdata/");
826

827
828
829
830
831
832
833
834
835
836
837
838
839
840
#
# Deal with linktest. If requested, swapping in or modifying, and experiment
# is indeed active, then run it!
#
# XXX - linktest uses files from $userdir/tbdata/, so the above cp must
#       happen first!
#
if ($linktest_level && ExpState($pid, $eid) eq EXPTSTATE_ACTIVE) {
    #
    # Run it. No worries about failures.
    #
    my $output = "linktest.$$";
    my $optarg = "-l $linktest_level -o $output";
    
841
842
    print STDOUT "Running 'linktest_control $optarg $pid $eid'\n";
    if (system("$TB/sbin/linktest_control $optarg $pid $eid") != 0) {
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
	system("cat $output")
	    if (-r $output);
	
	print STDERR "*** WARNING: ".
	             "Linktest run returned non-zero status!\n";
	
	SENDMAIL("$user_name <$user_email>",
		 "Linktest Failure: $pid/$eid",
		 "Failure in linktest (level $linktest_level); ".
		 "returned non-zero status",
		 "$user_name <$user_email>",
		 "Cc: $expt_head_name <$expt_head_email>\n".
		 "Cc: $TBOPS",
		 ($output));
    }
}

860
861
862
863
#
# Gather stats. 
#
if ($inout eq "in") {
864
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0);
865
866
}
elsif ($inout eq "out") {
867
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
868
		    ($idleswap ? TBDB_STATS_FLAGS_IDLESWAP() : 0));
869
870
}
elsif ($inout eq "modify") {
871
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0);
872
}
873
874
# Old accounting info.
TBSetExpSwapTime($pid, $eid);
875

876
877
878
879
880
881
#
# Set the swapper uid on success only, and *after* gathering swap stats!
#
TBExptSetSwapUID($pid, $eid, $dbuid);

#
882
# In batch mode, just exit without sending email or unlocking. The
883
# batch daemon will take care of that.
884
885
886
887
888
#
if ($batch) {
    exit(0);
}

889
890
891
892
893
894
895
#
# Clear the log file so the web page stops spewing. 
#
if (defined($logname)) {
    TBExptCloseLogFile($pid, $eid);
}

896
897
898
#
# Must unlock before exit.
#
899
TBUnLockExp($pid, $eid);
900
901
902
903
904

#
# Since the swap completed, clear the cancel flag. This must be done
# after we change the experiment state (above). 
#
905
TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
906
907
908

print "Swap Success!\n";

909
910
911
912
#
# Send email notification to user.
#
my $message =
913
914
    "Experiment $eid in project $pid has been ";

915
if ($inout eq "out" && ($idleswap || $autoswap || $force) ) {
916
    $message .= "forcibly swapped out by\nEmulab";
917
918
919
920
921
    if ($idleswap) {
	$message .= " because it was idle for too long (Idle-Swap).\n".
	  "(See also the Idle-Swap info in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($autoswap) {
922
923
	$message .= " because it exceeded its Maximum Duration.\n".
	  "(See also the Max. Duration info in \n".
924
925
926
927
928
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($force) {
	$message .= ". (See also our Node Usage Policies in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    }
929
930
931
932
933
}
else {
    $message .= "$action.\n";
}

934
935
936
937
938
if ($inout eq "in") {
    # Add the swap settings...
    $message .="\nCurrent swap settings:\n$swapsettings";
}

939
940
$message .=
    "\n".
941
942
    "Appended below is the output. If you have any questions or comments,\n" .
    "please include the output in your message to $TBOPS\n";
943
944

SENDMAIL("$user_name <$user_email>",
945
	 "Experiment $pid/$eid \u$action",
946
	 $message,
947
	 ($idleswap ? $TBOPS : "$user_name <$user_email>"),
948
949
	 "Cc:  $expt_head_name <$expt_head_email>\n".
	 "Bcc: $TBLOGS",
950
951
	 (($inout eq "restart") ? ($logname) :
	  (($repfile, $logname), (defined($modnsfile) ? ($modnsfile) : ()))));
952
953
954
955
956
957

exit 0;

sub fatal($)
{
    my($mesg) = $_[0];
Chad Barb's avatar
Chad Barb committed
958

959
960
    print STDOUT "*** $0:\n".
	         "    $mesg\n";
961

962
963
964
965
966
967
968
969
970
971
    #
    # Gather stats. 
    #
    if ($inout eq "in") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, $errorstat);
    }
    elsif ($inout eq "out") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, $errorstat);
    }
    elsif ($inout eq "modify") {
972
973
974
975
976
977
	#
	# If the modify fails, and the experiment is swapped out, then
	# insert a record for that since that is different then a modify
	# that fails, but results in the experiment being restored the
	# way it was. 
	#
978
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, $errorstat);
979
980
981
982
	
	if ($modifySwapped) {
	    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0);
	}
983
984
    }

985
986
987
988
989
990
991
    #
    # Clear backup state since not needed anymore; experiment is toast. 
    # 
    if ($inout eq "modify") {
	TBExptClearBackupState($pid, $eid);
    }

Chad Barb's avatar
   
Chad Barb committed
992
    #
993
    # If hosed, we entirely terminate the experiment.
Chad Barb's avatar
   
Chad Barb committed
994
    #
995
    if ($modifyHosed) {
996
997
	my $stat = 0;
	    
Chad Barb's avatar
   
Chad Barb committed
998
	#
999
	# Note: $estate is indeed still set appropriately!
Chad Barb's avatar
   
Chad Barb committed
1000
1001
	#
	if ($estate eq EXPTSTATE_ACTIVE) {
1002
	    print "Running 'tbswap out -force $pid $eid'\n";
Chad Barb's avatar
   
Chad Barb committed
1003
1004
	    if (system("$tbdir/tbswap out -force $pid $eid") != 0) {
		print "tbswap out failed!\n";
1005
		$stat = $? >> 8;
Chad Barb's avatar
   
Chad Barb committed
1006
	    }
1007
	    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, $stat);
Chad Barb's avatar
   
Chad Barb committed
1008
	}
Chad Barb's avatar
Chad Barb committed
1009

1010
	$stat = 0;
1011
	print "Running 'tbend -force $pid $eid'\n";
Chad Barb's avatar
   
Chad Barb committed
1012
1013
	if (system("$tbdir/tbend -force $pid $eid") != 0) {
	    print "tbend failed!\n";
1014
	    $stat = $? >> 8;
Chad Barb's avatar
   
Chad Barb committed
1015
	}
1016
1017
1018
1019
1020
1021
	
	#
	# Okay, we are going to destroy the experiment below.
	# 
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_TERMINATE, 0);
	
1022
	# Must override since we are so badly hosed. 
1023
	$termswapstate = EXPTSTATE_TERMINATED;
Chad Barb's avatar
   
Chad Barb committed
1024
1025
    }

1026
1027
1028
    # Copy over the log files so the user can see them.
    system("/bin/cp -Rfp $workdir/ $userdir/tbdata");

1029
1030
1031
    # Set proper state, which is typically the way we came in.
    SetExpState($pid, $eid, $termswapstate);

1032
    #
1033
    # In batch mode, exit without sending the email or unlocking. The
1034
    # batch daemon will take care of that.
1035
1036
    #
    if ($batch) {
1037
	exit($errorstat);
1038
1039
    }

1040
    #
Chad Barb's avatar
Chad Barb committed
1041
    # Clear the log file so the web page stops spewing.
1042
1043
1044
1045
1046
    #
    if (defined($logname)) {
	TBExptCloseLogFile($pid, $eid);
    }

1047
1048
    # Unlock and reset state to its terminal value.
    TBUnLockExp($pid, $eid);
1049
1050
1051
1052
1053

    #
    # Clear the cancel flag now that the operation is complete. Must be done
    # after we change the experiment state (above).
    #
1054
    TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
1055

1056
1057
1058
1059
    #
    # Send a message to the testbed list. Append the logfile.
    #
    SENDMAIL("$user_name <$user_email>",
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
	 "Swap ${inout} Failure: $pid/$eid",
	 $mesg . "\n\n" .
	 "Please look at the log below to see what happened. If the error\n".
	 "resulted from a lack of free nodes, you can use this web page to\n".
	 "get a summary of free nodes:\n\n".
	 "  ${TBBASE}/nodecontrol_list.php3\n".
	 "\n".
	 "Please do not try again until you see enough nodes free. Or, you\n".
	 "can use the batch system to swap your experiment in when enough\n".
	 "nodes are free:\n\n".
	 "  ${TBDOCBASE}/tutorial/docwrapper.php3?docname=tutorial.html".
	     "#BatchMode\n",
	 ($idleswap ? $TBOPS : "$user_name <$user_email>"),
	 "Cc:  $expt_head_name <$expt_head_email>\n".
	 "Cc:  $TBOPS",
	 (($logname), (defined($modnsfile) ? ($modnsfile) : ())));
1076

Leigh B. Stoller's avatar
Leigh B. Stoller committed
1077
    if ($modifyHosed) {
Chad Barb's avatar
   
Chad Barb committed
1078
1079
1080
1081
1082
1083
1084
1085
1086
	#
	# Copy off the workdir to the user directory, Then back up both of
	# them for post-mortem debugging.
	#
	system("/bin/cp -Rfp $workdir/ $userdir/tbdata");
	system("/bin/rm -rf  ${workdir}-failed");
	system("/bin/mv -f   $workdir ${workdir}-failed");
	system("/bin/rm -rf  ${userdir}-failed");
	system("/bin/mv -f   $userdir ${userdir}-failed");
Chad Barb's avatar
Chad Barb committed
1087
	TBExptDestroy($pid, $eid);
Chad Barb's avatar
   
Chad Barb committed
1088
1089
    }

1090
    exit($errorstat);
1091
}