create_image.in 26.3 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2
#
3
# Copyright (c) 2000-2013 University of Utah and the Flux Group.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
23
#
24
use strict;
25 26
use English;
use Getopt::Std;
27
use POSIX qw(setsid :sys_wait_h);
28
use File::Basename;
29

30
#
31
# Image Creation Tuneables.
32 33
#
# $maxwait	max wall clock time to allow, progress or not
34 35 36
#		Empirically we have observed about 1.6MB/sec on a pc850
#		for a Windows image (the slowest to create), so figuring
#		1.5MB/sec for a 6GB max image works out to around 72 minutes.
37 38 39 40
# $idlewait	max time to wait between periods of progress
# $checkwait	time between progress checks (must be int div of $idlewait)
# $reportwait	time between progress reports (must be multiple of $checkwait)
#
41 42 43 44
# $maximagesize	max size in bytes of an image.  This should really be in the
#		DB (per-testbed, per-project, per-user, per-something), and
#		not hardwired here.  In the meantime, we set this big and let
#		disk quotas do the dirty work of limiting size.
45
#
46
my $maxwait      = (72 * 60);
47 48 49
my $idlewait     = ( 8 * 60);
my $reportwait   = ( 2 * 60);
my $checkwait    = 15;
50 51
my $maximagesize = (6 * 1024**3); # 20GB

52 53 54 55 56 57 58 59
#
# Create a disk image.
#
# XXX: Device file should come from DB.
#      Start/count slice computation is not generalized at all.
#
sub usage()
{
60
    print(STDERR
61
	  "Usage: create_image [-wsN] [-p <pid>] <imagename> <node>\n" .
62
	  "switches and arguments:\n".
63
	  "-w          - wait for image to be fully created\n".
64 65
	  "-s          - use ssh instead of frisbee uploader\n".
	  "-N          - use NFS (if available) instead of frisbee uploader\n".
66 67 68
	  "-p <pid>    - project ID of the image; defaults to system project\n".
	  "<imagename> - imagename to use\n".
	  "<node>      - nodeid to create the image from\n");
69 70
    exit(-1);
}
71
my $optlist  = "p:wsNdfe";
72
my $waitmode = 0;
73 74 75 76
my $usessh   = 0;
my $usenfs   = 0;
my $usefup   = 1;
my $noemail  = 0;
77 78 79 80 81 82

#
# Configure variables
#
my $TB		= "@prefix@";
my $TBOPS       = "@TBOPSEMAIL@";
83
my $TBLOGS      = "@TBLOGSEMAIL@";
84
my $BOSSIP	= "@BOSSNODE_IP@";
85
my $CONTROL     = "@USERNODE@";
86
my $NONFS	= @NOSHAREDFS@;
87 88 89 90 91 92 93

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
94
use libadminmfs;
95
use Experiment;
96
use Node;
97 98
use User;
use Image;
99
use Logfile;
100 101 102 103 104 105 106 107 108 109 110 111

#
# Turn off line buffering on output
#
$| = 1;

#
# Untaint the path
# 
$ENV{'PATH'} = "/bin:/sbin:/usr/bin:";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

Leigh B Stoller's avatar
Leigh B Stoller committed
112 113 114 115 116 117 118 119
#
# We don't want to run this script unless its the real version.
#
if ($EUID != 0) {
    die("*** $0:\n".
	"    Must be setuid! Maybe its a development version?\n");
}

120 121
sub cleanup();
sub fatal($);
122
sub check_progress($$);
123
sub run_with_ssh($$);
124

125 126
my $nodereboot	= "$TB/bin/node_reboot";
my $createimage = "/usr/local/bin/create-image";
127
my $reboot_prep = "@CLIENT_BINDIR@/reboot_prepare";
128
my $EC2SNAP     = "$TB/sbin/ec2import.proxy";
129
my $friskiller  = "$TB/sbin/frisbeehelper";
130
my $osselect    = "$TB/bin/os_select";
131
my $checkquota  = "$TB/sbin/checkquota";
132
my $imagehash	= "$TB/bin/imagehash";
133
my $SHA1	= "/sbin/sha1";
134
my $SCP		= "/usr/bin/scp";
135 136 137 138
my $def_devtype	= "ad";
my $def_devnum	= 0;
my $devtype;
my $devnum;
139
my $device;
140
my $mereuser    = 0;
Leigh B Stoller's avatar
Leigh B Stoller committed
141
my $debug       = 1;
142
my $foreground  = 0;
143
my $imagepid    = TB_OPSPID;
144 145
my $logfile;
my $oldlogfile;
Kirk Webb's avatar
 
Kirk Webb committed
146
my $needcleanup = 0;
147
my $needunlock  = 0;
148
my $isvirtnode  = 0;
149
my $isec2node   = 0;
150
my $onsharednode= 0;
151
my $didbackup   = 0;
152 153 154
my $node_id;
my $node;
my ($experiment,$pid);
155 156 157 158 159

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
160
my %options = ();
161 162 163
if (! getopts($optlist, \%options)) {
    usage();
}
164 165 166
if (defined($options{"w"})) {
    $waitmode = 1;
}
167 168 169
if (defined($options{"e"})) {
    $noemail = 1;
}
170
if (defined($options{"s"})) {
171 172 173 174 175 176 177 178 179 180 181
    $usessh = 1;
    $usefup = $usenfs = 0;
}
if (defined($options{"N"})) {
    if (!$NONFS) {
	$usenfs = 1;
	$usefup = $usessh = 0;
    } else {
	print STDERR "NFS not available, cannot use -N\n";
	exit(1);
    }
182 183 184 185
}
if (defined($options{"d"})) {
    $debug = 1;
}
186 187
if (defined($options{"f"})) {
    $foreground = 1;
Leigh B Stoller's avatar
Leigh B Stoller committed
188
    $waitmode = 0;
189
}
190
if (@ARGV != 2) {
191 192 193
    usage();
}

194
my $imagename  = $ARGV[0];
195
my $target     = $ARGV[1];
196

197 198 199 200 201 202 203
#
# There is no reason to run as root unless we are taking a snapshot
# of a VM on a shared node. In that case we will flip back when
# we do the ssh over.
#
$EUID = $UID;

204 205 206
#
# Untaint the arguments.
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
207
if ($imagename =~ /^([-\w\.\+]+)$/) {
208 209 210
    $imagename = $1;
}
else {
211 212
    die("*** $0:\n".
	"    Bad data in $imagename.\n");
213 214 215 216 217
}
    
if (defined($options{"p"})) {
    $imagepid = $options{"p"};
	
218
    if ($imagepid =~ /^([-\w\.]+)$/) {
219 220 221
	$imagepid = $1;
    }
    else {
222 223
	die("*** $0:\n".
	    "    Bad data in $imagepid.\n");
224
    }
225 226
}

227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
#
# Reset default values from site variables if they exist.
#
my $tmp;
if (TBGetSiteVar("images/create/maxwait", \$tmp)) {
    $maxwait = $tmp * 60;
}
if (TBGetSiteVar("images/create/idlewait", \$tmp)) {
    $idlewait = $tmp * 60;
}
if (TBGetSiteVar("images/create/maxsize", \$tmp)) {
    $maximagesize = $tmp * 1024**3;
}
$idlewait = $maxwait
    if ($maxwait < $idlewait);
$reportwait = $idlewait
    if ($idlewait < $reportwait);
$checkwait = $reportwait
    if ($reportwait < $checkwait);

247
#
248
# Verify user and get his DB uid and other info for later.
249
#
250 251 252
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
253
}
254 255 256
my $user_uid   = $this_user->uid();
my $user_name  = $this_user->name();
my $user_email = $this_user->email();
257

258 259 260 261
#
# Before doing anything else, check for overquota ... lets not waste
# our time. Make sure user sees the error by exiting with 1.
#
262
if (system("$checkquota $user_uid") != 0) {
263
    die("*** $0:\n".
264 265
	"    You are over your disk quota on $CONTROL; ".
	"please login there and cleanup!\n");
266
}
267 268
if ($UID && ! $this_user->IsAdmin()) {
    $mereuser = 1;
269 270
}

271 272 273 274 275
#
# Grab the imageid description from the DB. We do a permission check, but
# mostly to avoid hard to track errors that would result if the user picked
# the wrong one (which is likely to happen no matter what I do).
#
276 277
my $image = Image->Lookup($imagepid, $imagename);
if (!defined($image)) {
278 279
    die("*** $0:\n".
	"    No such image descriptor $imagename in project $imagepid!\n");
280
}
281
my $imageid = $image->imageid();
282

283
if ($mereuser &&
284
    ! $image->AccessCheck($this_user, TB_IMAGEID_ACCESS)) {
285 286
    die("*** $0:\n".
	"    You do not have permission to use imageid $imageid!\n");
287 288
}

289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
#
# Is it a local node or a remote EC2 node (need to generalize). 
#
if ($target =~ /^.*@.*$/) {
    if ($target =~ /^([-\w\@\+\.]+)$/) {
	$target = $1;
    }
    else {
	die("*** $0:\n".
	    "    Bad data in $target\n");
    }

    $isec2node = 1;
    $usefup = $usessh = 0;
    $pid = $image->pid();
}
else {
    if ($target =~ /^([-\w]+)$/) {
	$node_id = $1;
    }
    else {
	die("*** $0:\n".
	    "    Bad data in $target\n");
    }

    # Check node and permission
    $node = Node->Lookup($node_id);
    if (!defined($node)) {
	die("*** $0:\n".
	    "    Invalid node name $node_id!\n");
    }
    $isvirtnode   = $node->isvirtnode();
    $onsharednode = $node->sharing_mode()
	if ($isvirtnode);

    if (! $node->AccessCheck($this_user, TB_NODEACCESS_LOADIMAGE)) {
	die("*** $0:\n".
	    "    You do not have permission to create an image from $node\n");
    }

    #
    # We need the project id for test below. The target directory for the
    # output file has to be the node project directory, since that is the
    # directory that is going to be NFS mounted by default.
    #
    $experiment = $node->Reservation();
    if (!defined($experiment)) {
	die("*** $0:\n".
	    "    Could not map $node to its experiment object!\n");
    }
    $pid = $experiment->pid();

    #
    # To avoid blowing a cavernous hole ("allow all TCP ports to boss")
    # in the per-experiment firewall, we don't use the frisbee uploader if
    # the node is firewalled.
    # 
    if ($usefup && $experiment->IsFirewalled()) {
	print "*** WARNING: $node_id is firewalled, not using Frisbee uploader\n";
	$usefup = 0;
	if ($NONFS) {
	    $usenfs = 0;
	    $usessh = 1;
	} else {
	    $usenfs = 1;
	    $usessh = 0;
	}
    }
}

359
#
360 361
# Make sure that the directory exists and is writeable for the user.
# We test this by creating the file. Its going to get wiped anyway.
362
#
363 364
my $filename = $image->path();
my $isglobal = $image->global();
365
my $usepath = 0;
366 367

#
368
# Redirect pathname for global images.
369
#
370
if ($isglobal && ($filename =~ /^\/usr\/testbed/)) {
Leigh B. Stoller's avatar
Leigh B. Stoller committed
371
    $filename = PROJROOT() . "/$pid/images/" . basename($filename);
372
    print "*** WARNING: Writing global descriptor to $filename instead!\n";
373 374 375 376 377 378 379 380
    #
    # XXX the Emulab config of the master server doesn't know this trick
    # so when it tries to lookup imageid emulab-ops/<whatever> it would
    # still map to /usr/testbed and fail because it cannot update images
    # outside of /{users,grouop,proj}. So we skirt the issue by passing
    # it the full path contructed here rather than the imageid.
    #
    $usepath = 1;
381
}
382

383 384 385 386 387 388 389 390
#
# Make sure real path is someplace that makes sense; remember that the
# image is created on the nodes, and it NFS mounts directories on ops.
# Writing the image to anyplace else is just going to break things.
#
# Use realpath to resolve any symlinks.
#
my $translated = `realpath $filename`;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
391
if ($translated =~ /^([-\w\.\/\+]+)$/) {
392 393 394 395 396 397
    $filename = $1;
}
else {
    die("*** $0:\n".
	"    Bad data returned by realpath: $translated\n");
}
398 399 400 401 402
# Make sure not a directory.
if (-d $filename) {
    die("*** $0:\n".
	"    $filename is a directory! Must be a plain file.\n");
}
403 404

#
405
# The file must reside in an allowed directory. Since this script
406 407 408
# runs as the caller, regular file permission checks ensure its a file
# the user is allowed to use. 
#
409
if (! TBValidUserDir($filename, 0)) {
410 411 412 413
    die("*** $0:\n".
	"    $filename does not resolve to an allowed directory!\n");
}

414 415 416 417 418 419 420 421 422
#
# Before we do anything destructive, we lock the descriptor.
#
if ($image->Lock()) {
    die("*** $0:\n".
	"    Image is locked, try again later!\n");
}
$needunlock = 1;

423 424 425 426
#
# Be sure to kill off running frisbee. If a node is trying to load that
# image, well tough. 
#
427 428
system("$friskiller -k $imageid");
if ($?) {
429
    fatal("Could not kill running frisbee for $imageid!");
430
}
431

432
if (-e $filename) {
433 434 435 436 437 438 439 440 441 442 443
    #
    # Back it up in case of failure. Note that the frisbee upload server
    # does this, so we do it only for the ssh/nfs case.
    #
    if (!$usefup) {
	system("/bin/mv -f $filename ${filename}.bak");
	if ($?) {
	    fatal("Could not back up $filename");
	}
	$didbackup = 1;
    }
444 445
}

446
#
447 448 449 450 451 452 453
# We want to truncate the file (we backed it up above), which also
# confirms the user can really create a new file.
#
# XXX The problem is that frisbee upload server does this too, which
# is why we have a lot of zero length backup files. So, in uploader
# mode, make sure the user can create the tmp file that the uploader
# uses.
454
#
455 456 457 458 459
$tmp = $filename . ($usefup ? ".tmp" : "");
open(FILE, "> $tmp") or
    fatal("Could not create $tmp: $!");
close(FILE) or
    fatal("Could not truncate $tmp: $!");
460

461
if (! ($isvirtnode || $isec2node)) {
462 463 464 465 466 467 468 469 470 471 472 473
    #
    # Get the disktype for this node
    #
    $node->disktype(\$devtype);
    $node->bootdisk_unit(\$devnum);

    $devtype = $def_devtype
	if (!defined($devtype));
    $devnum = $def_devnum
	if (!defined($devnum));
    $device = "/dev/${devtype}${devnum}";
}
474

475 476 477 478
#
# Record when this image was updated, so that we can figure out which
# revision of the testbed image it was based off.
#
479 480 481
# Makes no sense to do this when writing a global image to a different path.
# We need a better way to make new images live.
#
Leigh B Stoller's avatar
Leigh B Stoller committed
482
$image->MarkUpdate($this_user) == 0 or
483
    fatal("Could not mark the update time in $image");
484
    
485 486 487 488 489 490
#
# Okay, we want to build up a command line that will run the script on
# on the client node. We use the imageid description to determine what
# slice (or perhaps the entire disk) is going to be zipped up. We do not
# allow arbitrary combos of course. 
#
491 492
my $startslice;
my $loadlength;
493 494
my $command    = "$createimage ";

495 496 497
if ($usefup) {
    my $id = $usepath ? $filename : ($image->pid() . "/$imagename");
    $command .= " -S $BOSSIP -F $id";
498
}
499

500 501 502 503
if ($isec2node) {
    $command = "$EC2SNAP ";
}
elsif ($isvirtnode) {
504 505 506 507 508 509 510 511 512 513
    $command .= " $node_id";
}
else {
    $startslice = $image->loadpart();
    $loadlength = $image->loadlength();

    if ($startslice || $loadlength == 1) {
	$command .= " -s $startslice";
    }
    $command .= " $device";
514
}
515 516

if ($usefup || $usessh) {
517 518 519
    $command .= " -";
} else {
    $command .= " $filename";
520 521 522 523 524
}

#
# Go to the background since this is going to take a while.
# 
Leigh B Stoller's avatar
Leigh B Stoller committed
525
if (! $foreground) {
526 527
    $logfile = Logfile->Create((defined($experiment) ?
				$experiment->gid_idx() : $image->gid_idx()));
528 529 530 531
    fatal("Could not create a logfile")
	if (!defined($logfile));
    # Mark it open since we are going to start using it right away.
    $logfile->Open();
532

533 534
    # Logfile becomes the current spew.
    $image->SetLogFile($logfile);
535

536
    if (my $childpid = TBBackGround($logfile->filename())) {
537 538
	#
	# Parent exits normally, except if in waitmode. 
539
	#
540
	if (!$waitmode) {
541
	    print("Your image from $target is being created\n".
542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560
		  "You will be notified via email when the image has been\n".
		  "completed, and you can load the image on another node.\n");
	    exit(0);
	}
	print("Waiting for image creation to complete\n");
	print("You may type ^C at anytime; you will be notified via email;\n".
	      "later; you will not actually interrupt image creation.\n");
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

561
	#
562 563 564 565 566 567
	# Wait until child exits or until user gets bored and types ^C.
	#
	waitpid($childpid, 0);
	
	print("Done. Exited with status: $?\n");
	exit($? >> 8);
568
    }
569 570
}

571
#
572 573 574
# New process group since we get called from the web interface,
# and so the child does not get zapped if the user types ^C
# in waitmode. 
575
#
Leigh B Stoller's avatar
Leigh B Stoller committed
576
if (! $foreground) {
577 578 579
    POSIX::setsid();
}

Kirk Webb's avatar
 
Kirk Webb committed
580 581 582 583 584
#
# From here on out, we should take care to clean up the DB, and
# reboot the source node.
#
$needcleanup = 1;
585

586
# Clear the bootlog; see below.
587 588
$node->ClearBootLog()
    if (defined($node));
589

590 591 592 593 594 595 596 597 598
# check_progress state
my $runticks	 = 0;
my $maxticks	 = int($maxwait / $checkwait);
my $reportticks  = int($reportwait / $checkwait);
my $idleticks    = 0;
my $maxidleticks = int($idlewait / $checkwait);
my $lastsize     = 0;
my $result;

599
#
600
# We can skip a lot of the stuff below for virtnodes and ec2 nodes.
601
#
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619
if ($isec2node) {
    my $safe_target = User::escapeshellarg($target);
    
    my $cmd = "$TB/bin/sshtb -host $CONTROL $EC2SNAP -u $user_uid ".
	"$safe_target $pid $user_uid $imageid $filename";
    print STDERR "About to: '$cmd'\n" if (1 || $debug);

    my $SAVEUID	= $UID;
    $EUID = $UID = 0;

    system($cmd);
    fatal("'$cmd' failed")
	if ($?);

    $EUID = $UID = $SAVEUID;
    goto ec2done;
}
elsif ($isvirtnode) {
620
    #
621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641
    # XEN creates a problem; the physical host cannot actually
    # execute a command inside the guest, but we need to run
    # reboot_prepare and reboot it. FreeBSD creates an additional
    # problem in that shutdown has to run to invoke prepare; reboot
    # does not run it, and a shutdown from outside the VM has the
    # sae effect; prepare does not run. What a pain. 
    #
    my $SAVEUID	= $UID;
    $EUID = $UID = 0;

    my $cmd = "$TB/bin/sshtb -n -o ConnectTimeout=10 ".
	"-host $node_id $reboot_prep";
    print STDERR "About to: '$cmd'\n" if ($debug);
    system($cmd);
    fatal("'$cmd' failed")
	if ($?);

    $EUID = $UID = $SAVEUID;

    #
    # Now execute command and wait.
642 643 644 645 646 647 648 649 650
    #
    if ($NONFS) {
	$result = run_with_ssh($command, $filename);
    } else {
	$result = run_with_ssh($command, undef);
    }
    goto done;
}

651
#
Russ Fish's avatar
typo.  
Russ Fish committed
652
# Reboot into admin mode and run the command.
653 654
# Note that without a shared FS, we just boot the node into the admin MFS
# and run the command via SSH, capturing the output.
655 656 657 658
#
my $me           = $0;
my %args         = ();
$args{'name'}    = $me;
659
$args{'prepare'} = 1;
660

661
if ($usessh) {
662 663 664 665 666 667 668 669 670
    #
    # Put the node in admin mode...
    #
    $args{'on'} = 1;
    $args{'clearall'} = 0;
    if (TBAdminMfsSelect(\%args, undef, $node_id)) {
	$result = "setupfailed";
	goto done;
    }
671

672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
    #
    # ...boot it...
    #
    $args{'reboot'} = 1;
    $args{'retry'} = 0;
    $args{'wait'} = 1;
    my @failed = ();
    if (TBAdminMfsBoot(\%args, \@failed, $node_id)) {
	$result = "setupfailed";
	goto done;
    }

    #
    # ...execute command and wait!
    #
687 688
    $result = run_with_ssh($command, $filename);
    if ($result eq "setupfailed") {
689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
	goto done;
    }
} else {
    $args{'command'} = $command;
    $args{'timeout'} = $maxwait + $checkwait;
    $args{'pfunc'}     = \&check_progress;
    $args{'pinterval'} = $checkwait;

    my $retry = 1;
    while ($retry) {
	$retry = 0;
	if (TBAdminMfsRunCmd(\%args, undef, $node_id)) {
	    $result = "setupfailed"
		if (!defined($result));
	}
704 705 706
    }
}

707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725
#
# XXX woeful backward compat hack.
# The old client-side script will not recognize the -S and -F options
# we pass in and will exit(-1).  We detect that here and retry with
#
if ($usefup && $result eq "255") {
    print STDERR "MFS does not support frisbee upload, falling back on ",
                 $NONFS ? "ssh" : "nfs", "...\n";

    $command = "$createimage ";
    if ($startslice || $loadlength == 1) {
	$command .= " -s $startslice";
    }
    $command .= " $device";
    if ($usessh) {
	$command .= " -";
    } else {
	$command .= " $filename";
    }
726 727 728 729 730 731 732 733

    # reset state for check_progress
    $usefup = 0;
    $runticks = 0;
    $idleticks = 0;
    $lastsize = 0;
    $result = undef;

734 735 736 737 738 739 740
    if ($NONFS) {
	$result = run_with_ssh($command, $filename);
    } else {
	$result = run_with_ssh($command, undef);
    }
}

741 742
done:

743 744 745 746 747 748 749
# Grab boot log now. Node will reboot and possibly erase it. We should
# probably come up with a better way to handle this.
my $bootlog;
if ($node->GetBootLog(\$bootlog)) {
    $bootlog = undef;
}

Kirk Webb's avatar
 
Kirk Webb committed
750 751
if (! cleanup()) {
    fatal("Problem encountered while cleaning up!\n");
752 753 754
}

#
Kirk Webb's avatar
 
Kirk Webb committed
755 756
# If we timed out, if the result code was bad, or if the image size
# grew too large.
757
#
758 759
if ($result eq "setupfailed") {
    fatal("FAILED: Node setup failed ... \n");
760
}
761 762
if ($result eq "timeout") {
    fatal("FAILED: Timed out generating image ... \n");
763
}
764
if ($result eq "toobig") {
Kirk Webb's avatar
 
Kirk Webb committed
765 766
    fatal("FAILED: Maximum image size ($maximagesize bytes) exceeded ... \n");
}
767
if ($result != 0) {
Mike Hibler's avatar
Mike Hibler committed
768
    fatal("FAILED: Returned error code $result generating image ... \n");
769
}
770

771
ec2done:
772 773 774 775 776
#
# Everything worked, create the hash signature file.
#
my $sigdir;
($sigdir = $filename) =~ s/^(.*)\/[^\/]+$/$1\/sigs/;
777
mkdir($sigdir, 0770)
778 779 780 781 782
    if (! -d "$sigdir");

my $sigfilename;
($sigfilename = $filename) =~ s/^(.*)(\/[^\/]+$)/$1\/sigs$2.sig/;
my $swmsg = "";
783 784
if (! -x $imagehash ||
    system("$imagehash -c -o $sigfilename $filename") != 0) {
785 786 787 788 789 790 791
    warn("Could not create swapout signature file\n");
    $swmsg = "WARNING: could not create swapout signature file $sigfilename\n".
	     "       You will not be able to save disk state for this image\n";
} else {
    print("Swapout signature file created\n");
}

792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
#
# Hash the file itself since we really want an integrity check
# on the image file.
#
my $hashfile = "${filename}.sha1";
my $filehash = `$SHA1 $filename`;
if ($?) {
    fatal("Could not generate sha1 hash of $filename");
}
if ($filehash =~ /^SHA1.*= (\w*)$/) {
    if ($isglobal && $usepath) {
	print "*** WARNING: Not updating SHA1 in DB record since the ".
	    "image was written to /proj!\n";
	print "    See $hashfile instead\n";
    }
    else {
	$image->SetHash($1) == 0
	    or fatal("Failed to set the hash for $image");
    }
}
else {
    fatal("Could not parse the sha1 hash: '$filehash'")
}
unlink($hashfile)
    if (-e $hashfile);
open(HASH, ">$hashfile") or
    fatal("Could not open $hashfile for writing: $!");
print HASH $filehash;
close($hashfile);

822
print "Image creation succeeded.\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
823
print "Image written to $filename.\n";
824
#      "Final size: " . (stat($filename))[7] . " bytes.\n";
Kirk Webb's avatar
 
Kirk Webb committed
825

826
# Append bootlog (which has prepare output)
827
if (defined($bootlog)) {
828 829
    print "\n\n";
    print "------------------ Prepare Output ----------------\n";
830
    print "$bootlog\n";
831 832
}

833
SENDMAIL("$user_name <$user_email>",
834 835
	 "Image Creation on $target Completed: $pid/$imagename",
	 "Image creation on $target has completed. As you requested, the\n".
836
	 "image has been written to $filename.\n".
837 838
	 "You may now os_load this image on other nodes in your experiment.\n".
	 "$swmsg",
839 840
	 "$user_name <$user_email>",
	 "Bcc: $TBLOGS",
841
	 defined($logfile) ? ($logfile->filename()) : ()) if (!$noemail);
842

843
if (defined($logfile)) {
844
    # Close up the log file so the webpage stops.
845
    $logfile->Close();
846
    $image->ClearLogFile();
847
}
848
$image->Unlock();
849 850 851 852
exit 0;

sub cleanup ()
{
Kirk Webb's avatar
 
Kirk Webb committed
853 854
    $needcleanup = 0;

855
    if ($isvirtnode || $isec2node) {
856 857 858 859 860 861
	#
	# Nothing to do; the clientside script rebooted the container.
	#
	return 1;
    }

Kirk Webb's avatar
 
Kirk Webb committed
862
    #
863
    # Turn admin mode back off and reboot back to the old OS
Kirk Webb's avatar
 
Kirk Webb committed
864
    #
865 866 867 868
    my %args          = ();
    $args{'name'}     = $me;
    $args{'on'}       = 0;
    $args{'clearall'} = 0;
869
    if (TBAdminMfsSelect(\%args, undef, $node_id)) {
870
	print("*** $me:\n".
871
	      "    Could not turn admin mode off for $node_id!\n");
872
	return 0;
Kirk Webb's avatar
 
Kirk Webb committed
873 874
    }

875 876 877 878 879
    %args           = ();
    $args{'name'}   = $me;
    $args{'on'}     = 0;
    $args{'reboot'} = 1;
    $args{'wait'}   = 0;
880
    if (TBAdminMfsBoot(\%args, undef, $node_id)) {
881
	print("*** $me:\n".
882
	      "    Failed to reboot $node_id on cleanup!\n");
883
	return 0;
Kirk Webb's avatar
 
Kirk Webb committed
884 885
    }

886
    return 1;
887 888 889 890 891
}

sub fatal($)
{
    my($mesg) = $_[0];
892 893

    print "$mesg\n";
Kirk Webb's avatar
 
Kirk Webb committed
894 895 896 897

    if ($needcleanup && !cleanup()) {
        print "Encountered problems cleaning up!\n";
    }
898 899
    
    #
900
    # Send a message to the testbed list. 
901
    #
902
    SENDMAIL("$user_name <$user_email>",
903
	     "Image Creation Failure on $target: $pid/$imagename",
904 905 906
	     $mesg,
	     "$user_name <$user_email>",
	     "Cc: $TBOPS",
907
	     defined($logfile) ? ($logfile->filename()) : ());
908
    
909 910 911
    if (defined($logfile)) {
	# Close up the log file so the webpage stops.
	$logfile->Close();
912
	$image->ClearLogFile();
913
    }
914 915
    $image->Unlock()
	if ($needunlock);
916 917 918 919
    # Restore old image file. 
    if ($didbackup) {
	system("/bin/mv -f ${filename}.bak $filename");
    }
920 921 922
    exit(-1);
}

923 924 925 926 927 928 929 930 931 932 933
#
# Check progress of image creation by periodically checking the image size.
#
# Called every $checkwait seconds.
# Reports progress every $reportwait seconds.
# Gives up after $idlewait seconds without a size change.
#
sub check_progress($$)
{
    my (undef, $statusp) = @_;

934 935 936 937 938
    if ($runticks == 0) {
	print "$node_id: started image capture, ".
	    "waiting up to " . int($maxwait/60) . " minutes\n";
    }

939 940 941 942 943 944 945 946 947
    #
    # XXX frisbee uploader uploads into a temporary file and then moves
    # it into place. So track that tmp file here.
    #
    my $fname = $filename;
    if ($usefup) {
	$fname .= ".tmp";
    }

948 949 950
    #
    # Command has finished for better or worse, record status and finish.
    #
951
    if (defined($statusp) && $statusp->{$node_id} ne "none") {
952
	$result = $statusp->{$node_id};
953
	print "$node_id: image capture has completed: status='$result'\n";
954 955 956 957 958 959 960 961 962
	return 0;
    }

    #
    # Has run too long
    #
    $runticks++;
    if ($runticks >= $maxticks) {
	$result = "timeout";
963
	print "$node_id: image capture has completed: timeout\n";
964 965 966 967 968 969 970 971 972 973
	return 0;
    }

    #
    # See if imagezip on the node is making progress.  If not, we need to
    # check the idle timer and timeout if we have taken too long.
    #
    # Also, check to see if the (somewhat arbitrary) maximum filesize has 
    # been exceeded.
    #
974
    my $cursize = (stat($fname))[7];
975 976 977 978 979 980 981 982 983
    if ($usefup && !defined($cursize)) {
	#
	# XXX avoid an ugly race.
	# When done, frisuploadd moves foo.tmp -> foo
	# If we didn't find foo.tmp, try foo now.
	#
	$fname =~ s/\.tmp$//;
	$cursize = (stat($fname))[7];
    }
984 985
    if ($cursize > $maximagesize) {
	$result = "toobig";
986
	print "$node_id: image capture has completed: image too big\n";
987 988 989 990 991 992
	return 0;
    }
    if ($cursize == $lastsize) {
	$idleticks++;
	if ($idleticks >= $maxidleticks) {
	    $result = "timeout";
993
	    print "$node_id: image capture has completed: idle timeout\n";
994 995 996 997 998 999 1000 1001 1002
	    return 0;
	}
    } else {
	$idleticks = 0;
    }
    $lastsize = $cursize;
    
    if (($runticks % $reportticks) == 0) {
	my $curtdiff = int($runticks * $checkwait / 60);
1003 1004
	print "$node_id: still waiting ...".
	    " it has been ". $curtdiff ." minutes.".
1005 1006 1007 1008 1009
	    " Current image size: $cursize bytes.\n";
    }

    return 1;
}
1010 1011 1012 1013 1014

sub run_with_ssh($$)
{
    my ($cmd,$output) = @_;
    my $stat = undef;
1015 1016
    $node_id = $node->phys_nodeid()
	if ($isvirtnode);
1017

1018
    $cmd = "$TB/bin/sshtb -n -host $node_id $cmd";
1019 1020 1021
    if (defined($output)) {
	$cmd .= " > $output";
    }
1022 1023
    print STDERR "About to: '$cmd' as uid ".
	($onsharednode ? 0 : $UID) . "\n" if ($debug);
1024 1025 1026 1027 1028 1029 1030 1031 1032

    my $mypid = fork();
    if ($mypid < 0) {
	return "setupfailed";
    }

    #
    # Child. Just do it.
    #
1033 1034 1035 1036 1037 1038 1039
    # If this is a virtnode on a shared node, we want to flip
    # back to root so that we run the ssh as root. 
    #
    if ($onsharednode) {
	$EUID = $UID = 0;
    }

1040
    if ($mypid == 0) {
1041 1042 1043 1044 1045 1046 1047 1048 1049
	my $stat = 0;
	if (system($cmd)) {
	    $stat = $?;
	}
	if ($stat & 127) {
	    # died with a signal, return the signal
	    exit($stat & 127);
	}
	exit($stat >> 8);
1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061
    }

    #
    # Parent.  Wait for ssh to finish, reporting periodic progress
    # as TBAdminMfsRunCmd would do.
    #
    my $endtime = time() + $maxwait + $checkwait;
    while (1) {
	my $kid = waitpid($mypid, &WNOHANG);
	# ssh finished
	if ($kid == $mypid) {
	    $stat = $?;
Mike Hibler's avatar
Mike Hibler committed
1062 1063 1064 1065 1066 1067 1068
	    if ($stat & 127) {
		# died with a signal, return the signal
		$stat = $stat & 127;
	    } else {
		# else return the exit code
		$stat = $stat >> 8;
	    }
1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093
	    last;
	}

	# huh?
	if ($kid == -1) {
	    $stat = -1;
	    last;
	}

	# check on progress
	if (!check_progress(undef, undef)) {
	    $stat = $result;
	    last;
	}

	# wait for awhile
	sleep($checkwait);
	if (time() >= $endtime) {
	    $stat = "timeout";
	    last;
	}
    }

    return $stat;
}
1094