create_image.in 20.4 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh Stoller's avatar
Leigh Stoller committed
2 3
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2011 University of Utah and the Flux Group.
Leigh Stoller's avatar
Leigh Stoller committed
5 6
# All rights reserved.
#
7 8
use English;
use Getopt::Std;
9
use POSIX qw(setsid :sys_wait_h);
10
use File::Basename;
11

12
#
13
# Image Creation Tuneables.
14 15
#
# $maxwait	max wall clock time to allow, progress or not
16 17 18
#		Empirically we have observed about 1.6MB/sec on a pc850
#		for a Windows image (the slowest to create), so figuring
#		1.5MB/sec for a 6GB max image works out to around 72 minutes.
19 20 21 22
# $idlewait	max time to wait between periods of progress
# $checkwait	time between progress checks (must be int div of $idlewait)
# $reportwait	time between progress reports (must be multiple of $checkwait)
#
23 24 25 26
# $maximagesize	max size in bytes of an image.  This should really be in the
#		DB (per-testbed, per-project, per-user, per-something), and
#		not hardwired here.  In the meantime, we set this big and let
#		disk quotas do the dirty work of limiting size.
27
#
28
my $maxwait      = (72 * 60);
29 30 31
my $idlewait     = ( 8 * 60);
my $reportwait   = ( 2 * 60);
my $checkwait    = 15;
32 33
my $maximagesize = (6 * 1024**3); # 20GB

34 35 36 37 38 39 40 41
#
# Create a disk image.
#
# XXX: Device file should come from DB.
#      Start/count slice computation is not generalized at all.
#
sub usage()
{
42
    print(STDERR
43
	  "Usage: create_image [-wsN] [-p <pid>] <imagename> <node>\n" .
44
	  "switches and arguments:\n".
45
	  "-w          - wait for image to be fully created\n".
46 47
	  "-s          - use ssh instead of frisbee uploader\n".
	  "-N          - use NFS (if available) instead of frisbee uploader\n".
48 49 50
	  "-p <pid>    - project ID of the image; defaults to system project\n".
	  "<imagename> - imagename to use\n".
	  "<node>      - nodeid to create the image from\n");
51 52
    exit(-1);
}
53
my $optlist  = "p:wsNd";
54
my $waitmode = 0;
55
my $usessh = 0;
56 57
my $usenfs = 0;
my $usefup = 1;
58 59 60 61 62 63

#
# Configure variables
#
my $TB		= "@prefix@";
my $TBOPS       = "@TBOPSEMAIL@";
64
my $TBLOGS      = "@TBLOGSEMAIL@";
65
my $BOSSIP	= "@BOSSNODE_IP@";
66
my $CONTROL     = "@USERNODE@";
67
my $NONFS	= @NOSHAREDFS@;
68 69 70 71 72 73 74

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
75
use libadminmfs;
76
use Experiment;
77
use Node;
78 79
use User;
use Image;
80
use Logfile;
81 82 83 84 85 86 87 88 89 90 91 92

#
# Turn off line buffering on output
#
$| = 1;

#
# Untaint the path
# 
$ENV{'PATH'} = "/bin:/sbin:/usr/bin:";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

93 94
sub cleanup();
sub fatal($);
95
sub check_progress($$);
96
sub run_with_ssh($$);
97

98 99
my $nodereboot	= "$TB/bin/node_reboot";
my $createimage = "/usr/local/bin/create-image";
100
my $friskiller  = "$TB/sbin/frisbeehelper";
101
my $osselect    = "$TB/bin/os_select";
102
my $checkquota  = "$TB/sbin/checkquota";
103
my $imagehash	= "$TB/bin/imagehash";
104 105 106 107
my $def_devtype	= "ad";
my $def_devnum	= 0;
my $devtype;
my $devnum;
108 109
my $mereuser    = 0;
my $debug       = 0;
110
my $imagepid    = TB_OPSPID;
111 112
my $logfile;
my $oldlogfile;
113
my $needcleanup = 0;
114 115 116 117 118

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
119
my %options = ();
120 121 122
if (! getopts($optlist, \%options)) {
    usage();
}
123 124 125
if (defined($options{"w"})) {
    $waitmode = 1;
}
126
if (defined($options{"s"})) {
127 128 129 130 131 132 133 134 135 136 137
    $usessh = 1;
    $usefup = $usenfs = 0;
}
if (defined($options{"N"})) {
    if (!$NONFS) {
	$usenfs = 1;
	$usefup = $usessh = 0;
    } else {
	print STDERR "NFS not available, cannot use -N\n";
	exit(1);
    }
138 139 140 141 142
}
if (defined($options{"d"})) {
    $debug = 1;
    $waitmode = 0;
}
143
if (@ARGV != 2) {
144 145 146
    usage();
}

147
my $imagename  = $ARGV[0];
148
my $node_id    = $ARGV[1];
149 150 151 152

#
# Untaint the arguments.
#
153 154
if ($node_id =~ /^([-\w]+)$/) {
    $node_id = $1;
155 156
}
else {
157
    die("*** $0:\n".
158
	"    Bad data in $node_id\n");
159 160
}

161
if ($imagename =~ /^([-\w\.\+]+)$/) {
162 163 164
    $imagename = $1;
}
else {
165 166
    die("*** $0:\n".
	"    Bad data in $imagename.\n");
167 168 169 170 171
}
    
if (defined($options{"p"})) {
    $imagepid = $options{"p"};
	
172
    if ($imagepid =~ /^([-\w\.]+)$/) {
173 174 175
	$imagepid = $1;
    }
    else {
176 177
	die("*** $0:\n".
	    "    Bad data in $imagepid.\n");
178
    }
179 180
}

181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
#
# Reset default values from site variables if they exist.
#
my $tmp;
if (TBGetSiteVar("images/create/maxwait", \$tmp)) {
    $maxwait = $tmp * 60;
}
if (TBGetSiteVar("images/create/idlewait", \$tmp)) {
    $idlewait = $tmp * 60;
}
if (TBGetSiteVar("images/create/maxsize", \$tmp)) {
    $maximagesize = $tmp * 1024**3;
}
$idlewait = $maxwait
    if ($maxwait < $idlewait);
$reportwait = $idlewait
    if ($idlewait < $reportwait);
$checkwait = $reportwait
    if ($reportwait < $checkwait);

201
#
202
# Verify user and get his DB uid and other info for later.
203
#
204 205 206
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
207
}
208 209 210
my $user_uid   = $this_user->uid();
my $user_name  = $this_user->name();
my $user_email = $this_user->email();
211

212 213 214
# Check node and permission
my $node = Node->Lookup($node_id);
if (!defined($node)) {
215
    die("*** $0:\n".
216
	"    Invalid node name $node_id!\n");
217
}
218
if ($UID && ! $this_user->IsAdmin()) {
Leigh Stoller's avatar
Leigh Stoller committed
219 220
    $mereuser = 1;

221
    if (! $node->AccessCheck($this_user, TB_NODEACCESS_LOADIMAGE)) {
222 223
	die("*** $0:\n".
	    "    You do not have permission to create an image from $node\n");
224 225 226
    }
}

227 228 229 230
#
# Before doing anything else, check for overquota ... lets not waste
# our time. Make sure user sees the error by exiting with 1.
#
231
if (system("$checkquota $user_uid") != 0) {
232
    die("*** $0:\n".
233 234
	"    You are over your disk quota on $CONTROL; ".
	"please login there and cleanup!\n");
235 236
}

237 238 239 240 241
#
# We need the project id for test below. The target directory for the
# output file has to be the node project directory, since that is the
# directory that is going to be NFS mounted by default.
#
242 243
my $experiment = $node->Reservation();
if (!defined($experiment)) {
244
    die("*** $0:\n".
245
	"    Could not map $node to its experiment object!\n");
246
}
247 248
my $pid = $experiment->pid();
my $eid = $experiment->eid();
249 250 251 252 253 254

#
# Grab the imageid description from the DB. We do a permission check, but
# mostly to avoid hard to track errors that would result if the user picked
# the wrong one (which is likely to happen no matter what I do).
#
255 256
my $image = Image->Lookup($imagepid, $imagename);
if (!defined($image)) {
257 258
    die("*** $0:\n".
	"    No such image descriptor $imagename in project $imagepid!\n");
259
}
260
my $imageid = $image->imageid();
261

262
if ($mereuser &&
263
    ! $image->AccessCheck($this_user, TB_IMAGEID_ACCESS)) {
264 265
    die("*** $0:\n".
	"    You do not have permission to use imageid $imageid!\n");
266 267 268
}

#
269 270
# Make sure that the directory exists and is writeable for the user.
# We test this by creating the file. Its going to get wiped anyway.
271
#
272 273
my $filename = $image->path();
my $isglobal = $image->global();
274
my $usepath = 0;
275 276

#
277
# Redirect pathname for global images.
278
#
279
if ($isglobal && ($filename =~ /^\/usr\/testbed/)) {
Leigh Stoller's avatar
Leigh Stoller committed
280
    $filename = PROJROOT() . "/$pid/images/" . basename($filename);
281
    print "*** WARNING: Writing global descriptor to $filename instead!\n";
282 283 284 285 286 287 288 289
    #
    # XXX the Emulab config of the master server doesn't know this trick
    # so when it tries to lookup imageid emulab-ops/<whatever> it would
    # still map to /usr/testbed and fail because it cannot update images
    # outside of /{users,grouop,proj}. So we skirt the issue by passing
    # it the full path contructed here rather than the imageid.
    #
    $usepath = 1;
290
}
291

292 293 294 295 296 297 298 299
#
# Make sure real path is someplace that makes sense; remember that the
# image is created on the nodes, and it NFS mounts directories on ops.
# Writing the image to anyplace else is just going to break things.
#
# Use realpath to resolve any symlinks.
#
my $translated = `realpath $filename`;
Leigh Stoller's avatar
Leigh Stoller committed
300
if ($translated =~ /^([-\w\.\/\+]+)$/) {
301 302 303 304 305 306
    $filename = $1;
}
else {
    die("*** $0:\n".
	"    Bad data returned by realpath: $translated\n");
}
307 308 309 310 311
# Make sure not a directory.
if (-d $filename) {
    die("*** $0:\n".
	"    $filename is a directory! Must be a plain file.\n");
}
312 313

#
314
# The file must reside in an allowed directory. Since this script
315 316 317
# runs as the caller, regular file permission checks ensure its a file
# the user is allowed to use. 
#
318
if (! TBValidUserDir($filename, 0)) {
319 320 321 322
    die("*** $0:\n".
	"    $filename does not resolve to an allowed directory!\n");
}

323 324 325 326
#
# Be sure to kill off running frisbee. If a node is trying to load that
# image, well tough. 
#
327 328 329 330 331
system("$friskiller -k $imageid");
if ($?) {
    die("*** $0:\n".
	"    Could not kill running frisbee for $imageid!\n");
}
332

333 334 335 336 337 338
if (-e $filename) {
    unlink($filename) or
	die("*** $0:\n".
	    "    Could not delete $filename: $!\n");
}

339
open(FILE, "> $filename") or
340 341
    die("*** $0:\n".
	"    Could not create $filename: $!\n");
342
close(FILE) or
343 344
    die("*** $0:\n".
	"    Could not truncate $filename: $!\n");
345

346 347 348
#
# Get the disktype for this node
#
349 350
$node->disktype(\$devtype);
$node->bootdisk_unit(\$devnum);
351 352 353 354 355

$devtype = $def_devtype
    if (!defined($devtype));
$devnum = $def_devnum
    if (!defined($devnum));
356 357
my $device = "/dev/${devtype}${devnum}";

358 359 360 361
#
# Record when this image was updated, so that we can figure out which
# revision of the testbed image it was based off.
#
362
$image->MarkUpdateTime() == 0 or
363 364 365
    die("*** $0:\n".
	"    Could not mark the update time in $image\n");
    
366 367 368 369 370 371
#
# Okay, we want to build up a command line that will run the script on
# on the client node. We use the imageid description to determine what
# slice (or perhaps the entire disk) is going to be zipped up. We do not
# allow arbitrary combos of course. 
#
372 373
my $startslice = $image->loadpart();
my $loadlength = $image->loadlength();
374 375
my $command    = "$createimage ";

376 377 378
if ($usefup) {
    my $id = $usepath ? $filename : ($image->pid() . "/$imagename");
    $command .= " -S $BOSSIP -F $id";
379
}
380 381 382

if ($startslice || $loadlength == 1) {
    $command .= " -s $startslice";
383
}
384 385 386
$command .= " $device";

if ($usefup || $usessh) {
387 388 389
    $command .= " -";
} else {
    $command .= " $filename";
390 391 392 393 394
}

#
# Go to the background since this is going to take a while.
# 
395
if (!$debug) {
396
    $logfile = Logfile->Create($experiment->gid_idx());
397 398 399 400
    fatal("Could not create a logfile")
	if (!defined($logfile));
    # Mark it open since we are going to start using it right away.
    $logfile->Open();
401

402 403
    # Logfile becomes the current spew, but save off the old spew.
    $experiment->SetLogFile($logfile, \$oldlogfile);
404

405
    if (my $childpid = TBBackGround($logfile->filename())) {
406 407
	#
	# Parent exits normally, except if in waitmode. 
408
	#
409
	if (!$waitmode) {
410
	    print("Your image from $node_id is being created\n".
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
		  "You will be notified via email when the image has been\n".
		  "completed, and you can load the image on another node.\n");
	    exit(0);
	}
	print("Waiting for image creation to complete\n");
	print("You may type ^C at anytime; you will be notified via email;\n".
	      "later; you will not actually interrupt image creation.\n");
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

430
	#
431 432 433 434 435 436
	# Wait until child exits or until user gets bored and types ^C.
	#
	waitpid($childpid, 0);
	
	print("Done. Exited with status: $?\n");
	exit($? >> 8);
437
    }
438 439
}

440 441 442 443 444 445 446 447
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
    POSIX::setsid();
}

448 449 450 451 452
#
# From here on out, we should take care to clean up the DB, and
# reboot the source node.
#
$needcleanup = 1;
453

454
# Clear the bootlog; see below.
455
$node->ClearBootLog();
456

457 458 459 460 461 462 463 464 465
# check_progress state
my $runticks	 = 0;
my $maxticks	 = int($maxwait / $checkwait);
my $reportticks  = int($reportwait / $checkwait);
my $idleticks    = 0;
my $maxidleticks = int($idlewait / $checkwait);
my $lastsize     = 0;
my $result;

466
#
Russ Fish's avatar
Russ Fish committed
467
# Reboot into admin mode and run the command.
468 469
# Note that without a shared FS, we just boot the node into the admin MFS
# and run the command via SSH, capturing the output.
470 471 472 473
#
my $me           = $0;
my %args         = ();
$args{'name'}    = $me;
474
$args{'prepare'} = 1;
475

476
if ($usessh) {
477 478 479 480 481 482 483 484 485
    #
    # Put the node in admin mode...
    #
    $args{'on'} = 1;
    $args{'clearall'} = 0;
    if (TBAdminMfsSelect(\%args, undef, $node_id)) {
	$result = "setupfailed";
	goto done;
    }
486

487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
    #
    # ...boot it...
    #
    $args{'reboot'} = 1;
    $args{'retry'} = 0;
    $args{'wait'} = 1;
    my @failed = ();
    if (TBAdminMfsBoot(\%args, \@failed, $node_id)) {
	$result = "setupfailed";
	goto done;
    }

    #
    # ...execute command and wait!
    #
502 503
    $result = run_with_ssh($command, $filename);
    if ($result eq "setupfailed") {
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
	goto done;
    }
} else {
    $args{'command'} = $command;
    $args{'timeout'} = $maxwait + $checkwait;
    $args{'pfunc'}     = \&check_progress;
    $args{'pinterval'} = $checkwait;

    my $retry = 1;
    while ($retry) {
	$retry = 0;
	if (TBAdminMfsRunCmd(\%args, undef, $node_id)) {
	    $result = "setupfailed"
		if (!defined($result));
	}
519 520 521
    }
}

522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
#
# XXX woeful backward compat hack.
# The old client-side script will not recognize the -S and -F options
# we pass in and will exit(-1).  We detect that here and retry with
#
if ($usefup && $result eq "255") {
    print STDERR "MFS does not support frisbee upload, falling back on ",
                 $NONFS ? "ssh" : "nfs", "...\n";

    $command = "$createimage ";
    if ($startslice || $loadlength == 1) {
	$command .= " -s $startslice";
    }
    $command .= " $device";
    if ($usessh) {
	$command .= " -";
    } else {
	$command .= " $filename";
    }
541 542 543 544 545 546 547 548

    # reset state for check_progress
    $usefup = 0;
    $runticks = 0;
    $idleticks = 0;
    $lastsize = 0;
    $result = undef;

549 550 551 552 553 554 555
    if ($NONFS) {
	$result = run_with_ssh($command, $filename);
    } else {
	$result = run_with_ssh($command, undef);
    }
}

556 557
done:

558 559
if (! cleanup()) {
    fatal("Problem encountered while cleaning up!\n");
560 561 562
}

#
563 564
# If we timed out, if the result code was bad, or if the image size
# grew too large.
565
#
566 567
if ($result eq "setupfailed") {
    fatal("FAILED: Node setup failed ... \n");
568
}
569 570
if ($result eq "timeout") {
    fatal("FAILED: Timed out generating image ... \n");
571
}
572
if ($result eq "toobig") {
573 574
    fatal("FAILED: Maximum image size ($maximagesize bytes) exceeded ... \n");
}
575
if ($result != 0) {
Mike Hibler's avatar
Mike Hibler committed
576
    fatal("FAILED: Returned error code $result generating image ... \n");
577
}
578

579 580 581 582 583
#
# Everything worked, create the hash signature file.
#
my $sigdir;
($sigdir = $filename) =~ s/^(.*)\/[^\/]+$/$1\/sigs/;
584
mkdir($sigdir, 0770)
585 586 587 588 589
    if (! -d "$sigdir");

my $sigfilename;
($sigfilename = $filename) =~ s/^(.*)(\/[^\/]+$)/$1\/sigs$2.sig/;
my $swmsg = "";
590 591
if (! -x $imagehash ||
    system("$imagehash -c -o $sigfilename $filename") != 0) {
592 593 594 595 596 597 598
    warn("Could not create swapout signature file\n");
    $swmsg = "WARNING: could not create swapout signature file $sigfilename\n".
	     "       You will not be able to save disk state for this image\n";
} else {
    print("Swapout signature file created\n");
}

599
print "Image creation succeeded.\n";
Leigh Stoller's avatar
Leigh Stoller committed
600
print "Image written to $filename.\n";
601
#      "Final size: " . (stat($filename))[7] . " bytes.\n";
602

603
# Append bootlog (which has prepare output)
604 605
my $bootlog;
if ($node->GetBootLog(\$bootlog) == 0) {
606 607
    print "\n\n";
    print "------------------ Prepare Output ----------------\n";
608
    print "$bootlog\n";
609 610
}

611
SENDMAIL("$user_name <$user_email>",
612 613
	 "Image Creation on $node_id Completed: $pid/$imagename",
	 "Image creation on $node_id has completed. As you requested, the\n".
614
	 "image has been written to $filename.\n".
615 616
	 "You may now os_load this image on other nodes in your experiment.\n".
	 "$swmsg",
617 618
	 "$user_name <$user_email>",
	 "Bcc: $TBLOGS",
619
	 defined($logfile) ? ($logfile->filename()) : ());
620

621
if (defined($logfile)) {
622
    # Close up the log file so the webpage stops.
623 624 625 626
    $logfile->Close();
    # And restore the original logfile as current spew.
    $experiment->SetLogFile($oldlogfile)
	if (defined($oldlogfile));
Mike Hibler's avatar
Mike Hibler committed
627
    $logfile->Delete(1);
628
}
629 630 631 632
exit 0;

sub cleanup ()
{
633 634 635
    $needcleanup = 0;

    #
636
    # Turn admin mode back off and reboot back to the old OS
637
    #
638 639 640 641
    my %args          = ();
    $args{'name'}     = $me;
    $args{'on'}       = 0;
    $args{'clearall'} = 0;
642
    if (TBAdminMfsSelect(\%args, undef, $node_id)) {
643
	print("*** $me:\n".
644
	      "    Could not turn admin mode off for $node_id!\n");
645
	return 0;
646 647
    }

648 649 650 651 652
    %args           = ();
    $args{'name'}   = $me;
    $args{'on'}     = 0;
    $args{'reboot'} = 1;
    $args{'wait'}   = 0;
653
    if (TBAdminMfsBoot(\%args, undef, $node_id)) {
654
	print("*** $me:\n".
655
	      "    Failed to reboot $node_id on cleanup!\n");
656
	return 0;
657 658
    }

659
    return 1;
660 661 662 663 664
}

sub fatal($)
{
    my($mesg) = $_[0];
665 666

    print "$mesg\n";
667 668 669 670

    if ($needcleanup && !cleanup()) {
        print "Encountered problems cleaning up!\n";
    }
671 672
    
    #
673
    # Send a message to the testbed list. 
674
    #
675
    SENDMAIL("$user_name <$user_email>",
676
	     "Image Creation Failure on $node_id: $pid/$imagename",
677 678 679
	     $mesg,
	     "$user_name <$user_email>",
	     "Cc: $TBOPS",
680
	     defined($logfile) ? ($logfile->filename()) : ());
681
    
682 683 684 685 686 687 688 689 690
    if (defined($logfile)) {
	# Close up the log file so the webpage stops.
	$logfile->Close();
	# And restore the original logfile as current spew.
	$experiment->SetLogFile($oldlogfile)
	    if (defined($oldlogfile));
	$logfile->Delete();
	# This was mailed so no longer needed.
	unlink("$logfile->filename()");
691
    }
692 693 694
    exit(-1);
}

695 696 697 698 699 700 701 702 703 704 705
#
# Check progress of image creation by periodically checking the image size.
#
# Called every $checkwait seconds.
# Reports progress every $reportwait seconds.
# Gives up after $idlewait seconds without a size change.
#
sub check_progress($$)
{
    my (undef, $statusp) = @_;

706 707 708 709 710
    if ($runticks == 0) {
	print "$node_id: started image capture, ".
	    "waiting up to " . int($maxwait/60) . " minutes\n";
    }

711 712 713 714 715 716 717 718 719
    #
    # XXX frisbee uploader uploads into a temporary file and then moves
    # it into place. So track that tmp file here.
    #
    my $fname = $filename;
    if ($usefup) {
	$fname .= ".tmp";
    }

720 721 722
    #
    # Command has finished for better or worse, record status and finish.
    #
723
    if (defined($statusp) && $statusp->{$node_id} ne "none") {
724
	$result = $statusp->{$node_id};
725
	print "$node_id: image capture has completed: status='$result'\n";
726 727 728 729 730 731 732 733 734
	return 0;
    }

    #
    # Has run too long
    #
    $runticks++;
    if ($runticks >= $maxticks) {
	$result = "timeout";
735
	print "$node_id: image capture has completed: timeout\n";
736 737 738 739 740 741 742 743 744 745
	return 0;
    }

    #
    # See if imagezip on the node is making progress.  If not, we need to
    # check the idle timer and timeout if we have taken too long.
    #
    # Also, check to see if the (somewhat arbitrary) maximum filesize has 
    # been exceeded.
    #
746
    my $cursize = (stat($fname))[7];
747 748 749 750 751 752 753 754 755
    if ($usefup && !defined($cursize)) {
	#
	# XXX avoid an ugly race.
	# When done, frisuploadd moves foo.tmp -> foo
	# If we didn't find foo.tmp, try foo now.
	#
	$fname =~ s/\.tmp$//;
	$cursize = (stat($fname))[7];
    }
756 757
    if ($cursize > $maximagesize) {
	$result = "toobig";
758
	print "$node_id: image capture has completed: image too big\n";
759 760 761 762 763 764
	return 0;
    }
    if ($cursize == $lastsize) {
	$idleticks++;
	if ($idleticks >= $maxidleticks) {
	    $result = "timeout";
765
	    print "$node_id: image capture has completed: idle timeout\n";
766 767 768 769 770 771 772 773 774
	    return 0;
	}
    } else {
	$idleticks = 0;
    }
    $lastsize = $cursize;
    
    if (($runticks % $reportticks) == 0) {
	my $curtdiff = int($runticks * $checkwait / 60);
775 776
	print "$node_id: still waiting ...".
	    " it has been ". $curtdiff ." minutes.".
777 778 779 780 781
	    " Current image size: $cursize bytes.\n";
    }

    return 1;
}
782 783 784 785 786 787

sub run_with_ssh($$)
{
    my ($cmd,$output) = @_;
    my $stat = undef;

788
    $cmd = "$TB/bin/sshtb -n -host $node_id $cmd";
789 790 791
    if (defined($output)) {
	$cmd .= " > $output";
    }
792
    print STDERR "About to: '$cmd' as uid $UID\n" if ($debug);
793 794 795 796 797 798 799 800 801 802

    my $mypid = fork();
    if ($mypid < 0) {
	return "setupfailed";
    }

    #
    # Child. Just do it.
    #
    if ($mypid == 0) {
803 804 805 806 807 808 809 810 811
	my $stat = 0;
	if (system($cmd)) {
	    $stat = $?;
	}
	if ($stat & 127) {
	    # died with a signal, return the signal
	    exit($stat & 127);
	}
	exit($stat >> 8);
812 813 814 815 816 817 818 819 820 821 822 823
    }

    #
    # Parent.  Wait for ssh to finish, reporting periodic progress
    # as TBAdminMfsRunCmd would do.
    #
    my $endtime = time() + $maxwait + $checkwait;
    while (1) {
	my $kid = waitpid($mypid, &WNOHANG);
	# ssh finished
	if ($kid == $mypid) {
	    $stat = $?;
Mike Hibler's avatar
Mike Hibler committed
824 825 826 827 828 829 830
	    if ($stat & 127) {
		# died with a signal, return the signal
		$stat = $stat & 127;
	    } else {
		# else return the exit code
		$stat = $stat >> 8;
	    }
831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855
	    last;
	}

	# huh?
	if ($kid == -1) {
	    $stat = -1;
	    last;
	}

	# check on progress
	if (!check_progress(undef, undef)) {
	    $stat = $result;
	    last;
	}

	# wait for awhile
	sleep($checkwait);
	if (time() >= $endtime) {
	    $stat = "timeout";
	    last;
	}
    }

    return $stat;
}