#!/usr/bin/perl -wT
#
# Copyright (c) 2000-2013 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
# This file is part of the Emulab network testbed software.
#
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
# License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this file. If not, see .
#
# }}}
#
use strict;
use English;
use Getopt::Std;
use POSIX qw(setsid :sys_wait_h);
use File::Basename;
#
# Image Creation Tuneables.
#
# $maxwait max wall clock time to allow, progress or not
# Empirically we have observed about 1.6MB/sec on a pc850
# for a Windows image (the slowest to create), so figuring
# 1.5MB/sec for a 6GB max image works out to around 72 minutes.
# $idlewait max time to wait between periods of progress
# $checkwait time between progress checks (must be int div of $idlewait)
# $reportwait time between progress reports (must be multiple of $checkwait)
#
# $maximagesize max size in bytes of an image. This should really be in the
# DB (per-testbed, per-project, per-user, per-something), and
# not hardwired here. In the meantime, we set this big and let
# disk quotas do the dirty work of limiting size.
#
my $maxwait = (72 * 60);
my $idlewait = ( 8 * 60);
my $reportwait = ( 2 * 60);
my $checkwait = 15;
my $maximagesize = (6 * 1024**3); # 20GB
#
# Create a disk image.
#
# XXX: Device file should come from DB.
# Start/count slice computation is not generalized at all.
#
sub usage()
{
print(STDERR
"Usage: create_image [-wsN] [-p ] \n" .
"switches and arguments:\n".
"-w - wait for image to be fully created\n".
"-s - use ssh instead of frisbee uploader\n".
"-N - use NFS (if available) instead of frisbee uploader\n".
"-p - project ID of the image; defaults to system project\n".
" - imagename to use\n".
" - nodeid to create the image from\n");
exit(-1);
}
my $optlist = "p:wsNdfe";
my $waitmode = 0;
my $usessh = 0;
my $usenfs = 0;
my $usefup = 1;
my $noemail = 0;
#
# Configure variables
#
my $TB = "@prefix@";
my $TBOPS = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
my $BOSSIP = "@BOSSNODE_IP@";
my $CONTROL = "@USERNODE@";
my $NONFS = @NOSHAREDFS@;
#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
use libadminmfs;
use Experiment;
use Node;
use User;
use Image;
use Logfile;
#
# Turn off line buffering on output
#
$| = 1;
#
# Untaint the path
#
$ENV{'PATH'} = "/bin:/sbin:/usr/bin:";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
#
# We don't want to run this script unless its the real version.
#
if ($EUID != 0) {
die("*** $0:\n".
" Must be setuid! Maybe its a development version?\n");
}
sub cleanup();
sub fatal($);
sub check_progress($$);
sub run_with_ssh($$);
my $nodereboot = "$TB/bin/node_reboot";
my $createimage = "/usr/local/bin/create-image";
my $reboot_prep = "@CLIENT_BINDIR@/reboot_prepare";
my $EC2SNAP = "$TB/sbin/ec2import.proxy";
my $friskiller = "$TB/sbin/frisbeehelper";
my $osselect = "$TB/bin/os_select";
my $checkquota = "$TB/sbin/checkquota";
my $imagehash = "$TB/bin/imagehash";
my $SHA1 = "/sbin/sha1";
my $SCP = "/usr/bin/scp";
my $def_devtype = "ad";
my $def_devnum = 0;
my $devtype;
my $devnum;
my $device;
my $mereuser = 0;
my $debug = 1;
my $foreground = 0;
my $imagepid = TB_OPSPID;
my $logfile;
my $oldlogfile;
my $needcleanup = 0;
my $needunlock = 0;
my $isvirtnode = 0;
my $isec2node = 0;
my $onsharednode= 0;
my $didbackup = 0;
my $node_id;
my $node;
my ($experiment,$pid);
#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
usage();
}
if (defined($options{"w"})) {
$waitmode = 1;
}
if (defined($options{"e"})) {
$noemail = 1;
}
if (defined($options{"s"})) {
$usessh = 1;
$usefup = $usenfs = 0;
}
if (defined($options{"N"})) {
if (!$NONFS) {
$usenfs = 1;
$usefup = $usessh = 0;
} else {
print STDERR "NFS not available, cannot use -N\n";
exit(1);
}
}
if (defined($options{"d"})) {
$debug = 1;
}
if (defined($options{"f"})) {
$foreground = 1;
$waitmode = 0;
}
if (@ARGV != 2) {
usage();
}
my $imagename = $ARGV[0];
my $target = $ARGV[1];
#
# There is no reason to run as root unless we are taking a snapshot
# of a VM on a shared node. In that case we will flip back when
# we do the ssh over.
#
$EUID = $UID;
#
# Untaint the arguments.
#
if ($imagename =~ /^([-\w\.\+]+)$/) {
$imagename = $1;
}
else {
die("*** $0:\n".
" Bad data in $imagename.\n");
}
if (defined($options{"p"})) {
$imagepid = $options{"p"};
if ($imagepid =~ /^([-\w\.]+)$/) {
$imagepid = $1;
}
else {
die("*** $0:\n".
" Bad data in $imagepid.\n");
}
}
#
# Reset default values from site variables if they exist.
#
my $tmp;
if (TBGetSiteVar("images/create/maxwait", \$tmp)) {
$maxwait = $tmp * 60;
}
if (TBGetSiteVar("images/create/idlewait", \$tmp)) {
$idlewait = $tmp * 60;
}
if (TBGetSiteVar("images/create/maxsize", \$tmp)) {
$maximagesize = $tmp * 1024**3;
}
$idlewait = $maxwait
if ($maxwait < $idlewait);
$reportwait = $idlewait
if ($idlewait < $reportwait);
$checkwait = $reportwait
if ($reportwait < $checkwait);
#
# Verify user and get his DB uid and other info for later.
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
tbdie("You ($UID) do not exist!");
}
my $user_uid = $this_user->uid();
my $user_name = $this_user->name();
my $user_email = $this_user->email();
#
# Before doing anything else, check for overquota ... lets not waste
# our time. Make sure user sees the error by exiting with 1.
#
if (system("$checkquota $user_uid") != 0) {
die("*** $0:\n".
" You are over your disk quota on $CONTROL; ".
"please login there and cleanup!\n");
}
if ($UID && ! $this_user->IsAdmin()) {
$mereuser = 1;
}
#
# Grab the imageid description from the DB. We do a permission check, but
# mostly to avoid hard to track errors that would result if the user picked
# the wrong one (which is likely to happen no matter what I do).
#
my $image = Image->Lookup($imagepid, $imagename);
if (!defined($image)) {
die("*** $0:\n".
" No such image descriptor $imagename in project $imagepid!\n");
}
my $imageid = $image->imageid();
if ($mereuser &&
! $image->AccessCheck($this_user, TB_IMAGEID_ACCESS)) {
die("*** $0:\n".
" You do not have permission to use imageid $imageid!\n");
}
#
# Is it a local node or a remote EC2 node (need to generalize).
#
if ($target =~ /^.*@.*$/) {
if ($target =~ /^([-\w\@\+\.]+)$/) {
$target = $1;
}
else {
die("*** $0:\n".
" Bad data in $target\n");
}
$isec2node = 1;
$usefup = $usessh = 0;
$pid = $image->pid();
}
else {
if ($target =~ /^([-\w]+)$/) {
$node_id = $1;
}
else {
die("*** $0:\n".
" Bad data in $target\n");
}
# Check node and permission
$node = Node->Lookup($node_id);
if (!defined($node)) {
die("*** $0:\n".
" Invalid node name $node_id!\n");
}
$isvirtnode = $node->isvirtnode();
$onsharednode = $node->sharing_mode()
if ($isvirtnode);
if (! $node->AccessCheck($this_user, TB_NODEACCESS_LOADIMAGE)) {
die("*** $0:\n".
" You do not have permission to create an image from $node\n");
}
#
# We need the project id for test below. The target directory for the
# output file has to be the node project directory, since that is the
# directory that is going to be NFS mounted by default.
#
$experiment = $node->Reservation();
if (!defined($experiment)) {
die("*** $0:\n".
" Could not map $node to its experiment object!\n");
}
$pid = $experiment->pid();
#
# To avoid blowing a cavernous hole ("allow all TCP ports to boss")
# in the per-experiment firewall, we don't use the frisbee uploader if
# the node is firewalled.
#
if ($usefup && $experiment->IsFirewalled()) {
print "*** WARNING: $node_id is firewalled, not using Frisbee uploader\n";
$usefup = 0;
if ($NONFS) {
$usenfs = 0;
$usessh = 1;
} else {
$usenfs = 1;
$usessh = 0;
}
}
}
#
# Make sure that the directory exists and is writeable for the user.
# We test this by creating the file. Its going to get wiped anyway.
#
my $filename = $image->path();
my $isglobal = $image->global();
my $usepath = 0;
#
# Redirect pathname for global images.
#
if ($isglobal && ($filename =~ /^\/usr\/testbed/)) {
$filename = PROJROOT() . "/$pid/images/" . basename($filename);
print "*** WARNING: Writing global descriptor to $filename instead!\n";
#
# XXX the Emulab config of the master server doesn't know this trick
# so when it tries to lookup imageid emulab-ops/ it would
# still map to /usr/testbed and fail because it cannot update images
# outside of /{users,grouop,proj}. So we skirt the issue by passing
# it the full path contructed here rather than the imageid.
#
$usepath = 1;
}
#
# Make sure real path is someplace that makes sense; remember that the
# image is created on the nodes, and it NFS mounts directories on ops.
# Writing the image to anyplace else is just going to break things.
#
# Use realpath to resolve any symlinks.
#
my $translated = `realpath $filename`;
if ($translated =~ /^([-\w\.\/\+]+)$/) {
$filename = $1;
}
else {
die("*** $0:\n".
" Bad data returned by realpath: $translated\n");
}
# Make sure not a directory.
if (-d $filename) {
die("*** $0:\n".
" $filename is a directory! Must be a plain file.\n");
}
#
# The file must reside in an allowed directory. Since this script
# runs as the caller, regular file permission checks ensure its a file
# the user is allowed to use.
#
if (! TBValidUserDir($filename, 0)) {
die("*** $0:\n".
" $filename does not resolve to an allowed directory!\n");
}
#
# Before we do anything destructive, we lock the descriptor.
#
if ($image->Lock()) {
die("*** $0:\n".
" Image is locked, try again later!\n");
}
$needunlock = 1;
#
# Be sure to kill off running frisbee. If a node is trying to load that
# image, well tough.
#
system("$friskiller -k $imageid");
if ($?) {
fatal("Could not kill running frisbee for $imageid!");
}
if (-e $filename) {
#
# Back it up in case of failure. Note that the frisbee upload server
# does this, so we do it only for the ssh/nfs case.
#
if (!$usefup) {
system("/bin/mv -f $filename ${filename}.bak");
if ($?) {
fatal("Could not back up $filename");
}
$didbackup = 1;
}
}
#
# We want to truncate the file (we backed it up above), which also
# confirms the user can really create a new file.
#
# XXX The problem is that frisbee upload server does this too, which
# is why we have a lot of zero length backup files. So, in uploader
# mode, make sure the user can create the tmp file that the uploader
# uses.
#
$tmp = $filename . ($usefup ? ".tmp" : "");
open(FILE, "> $tmp") or
fatal("Could not create $tmp: $!");
close(FILE) or
fatal("Could not truncate $tmp: $!");
if (! ($isvirtnode || $isec2node)) {
#
# Get the disktype for this node
#
$node->disktype(\$devtype);
$node->bootdisk_unit(\$devnum);
$devtype = $def_devtype
if (!defined($devtype));
$devnum = $def_devnum
if (!defined($devnum));
$device = "/dev/${devtype}${devnum}";
}
#
# Record when this image was updated, so that we can figure out which
# revision of the testbed image it was based off.
#
# Makes no sense to do this when writing a global image to a different path.
# We need a better way to make new images live.
#
$image->MarkUpdate($this_user) == 0 or
fatal("Could not mark the update time in $image");
#
# Okay, we want to build up a command line that will run the script on
# on the client node. We use the imageid description to determine what
# slice (or perhaps the entire disk) is going to be zipped up. We do not
# allow arbitrary combos of course.
#
my $startslice;
my $loadlength;
my $command = "$createimage ";
if ($usefup) {
my $id = $usepath ? $filename : ($image->pid() . "/$imagename");
$command .= " -S $BOSSIP -F $id";
}
if ($isec2node) {
$command = "$EC2SNAP ";
}
elsif ($isvirtnode) {
$command .= " $node_id";
}
else {
$startslice = $image->loadpart();
$loadlength = $image->loadlength();
if ($startslice || $loadlength == 1) {
$command .= " -s $startslice";
}
$command .= " $device";
}
if ($usefup || $usessh) {
$command .= " -";
} else {
$command .= " $filename";
}
#
# Go to the background since this is going to take a while.
#
if (! $foreground) {
$logfile = Logfile->Create((defined($experiment) ?
$experiment->gid_idx() : $image->gid_idx()));
fatal("Could not create a logfile")
if (!defined($logfile));
# Mark it open since we are going to start using it right away.
$logfile->Open();
# Logfile becomes the current spew.
$image->SetLogFile($logfile);
if (my $childpid = TBBackGround($logfile->filename())) {
#
# Parent exits normally, except if in waitmode.
#
if (!$waitmode) {
print("Your image from $target is being created\n".
"You will be notified via email when the image has been\n".
"completed, and you can load the image on another node.\n");
exit(0);
}
print("Waiting for image creation to complete\n");
print("You may type ^C at anytime; you will be notified via email;\n".
"later; you will not actually interrupt image creation.\n");
# Give child a chance to run.
select(undef, undef, undef, 0.25);
#
# Reset signal handlers. User can now kill this process, without
# stopping the child.
#
$SIG{TERM} = 'DEFAULT';
$SIG{INT} = 'DEFAULT';
$SIG{QUIT} = 'DEFAULT';
#
# Wait until child exits or until user gets bored and types ^C.
#
waitpid($childpid, 0);
print("Done. Exited with status: $?\n");
exit($? >> 8);
}
}
#
# New process group since we get called from the web interface,
# and so the child does not get zapped if the user types ^C
# in waitmode.
#
if (! $foreground) {
POSIX::setsid();
}
#
# From here on out, we should take care to clean up the DB, and
# reboot the source node.
#
$needcleanup = 1;
# Clear the bootlog; see below.
$node->ClearBootLog()
if (defined($node));
# check_progress state
my $runticks = 0;
my $maxticks = int($maxwait / $checkwait);
my $reportticks = int($reportwait / $checkwait);
my $idleticks = 0;
my $maxidleticks = int($idlewait / $checkwait);
my $lastsize = 0;
my $result;
#
# We can skip a lot of the stuff below for virtnodes and ec2 nodes.
#
if ($isec2node) {
my $safe_target = User::escapeshellarg($target);
my $cmd = "$TB/bin/sshtb -host $CONTROL $EC2SNAP -u $user_uid ".
"$safe_target $pid $user_uid $imageid $filename";
print STDERR "About to: '$cmd'\n" if (1 || $debug);
my $SAVEUID = $UID;
$EUID = $UID = 0;
system($cmd);
fatal("'$cmd' failed")
if ($?);
$EUID = $UID = $SAVEUID;
goto ec2done;
}
elsif ($isvirtnode) {
#
# XEN creates a problem; the physical host cannot actually
# execute a command inside the guest, but we need to run
# reboot_prepare and reboot it. FreeBSD creates an additional
# problem in that shutdown has to run to invoke prepare; reboot
# does not run it, and a shutdown from outside the VM has the
# sae effect; prepare does not run. What a pain.
#
my $SAVEUID = $UID;
$EUID = $UID = 0;
my $cmd = "$TB/bin/sshtb -n -o ConnectTimeout=10 ".
"-host $node_id $reboot_prep";
print STDERR "About to: '$cmd'\n" if ($debug);
system($cmd);
fatal("'$cmd' failed")
if ($?);
$EUID = $UID = $SAVEUID;
#
# Now execute command and wait.
#
if ($NONFS) {
$result = run_with_ssh($command, $filename);
} else {
$result = run_with_ssh($command, undef);
}
goto done;
}
#
# Reboot into admin mode and run the command.
# Note that without a shared FS, we just boot the node into the admin MFS
# and run the command via SSH, capturing the output.
#
my $me = $0;
my %args = ();
$args{'name'} = $me;
$args{'prepare'} = 1;
if ($usessh) {
#
# Put the node in admin mode...
#
$args{'on'} = 1;
$args{'clearall'} = 0;
if (TBAdminMfsSelect(\%args, undef, $node_id)) {
$result = "setupfailed";
goto done;
}
#
# ...boot it...
#
$args{'reboot'} = 1;
$args{'retry'} = 0;
$args{'wait'} = 1;
my @failed = ();
if (TBAdminMfsBoot(\%args, \@failed, $node_id)) {
$result = "setupfailed";
goto done;
}
#
# ...execute command and wait!
#
$result = run_with_ssh($command, $filename);
if ($result eq "setupfailed") {
goto done;
}
} else {
$args{'command'} = $command;
$args{'timeout'} = $maxwait + $checkwait;
$args{'pfunc'} = \&check_progress;
$args{'pinterval'} = $checkwait;
my $retry = 1;
while ($retry) {
$retry = 0;
if (TBAdminMfsRunCmd(\%args, undef, $node_id)) {
$result = "setupfailed"
if (!defined($result));
}
}
}
#
# XXX woeful backward compat hack.
# The old client-side script will not recognize the -S and -F options
# we pass in and will exit(-1). We detect that here and retry with
#
if ($usefup && $result eq "255") {
print STDERR "MFS does not support frisbee upload, falling back on ",
$NONFS ? "ssh" : "nfs", "...\n";
$command = "$createimage ";
if ($startslice || $loadlength == 1) {
$command .= " -s $startslice";
}
$command .= " $device";
if ($usessh) {
$command .= " -";
} else {
$command .= " $filename";
}
# reset state for check_progress
$usefup = 0;
$runticks = 0;
$idleticks = 0;
$lastsize = 0;
$result = undef;
if ($NONFS) {
$result = run_with_ssh($command, $filename);
} else {
$result = run_with_ssh($command, undef);
}
}
done:
# Grab boot log now. Node will reboot and possibly erase it. We should
# probably come up with a better way to handle this.
my $bootlog;
if ($node->GetBootLog(\$bootlog)) {
$bootlog = undef;
}
if (! cleanup()) {
fatal("Problem encountered while cleaning up!\n");
}
#
# If we timed out, if the result code was bad, or if the image size
# grew too large.
#
if ($result eq "setupfailed") {
fatal("FAILED: Node setup failed ... \n");
}
if ($result eq "timeout") {
fatal("FAILED: Timed out generating image ... \n");
}
if ($result eq "toobig") {
fatal("FAILED: Maximum image size ($maximagesize bytes) exceeded ... \n");
}
if ($result != 0) {
fatal("FAILED: Returned error code $result generating image ... \n");
}
ec2done:
#
# Everything worked, create the hash signature file.
#
my $sigdir;
($sigdir = $filename) =~ s/^(.*)\/[^\/]+$/$1\/sigs/;
mkdir($sigdir, 0770)
if (! -d "$sigdir");
my $sigfilename;
($sigfilename = $filename) =~ s/^(.*)(\/[^\/]+$)/$1\/sigs$2.sig/;
my $swmsg = "";
if (! -x $imagehash ||
system("$imagehash -c -o $sigfilename $filename") != 0) {
warn("Could not create swapout signature file\n");
$swmsg = "WARNING: could not create swapout signature file $sigfilename\n".
" You will not be able to save disk state for this image\n";
} else {
print("Swapout signature file created\n");
}
#
# Hash the file itself since we really want an integrity check
# on the image file.
#
my $hashfile = "${filename}.sha1";
my $filehash = `$SHA1 $filename`;
if ($?) {
fatal("Could not generate sha1 hash of $filename");
}
if ($filehash =~ /^SHA1.*= (\w*)$/) {
if ($isglobal && $usepath) {
print "*** WARNING: Not updating SHA1 in DB record since the ".
"image was written to /proj!\n";
print " See $hashfile instead\n";
}
else {
$image->SetHash($1) == 0
or fatal("Failed to set the hash for $image");
}
}
else {
fatal("Could not parse the sha1 hash: '$filehash'")
}
unlink($hashfile)
if (-e $hashfile);
open(HASH, ">$hashfile") or
fatal("Could not open $hashfile for writing: $!");
print HASH $filehash;
close($hashfile);
print "Image creation succeeded.\n";
print "Image written to $filename.\n";
# "Final size: " . (stat($filename))[7] . " bytes.\n";
# Append bootlog (which has prepare output)
if (defined($bootlog)) {
print "\n\n";
print "------------------ Prepare Output ----------------\n";
print "$bootlog\n";
}
SENDMAIL("$user_name <$user_email>",
"Image Creation on $target Completed: $pid/$imagename",
"Image creation on $target has completed. As you requested, the\n".
"image has been written to $filename.\n".
"You may now os_load this image on other nodes in your experiment.\n".
"$swmsg",
"$user_name <$user_email>",
"Bcc: $TBLOGS",
defined($logfile) ? ($logfile->filename()) : ()) if (!$noemail);
if (defined($logfile)) {
# Close up the log file so the webpage stops.
$logfile->Close();
$image->ClearLogFile();
}
$image->Unlock();
exit 0;
sub cleanup ()
{
$needcleanup = 0;
if ($isvirtnode || $isec2node) {
#
# Nothing to do; the clientside script rebooted the container.
#
return 1;
}
#
# Turn admin mode back off and reboot back to the old OS
#
my %args = ();
$args{'name'} = $me;
$args{'on'} = 0;
$args{'clearall'} = 0;
if (TBAdminMfsSelect(\%args, undef, $node_id)) {
print("*** $me:\n".
" Could not turn admin mode off for $node_id!\n");
return 0;
}
%args = ();
$args{'name'} = $me;
$args{'on'} = 0;
$args{'reboot'} = 1;
$args{'wait'} = 0;
if (TBAdminMfsBoot(\%args, undef, $node_id)) {
print("*** $me:\n".
" Failed to reboot $node_id on cleanup!\n");
return 0;
}
return 1;
}
sub fatal($)
{
my($mesg) = $_[0];
print "$mesg\n";
if ($needcleanup && !cleanup()) {
print "Encountered problems cleaning up!\n";
}
#
# Send a message to the testbed list.
#
SENDMAIL("$user_name <$user_email>",
"Image Creation Failure on $target: $pid/$imagename",
$mesg,
"$user_name <$user_email>",
"Cc: $TBOPS",
defined($logfile) ? ($logfile->filename()) : ());
if (defined($logfile)) {
# Close up the log file so the webpage stops.
$logfile->Close();
$image->ClearLogFile();
}
$image->Unlock()
if ($needunlock);
# Restore old image file.
if ($didbackup) {
system("/bin/mv -f ${filename}.bak $filename");
}
exit(-1);
}
#
# Check progress of image creation by periodically checking the image size.
#
# Called every $checkwait seconds.
# Reports progress every $reportwait seconds.
# Gives up after $idlewait seconds without a size change.
#
sub check_progress($$)
{
my (undef, $statusp) = @_;
if ($runticks == 0) {
print "$node_id: started image capture, ".
"waiting up to " . int($maxwait/60) . " minutes\n";
}
#
# XXX frisbee uploader uploads into a temporary file and then moves
# it into place. So track that tmp file here.
#
my $fname = $filename;
if ($usefup) {
$fname .= ".tmp";
}
#
# Command has finished for better or worse, record status and finish.
#
if (defined($statusp) && $statusp->{$node_id} ne "none") {
$result = $statusp->{$node_id};
print "$node_id: image capture has completed: status='$result'\n";
return 0;
}
#
# Has run too long
#
$runticks++;
if ($runticks >= $maxticks) {
$result = "timeout";
print "$node_id: image capture has completed: timeout\n";
return 0;
}
#
# See if imagezip on the node is making progress. If not, we need to
# check the idle timer and timeout if we have taken too long.
#
# Also, check to see if the (somewhat arbitrary) maximum filesize has
# been exceeded.
#
my $cursize = (stat($fname))[7];
if ($usefup && !defined($cursize)) {
#
# XXX avoid an ugly race.
# When done, frisuploadd moves foo.tmp -> foo
# If we didn't find foo.tmp, try foo now.
#
$fname =~ s/\.tmp$//;
$cursize = (stat($fname))[7];
}
if ($cursize > $maximagesize) {
$result = "toobig";
print "$node_id: image capture has completed: image too big\n";
return 0;
}
if ($cursize == $lastsize) {
$idleticks++;
if ($idleticks >= $maxidleticks) {
$result = "timeout";
print "$node_id: image capture has completed: idle timeout\n";
return 0;
}
} else {
$idleticks = 0;
}
$lastsize = $cursize;
if (($runticks % $reportticks) == 0) {
my $curtdiff = int($runticks * $checkwait / 60);
print "$node_id: still waiting ...".
" it has been ". $curtdiff ." minutes.".
" Current image size: $cursize bytes.\n";
}
return 1;
}
sub run_with_ssh($$)
{
my ($cmd,$output) = @_;
my $stat = undef;
$node_id = $node->phys_nodeid()
if ($isvirtnode);
$cmd = "$TB/bin/sshtb -n -host $node_id $cmd";
if (defined($output)) {
$cmd .= " > $output";
}
print STDERR "About to: '$cmd' as uid ".
($onsharednode ? 0 : $UID) . "\n" if ($debug);
my $mypid = fork();
if ($mypid < 0) {
return "setupfailed";
}
#
# Child. Just do it.
#
# If this is a virtnode on a shared node, we want to flip
# back to root so that we run the ssh as root.
#
if ($onsharednode) {
$EUID = $UID = 0;
}
if ($mypid == 0) {
my $stat = 0;
if (system($cmd)) {
$stat = $?;
}
if ($stat & 127) {
# died with a signal, return the signal
exit($stat & 127);
}
exit($stat >> 8);
}
#
# Parent. Wait for ssh to finish, reporting periodic progress
# as TBAdminMfsRunCmd would do.
#
my $endtime = time() + $maxwait + $checkwait;
while (1) {
my $kid = waitpid($mypid, &WNOHANG);
# ssh finished
if ($kid == $mypid) {
$stat = $?;
if ($stat & 127) {
# died with a signal, return the signal
$stat = $stat & 127;
} else {
# else return the exit code
$stat = $stat >> 8;
}
last;
}
# huh?
if ($kid == -1) {
$stat = -1;
last;
}
# check on progress
if (!check_progress(undef, undef)) {
$stat = $result;
last;
}
# wait for awhile
sleep($checkwait);
if (time() >= $endtime) {
$stat = "timeout";
last;
}
}
return $stat;
}