manage_instance.in 125 KB
Newer Older
1 2
#!/usr/bin/perl -w
#
Leigh Stoller's avatar
Leigh Stoller committed
3
# Copyright (c) 2000-2019 University of Utah and the Flux Group.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
#
use English;
use strict;
use Getopt::Std;
use XML::Simple;
use Data::Dumper;
use CGI;
use POSIX ":sys_wait_h";
31
use POSIX qw(setsid strftime ceil floor);
32
use Date::Parse;
33
use JSON;
34 35 36 37 38 39

#
# Back-end script to manage APT profiles.
#
sub usage()
{
Leigh Stoller's avatar
Leigh Stoller committed
40 41
    print("Usage: manage_instance snapshot instance ".
	  "[-n node_id] [-i imagename] [-u node|all]\n");
42 43 44
    print("Usage: manage_instance consoleurl instance node\n");
    print("Usage: manage_instance terminate instance\n");
    print("Usage: manage_instance refresh instance\n");
45 46
    print("Usage: manage_instance reboot instance node_id ...\n");
    print("Usage: manage_instance reload instance node_id ...\n");
Leigh Stoller's avatar
Leigh Stoller committed
47
    print("Usage: manage_instance recovery instance [-c] node_id\n");
48
    print("Usage: manage_instance deletenodes instance node_id ...\n");
49
    print("Usage: manage_instance monitor instance\n");
50
    print("Usage: manage_instance lockdown instance set|clear user|admin\n");
51
    print("Usage: manage_instance panic instance set|clear\n");
52
    print("Usage: manage_instance linktest instance [-k | level]\n");
53
    print("Usage: manage_instance writecreds instance directory\n");
54
    print("Usage: manage_instance updatekeys instance [uid] \n");
55
    print("Usage: manage_instance extend instance ".
56
	  "[-M] [-m message | -f filename] hours\n");
57
    print("Usage: manage_instance denyextension instance [-m message] [filename]\n");
58
    print("Usage: manage_instance maxextension instance\n");
59
    print("Usage: manage_instance moreinfo instance [-m message] [filename]\n");
60
    print("Usage: manage_instance utilization instance\n");
61
    print("Usage: manage_instance schedterminate instance [-m message] days [filename]\n");
62
    print("Usage: manage_instance idledata instance\n");
63
    print("Usage: manage_instance openstackstats instance\n");
Leigh Stoller's avatar
Leigh Stoller committed
64
    print("Usage: manage_instance getmanifests instance\n");
65
    print("Usage: manage_instance warn instance\n");
66
    print("Usage: manage_instance applyextensionpolicy instance\n");
67 68
    exit(-1);
}
69
my $optlist     = "dt:s";
70
my $debug       = 0;
71
my $silent      = 0;
72
my $webtask_id;
73
my $webtask;
74 75
my $this_user;
my $geniuser;
76 77 78 79

#
# Configure variables
#
80 81 82 83 84 85 86
my $TB		   = "@prefix@";
my $TBOPS          = "@TBOPSEMAIL@";
my $PROTOUSER      = "elabman";
my $SUDO           = "/usr/local/bin/sudo";
my $MANAGEINSTANCE = "$TB/bin/manage_instance";
my $WAP            = "$TB/sbin/wap";
my $TBACCT         = "$TB/sbin/tbacct";
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105

#
# Untaint the path
#
$ENV{'PATH'} = "$TB/bin:$TB/sbin:/bin:/usr/bin:/usr/bin:/usr/sbin";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

#
# Load the Testbed support stuff.
#
use lib "@prefix@/lib";
use EmulabConstants;
use emdb;
use emutil;
106
use libEmulab;
107
use libtestbed;
108 109 110 111
use User;
use Project;
use APT_Profile;
use APT_Instance;
112
use APT_Geni;
113
use APT_Utility;
114 115
use GeniXML;
use GeniHRN;
116 117 118
use Genixmlrpc;
use GeniResponse;
use GeniSlice;
119
use GeniImage;
120
use GeniUser;
121
use WebTask;
122
use EmulabFeatures;
123 124 125

# Protos
sub fatal($);
126
sub UserError($);
127
sub DoSnapshot();
128
sub DoConsole();
129
sub DoTerminate();
130
sub DoSchedTerminate();
131
sub DoExtend();
132
sub DoDenyOrMoreInfo($);
133
sub DoRefresh();
134
sub DoReboot();
135
sub DoReload();
Leigh Stoller's avatar
Leigh Stoller committed
136
sub DoRecovery();
137
sub DoLockdown();
138
sub DoPanic();
139
sub DoManifests();
140
sub DoLinktest();
141
sub DoUpdateKeys();
142
sub DoDeleteNodes();
143
sub DoUtilization();
144
sub DoIdleData();
145
sub DoOpenstack();
146 147
sub DoCheckAutoApprove();
sub CheckAutoApprove($$);
148
sub DoMaxExtension();
149
sub DoMaxExtensionInternal($$);
150
sub DoApplyExtensionPolicy();
151
sub WriteCredentials();
152
sub StartMonitor();
153
sub StartMonitorInternal(;$);
154
sub DoImageTrackerStuff($$$$$$$);
155
sub DoWarn();
156
sub DoDelete();
157
sub DenyExtensionInternal($);
158
sub ExtendInternal($$$$$);
159 160
sub CallMethodOnAggregates($$$@);
sub ResponseErrorMessage($$);
161 162 163 164 165 166 167 168 169 170 171 172

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (defined($options{"t"})) {
    $webtask_id = $options{"t"};
}
173 174 175
if (defined($options{"d"})) {
    $debug++;
}
176 177 178
if (defined($options{"s"})) {
    $silent = 1;
}
179
if (@ARGV < 2) {
180 181
    usage();
}
182
my $action   = shift(@ARGV);
183 184
my $token    = shift(@ARGV);
my $instance = APT_Instance->Lookup($token);
185
if (!defined($instance)) {
186
    $instance = APT_Instance->LookupBySlice($token);
187
}
188 189 190 191 192 193 194 195 196 197 198
#
# Grab the webtask object.
#
if (defined($webtask_id)) {
    $webtask = WebTask->Lookup($webtask_id);
    if (!defined($webtask)) {
	fatal("Could not lookup/create webtask for profile");
    }
    $webtask->AutoStore(1);
}

199 200 201 202 203 204 205 206 207 208
if (getpwuid($UID) eq "nobody") {
    $this_user = User->ImpliedUser();
}
else  {
    $this_user = User->ThisUser();
}
# If a guest user, we will not have an actual user, which is okay.
if (defined($this_user)) {
    $geniuser = GeniUser->CreateFromLocal($this_user);
}
209

210 211 212 213 214 215 216
if (!defined($instance)) {
    # Lets not make this a fatal error; when coming from the web interface,
    # we do not need to send email. 
    UserError("No such instance $token");
}
my $uuid = $instance->uuid();

217
if ($action eq "snapshot") {
218 219
    DoSnapshot();
}
220 221 222
if ($action eq "extend") {
    DoExtend();
}
223
elsif ($action eq "denyextension") {
224 225 226 227
    DoDenyOrMoreInfo("deny")
}
elsif ($action eq "moreinfo") {
    DoDenyOrMoreInfo("info")
228
}
229
elsif ($action eq "consoleurl") {
230 231
    DoConsole()
}
232 233 234
elsif ($action eq "terminate") {
    DoTerminate()
}
235 236 237
elsif ($action eq "warn") {
    DoWarn()
}
238 239 240
elsif ($action eq "schedterminate") {
    DoSchedTerminate()
}
241 242 243
elsif ($action eq "refresh") {
    DoRefresh()
}
244 245 246
elsif ($action eq "reboot") {
    DoReboot()
}
247 248 249
elsif ($action eq "reload") {
    DoReload()
}
Leigh Stoller's avatar
Leigh Stoller committed
250 251 252
elsif ($action eq "recovery") {
    DoRecovery()
}
253 254 255
elsif ($action eq "monitor") {
    StartMonitor()
}
256 257 258
elsif ($action eq "lockdown") {
    DoLockdown()
}
259 260 261
elsif ($action eq "panic") {
    DoPanic()
}
262 263 264
elsif ($action eq "linktest") {
    DoLinktest()
}
265 266 267
elsif ($action eq "updatekeys") {
    DoUpdateKeys()
}
268 269 270
elsif ($action eq "writecreds") {
    WriteCredentials()
}
271 272 273
elsif ($action eq "getmanifests") {
    DoManifests()
}
274 275 276
elsif ($action eq "deletenodes") {
    DoDeleteNodes()
}
277 278 279
elsif ($action eq "utilization") {
    DoUtilization()
}
280 281 282
elsif ($action eq "idledata") {
    DoIdleData()
}
283 284 285
elsif ($action eq "openstackstats") {
    DoOpenstack()
}
286 287 288
elsif ($action eq "maxextension") {
    DoMaxExtension()
}
289 290 291
elsif ($action eq "checkautoapprove") {
    DoCheckAutoApprove()
}
292 293 294
elsif ($action eq "applyextensionpolicy") {
    DoApplyExtensionPolicy()
}
295 296 297
elsif ($action eq "delete") {
    DoDelete()
}
298 299 300
else {
    usage();
}
301 302 303 304 305 306 307
exit(0);

#
# Take a snapshot. Implies a single node instance, for now.
#
sub DoSnapshot()
{
308 309
    my $errmsg;
    my $logfile;
310
    my $errcode        = -1;
311
    my $exitcode       = -1;
312 313
    my $needunlock     = 0;
    my $old_status     = $instance->status();
314 315
    my $node_id;
    my $imagename;
Leigh Stoller's avatar
Leigh Stoller committed
316
    my $description;
317
    my $cloneprofile;
318
    my $update_profile;
319 320
    my $copyback_uuid;
    my $copyback_urn;
321 322 323
    my $swebtask;
    my $nosnapshot     = 0;
    my $mustnotexist   = 0;
324
    my $wholedisk      = 0;
325
    my $update_prepare = 0;
326 327
    my $doversions = 0;
    my $usetracker = 0;
328
    my $operation = "image-only"; # Default to just snapshot.
329

Leigh Stoller's avatar
Leigh Stoller committed
330
    my $optlist = "n:i:u:Uc:O:SseD:";
331 332 333 334 335 336 337 338 339 340
    my %options = ();
    if (! getopts($optlist, \%options)) {
	usage();
    }
    if (defined($options{"n"})) {
	$node_id = $options{"n"};
    }
    if (defined($options{"i"})) {
	$imagename = $options{"i"};
    }
341 342 343
    if (defined($options{"c"})) {
	$cloneprofile = $options{"c"};
    }
344 345 346 347 348 349
    if (defined($options{"u"})) {
	$update_profile = $options{"u"};
	if ($update_profile !~ /^(node|all)$/) {
	    usage();
	}
    }
350 351 352
    if (defined($options{"U"})) {
	$update_prepare = 1;
    }
Leigh Stoller's avatar
Leigh Stoller committed
353 354 355
    if (defined($options{"D"})) {
	$description = ReadFile($options{"D"});
    }
356 357 358
    if (defined($options{"s"})) {
	$nosnapshot = 1;
    }
359 360 361 362 363
    if (defined($options{"e"})) {
	$wholedisk = 1;
	# Must be a brand new image for wholedisk. 
	$mustnotexist = 1;
    }
364 365 366 367 368 369 370 371 372 373 374 375 376 377
    if (defined($options{"S"})) {
	$nosnapshot = 1;
	$mustnotexist = 1;
    }
    if (defined($options{"O"})) {
	$operation = $options{"O"};
	if ($operation !~
	    /^(update-profile|copy-profile|new-profile|image-only)$/) {
	    usage();
	}
	if ($operation eq "update-profile") {
	    $update_profile = "node";
	}
    }
378 379 380 381 382 383
    if (defined($cloneprofile) && defined($update_profile)) {
	fatal("Not allowed to update profile when cloning a profile");
    }
    if (defined($cloneprofile) && !defined($imagename)) {
	fatal("Must supply image name when cloning a profile");
    }
384 385 386 387 388 389 390
    if ($old_status ne "ready") {
	fatal("Instance must be in the ready state to take a snapshot");
    }
    my $slice = $instance->GetGeniSlice();
    if (!defined($slice)) {
	fatal("No slice for quick VM: $uuid");
    }
391
    
392
    #
393
    # Might be a clone (manage_profile).
394
    #
395
    my $sliver_urn;
396 397
    my $aggregate;
    my $node;
398 399 400 401 402 403 404 405
    my $profile;

    if (defined($cloneprofile)) {
	$profile = APT_Profile->Lookup($cloneprofile);
    }
    else {
	$profile = APT_Profile->Lookup($instance->profile_id());
    }
406
    if (!defined($profile)) {
407 408
	fatal("Could not lookup profile for " .
	      (defined($cloneprofile) ? "cloning" : "snapshot"));
409
    }
410 411
    my $project = Project->Lookup($profile->pid_idx());
    if (!defined($project)) {
412
	fatal("Could not lookup project for $profile");
413 414
    }
    
415
    #
416
    # Sanity checks. 
417
    #
418
    my @aggs = $instance->AggregateList();
419 420
    if (! @aggs) {
	fatal("No slivers for instance!");
421
    }
422
    if (!defined($node_id)) {
423
	# We snapshot the one node in the instance.
424 425 426 427 428 429 430 431 432
	if (@aggs != 1) {
	    fatal("Too many aggregates (> 1) to snapshot");
	}
	my ($agg) = @aggs;
	my $manifest = GeniXML::Parse($agg->manifest());
	if (! defined($manifest)) {
	    fatal("Could not parse manifest for $agg");
	}
	my @nodes = GeniXML::FindNodes("n:node", $manifest)->get_nodelist();
433 434 435
	if (@nodes != 1) {
	    fatal("Too many nodes (> 1) to snapshot");
	}
436
	($node)     = @nodes;
437
	$sliver_urn = GeniXML::GetSliverId($node);
438 439
	$node_id    = GeniXML::GetVirtualId($node);
	$aggregate  = $agg;
440 441 442
	# Profile Snapshot, always use the profile name. Clone passes in name.
	if (!defined($imagename)) {
	    $imagename = $profile->name();
443
	}
444
    }
445
    else {
446 447
	my $nodecount = 0;
	
448 449 450 451 452 453 454
	# Find the node in its manifest.
	foreach my $agg (@aggs) {
	    my $manifest = GeniXML::Parse($agg->manifest());
	    if (! defined($manifest)) {
		fatal("Could not parse manifest for $agg");
	    }
	    foreach my $ref (GeniXML::FindNodes("n:node",
455 456 457
					$manifest)->get_nodelist(),
			     GeniXML::FindNodesNS("n:vhost", $manifest,
					$GeniXML::EMULAB_NS)->get_nodelist()) {
458 459
		$nodecount++;
		
460 461 462
		my $client_id   = GeniXML::GetVirtualId($ref);
		my $manager_urn = GetManagerId($ref);
		my $urn          = GeniXML::GetSliverId($ref);
463 464 465

		# No sliver urn or a different aggregate.
		next
466 467 468
		    if (! (defined($urn) &&
			   defined($manager_urn) &&
			   $manager_urn eq $agg->aggregate_urn()));
469 470 471 472 473 474 475

		if ($node_id eq $client_id) {
		    $node = $ref;
		    $sliver_urn = $urn;
		    $aggregate = $agg;
		    last;
		}
476 477 478 479
	    }
	}
	if (!defined($sliver_urn)) {
	    fatal("Could not find node '$node_id' in manifest");
480
	}
481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
	#
	# So, we want Profile snapshot above (of a single node profile) and
	# Node snapshot in a single node profile to behave the same wrt the
	# image name, so look at the nodecount to see if need to append the
	# nodeid to the imagename. 
	#
	if (!defined($imagename)) {
	    $imagename = $profile->name();
	    if ($nodecount > 1) {
		$imagename .= "." . $node_id;
	    }
	}
    }
    #
    # Make sure a valid imagename. This a local test of course, but this
    # only works on IG aggregates anyway.
    #
    if (! TBcheck_dbslot($imagename, "images",
			 "imagename", TBDB_CHECKDBSLOT_ERROR)) {
500 501 502
	$errmsg   = "Invalid imagename: " . TBFieldErrorString() . "\n";
	$errcode  = GENIRESPONSE_ERROR;
	$exitcode = 1;
503
	goto uerror;
504 505
    }
    
506
    #
507 508 509
    # Instruct the remote cluster to copy the image back to its origin,
    # but we need to ask the IMS for uuid of the image that is running,
    # so we can tell the cluster, which then tells the origin cluster.
510 511
    # We also need to know what the new URN of the image will be, for
    # updating the profile. 
512 513 514 515 516 517
    #
    if (GetSiteVar("protogeni/use_imagetracker") &&	
	EmulabFeatures->FeatureEnabled("APT_UseImageTracker",
					   $this_user, $project)) {
	$usetracker = 1;

518 519 520 521
	#
	# When cloning, we use the URN returned by the cluster; it is
	# the origin of the new image.
	#
522 523
	if (!defined($cloneprofile)) {
	    my $rval = DoImageTrackerStuff($aggregate, $node, $project,
524
					   $imagename,
525 526 527 528 529 530 531
					   \$copyback_uuid, \$copyback_urn,
					   \$errmsg);
	    if ($rval) {
		if ($rval < 0) {
		    fatal($errmsg);
		}
		else {
532 533
		    $errcode  = GENIRESPONSE_ERROR;
		    $exitcode = 1;
534 535 536
		    goto uerror;
		}
	    }
537 538 539 540
	}
    }
    if (0) {
	fatal("$copyback_uuid, $copyback_urn\n");
541 542
    }

543
    #
544 545 546
    # We are not going to allow this if the instance is on a different
    # cluster then where the image was originally created, since otherwise
    # the image provenancewill look like spaghetti. 
547
    #
548
    if (defined($update_profile)) {
549 550
	my $diskref = GeniXML::GetDiskImage($node);
	if (defined($diskref)) {
551
	    my $authority = $aggregate->GetGeniAuthority();
552
	    my $image_url = GeniXML::GetText("url", $diskref);
553
	    if (defined($image_url) && !$usetracker) {
554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
		require URI;

		# Get the hostname for the image URL.
		my $uri = URI->new($image_url);
		if (!defined($uri)) {
		    fatal("Could not parse $image_url");
		}
		my $image_host = $uri->host();

		# Get the hostname for the authority.
		$uri = URI->new($authority->url());
		if (!defined($uri)) {
		    fatal("Could not parse authority URL");
		}
		my $authority_host = $uri->host();

		# Compare domains.
		$image_host =~ s/^([^.]+\.)//;
		$authority_host =~ s/^([^.]+\.)//;
	
		if ($image_host ne $authority_host) {
		    $errmsg  = "Not allowed to take a snapshot on this cluster";
576 577
		    $errcode  = GENIRESPONSE_ERROR;
		    $exitcode = 1;
578
		    goto uerror;
579 580 581
		}
	    }
	}
582 583 584 585
	# Do this here to avoid output to logfile.
	$doversions =
	    EmulabFeatures->FeatureEnabled("APT_ProfileVersions",
					   $this_user, $project);
586 587
    }
    if ($slice->Lock()) {
588
	$errmsg  = "Experiment is busy, please try again later.";
589 590
	$errcode = GENIRESPONSE_BUSY;
	$exitcode = 1;
591
	goto uerror;
592 593
    }
    $needunlock = 1;
594

595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
    if (!$nosnapshot) {
	#
	# Grab the webtask, but only after we have it locked, since we are
	# going to modify it. This is a different webtask then the one we
	# got on the command line. The command line webtask is for reporting
	# he results of the command, while the instance webtask is used to
	# report ongoing status of the imaging operation (to the web UI).
	#
	$swebtask = $instance->webtask();
	# We reuse this so clear it.
	$swebtask->Reset();
	$swebtask->AutoStore(1);
	# These are for the web server (imaging status).
	$swebtask->aggregate_urn($aggregate->aggregate_urn());
	$swebtask->client_id($node_id);
	$swebtask->operation($operation) if (defined($operation));
	$swebtask->imagename($imagename);
	
	$instance->SetStatus("imaging");
	$aggregate->SetStatus("imaging");
    }
616

617 618 619
    # Shorten default timeout
    Genixmlrpc->SetTimeout(60);
    
620 621
    #
    # This returns pretty fast, and then the imaging takes place in
622
    # the background at the aggregate. 
623
    #
624
    my $response =
625
	$aggregate->CreateImage($sliver_urn, $imagename,
626
				$update_prepare, $copyback_uuid,
Leigh Stoller's avatar
Leigh Stoller committed
627 628
				undef, $nosnapshot, $mustnotexist, $wholedisk,
				$description);
629
    if ($response->code() != GENIRESPONSE_SUCCESS) {
630 631 632 633
	$errcode = $response->code();
	($exitcode,$errmsg) = ResponseErrorMessage($aggregate, $response);
	# Important to tell web user about these.
	if ($response->code() == GENIRESPONSE_NOSPACE ||
Leigh Stoller's avatar
Leigh Stoller committed
634
	    $response->code() == GENIRESPONSE_FORBIDDEN || 
635 636 637
	    $response->code() == GENIRESPONSE_ALREADYEXISTS) {
	    $exitcode = 1;
	}
638 639 640 641
	if (!$nosnapshot) {
	    $instance->SetStatus($old_status);
	    $aggregate->SetStatus($old_status);
	}
642
	goto uerror;
643 644 645
    }
    my ($image_urn, $image_url,
	$version_urn, $version_url) = @{ $response->value() };
646 647 648 649
    #
    # For version zero, kill the version number.
    #
    if (!defined($version_urn) || $version_urn =~ /:0$/) {
650 651 652
	$version_urn = $image_urn;
	$version_url = $image_url
    }
653
    my $image_name;
654

655 656 657 658 659 660
    # When using the image server we use URNs. 
    if ($usetracker) {
	# DoImageTrackerStuff determined that we use whatever the cluster
	# tells us, cause it is the home of the image.
	if (!defined($copyback_urn)) {
	    $image_name = $version_urn;
Leigh Stoller's avatar
Leigh Stoller committed
661 662
	}
	else {
663
	    $image_name = $copyback_urn;
Leigh Stoller's avatar
Leigh Stoller committed
664
	}
665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685
    }
    elsif ($aggregate->OnLocalCluster()) {
	$image_name = $version_urn;
    }
    else {
	$image_name = $version_url;
    }
    
    if ($nosnapshot) {
	if (defined($webtask)) {
	    $webtask->image_urn($version_urn);
	    $webtask->image_url($version_url);
	    $webtask->image_name($image_name);
	}
	print "$version_urn,$version_url,$image_name\n";
	$slice->UnLock();
	exit(0);
    }
    if (defined($swebtask)) {
	$swebtask->image_urn($version_urn);
	$swebtask->image_url($version_url);
686
	$swebtask->image_name($image_name);
687

Leigh Stoller's avatar
Leigh Stoller committed
688 689 690
	# We tell the web interface that the image has to be copied
	# back,
	if (defined($copyback_uuid)) {
691
	    $swebtask->copyback_uuid($copyback_uuid);
Leigh Stoller's avatar
Leigh Stoller committed
692
	}
693
    }
694
    print "$version_urn,$version_url,$image_name\n";
695 696 697 698 699

    #
    # Exit and leave child to poll.
    #
    if (! $debug) {
700 701 702 703
        $logfile = TBMakeLogname("snapshot");
	
	if (my $childpid = TBBackGround($logfile)) {
	    # Parent exits normally, web interface watches.
704 705 706 707 708
	    exit(0);
	}
	# Let parent exit;
	sleep(2);
    }
709 710
    # Bind the process id. This is important when the caller is
    # manage_profile, doing a clone.
711 712
    $webtask->SetProcessID($PID)
	if (defined($webtask));
713 714 715 716

    #
    # Poll for a reasonable amount of time.
    #
717
    my $seconds  = 1500;
718
    my $interval = 15;
719
    my $ready    = 0;
720
    my $sliver_ready = 0;
721
    my $failed   = 0;
722

723
    while ($seconds > 0) {
724 725
	sleep($interval);
	$seconds -= $interval;
726
    
727
	my $response = $aggregate->SliceStatus();
728
	if ($response->code() != GENIRESPONSE_SUCCESS &&
729
	    $response->code() != GENIRESPONSE_NETWORK_ERROR &&
730
	    $response->code() != GENIRESPONSE_SERVER_UNAVAILABLE &&
731 732 733 734 735 736
	    $response->code() != GENIRESPONSE_BUSY) {
	    $errmsg = "Sliverstatus failed: ". $response->output() . "\n";
	    $failed = 1;
	    last;
	}
	next
737
	    if ($response->code() == GENIRESPONSE_BUSY ||
738
		$response->code() == GENIRESPONSE_SERVER_UNAVAILABLE ||
739
		$response->code() == GENIRESPONSE_NETWORK_ERROR);
740

741
	my $blob = $response->value();
742
	# This is the per-aggregate status, we always set this for web UI.
743
	$aggregate->UpdateSliverStatusAll($blob->{'details'});
744
	
745 746 747 748
	if ($blob->{'status'} eq "failed") {
	    $failed = 1;
	    last;
	}
749 750 751 752
	elsif ($blob->{'status'} eq "ready") {
	    $sliver_ready = 1;
	}
	
753 754 755
	#
	# We are watching for the image status to report ready or failed.
	#
756
	$response = $aggregate->ImageInfo($image_urn);
757
	if ($response->code() != GENIRESPONSE_SUCCESS &&
758
	    $response->code() != GENIRESPONSE_NETWORK_ERROR &&
759
	    $response->code() != GENIRESPONSE_SERVER_UNAVAILABLE &&
760 761 762 763 764 765
	    $response->code() != GENIRESPONSE_BUSY) {
	    $errmsg = "Imageinfo failed: ". $response->output() . "\n";
	    $failed = 1;
	    last;
	}
	next
766
	    if ($response->code() == GENIRESPONSE_BUSY ||
767
		$response->code() == GENIRESPONSE_SERVER_UNAVAILABLE ||
768
		$response->code() == GENIRESPONSE_NETWORK_ERROR);
769

770
	my $imageblob = $response->value();
771
	if (defined($swebtask)) {
772 773 774 775 776 777 778 779 780
	    my %blobcopy = %{ $imageblob };

	    #
	    # If the image is ready, but needs to be copied back to
	    # its origin, hold of ready till later. We will wait for
	    # the copyback to finish, see below.
	    #
	    if ($imageblob->{'status'} eq "ready" && defined($copyback_uuid)) {
		$blobcopy{'status'} = "copying";
Leigh Stoller's avatar
Leigh Stoller committed
781
	    }
782 783
	    # This is also being updated by the event system.
	    $instance->UpdateImageStatus(\%blobcopy);
784
	}
785
	if ($imageblob->{'status'} eq "ready") {
786 787 788
	    $ready = 1;
	    last;
	}
789
	elsif ($imageblob->{'status'} eq "failed") {
790 791 792
	    $failed = 1;
	    last;
	}
793
    }
794
    # Cause of image status events.
795 796
    $swebtask->Refresh()
	if (defined($swebtask));
797
    
798 799 800 801
    if ($failed) {
	$errmsg = "Imaging failed"
	    if (!defined($errmsg));
	goto bad;
802
    }
803 804
    elsif (!$ready) {
	$errmsg  = "Imaging timed out";
805
	$errcode = -2;
806 807
	goto bad;
    }
Leigh Stoller's avatar
Leigh Stoller committed
808 809 810 811 812 813
    #
    # We cannot change a geni-lib script profile, so no need to do this.
    # But we can change a portal converted profile.
    #
    elsif (defined($update_profile) &&
	   (!defined($profile->script()) || $profile->portal_converted())) {
814
	#
Leigh Stoller's avatar
Leigh Stoller committed
815 816
	# New image. Might not have changed if the cluster is not doing
	# image versions.
817
	#
818
	# DoImageTrackerStuff determined that we use whatever the cluster
819
	# tells us, cause it is the home of the image.
Leigh Stoller's avatar
Leigh Stoller committed
820 821
	#
	my $newimage = $version_urn
822
	    if ($usetracker && !defined($copyback_urn));
Leigh Stoller's avatar
Leigh Stoller committed
823 824 825
	# And fall back to url
	$newimage = $version_url
	    if (!defined($newimage));
826

Leigh Stoller's avatar
Leigh Stoller committed
827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
	#
	# Okay, is the rspec going to change. This is kinda annoying to
	# figure out, so lets just run the code in impotent mode and have
	# it tell us the number of nodes that will get updated.
	#
	my $changed = 
	    $profile->UpdateDiskImage($node_id, $newimage,
				      ($update_profile eq "all" ? 1 : 0), 1);

	if ($changed > 0) {
	    #
	    # Create a new version of the profile and update the rspec
	    # to reflect the new image version. 
	    #
	    if ($doversions) {
		$profile = $profile->NewVersion($this_user);
		if (!defined($profile)) {
		    print STDERR "Could not create new profile version\n";
		    $webtask->Exited(70)
			if (defined($webtask));
		    exit(1);
		}
	    }
	    $profile->UpdateDiskImage($node_id, $newimage,
				      ($update_profile eq "all" ? 1 : 0), 0);
	    $profile->InsertImageRecords();
	    #
	    # For a portal converted profile, we need to regen the script.
	    #
	    if ($profile->portal_converted()) {
		if ($profile->Convert2Genilib() != 0) {
		    fatal("Could not convert rspec to geni-lib");
		}
	    }
	}
862
    }
863
    $instance->SetStatus("ready");
864
    $aggregate->SetStatus("ready");
Leigh Stoller's avatar
Leigh Stoller committed
865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
    
    #
    # If there is a copyback_uuid, we want to wait for that to finish.
    #
    if (defined($copyback_uuid)) {
	#
	# We know the copyback is done when the IMS has the info.
	#
	my $copied  = 0;
	$seconds  = 1000;

	while ($seconds > 0) {
	    sleep($interval);
	    $seconds -= $interval;

	    #
	    # It would clearly be more more efficient to just look in
	    # the IMS database. 
	    #
	    Genixmlrpc->SetContext(APT_Geni::GeniContext());
	    my $blob = GeniImage::GetImageData($copyback_urn, \$errmsg);
	    Genixmlrpc->SetContext(undef);
	    # We get back undefined if the image is not posted yet.
	    if (defined($blob)) {
		$copied = 1;
		last;
	    }
	    sleep($interval);
	}
	# Tell the web interface. 
	if (!$copied) {
	    $errmsg  = "Failed to copy image back to its origin cluster";
	    $errcode = 1;
	    goto bad;
	}
900 901
	elsif (defined($swebtask)) {
	    $swebtask->image_status("ready");
Leigh Stoller's avatar
Leigh Stoller committed
902 903
	}
    }
904
    $swebtask->Exited(0);
905 906 907
    $webtask->Exited(0)
	if (defined($webtask));
    $slice->UnLock();
908
    
909 910 911 912 913 914 915
    if (defined($logfile)) {
	if (-s $logfile) {
	    SENDMAIL($TBOPS,
		     "Instance Snapshot Complete",
		     "Finished taking snapshot of $instance.\n",
		     $TBOPS, undef, $logfile);
	}
916 917
	unlink($logfile);
    }
918 919 920 921 922
    if (!$sliver_ready) {
	#
	# Image is ready, but sliver is not. Start a monitor so that
	# web interface is updated.
	#
Leigh Stoller's avatar
Leigh Stoller committed
923
	StartMonitorInternal();
924
    }
925
    exit(0);
926
  bad:
927
    if (!$sliver_ready) {
928
	#
929 930
	# Image is ready, but sliver is not. Start a monitor so that
	# web interface is updated.
931
	#
Leigh Stoller's avatar
Leigh Stoller committed
932
	StartMonitorInternal();
933
    }
934
    $instance->SetStatus("ready");
935
    $aggregate->SetStatus("ready");
936
    if (defined($logfile)) {
937 938 939 940 941
	SENDMAIL($TBOPS,
		 "Snapshot failed",
		 "Error taking snapshot of $instance:\n\n".
		 "$errmsg\n",
		 $TBOPS, undef, $logfile);
942 943
	unlink($logfile);
    }
944 945 946 947 948 949
  uerror:
    print STDERR "$errmsg\n";
    if (defined($errmsg) && defined($webtask)) {
	$webtask->Exited($errcode);
	$webtask->output($errmsg);
    }
950
    # For display in the imaging modal.
Leigh Stoller's avatar
Leigh Stoller committed
951 952 953 954
    if (defined($errmsg) && defined($swebtask)) {
	$swebtask->Exited($errcode);
	$swebtask->output($errmsg);
    }
955 956 957
    $slice->UnLock()
	if ($needunlock);

Leigh Stoller's avatar
Leigh Stoller committed
958
    exit($exitcode);
959
}
960

961
sub DoImageTrackerStuff($$$$$$$)
962
{
963
    my ($aggregate, $node, $project, $newname, $puuid, $purn, $perrmsg) = @_;
964 965 966 967 968 969 970 971 972 973 974 975 976 977
    my $node_id = GeniXML::GetVirtualId($node);
    my $errmsg;

    #
    # If we do not have a diskinfo section, we will use the URN we get back
    # from the cluster (it is a snapshot of the default image).
    #
    my $diskinfo = GeniXML::GetDiskImage($node);
    return 0
	if (!defined($diskinfo));

    #
    # This one needs more thought, it might be a URL.
    #
978 979 980 981 982
    my $image_token = GeniXML::GetText("name", $diskinfo);
    if (!defined($image_token)) {
	$image_token = GeniXML::GetText("url", $diskinfo);
	return 0
	    if (!defined($image_token));
983
    }
984 985 986
    if (GeniHRN::IsValid($image_token)) {
	my ($auth,$ospid) = GeniHRN::ParseImage($image_token);
	if (!defined($ospid)) {
987
	    $$perrmsg = "Invalid image urn: $image_token";
988 989 990 991
	    return 1;
	}
    }
    
992
    Genixmlrpc->SetContext(APT_Geni::GeniContext());
993
    my $blob = GeniImage::GetImageData($image_token, \$errmsg);
994 995 996
    Genixmlrpc->SetContext(undef);
    
    if (!defined($blob)) {
997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012
	if (GeniHRN::IsValid($image_token)) {
	    #
	    # See if this is for a system image (emulab-ops). If it is,
	    # and the domain is not the MS, then retry with a MS URN.
	    #
	    # This is sorta temporary; at some point there will not be any
	    # profiles using the URNs that are not in the image tracker.
	    # Of course a user is free to set the URN to anything the want,
	    # which is why I expect this code to be here for a while. 
	    #
	    my $urn;
	    my $hrn = GeniHRN->new($image_token);
	    my ($auth,$ospid,$os,$vers) = $hrn->ParseImage();
	    if ($ospid eq TBOPSPID() && $auth ne "emulab.net") {
		$urn = GeniHRN::GenerateImage("emulab.net",
						 TBOPSPID(), $os, $vers);
1013 1014 1015
		Genixmlrpc->SetContext(APT_Geni::GeniContext());
		$blob = GeniImage::GetImageData($urn, \$errmsg);
		Genixmlrpc->SetContext(undef);
1016 1017 1018
	    }
	}
	if (!defined($blob)) {
1019 1020 1021
	    $$perrmsg = "Could not get info from the image server for ".
		"$image_token:\n" . $errmsg;
	    return 1;
1022
	}
1023 1024
    }
    #
1025
    # System Image? We use the URN we get back from CreateImage().
1026 1027 1028
    # The cluster will be the origin for the new image.
    #
    return 0
1029 1030 1031 1032
	if (exists($blob->{'issystem'}) && $blob->{'issystem'});
    # Image aliases are also considered system images.
    return 0
	if (exists($blob->{'isimagealias'}) && $blob->{'isimagealias'});
1033

1034
    my $image_urn     = $blob->{'urn'};
1035 1036 1037 1038
    my $copyback_uuid = $blob->{'version_uuid'};
    my $copyback_urn  = $image_urn;

    my $hrn = GeniHRN->Parse($image_urn);
1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060
    if (!defined($hrn)) {
	$$perrmsg = "Could not parse $image_urn\n";
	return -1;
    }
    if (!$hrn->IsImage()) {
	$$perrmsg = "$image_urn is not an image URN\n";
	return -1;
    }
    my $projhrn = GeniHRN->Parse($blob->{'project_urn'});
    if (!defined($projhrn)) {
	$$perrmsg = "Could not parse " . $blob->{'project_urn'} . "\n";
	return -1;
    }

    #
    # Whenever we cross projects, we expect the cluster to create
    # a new image. We use the URN that is returned.
    #
    if (lc($projhrn->subauth()) ne lc($project->pid())) {
	# We use the URN we get back from CreateSliver().
	return 0;
    }
1061 1062 1063 1064

    #
    # What happens if the user is doing a snapshot on the cluster where
    # the image lives? The copyback (import) makes no sense in that case,
1065
    # We can use the URN the cluster returns.
1066
    #
1067
    if (lc($hrn->domain()) eq lc($aggregate->domain())) {
1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078
	return 0;
    }

    #
    # Different cluster
    #
    # Is this node currently running a different image then what
    # the new name will be? Is so, then we expect the cluster to
    # start a new image and we use the URN it returns.
    #
    if ($newname ne $hrn->osname()) {
1079
	return 0;
1080 1081 1082 1083 1084
    }

    #
    # If we are going to update the profile, we need to know what to
    # change the image urn to, and that depends on what version the
1085 1086 1087 1088 1089
    # image is currently at, AT THE ORIGIN CLUSTER. The urn we get back
    # from the snapshotting cluster is not what we care about, we need
    # a urn for the origin cluster. But that depends on what version the
    # origin cluster is at (the highest numbered version). But if we are
    # doing a snapshot of an earlier version, we cannot generate the
1090
    # version here, we have to ask what it will be. 
1091 1092
    #
    if ($blob->{'isversioned'}) {
1093
	$copyback_urn = GeniHRN::GenerateImage($hrn->authority(),
1094 1095
					       $hrn->ospid(),
					       $hrn->osname(),
1096
					       $blob->{'maxversion'} + 1);
1097 1098 1099 1100 1101 1102
    }
    $$puuid = $copyback_uuid;
    $$purn  = $copyback_urn;
    return 0;
}

1103 1104 1105 1106 1107
#
# Ask the console URL for a node in an instance.
#
sub DoConsole()
{
1108
    my $errmsg;
1109
    usage()
1110 1111
	if (!@ARGV);
    my $node_id = shift(@ARGV);
1112

1113 1114 1115 1116 1117
    #
    # Sanity check to make sure the node is really in the rspec, since
    # we need its sliver urn to ask for the console url.
    #
    my $sliver_urn;
1118
    my $sliver;
1119
    foreach my $obj ($instance->AggregateList()) {
1120 1121 1122 1123
	my $manifest = GeniXML::Parse($obj->manifest());
	if (! defined($manifest)) {
	    fatal("Could not parse manifest for $obj");
	}
1124 1125 1126
	my @nodes = (GeniXML::FindNodes("n:node", $manifest)->get_nodelist(),
		     GeniXML::FindNodesNS("n:vhost", $manifest,
					  $GeniXML::EMULAB_NS)->get_nodelist());
1127
	foreach my $node (@nodes) {
1128 1129 1130
	    my $client_id   = GeniXML::GetVirtualId($node);
	    my $urn         = GeniXML::GetSliverId($node);
	    my $manager_urn = GetManagerId($node);
1131 1132 1133

	    # No sliver urn or a different aggregate.
	    next
1134 1135 1136
		if (! (defined($urn) &&
		       defined($manager_urn) &&
		       $manager_urn eq $obj->aggregate_urn()));
1137 1138

	    if ($node_id eq $client_id) {
1139
		$sliver_urn = $urn;
1140 1141
		$sliver = $obj;
	    }
1142 1143 1144 1145 1146
	}
    }
    if (!defined($sliver_urn)) {
	fatal("Could not find node '$node_id' in manifest");
    }
1147 1148 1149 1150 1151 1152 1153
    if ($sliver->GetAptAggregate()->CheckStatus(\$errmsg)) {
	print STDERR "$errmsg\n";
	if (defined($webtask)) {
	    $webtask->output($errmsg);
	    $webtask->Exited(GENIRESPONSE_SERVER_UNAVAILABLE);
	}
	exit(1);
1154
    }
1155 1156
    my $response = $sliver->ConsoleInfo($sliver_urn);

1157
    if ($response->code() == GENIRESPONSE_RPCERROR) {
1158
	print STDERR "RPC Error: " . $response->error() . "\n";
1159
	if (defined($webtask)) {
1160
	    $webtask->output($response->error());
1161 1162
	    $webtask->Exited($response->code());
	}
1163 1164 1165 1166 1167 1168 1169 1170 1171 1172
	exit(-1);
    }
    if ($response->code() == GENIRESPONSE_NETWORK_ERROR ||
	$response->code() == GENIRESPONSE_SERVER_UNAVAILABLE) {
	print STDERR "RPC Error: " . $response->error() . "\n";
	if (defined($webtask)) {
	    $webtask->output($response->error());
	    $webtask->Exited($response->code());
	}
	exit(1);
1173
    }
1174 1175 1176 1177
    if ($response->code() == GENIRESPONSE_UNAVAILABLE) {
	print STDERR "Server says there is no console for $node_id\n";
	if (defined($webtask)) {
	    $webtask->output("Sorry, $node_id does not have a console line");
1178 1179
	    $webtask->Exited($response->code());
	}
1180
	exit(1);
1181 1182 1183 1184 1185
    }
    if ($response->code() == GENIRESPONSE_SEARCHFAILED) {
	print STDERR "Server says $node_id has been deallocated\n";
	if (defined($webtask)) {
	    $webtask->output("Sorry, $node_id has been deallocated");
1186 1187
	    $webtask->Exited($response->code());
	}
1188
	exit(1);
1189
    }
1190 1191 1192 1193 1194 1195
    if ($response->code() == GENIRESPONSE_FORBIDDEN) {
	print STDERR "Server says access to $node_id console is forbidden\n";
	if (defined($webtask)) {
	    $webtask->output("Sorry, access to $node_id console is forbidden");
	    $webtask->Exited($response->code());
	}
1196
	exit(1);
1197
    }
1198 1199 1200 1201 1202
    if ($response->code() == XMLRPC_NO_SUCH_METHOD) {
	print STDERR "Server does not support console access\n";
	if (defined($webtask)) {
	    $webtask->output("Sorry, server does not support console access");
	    $webtask->Exited(GENIRESPONSE_NOT_IMPLEMENTED);
1203
	}
1204 1205 1206 1207
	exit(1);
    }
    if ($response->code() != GENIRESPONSE_SUCCESS) {
	fatal($response->error());
1208 1209 1210
    }
    my $url;
    my $pswd;
1211
    my $logurl;
1212 1213 1214 1215 1216
	
    if (ref($response->value())) {
	$url  = $response->value()->{'url'};
	$pswd = $response->value()->{'password'}
	    if (exists($response->value()->{'password'}));
1217 1218 1219 1220
	$logurl = $response->value()->{'logurl'}
	    if (exists($response->value()->{'logurl'}));

	print Dumper($response->value());
1221 1222 1223
    }
    else {
	$url = $response->value();
1224
    }
1225
    if (defined($webtask)) {
1226
	if ($response->code()) {
1227
	    $webtask->output($response->error());
1228 1229 1230 1231
	}
	else {
	    $webtask->url($url);
	    $webtask->password($pswd) if (defined($pswd));
1232
	    $webtask->logurl($logurl) if (defined($logurl));
1233
	}
1234
	$webtask->Exited($response->code());
1235
	exit($response->code());
1236 1237 1238
    }
    # For command line operation too.
    if ($response->code()) {
1239
	fatal($response->error());
1240
    }
1241 1242
    print $url . "\n";
    print $pswd . "\n" if (defined($pswd));
1243
    print $logurl . "\n" if (defined($logurl));
1244 1245
    exit(0);
}
1246

1247 1248 1249 1250 1251 1252
#
# Terminate
#
sub DoTerminate()
{
    my $errmsg;
1253 1254
    my $errcode;
    my $exitcode = 1;
1255
    my $logfile;
1256 1257
    my $takelock = 0;
    my $expired  = $RECORDHISTORY_TERMINATED;
1258

1259 1260 1261 1262 1263 1264 1265 1266 1267 1268
    my $optlist = "eL";
    my %options = ();
    if (! getopts($optlist, \%options)) {
	usage();
    }
    if (defined($options{"e"})) {
	$expired = $RECORDHISTORY_EXPIRED;
    }
    if (defined($options{"L"})) {
	$takelock = 1;
1269
    }
1270 1271 1272
    
    my $slice = $instance->GetGeniSlice();
    if (!defined($slice)) {
1273 1274 1275 1276
	#
	# No slice (typically) means we never got far enough to the
	# get the sliver created on the backend cluster.
	#
1277 1278 1279 1280 1281 1282
	goto killit;
    }
    #
    # Lock the slice in case it is doing something else, like taking
    # a disk image.
    #
1283
    # When told to take the lock, we take it and go.
1284 1285 1286 1287 1288
    #
    if ($takelock) {
	$slice->TakeLock();
    }
    elsif ($slice->Lock()) {
1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304
	#
	# A special case is if the slice is provisioning. This means the
	# user is giving up on it, and we want to tell the aggregate to
	# kill it. Not all aggregates are going to allow this, so need
	# to be able to deal with that.
	#
	if ($instance->status() ne "provisioned") {
	    fatal("Slice is busy, cannot lock it");
	}
	if (!$instance->canceled()) {
	    print "Marking instance canceled\n";
	    $instance->MarkCanceled();
	}
	sleep(1);
	# We have an obvious race here since we do not have the lock.
	exit(0);
1305 1306 1307 1308
    }
    my $old_status = $instance->status();
    $instance->SetStatus("terminating");

1309 1310 1311 1312 1313 1314 1315 1316
    #
    # If deferred, then no reason to save this in the history, it
    # was never started.
    #
    if ($old_status eq "deferred") {
	goto killit;
    }

1317 1318 1319
    #
    # Exit and let caller poll for status.
    #
1320
    if (!$debug) {
1321 1322 1323 1324 1325 1326 1327 1328 1329 1330
        $logfile = TBMakeLogname("terminate");
	
	if (my $childpid = TBBackGround($logfile)) {
	    my $status = 0;
	    #
	    # Wait a couple of seconds to see if there is going to be an
	    # immediate error. Then return and let it continue to run. This
	    # allows the web server to see quick errors. Later errors will
	    # have to be emailed. 
	    #
1331
	    sleep(5);
1332 1333 1334 1335 1336 1337 1338
	    my $foo = waitpid($childpid, &WNOHANG);
	    if ($foo) {
		$status = $? >> 8;
	    }
	    exit($status);
	}
    }
1339

1340
    # Skip terminated aggregates, since we retry later on failure.
1341
    # Also skip deferred aggregates, these were never setup.
1342 1343 1344
    my @agglist;
    foreach my $agg ($instance->AggregateList()) {
	push(@agglist, $agg)
1345 1346
	    if ($agg->status() ne "terminated" &&
		$agg->status() ne "deferred");
1347 1348
    }
    my $response;
1349

1350 1351 1352 1353 1354
    $errcode = CallMethodOnAggregates("Terminate", 10, \$response, @agglist);
				      
    if ($errcode) {
	$exitcode = -1;
	$errmsg   = $response;
1355 1356
	goto bad;
    }
1357

1358
    #
1359
    # Look at all the responses, update the status. 
1360
    #
1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392
    my @responses = @{$response};
    foreach my $agg (@agglist) {
	my $response = shift(@responses);
	my $code = $response->code();

	# SEARCHFAILED is success too
	if ($code == GENIRESPONSE_SUCCESS ||
	    $code == GENIRESPONSE_SEARCHFAILED) {
	    $agg->SetStatus("terminated");
	    next;
	}
	($exitcode,$errmsg) = ResponseErrorMessage($agg, $response);
	
	#
	# These will typically clear up at some point so abort
	# sending any email.
	#
	if ($code == GENIRESPONSE_BUSY ||
	    $code == GENIRESPONSE_SERVER_UNAVAILABLE ||
	    $code == GENIRESPONSE_NETWORK_ERROR) {
	    if (defined($logfile)) {
		unlink($logfile);
		$logfile = undef;
	    }
	    $errcode  = $code;
	    $exitcode = 1;
	}
	else {
	    # We are going to send mail below, so exit with positive status
	    # so web interface does not send email.
	    $errcode  = $code;
	    $exitcode = 1;
1393 1394
	}
    }
1395 1396 1397 1398 1399
    # Will have to try again later.
    goto bad
	if ($errcode);

    $instance->SetStatus("terminated");
1400
    $slice->Delete();
1401
    $instance->RecordHistory($expired);
1402 1403 1404 1405 1406
  killit:
    $instance->Delete();
    unlink($logfile)
	if (defined($logfile));
    exit(0);
1407
    
1408 1409 1410
  bad:
    print STDERR $errmsg . "\n";
    $instance->SetStatus($old_status);
1411
    if (defined($logfile)) {
1412 1413
	my $instance_name = $instance->name();
	my $slice_uuid    = $slice->uuid();
Leigh Stoller's avatar