All new accounts created on Gitlab now require administrator approval. If you invite any collaborators, please let Flux staff know so they can approve the accounts.

manage_instance.in 122 KB
Newer Older
1 2
#!/usr/bin/perl -w
#
3
# Copyright (c) 2000-2018 University of Utah and the Flux Group.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
#
use English;
use strict;
use Getopt::Std;
use XML::Simple;
use Data::Dumper;
use CGI;
use POSIX ":sys_wait_h";
31
use POSIX qw(setsid strftime ceil floor);
32
use Date::Parse;
33
use JSON;
34 35 36 37 38 39

#
# Back-end script to manage APT profiles.
#
sub usage()
{
Leigh B Stoller's avatar
Leigh B Stoller committed
40 41
    print("Usage: manage_instance snapshot instance ".
	  "[-n node_id] [-i imagename] [-u node|all]\n");
42 43 44
    print("Usage: manage_instance consoleurl instance node\n");
    print("Usage: manage_instance terminate instance\n");
    print("Usage: manage_instance refresh instance\n");
Leigh B Stoller's avatar
Leigh B Stoller committed
45 46 47
    print("Usage: manage_instance reboot instance node_id ...\n");
    print("Usage: manage_instance reload instance node_id ...\n");
    print("Usage: manage_instance deletenodes instance node_id ...\n");
48
    print("Usage: manage_instance monitor instance\n");
49
    print("Usage: manage_instance lockdown instance set|clear user|admin\n");
50
    print("Usage: manage_instance panic instance set|clear\n");
51
    print("Usage: manage_instance linktest instance [-k | level]\n");
52
    print("Usage: manage_instance writecreds instance directory\n");
53
    print("Usage: manage_instance updatekeys instance [uid] \n");
54
    print("Usage: manage_instance extend instance ".
55
	  "[-M] [-m message | -f filename] hours\n");
56
    print("Usage: manage_instance denyextension instance [-m message] [filename]\n");
57
    print("Usage: manage_instance maxextension instance\n");
58
    print("Usage: manage_instance moreinfo instance [-m message] [filename]\n");
59
    print("Usage: manage_instance utilization instance\n");
60
    print("Usage: manage_instance schedterminate instance [-m message] days [filename]\n");
61
    print("Usage: manage_instance idledata instance\n");
62
    print("Usage: manage_instance openstackstats instance\n");
Leigh B Stoller's avatar
Leigh B Stoller committed
63
    print("Usage: manage_instance getmanifests instance\n");
64
    print("Usage: manage_instance warn instance\n");
Leigh B Stoller's avatar
Leigh B Stoller committed
65
    print("Usage: manage_instance applyextensionpolicy instance\n");
66 67
    exit(-1);
}
68
my $optlist     = "dt:s";
69
my $debug       = 0;
70
my $silent      = 0;
71
my $webtask_id;
72
my $webtask;
73 74
my $this_user;
my $geniuser;
75 76 77 78

#
# Configure variables
#
79 80 81 82 83 84 85
my $TB		   = "@prefix@";
my $TBOPS          = "@TBOPSEMAIL@";
my $PROTOUSER      = "elabman";
my $SUDO           = "/usr/local/bin/sudo";
my $MANAGEINSTANCE = "$TB/bin/manage_instance";
my $WAP            = "$TB/sbin/wap";
my $TBACCT         = "$TB/sbin/tbacct";
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104

#
# Untaint the path
#
$ENV{'PATH'} = "$TB/bin:$TB/sbin:/bin:/usr/bin:/usr/bin:/usr/sbin";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

#
# Load the Testbed support stuff.
#
use lib "@prefix@/lib";
use EmulabConstants;
use emdb;
use emutil;
105
use libEmulab;
106
use libtestbed;
107 108 109 110
use User;
use Project;
use APT_Profile;
use APT_Instance;
111
use APT_Geni;
112
use APT_Utility;
113 114
use GeniXML;
use GeniHRN;
115 116 117
use Genixmlrpc;
use GeniResponse;
use GeniSlice;
118
use GeniImage;
119
use GeniUser;
120
use WebTask;
121
use EmulabFeatures;
122 123 124

# Protos
sub fatal($);
125
sub UserError($);
126
sub DoSnapshot();
127
sub DoConsole();
128
sub DoTerminate();
129
sub DoSchedTerminate();
130
sub DoExtend();
131
sub DoDenyOrMoreInfo($);
132
sub DoRefresh();
133
sub DoReboot();
134
sub DoReload();
135
sub DoLockdown();
136
sub DoPanic();
137
sub DoManifests();
138
sub DoLinktest();
139
sub DoUpdateKeys();
Leigh B Stoller's avatar
Leigh B Stoller committed
140
sub DoDeleteNodes();
141
sub DoUtilization();
142
sub DoIdleData();
143
sub DoOpenstack();
144 145
sub DoCheckAutoApprove();
sub CheckAutoApprove($$);
146
sub DoMaxExtension();
147
sub DoMaxExtensionInternal($$);
Leigh B Stoller's avatar
Leigh B Stoller committed
148
sub DoApplyExtensionPolicy();
149
sub WriteCredentials();
150
sub StartMonitor();
151
sub StartMonitorInternal(;$);
152
sub DoImageTrackerStuff($$$$$$$);
153
sub DoWarn();
154
sub DenyExtensionInternal($);
155
sub ExtendInternal($$$$$);
156 157
sub CallMethodOnAggregates($$$@);
sub ResponseErrorMessage($$);
158 159 160 161 162 163 164 165 166 167 168 169

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (defined($options{"t"})) {
    $webtask_id = $options{"t"};
}
170 171 172
if (defined($options{"d"})) {
    $debug++;
}
173 174 175
if (defined($options{"s"})) {
    $silent = 1;
}
176
if (@ARGV < 2) {
177 178
    usage();
}
179
my $action   = shift(@ARGV);
180 181
my $token    = shift(@ARGV);
my $instance = APT_Instance->Lookup($token);
182
if (!defined($instance)) {
183
    $instance = APT_Instance->LookupBySlice($token);
184
}
185 186 187 188 189 190 191 192 193 194 195
#
# Grab the webtask object.
#
if (defined($webtask_id)) {
    $webtask = WebTask->Lookup($webtask_id);
    if (!defined($webtask)) {
	fatal("Could not lookup/create webtask for profile");
    }
    $webtask->AutoStore(1);
}

196 197 198 199 200 201 202 203 204 205
if (getpwuid($UID) eq "nobody") {
    $this_user = User->ImpliedUser();
}
else  {
    $this_user = User->ThisUser();
}
# If a guest user, we will not have an actual user, which is okay.
if (defined($this_user)) {
    $geniuser = GeniUser->CreateFromLocal($this_user);
}
206

207 208 209 210 211 212 213
if (!defined($instance)) {
    # Lets not make this a fatal error; when coming from the web interface,
    # we do not need to send email. 
    UserError("No such instance $token");
}
my $uuid = $instance->uuid();

214
if ($action eq "snapshot") {
215 216
    DoSnapshot();
}
217 218 219
if ($action eq "extend") {
    DoExtend();
}
220
elsif ($action eq "denyextension") {
221 222 223 224
    DoDenyOrMoreInfo("deny")
}
elsif ($action eq "moreinfo") {
    DoDenyOrMoreInfo("info")
225
}
226
elsif ($action eq "consoleurl") {
227 228
    DoConsole()
}
229 230 231
elsif ($action eq "terminate") {
    DoTerminate()
}
232 233 234
elsif ($action eq "warn") {
    DoWarn()
}
235 236 237
elsif ($action eq "schedterminate") {
    DoSchedTerminate()
}
238 239 240
elsif ($action eq "refresh") {
    DoRefresh()
}
241 242 243
elsif ($action eq "reboot") {
    DoReboot()
}
244 245 246
elsif ($action eq "reload") {
    DoReload()
}
247 248 249
elsif ($action eq "monitor") {
    StartMonitor()
}
250 251 252
elsif ($action eq "lockdown") {
    DoLockdown()
}
253 254 255
elsif ($action eq "panic") {
    DoPanic()
}
256 257 258
elsif ($action eq "linktest") {
    DoLinktest()
}
259 260 261
elsif ($action eq "updatekeys") {
    DoUpdateKeys()
}
262 263 264
elsif ($action eq "writecreds") {
    WriteCredentials()
}
265 266 267
elsif ($action eq "getmanifests") {
    DoManifests()
}
Leigh B Stoller's avatar
Leigh B Stoller committed
268 269 270
elsif ($action eq "deletenodes") {
    DoDeleteNodes()
}
271 272 273
elsif ($action eq "utilization") {
    DoUtilization()
}
274 275 276
elsif ($action eq "idledata") {
    DoIdleData()
}
277 278 279
elsif ($action eq "openstackstats") {
    DoOpenstack()
}
280 281 282
elsif ($action eq "maxextension") {
    DoMaxExtension()
}
283 284 285
elsif ($action eq "checkautoapprove") {
    DoCheckAutoApprove()
}
Leigh B Stoller's avatar
Leigh B Stoller committed
286 287 288
elsif ($action eq "applyextensionpolicy") {
    DoApplyExtensionPolicy()
}
289 290 291
else {
    usage();
}
292 293 294 295 296 297 298
exit(0);

#
# Take a snapshot. Implies a single node instance, for now.
#
sub DoSnapshot()
{
299 300
    my $errmsg;
    my $logfile;
301
    my $errcode        = -1;
302
    my $exitcode       = -1;
303 304
    my $needunlock     = 0;
    my $old_status     = $instance->status();
305 306
    my $node_id;
    my $imagename;
Leigh B Stoller's avatar
Leigh B Stoller committed
307
    my $description;
Leigh B Stoller's avatar
Leigh B Stoller committed
308
    my $cloneprofile;
309
    my $update_profile;
310 311
    my $copyback_uuid;
    my $copyback_urn;
312 313 314
    my $swebtask;
    my $nosnapshot     = 0;
    my $mustnotexist   = 0;
315
    my $wholedisk      = 0;
316
    my $update_prepare = 0;
317 318
    my $doversions = 0;
    my $usetracker = 0;
319
    my $operation = "image-only"; # Default to just snapshot.
320

Leigh B Stoller's avatar
Leigh B Stoller committed
321
    my $optlist = "n:i:u:Uc:O:SseD:";
322 323 324 325 326 327 328 329 330 331
    my %options = ();
    if (! getopts($optlist, \%options)) {
	usage();
    }
    if (defined($options{"n"})) {
	$node_id = $options{"n"};
    }
    if (defined($options{"i"})) {
	$imagename = $options{"i"};
    }
Leigh B Stoller's avatar
Leigh B Stoller committed
332 333 334
    if (defined($options{"c"})) {
	$cloneprofile = $options{"c"};
    }
335 336 337 338 339 340
    if (defined($options{"u"})) {
	$update_profile = $options{"u"};
	if ($update_profile !~ /^(node|all)$/) {
	    usage();
	}
    }
341 342 343
    if (defined($options{"U"})) {
	$update_prepare = 1;
    }
Leigh B Stoller's avatar
Leigh B Stoller committed
344 345 346
    if (defined($options{"D"})) {
	$description = ReadFile($options{"D"});
    }
347 348 349
    if (defined($options{"s"})) {
	$nosnapshot = 1;
    }
350 351 352 353 354
    if (defined($options{"e"})) {
	$wholedisk = 1;
	# Must be a brand new image for wholedisk. 
	$mustnotexist = 1;
    }
355 356 357 358 359 360 361 362 363 364 365 366 367 368
    if (defined($options{"S"})) {
	$nosnapshot = 1;
	$mustnotexist = 1;
    }
    if (defined($options{"O"})) {
	$operation = $options{"O"};
	if ($operation !~
	    /^(update-profile|copy-profile|new-profile|image-only)$/) {
	    usage();
	}
	if ($operation eq "update-profile") {
	    $update_profile = "node";
	}
    }
Leigh B Stoller's avatar
Leigh B Stoller committed
369 370 371 372 373 374
    if (defined($cloneprofile) && defined($update_profile)) {
	fatal("Not allowed to update profile when cloning a profile");
    }
    if (defined($cloneprofile) && !defined($imagename)) {
	fatal("Must supply image name when cloning a profile");
    }
375 376 377 378 379 380 381
    if ($old_status ne "ready") {
	fatal("Instance must be in the ready state to take a snapshot");
    }
    my $slice = $instance->GetGeniSlice();
    if (!defined($slice)) {
	fatal("No slice for quick VM: $uuid");
    }
382
    
383
    #
384
    # Might be a clone (manage_profile).
385
    #
386
    my $sliver_urn;
387 388
    my $aggregate;
    my $node;
Leigh B Stoller's avatar
Leigh B Stoller committed
389 390 391 392 393 394 395 396
    my $profile;

    if (defined($cloneprofile)) {
	$profile = APT_Profile->Lookup($cloneprofile);
    }
    else {
	$profile = APT_Profile->Lookup($instance->profile_id());
    }
397
    if (!defined($profile)) {
Leigh B Stoller's avatar
Leigh B Stoller committed
398 399
	fatal("Could not lookup profile for " .
	      (defined($cloneprofile) ? "cloning" : "snapshot"));
400
    }
401 402
    my $project = Project->Lookup($profile->pid_idx());
    if (!defined($project)) {
Leigh B Stoller's avatar
Leigh B Stoller committed
403
	fatal("Could not lookup project for $profile");
404 405
    }
    
406
    #
407
    # Sanity checks. 
408
    #
409
    my @aggs = $instance->AggregateList();
410 411
    if (! @aggs) {
	fatal("No slivers for instance!");
412
    }
413
    if (!defined($node_id)) {
414
	# We snapshot the one node in the instance.
415 416 417 418 419 420 421 422 423
	if (@aggs != 1) {
	    fatal("Too many aggregates (> 1) to snapshot");
	}
	my ($agg) = @aggs;
	my $manifest = GeniXML::Parse($agg->manifest());
	if (! defined($manifest)) {
	    fatal("Could not parse manifest for $agg");
	}
	my @nodes = GeniXML::FindNodes("n:node", $manifest)->get_nodelist();
424 425 426
	if (@nodes != 1) {
	    fatal("Too many nodes (> 1) to snapshot");
	}
427
	($node)     = @nodes;
428
	$sliver_urn = GeniXML::GetSliverId($node);
429 430
	$node_id    = GeniXML::GetVirtualId($node);
	$aggregate  = $agg;
431 432 433
	# Profile Snapshot, always use the profile name. Clone passes in name.
	if (!defined($imagename)) {
	    $imagename = $profile->name();
434
	}
435
    }
436
    else {
437 438
	my $nodecount = 0;
	
439 440 441 442 443 444 445 446
	# Find the node in its manifest.
	foreach my $agg (@aggs) {
	    my $manifest = GeniXML::Parse($agg->manifest());
	    if (! defined($manifest)) {
		fatal("Could not parse manifest for $agg");
	    }
	    foreach my $ref (GeniXML::FindNodes("n:node",
						$manifest)->get_nodelist()) {
447 448
		$nodecount++;
		
449 450 451
		my $client_id   = GeniXML::GetVirtualId($ref);
		my $manager_urn = GetManagerId($ref);
		my $urn          = GeniXML::GetSliverId($ref);
452 453 454

		# No sliver urn or a different aggregate.
		next
455 456 457
		    if (! (defined($urn) &&
			   defined($manager_urn) &&
			   $manager_urn eq $agg->aggregate_urn()));
458 459 460 461 462 463 464

		if ($node_id eq $client_id) {
		    $node = $ref;
		    $sliver_urn = $urn;
		    $aggregate = $agg;
		    last;
		}
465 466 467 468
	    }
	}
	if (!defined($sliver_urn)) {
	    fatal("Could not find node '$node_id' in manifest");
469
	}
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
	#
	# So, we want Profile snapshot above (of a single node profile) and
	# Node snapshot in a single node profile to behave the same wrt the
	# image name, so look at the nodecount to see if need to append the
	# nodeid to the imagename. 
	#
	if (!defined($imagename)) {
	    $imagename = $profile->name();
	    if ($nodecount > 1) {
		$imagename .= "." . $node_id;
	    }
	}
    }
    #
    # Make sure a valid imagename. This a local test of course, but this
    # only works on IG aggregates anyway.
    #
    if (! TBcheck_dbslot($imagename, "images",
			 "imagename", TBDB_CHECKDBSLOT_ERROR)) {
489 490 491
	$errmsg   = "Invalid imagename: " . TBFieldErrorString() . "\n";
	$errcode  = GENIRESPONSE_ERROR;
	$exitcode = 1;
492
	goto uerror;
493 494
    }
    
495
    #
496 497 498
    # Instruct the remote cluster to copy the image back to its origin,
    # but we need to ask the IMS for uuid of the image that is running,
    # so we can tell the cluster, which then tells the origin cluster.
Leigh B Stoller's avatar
Leigh B Stoller committed
499 500
    # We also need to know what the new URN of the image will be, for
    # updating the profile. 
501 502 503 504 505 506
    #
    if (GetSiteVar("protogeni/use_imagetracker") &&	
	EmulabFeatures->FeatureEnabled("APT_UseImageTracker",
					   $this_user, $project)) {
	$usetracker = 1;

Leigh B Stoller's avatar
Leigh B Stoller committed
507 508 509 510
	#
	# When cloning, we use the URN returned by the cluster; it is
	# the origin of the new image.
	#
511 512
	if (!defined($cloneprofile)) {
	    my $rval = DoImageTrackerStuff($aggregate, $node, $project,
513
					   $imagename,
514 515 516 517 518 519 520
					   \$copyback_uuid, \$copyback_urn,
					   \$errmsg);
	    if ($rval) {
		if ($rval < 0) {
		    fatal($errmsg);
		}
		else {
521 522
		    $errcode  = GENIRESPONSE_ERROR;
		    $exitcode = 1;
523 524 525
		    goto uerror;
		}
	    }
526 527 528 529
	}
    }
    if (0) {
	fatal("$copyback_uuid, $copyback_urn\n");
530 531
    }

532
    #
533 534 535
    # We are not going to allow this if the instance is on a different
    # cluster then where the image was originally created, since otherwise
    # the image provenancewill look like spaghetti. 
536
    #
537
    if (defined($update_profile)) {
538 539
	my $diskref = GeniXML::GetDiskImage($node);
	if (defined($diskref)) {
540
	    my $authority = $aggregate->GetGeniAuthority();
541
	    my $image_url = GeniXML::GetText("url", $diskref);
542
	    if (defined($image_url) && !$usetracker) {
543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564
		require URI;

		# Get the hostname for the image URL.
		my $uri = URI->new($image_url);
		if (!defined($uri)) {
		    fatal("Could not parse $image_url");
		}
		my $image_host = $uri->host();

		# Get the hostname for the authority.
		$uri = URI->new($authority->url());
		if (!defined($uri)) {
		    fatal("Could not parse authority URL");
		}
		my $authority_host = $uri->host();

		# Compare domains.
		$image_host =~ s/^([^.]+\.)//;
		$authority_host =~ s/^([^.]+\.)//;
	
		if ($image_host ne $authority_host) {
		    $errmsg  = "Not allowed to take a snapshot on this cluster";
565 566
		    $errcode  = GENIRESPONSE_ERROR;
		    $exitcode = 1;
567
		    goto uerror;
568 569 570
		}
	    }
	}
571 572 573 574
	# Do this here to avoid output to logfile.
	$doversions =
	    EmulabFeatures->FeatureEnabled("APT_ProfileVersions",
					   $this_user, $project);
575 576
    }
    if ($slice->Lock()) {
577
	$errmsg  = "Experiment is busy, please try again later.";
578 579
	$errcode = GENIRESPONSE_BUSY;
	$exitcode = 1;
580
	goto uerror;
581 582
    }
    $needunlock = 1;
583

584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
    if (!$nosnapshot) {
	#
	# Grab the webtask, but only after we have it locked, since we are
	# going to modify it. This is a different webtask then the one we
	# got on the command line. The command line webtask is for reporting
	# he results of the command, while the instance webtask is used to
	# report ongoing status of the imaging operation (to the web UI).
	#
	$swebtask = $instance->webtask();
	# We reuse this so clear it.
	$swebtask->Reset();
	$swebtask->AutoStore(1);
	# These are for the web server (imaging status).
	$swebtask->aggregate_urn($aggregate->aggregate_urn());
	$swebtask->client_id($node_id);
	$swebtask->operation($operation) if (defined($operation));
	$swebtask->imagename($imagename);
	
	$instance->SetStatus("imaging");
	$aggregate->SetStatus("imaging");
    }
605

606 607 608
    # Shorten default timeout
    Genixmlrpc->SetTimeout(60);
    
609 610
    #
    # This returns pretty fast, and then the imaging takes place in
611
    # the background at the aggregate. 
612
    #
613
    my $response =
614
	$aggregate->CreateImage($sliver_urn, $imagename,
615
				$update_prepare, $copyback_uuid,
Leigh B Stoller's avatar
Leigh B Stoller committed
616 617
				undef, $nosnapshot, $mustnotexist, $wholedisk,
				$description);
618
    if ($response->code() != GENIRESPONSE_SUCCESS) {
619 620 621 622
	$errcode = $response->code();
	($exitcode,$errmsg) = ResponseErrorMessage($aggregate, $response);
	# Important to tell web user about these.
	if ($response->code() == GENIRESPONSE_NOSPACE ||
Leigh B Stoller's avatar
Leigh B Stoller committed
623
	    $response->code() == GENIRESPONSE_FORBIDDEN || 
624 625 626
	    $response->code() == GENIRESPONSE_ALREADYEXISTS) {
	    $exitcode = 1;
	}
627 628 629 630
	if (!$nosnapshot) {
	    $instance->SetStatus($old_status);
	    $aggregate->SetStatus($old_status);
	}
631
	goto uerror;
632 633 634
    }
    my ($image_urn, $image_url,
	$version_urn, $version_url) = @{ $response->value() };
635 636 637 638
    #
    # For version zero, kill the version number.
    #
    if (!defined($version_urn) || $version_urn =~ /:0$/) {
639 640 641
	$version_urn = $image_urn;
	$version_url = $image_url
    }
642
    my $image_name;
643

644 645 646 647 648 649
    # When using the image server we use URNs. 
    if ($usetracker) {
	# DoImageTrackerStuff determined that we use whatever the cluster
	# tells us, cause it is the home of the image.
	if (!defined($copyback_urn)) {
	    $image_name = $version_urn;
Leigh B Stoller's avatar
Leigh B Stoller committed
650 651
	}
	else {
652
	    $image_name = $copyback_urn;
Leigh B Stoller's avatar
Leigh B Stoller committed
653
	}
654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674
    }
    elsif ($aggregate->OnLocalCluster()) {
	$image_name = $version_urn;
    }
    else {
	$image_name = $version_url;
    }
    
    if ($nosnapshot) {
	if (defined($webtask)) {
	    $webtask->image_urn($version_urn);
	    $webtask->image_url($version_url);
	    $webtask->image_name($image_name);
	}
	print "$version_urn,$version_url,$image_name\n";
	$slice->UnLock();
	exit(0);
    }
    if (defined($swebtask)) {
	$swebtask->image_urn($version_urn);
	$swebtask->image_url($version_url);
675
	$swebtask->image_name($image_name);
676

Leigh B Stoller's avatar
Leigh B Stoller committed
677 678 679
	# We tell the web interface that the image has to be copied
	# back,
	if (defined($copyback_uuid)) {
680
	    $swebtask->copyback_uuid($copyback_uuid);
Leigh B Stoller's avatar
Leigh B Stoller committed
681
	}
682
    }
683
    print "$version_urn,$version_url,$image_name\n";
684 685 686 687 688

    #
    # Exit and leave child to poll.
    #
    if (! $debug) {
689 690 691 692
        $logfile = TBMakeLogname("snapshot");
	
	if (my $childpid = TBBackGround($logfile)) {
	    # Parent exits normally, web interface watches.
693 694 695 696 697
	    exit(0);
	}
	# Let parent exit;
	sleep(2);
    }
698 699
    # Bind the process id. This is important when the caller is
    # manage_profile, doing a clone.
700 701
    $webtask->SetProcessID($PID)
	if (defined($webtask));
702 703 704 705

    #
    # Poll for a reasonable amount of time.
    #
706
    my $seconds  = 1500;
707
    my $interval = 15;
708
    my $ready    = 0;
709
    my $sliver_ready = 0;
710
    my $failed   = 0;
711

712
    while ($seconds > 0) {
713 714
	sleep($interval);
	$seconds -= $interval;
715
    
716
	my $response = $aggregate->SliceStatus();
717
	if ($response->code() != GENIRESPONSE_SUCCESS &&
718
	    $response->code() != GENIRESPONSE_NETWORK_ERROR &&
719
	    $response->code() != GENIRESPONSE_SERVER_UNAVAILABLE &&
720 721 722 723 724 725
	    $response->code() != GENIRESPONSE_BUSY) {
	    $errmsg = "Sliverstatus failed: ". $response->output() . "\n";
	    $failed = 1;
	    last;
	}
	next
726
	    if ($response->code() == GENIRESPONSE_BUSY ||
727
		$response->code() == GENIRESPONSE_SERVER_UNAVAILABLE ||
728
		$response->code() == GENIRESPONSE_NETWORK_ERROR);
729

730
	my $blob = $response->value();
731
	# This is the per-aggregate status, we always set this for web UI.
732
	$aggregate->UpdateSliverStatusAll($blob->{'details'});
733
	
734 735 736 737
	if ($blob->{'status'} eq "failed") {
	    $failed = 1;
	    last;
	}
738 739 740 741
	elsif ($blob->{'status'} eq "ready") {
	    $sliver_ready = 1;
	}
	
742 743 744
	#
	# We are watching for the image status to report ready or failed.
	#
745
	$response = $aggregate->ImageInfo($image_urn);
746
	if ($response->code() != GENIRESPONSE_SUCCESS &&
747
	    $response->code() != GENIRESPONSE_NETWORK_ERROR &&
748
	    $response->code() != GENIRESPONSE_SERVER_UNAVAILABLE &&
749 750 751 752 753 754
	    $response->code() != GENIRESPONSE_BUSY) {
	    $errmsg = "Imageinfo failed: ". $response->output() . "\n";
	    $failed = 1;
	    last;
	}
	next
755
	    if ($response->code() == GENIRESPONSE_BUSY ||
756
		$response->code() == GENIRESPONSE_SERVER_UNAVAILABLE ||
757
		$response->code() == GENIRESPONSE_NETWORK_ERROR);
758

759
	my $imageblob = $response->value();
760
	if (defined($swebtask)) {
761 762 763 764 765 766 767 768 769
	    my %blobcopy = %{ $imageblob };

	    #
	    # If the image is ready, but needs to be copied back to
	    # its origin, hold of ready till later. We will wait for
	    # the copyback to finish, see below.
	    #
	    if ($imageblob->{'status'} eq "ready" && defined($copyback_uuid)) {
		$blobcopy{'status'} = "copying";
Leigh B Stoller's avatar
Leigh B Stoller committed
770
	    }
771 772
	    # This is also being updated by the event system.
	    $instance->UpdateImageStatus(\%blobcopy);
773
	}
774
	if ($imageblob->{'status'} eq "ready") {
775 776 777
	    $ready = 1;
	    last;
	}
778
	elsif ($imageblob->{'status'} eq "failed") {
779 780 781
	    $failed = 1;
	    last;
	}
782
    }
783
    # Cause of image status events.
784 785
    $swebtask->Refresh()
	if (defined($swebtask));
786
    
787 788 789 790
    if ($failed) {
	$errmsg = "Imaging failed"
	    if (!defined($errmsg));
	goto bad;
791
    }
792 793
    elsif (!$ready) {
	$errmsg  = "Imaging timed out";
794
	$errcode = -2;
795 796
	goto bad;
    }
Leigh B Stoller's avatar
Leigh B Stoller committed
797 798 799 800 801 802
    #
    # We cannot change a geni-lib script profile, so no need to do this.
    # But we can change a portal converted profile.
    #
    elsif (defined($update_profile) &&
	   (!defined($profile->script()) || $profile->portal_converted())) {
803
	#
Leigh B Stoller's avatar
Leigh B Stoller committed
804 805
	# New image. Might not have changed if the cluster is not doing
	# image versions.
806
	#
807
	# DoImageTrackerStuff determined that we use whatever the cluster
Leigh B Stoller's avatar
Leigh B Stoller committed
808
	# tells us, cause it is the home of the image.
Leigh B Stoller's avatar
Leigh B Stoller committed
809 810
	#
	my $newimage = $version_urn
811
	    if ($usetracker && !defined($copyback_urn));
Leigh B Stoller's avatar
Leigh B Stoller committed
812 813 814
	# And fall back to url
	$newimage = $version_url
	    if (!defined($newimage));
815

Leigh B Stoller's avatar
Leigh B Stoller committed
816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850
	#
	# Okay, is the rspec going to change. This is kinda annoying to
	# figure out, so lets just run the code in impotent mode and have
	# it tell us the number of nodes that will get updated.
	#
	my $changed = 
	    $profile->UpdateDiskImage($node_id, $newimage,
				      ($update_profile eq "all" ? 1 : 0), 1);

	if ($changed > 0) {
	    #
	    # Create a new version of the profile and update the rspec
	    # to reflect the new image version. 
	    #
	    if ($doversions) {
		$profile = $profile->NewVersion($this_user);
		if (!defined($profile)) {
		    print STDERR "Could not create new profile version\n";
		    $webtask->Exited(70)
			if (defined($webtask));
		    exit(1);
		}
	    }
	    $profile->UpdateDiskImage($node_id, $newimage,
				      ($update_profile eq "all" ? 1 : 0), 0);
	    $profile->InsertImageRecords();
	    #
	    # For a portal converted profile, we need to regen the script.
	    #
	    if ($profile->portal_converted()) {
		if ($profile->Convert2Genilib() != 0) {
		    fatal("Could not convert rspec to geni-lib");
		}
	    }
	}
851
    }
852
    $instance->SetStatus("ready");
853
    $aggregate->SetStatus("ready");
Leigh B Stoller's avatar
Leigh B Stoller committed
854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888
    
    #
    # If there is a copyback_uuid, we want to wait for that to finish.
    #
    if (defined($copyback_uuid)) {
	#
	# We know the copyback is done when the IMS has the info.
	#
	my $copied  = 0;
	$seconds  = 1000;

	while ($seconds > 0) {
	    sleep($interval);
	    $seconds -= $interval;

	    #
	    # It would clearly be more more efficient to just look in
	    # the IMS database. 
	    #
	    Genixmlrpc->SetContext(APT_Geni::GeniContext());
	    my $blob = GeniImage::GetImageData($copyback_urn, \$errmsg);
	    Genixmlrpc->SetContext(undef);
	    # We get back undefined if the image is not posted yet.
	    if (defined($blob)) {
		$copied = 1;
		last;
	    }
	    sleep($interval);
	}
	# Tell the web interface. 
	if (!$copied) {
	    $errmsg  = "Failed to copy image back to its origin cluster";
	    $errcode = 1;
	    goto bad;
	}
889 890
	elsif (defined($swebtask)) {
	    $swebtask->image_status("ready");
Leigh B Stoller's avatar
Leigh B Stoller committed
891 892
	}
    }
893
    $swebtask->Exited(0);
894 895 896
    $webtask->Exited(0)
	if (defined($webtask));
    $slice->UnLock();
897
    
898 899 900 901 902 903 904
    if (defined($logfile)) {
	if (-s $logfile) {
	    SENDMAIL($TBOPS,
		     "Instance Snapshot Complete",
		     "Finished taking snapshot of $instance.\n",
		     $TBOPS, undef, $logfile);
	}
905 906
	unlink($logfile);
    }
907 908 909 910 911
    if (!$sliver_ready) {
	#
	# Image is ready, but sliver is not. Start a monitor so that
	# web interface is updated.
	#
Leigh B Stoller's avatar
Leigh B Stoller committed
912
	StartMonitorInternal();
913
    }
914
    exit(0);
915
  bad:
916
    if (!$sliver_ready) {
917
	#
918 919
	# Image is ready, but sliver is not. Start a monitor so that
	# web interface is updated.
920
	#
Leigh B Stoller's avatar
Leigh B Stoller committed
921
	StartMonitorInternal();
922
    }
923
    $instance->SetStatus("ready");
924
    $aggregate->SetStatus("ready");
925
    if (defined($logfile)) {
926 927 928 929 930
	SENDMAIL($TBOPS,
		 "Snapshot failed",
		 "Error taking snapshot of $instance:\n\n".
		 "$errmsg\n",
		 $TBOPS, undef, $logfile);
931 932
	unlink($logfile);
    }
933 934 935 936 937 938
  uerror:
    print STDERR "$errmsg\n";
    if (defined($errmsg) && defined($webtask)) {
	$webtask->Exited($errcode);
	$webtask->output($errmsg);
    }
939
    # For display in the imaging modal.
Leigh B Stoller's avatar
Leigh B Stoller committed
940 941 942 943
    if (defined($errmsg) && defined($swebtask)) {
	$swebtask->Exited($errcode);
	$swebtask->output($errmsg);
    }
944 945 946
    $slice->UnLock()
	if ($needunlock);

Leigh B Stoller's avatar
Leigh B Stoller committed
947
    exit($exitcode);
948
}
949

950
sub DoImageTrackerStuff($$$$$$$)
951
{
952
    my ($aggregate, $node, $project, $newname, $puuid, $purn, $perrmsg) = @_;
953 954 955 956 957 958 959 960 961 962 963 964 965 966
    my $node_id = GeniXML::GetVirtualId($node);
    my $errmsg;

    #
    # If we do not have a diskinfo section, we will use the URN we get back
    # from the cluster (it is a snapshot of the default image).
    #
    my $diskinfo = GeniXML::GetDiskImage($node);
    return 0
	if (!defined($diskinfo));

    #
    # This one needs more thought, it might be a URL.
    #
967 968 969 970 971
    my $image_token = GeniXML::GetText("name", $diskinfo);
    if (!defined($image_token)) {
	$image_token = GeniXML::GetText("url", $diskinfo);
	return 0
	    if (!defined($image_token));
972
    }
Leigh B Stoller's avatar
Leigh B Stoller committed
973 974 975
    if (GeniHRN::IsValid($image_token)) {
	my ($auth,$ospid) = GeniHRN::ParseImage($image_token);
	if (!defined($ospid)) {
976
	    $$perrmsg = "Invalid image urn: $image_token";
Leigh B Stoller's avatar
Leigh B Stoller committed
977 978 979 980
	    return 1;
	}
    }
    
981
    Genixmlrpc->SetContext(APT_Geni::GeniContext());
982
    my $blob = GeniImage::GetImageData($image_token, \$errmsg);
983 984 985
    Genixmlrpc->SetContext(undef);
    
    if (!defined($blob)) {
Leigh B Stoller's avatar
Leigh B Stoller committed
986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001
	if (GeniHRN::IsValid($image_token)) {
	    #
	    # See if this is for a system image (emulab-ops). If it is,
	    # and the domain is not the MS, then retry with a MS URN.
	    #
	    # This is sorta temporary; at some point there will not be any
	    # profiles using the URNs that are not in the image tracker.
	    # Of course a user is free to set the URN to anything the want,
	    # which is why I expect this code to be here for a while. 
	    #
	    my $urn;
	    my $hrn = GeniHRN->new($image_token);
	    my ($auth,$ospid,$os,$vers) = $hrn->ParseImage();
	    if ($ospid eq TBOPSPID() && $auth ne "emulab.net") {
		$urn = GeniHRN::GenerateImage("emulab.net",
						 TBOPSPID(), $os, $vers);
1002 1003 1004
		Genixmlrpc->SetContext(APT_Geni::GeniContext());
		$blob = GeniImage::GetImageData($urn, \$errmsg);
		Genixmlrpc->SetContext(undef);
Leigh B Stoller's avatar
Leigh B Stoller committed
1005 1006 1007
	    }
	}
	if (!defined($blob)) {
1008 1009 1010
	    $$perrmsg = "Could not get info from the image server for ".
		"$image_token:\n" . $errmsg;
	    return 1;
Leigh B Stoller's avatar
Leigh B Stoller committed
1011
	}
1012 1013
    }
    #
1014
    # System Image? We use the URN we get back from CreateImage().
1015 1016 1017
    # The cluster will be the origin for the new image.
    #
    return 0
1018 1019 1020 1021
	if (exists($blob->{'issystem'}) && $blob->{'issystem'});
    # Image aliases are also considered system images.
    return 0
	if (exists($blob->{'isimagealias'}) && $blob->{'isimagealias'});
1022

1023
    my $image_urn     = $blob->{'urn'};
1024 1025 1026 1027
    my $copyback_uuid = $blob->{'version_uuid'};
    my $copyback_urn  = $image_urn;

    my $hrn = GeniHRN->Parse($image_urn);