Commit dd3d2a7e authored by Leigh Stoller's avatar Leigh Stoller

The long and winding road to image prestaging.

parent d72cf116
......@@ -334,9 +334,6 @@ sub Create($$$)
$sets .= ", start_at=FROM_UNIXTIME(" . $argref->{"start_at"} . ")";
delete($argref->{"start_at"});
}
else {
$sets .= ", started=created";
}
my $query = "insert into apt_instances set ${sets}, ".
join(",", map("$_=" .
DBQuoteSpecial($argref->{$_}), keys(%{$argref})));
......@@ -1611,6 +1608,55 @@ sub CreateDatasetCreds($$$)
return 0;
}
#
# Get a list of all images used by the instance, for each aggregate.
#
sub GetImageList($$;$)
{
my ($self, $pmsg, $pref) = @_;
my $rspecstr = $self->rspec();
my %imagelist = ();
my $rspec = GeniXML::Parse($rspecstr);
if (! defined($rspec)) {
print STDERR "CreateDatasetCreds: Could not parse rspec\n";
return -1;
}
foreach my $ref (GeniXML::FindNodes("n:node", $rspec)->get_nodelist()) {
my $manager_urn = GeniHRN->new(GetManagerId($ref));
my $diskref = GeniXML::GetDiskImage($ref);
if (! (defined($manager_urn) && $manager_urn->IsCM())) {
$$pmsg = "$manager_urn is not a valid CM URN";
return 1;
}
# Using the default (system) image.
next
if (!defined($diskref));
my $image_url = GeniXML::GetText("url", $diskref);
my $image_urn = GeniXML::GetText("name", $diskref);
next
if (!defined($image_urn));
# Yep, people put the URL in the URN spot. We should throw an error.
next
if ($image_urn =~ /^(http|https):/);
if (!GeniHRN::IsValid($image_urn)) {
$$pmsg = "$image_urn is not a valid URN";
return 1;
}
if (!exists($imagelist{$manager_urn})) {
$imagelist{$manager_urn} = [];
}
push(@{ $imagelist{$manager_urn} }, $image_urn);
}
$$pref = \%imagelist;
return 0;
}
#
# Create credentials required by this instance, to access restricted images.
# Access means import and/or use the image.
......@@ -1841,6 +1887,12 @@ sub Defer($$)
my $uuid = $self->uuid();
my $name = $self->name();
if (defined($start)) {
if ($start !~ /^\d+$/) {
$start = str2time($start);
}
}
my $query_result =
DBQueryWarn("select uuid from apt_deferred_instances ".
"where uuid='$uuid'");
......@@ -1859,12 +1911,6 @@ sub Defer($$)
(defined($start) ?
", start_at=FROM_UNIXTIME($start)" : ""))
or return -1;
if (defined($start)) {
DBQueryWarn("update apt_instance_aggregates set status='deferred' ".
"where uuid='$uuid'")
or return -1;
}
}
return 0;
}
......@@ -4108,5 +4154,44 @@ sub Recovery($$$)
return Genixmlrpc::CallMethod($cmurl, $context, "Recovery", $args);
}
#
# Handle prestaging images at aggregates.
#
sub PrestageImages($$$@)
{
my ($self, $operation, $imagelist, $credlist) = @_;
my $authority = $self->GetGeniAuthority();
my $geniuser = $self->instance()->GetGeniUser();
my $slice = $self->instance()->GetGeniSlice();
my $context = APT_Geni::GeniContext();
return ContextError()
if (! (defined($geniuser) && defined($authority) &&
defined($slice) && defined($context)));
my ($slice_credential, $speaksfor_credential) =
APT_Geni::GenCredentials($slice, $geniuser, undef, 1);
return CredentialError()
if (!defined($slice_credential));
my $credentials = [$slice_credential->asString()];
if (defined($speaksfor_credential)) {
$credentials = [@$credentials, $speaksfor_credential->asString()];
}
foreach my $cred (@$credlist) {
push(@$credentials, $cred->asString());
}
my $args = {
"slice_urn" => $slice->urn(),
"credentials" => $credentials,
"imagelist" => $imagelist,
"operation" => $operation,
};
my $cmurl = $authority->url();
$cmurl = devurl($cmurl) if ($usemydevtree);
return Genixmlrpc::CallMethod($cmurl, $context, "PrestageImages", $args);
}
# _Always_ make sure that this 1 is at the end of the file...
1;
......@@ -38,7 +38,7 @@ BIN_SCRIPTS = manage_profile manage_instance manage_dataset \
SBIN_SCRIPTS = apt_daemon aptevent_daemon portal_xmlrpc apt_checkup \
portal_monitor apt_scheduler portal_resources \
manage_licenses manage_aggregate powder_shutdown \
rfmonitor_daemon
rfmonitor_daemon aptimage_daemon
LIB_SCRIPTS = APT_Profile.pm APT_Instance.pm APT_Dataset.pm APT_Geni.pm \
APT_Aggregate.pm APT_Utility.pm APT_Rspec.pm
WEB_BIN_SCRIPTS = webmanage_profile webmanage_instance webmanage_dataset \
......
#!/usr/bin/perl -w
#
# Copyright (c) 2008-2018 University of Utah and the Flux Group.
# Copyright (c) 2008-2019 University of Utah and the Flux Group.
#
# {{{GENIPUBLIC-LICENSE
#
......@@ -58,7 +58,7 @@ my $CREATESLIVERS = "$TB/bin/create_slivers";
my $LOGFILE = "$TB/log/apt_scheduler.log";
my $SUDO = "/usr/local/bin/sudo";
my $PROTOUSER = "elabman";
my $SLEEP_INTERVAL = 120;
my $SLEEP_INTERVAL = 60;
my $DAILY_INTERVAL = 24 * 3600;
# un-taint path
......@@ -144,8 +144,10 @@ while (1) {
POSIX::strftime("20%y-%m-%d %H:%M:%S", localtime()) . "\n";
my $query_result =
DBQueryWarn("select uuid from apt_deferred_instances ".
"where start_at is null or now() >= start_at");
DBQueryWarn("select d.uuid from apt_deferred_instances as d ".
"left join apt_instances as i on i.uuid=d.uuid ".
"where (d.start_at is null or now() >= d.start_at or ".
" i.status='prestage' or i.status='staging'");
if ($query_result && $query_result->numrows) {
while (my ($uuid) = $query_result->fetchrow_array()) {
......@@ -173,8 +175,8 @@ while (1) {
$code = -1
if ($code == 255);
print STDERR $output;
if ($code < 0) {
print STDERR $output;
if (!exists($emailedErrors{$uuid}) ||
time() - $emailedErrors{$uuid} > (3 * 3600)) {
......
This diff is collapsed.
......@@ -67,6 +67,7 @@ my $sitemap;
my $usetracker = 0;
my $maxduration = 16; # Hours. Need to make this a site variable.
my @aggregate_urns = ();
my @prestage = ();
# Protos
sub fatal($);
......@@ -91,6 +92,7 @@ my $CREATESLIVERS = "$TB/bin/create_slivers";
my $UPDATEGENIUSER= "$TB/sbin/protogeni/updategeniuser";
my $OPENSSL = "/usr/bin/openssl";
my $MANAGEGITREPO = "$TB/bin/manage_gitrepo";
my $MANAGEINSTANCE= "$TB/bin/manage_instance";
my $DEFAULT_URN = "urn:publicid:IDN+$OURDOMAIN+authority+cm";
my $GUEST_URN = "urn:publicid:IDN+apt.emulab.net+authority+cm";
my $PROTOGENI_LOCALUSER= @PROTOGENI_LOCALUSER@;
......@@ -876,7 +878,7 @@ my $blob = {'uuid' => $quickvm_uuid,
'creator' => $geniuser->uid(),
'creator_idx' => $geniuser->idx(),
'creator_uuid' => $geniuser->uuid(),
'status' => ($start_at ? "deferred" : "created"),
'status' => "created",
'start_at' => $start_at,
'stop_at' => $stop_at,
'servername' => $SERVER_NAME,
......@@ -955,6 +957,13 @@ foreach my $aggregate_urn (@aggregate_urns) {
}
$aggobj->_authority($authority);
push(@aggregate_list, $aggobj);
#
# Mark for prestaging images if needed.
#
if ($aptaggregate->prestageimages()) {
push(@prestage, $aggobj);
}
}
# Must create a webtask if we did not get one on the command line.
......@@ -998,17 +1007,31 @@ print STDERR "\n";
print STDERR "$rspecstr\n";
#
# If start is deferred, then mark it and exit immediately.
# Check to see if any of the aggregates need a prestage check, if so
# set the status to "prestage" to flag create_slivers to do that first.
# We will catch the start_at deferment later after we are done with
# precheck/prestage.
#
if (defined($start_at)) {
if (@prestage) {
foreach my $agg (@prestage) {
$agg->SetStatus("prestage");
}
$instance->SetStatus("prestage");
$instance->Defer();
}
elsif (defined($start_at)) {
#
# If scheduled, then mark it and exit immediately.
#
$instance->SetAggregateStatus("deferred");
$instance->SetStatus("scheduled");
$instance->Defer($start_at);
$slice->UnLock();
exit(0);
}
#
# Hand off to create slivers script. We have to fork/system cause
# of libaudit logging.
# Hand off to create slivers script, parent exits. We pass the lock along.
#
if (! ($debug || $foreground)) {
libaudit::AuditPrefork();
......@@ -1020,6 +1043,7 @@ if (! ($debug || $foreground)) {
# All of the logging magic happens in here.
libaudit::AuditFork();
}
system("$CREATESLIVERS -L -f " .
($debug ? "-d " : "") . ($usestitcher ? "-S " : "") . $quickvm_uuid);
if ($?) {
......
......@@ -151,6 +151,7 @@ my $instance = APT_Instance->Lookup($ARGV[0]);
if (!defined($instance)) {
fatal("No such instance");
}
my $uuid = $instance->uuid();
my $webtask = $instance->webtask();
my $webtask_id = $webtask->task_id();
my $genislice = $instance->GetGeniSlice();
......@@ -168,6 +169,190 @@ else {
}
$webtask->AutoStore(1);
#
# In prestage mode, we want to initiate the prestage on any images
# that need to get pulled over. Since some of the clusters might not
# be available for the initial prestage, we use the agg status to
# determine which ones need to be initiated. We keep trying (via
# defer) until all aggregates have been contacted.
#
while ($instance->status() eq "prestage" && !$instance->IsCanceled()) {
my $temp_webtask = WebTask->CreateAnonymous();
my $temp_taskid = $temp_webtask->task_id();
system("$MANAGEINSTANCE -t $temp_taskid prestageimages $uuid prestage");
$temp_webtask->Refresh();
$instance->Refresh();
if ($? || !$temp_webtask->HasExited()) {
my $exitval = $? >> 8;
if ($exitval < 0 || !$temp_webtask->HasExited()) {
$errmsg = "Internal error staging images";
$webtask->output($errmsg);
$webtask->Exited(GENIRESPONSE_ERROR);
$instance->RecordError(GENIRESPONSE_ERROR, $errmsg);
$instance->SetStatus("failed");
$instance->ResolveDefer();
$temp_webtask->Delete();
$genislice->UnLock();
exit(-1);
}
#
# Ignore most errors, we are going to try again later. But some
# really are bad and result in instance failure.
#
if ($temp_webtask->exitcode() == GENIRESPONSE_ERROR() ||
$temp_webtask->exitcode() == GENIRESPONSE_SEARCHFAILED() ||
$temp_webtask->exitcode() == GENIRESPONSE_BADARGS() ||
$temp_webtask->exitcode() == GENIRESPONSE_RPCERROR() ||
$temp_webtask->exitcode() == GENIRESPONSE_FORBIDDEN()) {
$errmsg = $temp_webtask->output();
$webtask->output($errmsg);
$webtask->Exited($temp_webtask->exitcode());
$instance->RecordError($temp_webtask->exitcode(), $errmsg);
$instance->SetStatus("failed");
$instance->ResolveDefer();
$temp_webtask->Delete();
$genislice->UnLock();
exit(1);
}
}
my $results = $temp_webtask->value();
my $done = 1;
$temp_webtask->Delete();
print Dumper($results);
#
# Store prestaging info in the instance webtask so we can display
# information about prestage progress in the web UI.
#
if (!defined($webtask->prestageStatus())) {
$webtask->prestageStatus($results);
}
else {
#
# Merge in results. This is to avoid things going missing after
# an aggregate has finished, the web UI is doing a very simple
# dumnp of the info, and the merge would happen there if not here.
#
my $current = $webtask->prestageStatus();
foreach my $aggurn (keys(%{$results})) {
if (!exists($current->{$aggurn})) {
$current->{$aggurn} = $results->{$aggurn};
next;
}
my $images = $results->{$aggurn}->{"images"};
foreach my $imgurn (keys(%{$images})) {
if (!exists($current->{$aggurn}->{"images"}->{$imgurn})) {
$current->{$aggurn}->{"images"}->{$imgurn} =
$images->{$imgurn};
next;
}
my $curimg = $current->{$aggurn}->{"images"}->{$imgurn};
my $newstatus = $images->{$imgurn}->{"status"};
my $newprogress = $images->{$imgurn}->{"progress"};
$curimg->{"status"} = $newstatus;
# Do not overwrite a non-zero progress with a zero.
if ($newprogress) {
$curimg->{"progress"} = $newprogress;
}
# Hmm, if ready we might not get the final size. Force it
# for the web UI.
if ($newstatus eq "ready") {
$curimg->{"progress"} = $curimg->{"size"};
}
}
}
$webtask->prestageStatus($current);
print Dumper($current);
}
#
# Look at the results to see which aggregates are ready, staging,
# error. We keep doing the prestage until all the aggregates report
# ready for all images. The we can move on.
#
foreach my $agg ($instance->AggregateList()) {
if ($agg->status() eq "prestage") {
my $result = $results->{$agg->aggregate_urn()}->{"status"};
if ($result eq "ready" || $result eq "notready") {
# We did the prestage call at this aggregete successfully.
if ($result eq "ready") {
$agg->SetStatus("staged");
}
else {
$agg->SetStatus("staging");
$done = 0;
}
}
else {
$done = 0;
}
}
elsif ($agg->status() eq "staging") {
my $result = $results->{$agg->aggregate_urn()}->{"status"};
if ($result eq "ready") {
$agg->SetStatus("staged");
}
else {
$done = 0;
}
}
}
last
if ($instance->IsCanceled());
#
# Need to keep trying on some clusters.
#
if (!$done) {
print "Still need to prestage at some aggregates.\n";
sleep(10);
next;
}
#
# No longer staging images, we can proceed unless scheduled
#
if (defined($instance->start_at())) {
my $start_at = str2time($instance->start_at());
if ($start_at > time()) {
$instance->SetAggregateStatus("deferred");
$instance->SetStatus("scheduled");
# Clear the old defer since we are changing the type of deferment.
$instance->ResolveDefer();
$instance->Defer($instance->start_at());
$genislice->UnLock();
exit(0);
}
}
$instance->ResolveDefer();
$instance->SetStatus("staged");
$instance->SetAggregateStatus("staged");
}
#
# If we were canceled, stop now and terminate.
#
if ($instance->IsCanceled()) {
$instance->ResolveDefer();
$genislice->UnLock();
print "Cancelation flag set, aborting and terminating ...\n";
#
# If someone gets the lock, this will fail. But the apt daemon will
# see the canceled flag too and fire off a termination.
#
system("$MANAGEINSTANCE -t $webtask_id terminate $uuid");
exit(0);
}
#
# Anything to do? See create_instance; Note that we skip anything
# that is already being worked on.
......@@ -176,7 +361,9 @@ foreach my $agg ($instance->AggregateList()) {
my $aptagg = $agg->GetAptAggregate();
next
if (! ($agg->status() eq "created" || $agg->status() eq "deferred"));
if (! ($agg->status() eq "created" ||
$agg->status() eq "staged" ||
$agg->status() eq "deferred"));
#
# See if the aggregate is online, lets not go to a ton of trouble
......@@ -197,9 +384,8 @@ foreach my $agg ($instance->AggregateList()) {
$genislice->UnLock();
exit(1);
}
# Mark as deferred (it might still be "created" if not scheduled).
$agg->SetStatus("deferred")
if ($agg->status() eq "created");
# Mark as deferred since we have to wait on it.
$agg->SetStatus("deferred");
next;
}
push(@aggregate_list, $agg);
......@@ -210,19 +396,21 @@ foreach my $agg ($instance->AggregateList()) {
# email, no need to since the user is looking at the web interface.
#
$sendemail = 1
if ($instance->status() ne "created");
if (! ($instance->status() eq "created" ||
$instance->status() eq "staged"));
}
if (!@aggregate_list) {
print "No uncreated aggregates to process.\n";
#
# If not a scheduled experiment, we need to set the instance status
# to something that makes sense for the web interface since we were
# not able to start anything at all. Later, if we get some started,
# If no aggregates have been created, lets set the instance status
# to something that makes sense for the web interface.
#
if ($instance->status() eq "created") {
if (scalar($instance->AggregateList()) ==
scalar($instance->DeferredAggregateList())) {
$instance->SetStatus("pending");
$instance->Defer();
}
$instance->Defer();
$genislice->UnLock();
exit(0);
}
......@@ -233,7 +421,6 @@ if (!defined($geniuser)) {
}
my $emulab_user = $geniuser->emulab_user();
my $slice_urn = $genislice->urn();
my $uuid = $instance->uuid();
my $project = $instance->GetProject();
my $rspecstr = $instance->rspec();
......@@ -461,6 +648,7 @@ if (ParRun({"maxwaittime" => 99999,
#
if ($instance->IsCanceled()) {
$genislice->UnLock();
print "Cancelation flag set, aborting and terminating ...\n";
#
# If someone gets the lock, this will fail. But the apt daemon will
......@@ -838,10 +1026,10 @@ sub CreateSlivers()
my @return_codes = ();
#
# If this is a scheduled experiment, we want to update the start
# time for the web interface.
# If starting the experiment (it might have been deferred), then
# initialize the start time.
#
if ($instance->status() eq "deferred") {
if (!defined($instance->started())) {
$instance->Start();
}
$instance->SetStatus("provisioning");
......@@ -918,7 +1106,7 @@ sub CreateSlivers()
}
#
# No errors, but we have deferred aggregates. Need to schedule these
# for later retry.
# for later retry.
#
if (@deferred) {
foreach my $aggobj (@deferred) {
......
......@@ -37,6 +37,7 @@ sub usage()
print STDERR "Usage: manage_images [options] delete <urn> ...\n";
print STDERR "Usage: manage_images [options] getcredential <urn>\n";
print STDERR "Usage: manage_images [options] relocate ...\n";
print STDERR "Usage: manage_images [options] prestage ...\n";
exit(-1);
}
my $optlist = "dt:u:";
......@@ -101,6 +102,7 @@ sub DoListImages();
sub DoDeleteImage();
sub DoGetCredential();
sub DoRelocate();
sub DoPrestage();
sub ExitWithError($);
#
......@@ -155,6 +157,9 @@ elsif ($action eq "getcredential") {
elsif ($action eq "relocate") {
exit(DoRelocate());
}
elsif ($action eq "prestage") {
exit(DoPrestage());
}
else {
usage();
}
......@@ -774,7 +779,7 @@ sub DoRelocate()
my $usage = sub {
print STDERR "Usage: manage_images relocate [-s] [-u user] -p pid ".
"-i imagename <image_urn> <url>\n";
print STDERR " manage_images relocate -p pid -i imagename\n";
print STDERR " manage_images relocate <pending idx>\n";
print STDERR "Use the -s option to *also* schedule.\n";
print STDERR "Use the -S option to *only* schedule.\n";
print STDERR "Use the second form to start a scheduled relocation\n";
......@@ -789,6 +794,7 @@ sub DoRelocate()
my $image_urn;
my $image_url;
my $imagename;
my $pending;
my $errmsg;
my %options = ();
if (! getopts($optlist, \%options)) {
......@@ -821,45 +827,48 @@ sub DoRelocate()
fatal("Not a valid imagename");
}
}
&$usage()
if (!(defined($pid) && defined($imagename)));
if (@ARGV == 2) {
&$usage()
if (!(defined($pid) && defined($imagename)));
$image_urn = shift(@ARGV);
if (!GeniHRN::IsValid($image_urn)) {
fatal("Not a valid urn");
}
$image_url = shift(@ARGV);
if ($schedule) {
$pending = Image::PendingImport->Create({
"uid" => $user->uid(),
"uid_idx" => $user->uid_idx(),
"pid" => $group->pid(),
"pid_idx" => $group->pid_idx(),
"gid" => $group->gid(),
"gid_idx" => $group->gid_idx(),
"type" => "relocation",
"imagename" => $imagename,
"remote_urn" => $image_urn,
"metadata_url" => $image_url});
if (!defined($pending)) {
fatal("Could not schedule incoming relocation for $image_urn");
}
if (exists($options{"S"})) {
exit(0);
}
}
}
else {
my $blob;
my $val = GeniImage::ImageRelocationPending($imagename, $group, \$blob);
if (!$val || !defined($blob)) {
elsif (@ARGV == 1) {
$pending = Image::PendingImport->Lookup($ARGV[0]);
if (!defined($pending)) {
fatal("Could not lookup scheduled relocation");
}
print Dumper($blob);
$image_urn = $blob->{'remote_urn'};
$image_url = $blob->{'metadata_url'};
$user = User->Lookup($blob->{'uid_idx'});
if (!defined($user)) {
fatal("Not a valid user: " . $blob->{'uid'});
}
}
# Convert the image urn into the authority URN.
my $hrn = GeniHRN->new($image_urn);
if (!$hrn->IsImage()) {
fatal("Not an image urn");
$image_urn = $pending->image_urn();
}
if ($schedule) {
if (GeniImage::ImageRelocationSchedule($imagename, $user, $group,
$image_urn, $image_url)) {
fatal("Could not schedule incoming relocation for $image_urn");
}
if (exists($options{"S"})) {
exit(0);
}
else {
&usage();
}
#
# The context for image import is the CM.
#
......@@ -883,23 +892,90 @@ sub DoRelocate()
if (!defined($credfile)) {
fatal("Could not write credential to file");
}
if (GeniImage::ImageRelocationLock($imagename, $group)) {
print STDERR "Could not lock image relocation record\n";
exit(1);
}
my $cmd = "$IMPORTER ";
$cmd .= "-d " if ($debug);
$cmd .= "-R -C $credfile -p $pid -u " . $user->uid() . " " ;
$cmd .= "-i '$imagename' '$image_url'";
if ($debug) {
print "Running '$cmd'\n";
if ($pending) {
$cmd .= " -C $credfile -P " . " " . $pending->idx();
}
else {
$cmd .= "-R -C $credfile -p $pid -u " . $user->uid() . " " ;
$cmd .= "-i '$imagename' '$image_url'";
if ($debug) {
print "Running '$cmd'\n";
}
}
system($cmd);
if ($?) {
GeniImage::ImageRelocationUnlock($imagename, $group);
fatal("Could not relocate image");
}
GeniImage::ImageRelocationFinished($imagename, $group);
exit(0);
}
#
# Ask a cluster to prestage a list of images.
#
sub DoPrestage()
{
$debug = 1;
my $usage = sub {
print STDERR "Usage: manage_images prestage <op> <instance> <aggregate> ".
"[image urns ...]\n";
exit(-1);
};
my $optlist = "";
my @imagelist = ();
my $slice;
my $errmsg;
my %options = ();
if (! getopts($optlist, \%options)) {
&$usage();
}
&$usage()
if (@ARGV < 4);
my $op = shift(@ARGV);
if (! ($op eq "precheck" || $op eq "prestage" || $op eq "status")) {
fatal("Bad operation, must be precheck,prestage,status");
}
my $instance = APT_Instance->Lookup(shift(@ARGV));