Commit 32c3d934 authored by Leigh Stoller's avatar Leigh Stoller

Add support for cancelation; stopping an experiment setup early, instead of

waiting till it finished setting up (or fails). This is really nice when a
1000 node experiment has gone awry and it is pointless to wait for it to
finish. When we do this, we mark the instance as canceled in the DB, and
then wait for create_instance() to notice it. When it does, it stops
waiting and invokes terminate with a new cancel option at the backend.
parent 08ce72b6
......@@ -406,6 +406,42 @@ sub SetManifest($$)
return 0;
}
sub MarkCanceled($)
{
my ($self) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $uuid = $self->uuid();
DBQueryWarn("update apt_instances set ".
" canceled=1,canceled_timestamp=now() ".
"where uuid='$uuid'")
or return -1;
$self->{'INSTANCE'}->{'canceled'} = 1;
return 0;
}
# We need to bypass the in memory state for this.
sub IsCanceled($)
{
my ($self) = @_;
my $uuid = $self->uuid();
my $query_result =
DBQueryWarn("select canceled from apt_instances ".
"where uuid='$uuid'");
return -1
if (!$query_result);
return 0
if (!$query_result->numrows);
my ($canceled) = $query_result->fetchrow_array();
return $canceled;
}
#
# Set to use the logfile.
#
......@@ -1205,7 +1241,9 @@ sub Terminate($)
$speaksfor_credential->asString()];
}
$method = "DeleteSliver";
@params = ($slice->urn(), $credentials, {"blocking" => 'true'});
@params = ($slice->urn(), $credentials,
{"blocking" => 'true',
"cancel" => 'true'});
# Convert URL to use AM interface.
$cmurl =~ s/\/cm$/\/am/;
......
......@@ -847,6 +847,7 @@ sub WaitForSliver($)
my $interval = 15;
my $ready = 0;
my $failed = 0;
my $rpcfail = 0;
my $public_url;
my $repblob;
my $laststatus;
......@@ -859,7 +860,8 @@ sub WaitForSliver($)
if (!defined($response) || !defined($response->value()) ||
($response->code() != GENIRESPONSE_SUCCESS &&
$response->code() != GENIRESPONSE_SERVER_UNAVAILABLE &&
$response->code() != GENIRESPONSE_BUSY)) {
$response->code() != GENIRESPONSE_BUSY &&
$response->code() != GENIRESPONSE_RPCERROR)) {
print STDERR "SliverStatus failed";
if (defined($response)) {
......@@ -878,6 +880,23 @@ sub WaitForSliver($)
$failed = 1;
last;
}
if ($response->code() == GENIRESPONSE_RPCERROR) {
if ($rpcfail > 10) {
if ($response->output() =~ /read timeout/) {
$webtask->output("Lost contact with the aggregate. " .
"Possibly a network failure, ".
"please try again later.");
}
else {
$webtask->output($response->output());
}
$failed = 1;
last;
}
$rpcfail++;
next;
}
$rpcfail = 0;
next
if ($response->code() == GENIRESPONSE_BUSY ||
$response->code() == GENIRESPONSE_SERVER_UNAVAILABLE);
......@@ -888,6 +907,7 @@ sub WaitForSliver($)
# cares about. We get this on each loop, update so the web
# interface can show changes.
#
my $changed = 0;
my $statusblob = {};
foreach my $urn (keys(%{$repblob->{'details'}})) {
my $details = $repblob->{'details'}->{$urn};
......@@ -940,6 +960,13 @@ sub WaitForSliver($)
$webtask->output("Experiment setup on $urn failed");
last;
}
elsif ($instance->IsCanceled()) {
last;
}
}
if ($instance->IsCanceled()) {
$webtask->Exited(0);
return 0;
}
if ($failed || !$ready) {
$aggobj->SetStatus("failed");
......@@ -981,6 +1008,18 @@ print "$slice_urn\n";
# Count up nodes running a startup service.
my $startuprunning = 0;
#
# If we were canceled, then none of the stuff below matters, we
# are going to do a terminate.
#
if ($instance->IsCanceled()) {
$instance->SetStatus("canceled");
$slice->UnLock();
system("$MANAGEINSTANCE -t $webtask_id terminate $quickvm_uuid");
exit(0);
}
#
# Check the exit codes; any failure is a total failure (for now).
#
......
......@@ -1015,7 +1015,22 @@ sub DoTerminate()
# a disk image.
#
if ($slice->Lock()) {
fatal("Slice is busy, cannot lock it");
#
# A special case is if the slice is provisioning. This means the
# user is giving up on it, and we want to tell the aggregate to
# kill it. Not all aggregates are going to allow this, so need
# to be able to deal with that.
#
if ($instance->status() ne "provisioned") {
fatal("Slice is busy, cannot lock it");
}
if (!$instance->canceled()) {
print "Marking instance canceled\n";
$instance->MarkCanceled();
}
sleep(1);
# We have an obvious race here since we do not have the lock.
exit(0);
}
my $old_status = $instance->status();
$instance->SetStatus("terminating");
......
......@@ -134,6 +134,7 @@ class Instance
function profile_id() { return $this->field('profile_id'); }
function profile_version() { return $this->field('profile_version'); }
function status() { return $this->field('status'); }
function canceled() { return $this->field('canceled'); }
function pid() { return $this->field('pid'); }
function pid_idx() { return $this->field('pid_idx'); }
function public_url() { return $this->field('public_url'); }
......
......@@ -379,6 +379,13 @@ function (_, sup, moment, marked, UriTemplate, ShowImagingModal,
else if (status == 'provisioned') {
$("#status_progress_bar").width("66%");
status_html = "booting";
if (json.value.canceled) {
status_html += " (but canceled)";
}
else {
// So the user can cancel.
EnableButton("terminate");
}
}
else if (status == 'ready') {
bgtype = "panel-success";
......@@ -397,6 +404,7 @@ function (_, sup, moment, marked, UriTemplate, ShowImagingModal,
$("#status_progress_div").addClass("progress-bar-success");
$("#status_progress_bar").width("100%");
}
$('#error_panel').addClass("hidden");
EnableButtons();
// We should be looking at the node status instead.
if (lastStatus != "imaging") {
......
......@@ -180,6 +180,7 @@ function SPITROWS($showall, $name, $result)
$uuid = $row["uuid"];
$name = $row["name"];
$status = $row["status"];
$canceled = $row["canceled"];
$created = DateStringGMT($row["created"]);
$expires = DateStringGMT($row["expires"]);
$creator_idx = $row["creator_idx"];
......@@ -206,6 +207,9 @@ function SPITROWS($showall, $name, $result)
if ($row["expired"]) {
$status = "expired";
}
elseif ($canceled) {
$status = "canceled";
}
$profile = Profile::Lookup($profile_id, $version);
if ($profile) {
$profile_name = $profile->name();
......
......@@ -120,6 +120,7 @@ function Do_GetInstanceStatus()
}
$blob = array();
$blob["status"] = $instance->status();
$blob["canceled"] = $instance->canceled() ? 1 : 0;
$blob["sliverstatus"] = array();
$blob["sliverurls"] = array();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment