Commit 32c3d934 authored by Leigh B Stoller's avatar Leigh B Stoller

Add support for cancelation; stopping an experiment setup early, instead of

waiting till it finished setting up (or fails). This is really nice when a
1000 node experiment has gone awry and it is pointless to wait for it to
finish. When we do this, we mark the instance as canceled in the DB, and
then wait for create_instance() to notice it. When it does, it stops
waiting and invokes terminate with a new cancel option at the backend.
parent 08ce72b6
...@@ -406,6 +406,42 @@ sub SetManifest($$) ...@@ -406,6 +406,42 @@ sub SetManifest($$)
return 0; return 0;
} }
sub MarkCanceled($)
{
my ($self) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $uuid = $self->uuid();
DBQueryWarn("update apt_instances set ".
" canceled=1,canceled_timestamp=now() ".
"where uuid='$uuid'")
or return -1;
$self->{'INSTANCE'}->{'canceled'} = 1;
return 0;
}
# We need to bypass the in memory state for this.
sub IsCanceled($)
{
my ($self) = @_;
my $uuid = $self->uuid();
my $query_result =
DBQueryWarn("select canceled from apt_instances ".
"where uuid='$uuid'");
return -1
if (!$query_result);
return 0
if (!$query_result->numrows);
my ($canceled) = $query_result->fetchrow_array();
return $canceled;
}
# #
# Set to use the logfile. # Set to use the logfile.
# #
...@@ -1205,7 +1241,9 @@ sub Terminate($) ...@@ -1205,7 +1241,9 @@ sub Terminate($)
$speaksfor_credential->asString()]; $speaksfor_credential->asString()];
} }
$method = "DeleteSliver"; $method = "DeleteSliver";
@params = ($slice->urn(), $credentials, {"blocking" => 'true'}); @params = ($slice->urn(), $credentials,
{"blocking" => 'true',
"cancel" => 'true'});
# Convert URL to use AM interface. # Convert URL to use AM interface.
$cmurl =~ s/\/cm$/\/am/; $cmurl =~ s/\/cm$/\/am/;
......
...@@ -847,6 +847,7 @@ sub WaitForSliver($) ...@@ -847,6 +847,7 @@ sub WaitForSliver($)
my $interval = 15; my $interval = 15;
my $ready = 0; my $ready = 0;
my $failed = 0; my $failed = 0;
my $rpcfail = 0;
my $public_url; my $public_url;
my $repblob; my $repblob;
my $laststatus; my $laststatus;
...@@ -859,7 +860,8 @@ sub WaitForSliver($) ...@@ -859,7 +860,8 @@ sub WaitForSliver($)
if (!defined($response) || !defined($response->value()) || if (!defined($response) || !defined($response->value()) ||
($response->code() != GENIRESPONSE_SUCCESS && ($response->code() != GENIRESPONSE_SUCCESS &&
$response->code() != GENIRESPONSE_SERVER_UNAVAILABLE && $response->code() != GENIRESPONSE_SERVER_UNAVAILABLE &&
$response->code() != GENIRESPONSE_BUSY)) { $response->code() != GENIRESPONSE_BUSY &&
$response->code() != GENIRESPONSE_RPCERROR)) {
print STDERR "SliverStatus failed"; print STDERR "SliverStatus failed";
if (defined($response)) { if (defined($response)) {
...@@ -878,6 +880,23 @@ sub WaitForSliver($) ...@@ -878,6 +880,23 @@ sub WaitForSliver($)
$failed = 1; $failed = 1;
last; last;
} }
if ($response->code() == GENIRESPONSE_RPCERROR) {
if ($rpcfail > 10) {
if ($response->output() =~ /read timeout/) {
$webtask->output("Lost contact with the aggregate. " .
"Possibly a network failure, ".
"please try again later.");
}
else {
$webtask->output($response->output());
}
$failed = 1;
last;
}
$rpcfail++;
next;
}
$rpcfail = 0;
next next
if ($response->code() == GENIRESPONSE_BUSY || if ($response->code() == GENIRESPONSE_BUSY ||
$response->code() == GENIRESPONSE_SERVER_UNAVAILABLE); $response->code() == GENIRESPONSE_SERVER_UNAVAILABLE);
...@@ -888,6 +907,7 @@ sub WaitForSliver($) ...@@ -888,6 +907,7 @@ sub WaitForSliver($)
# cares about. We get this on each loop, update so the web # cares about. We get this on each loop, update so the web
# interface can show changes. # interface can show changes.
# #
my $changed = 0;
my $statusblob = {}; my $statusblob = {};
foreach my $urn (keys(%{$repblob->{'details'}})) { foreach my $urn (keys(%{$repblob->{'details'}})) {
my $details = $repblob->{'details'}->{$urn}; my $details = $repblob->{'details'}->{$urn};
...@@ -940,6 +960,13 @@ sub WaitForSliver($) ...@@ -940,6 +960,13 @@ sub WaitForSliver($)
$webtask->output("Experiment setup on $urn failed"); $webtask->output("Experiment setup on $urn failed");
last; last;
} }
elsif ($instance->IsCanceled()) {
last;
}
}
if ($instance->IsCanceled()) {
$webtask->Exited(0);
return 0;
} }
if ($failed || !$ready) { if ($failed || !$ready) {
$aggobj->SetStatus("failed"); $aggobj->SetStatus("failed");
...@@ -981,6 +1008,18 @@ print "$slice_urn\n"; ...@@ -981,6 +1008,18 @@ print "$slice_urn\n";
# Count up nodes running a startup service. # Count up nodes running a startup service.
my $startuprunning = 0; my $startuprunning = 0;
#
# If we were canceled, then none of the stuff below matters, we
# are going to do a terminate.
#
if ($instance->IsCanceled()) {
$instance->SetStatus("canceled");
$slice->UnLock();
system("$MANAGEINSTANCE -t $webtask_id terminate $quickvm_uuid");
exit(0);
}
# #
# Check the exit codes; any failure is a total failure (for now). # Check the exit codes; any failure is a total failure (for now).
# #
......
...@@ -1015,7 +1015,22 @@ sub DoTerminate() ...@@ -1015,7 +1015,22 @@ sub DoTerminate()
# a disk image. # a disk image.
# #
if ($slice->Lock()) { if ($slice->Lock()) {
fatal("Slice is busy, cannot lock it"); #
# A special case is if the slice is provisioning. This means the
# user is giving up on it, and we want to tell the aggregate to
# kill it. Not all aggregates are going to allow this, so need
# to be able to deal with that.
#
if ($instance->status() ne "provisioned") {
fatal("Slice is busy, cannot lock it");
}
if (!$instance->canceled()) {
print "Marking instance canceled\n";
$instance->MarkCanceled();
}
sleep(1);
# We have an obvious race here since we do not have the lock.
exit(0);
} }
my $old_status = $instance->status(); my $old_status = $instance->status();
$instance->SetStatus("terminating"); $instance->SetStatus("terminating");
......
...@@ -134,6 +134,7 @@ class Instance ...@@ -134,6 +134,7 @@ class Instance
function profile_id() { return $this->field('profile_id'); } function profile_id() { return $this->field('profile_id'); }
function profile_version() { return $this->field('profile_version'); } function profile_version() { return $this->field('profile_version'); }
function status() { return $this->field('status'); } function status() { return $this->field('status'); }
function canceled() { return $this->field('canceled'); }
function pid() { return $this->field('pid'); } function pid() { return $this->field('pid'); }
function pid_idx() { return $this->field('pid_idx'); } function pid_idx() { return $this->field('pid_idx'); }
function public_url() { return $this->field('public_url'); } function public_url() { return $this->field('public_url'); }
......
...@@ -379,6 +379,13 @@ function (_, sup, moment, marked, UriTemplate, ShowImagingModal, ...@@ -379,6 +379,13 @@ function (_, sup, moment, marked, UriTemplate, ShowImagingModal,
else if (status == 'provisioned') { else if (status == 'provisioned') {
$("#status_progress_bar").width("66%"); $("#status_progress_bar").width("66%");
status_html = "booting"; status_html = "booting";
if (json.value.canceled) {
status_html += " (but canceled)";
}
else {
// So the user can cancel.
EnableButton("terminate");
}
} }
else if (status == 'ready') { else if (status == 'ready') {
bgtype = "panel-success"; bgtype = "panel-success";
...@@ -397,6 +404,7 @@ function (_, sup, moment, marked, UriTemplate, ShowImagingModal, ...@@ -397,6 +404,7 @@ function (_, sup, moment, marked, UriTemplate, ShowImagingModal,
$("#status_progress_div").addClass("progress-bar-success"); $("#status_progress_div").addClass("progress-bar-success");
$("#status_progress_bar").width("100%"); $("#status_progress_bar").width("100%");
} }
$('#error_panel').addClass("hidden");
EnableButtons(); EnableButtons();
// We should be looking at the node status instead. // We should be looking at the node status instead.
if (lastStatus != "imaging") { if (lastStatus != "imaging") {
......
...@@ -180,6 +180,7 @@ function SPITROWS($showall, $name, $result) ...@@ -180,6 +180,7 @@ function SPITROWS($showall, $name, $result)
$uuid = $row["uuid"]; $uuid = $row["uuid"];
$name = $row["name"]; $name = $row["name"];
$status = $row["status"]; $status = $row["status"];
$canceled = $row["canceled"];
$created = DateStringGMT($row["created"]); $created = DateStringGMT($row["created"]);
$expires = DateStringGMT($row["expires"]); $expires = DateStringGMT($row["expires"]);
$creator_idx = $row["creator_idx"]; $creator_idx = $row["creator_idx"];
...@@ -206,6 +207,9 @@ function SPITROWS($showall, $name, $result) ...@@ -206,6 +207,9 @@ function SPITROWS($showall, $name, $result)
if ($row["expired"]) { if ($row["expired"]) {
$status = "expired"; $status = "expired";
} }
elseif ($canceled) {
$status = "canceled";
}
$profile = Profile::Lookup($profile_id, $version); $profile = Profile::Lookup($profile_id, $version);
if ($profile) { if ($profile) {
$profile_name = $profile->name(); $profile_name = $profile->name();
......
...@@ -120,6 +120,7 @@ function Do_GetInstanceStatus() ...@@ -120,6 +120,7 @@ function Do_GetInstanceStatus()
} }
$blob = array(); $blob = array();
$blob["status"] = $instance->status(); $blob["status"] = $instance->status();
$blob["canceled"] = $instance->canceled() ? 1 : 0;
$blob["sliverstatus"] = array(); $blob["sliverstatus"] = array();
$blob["sliverurls"] = array(); $blob["sliverurls"] = array();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment