Commit 97347528 authored by Leigh Stoller's avatar Leigh Stoller

Export geni API for our panic mode (level 1, since not all clusters can do

control network port modification), and add front end support to the Portal
status page (admin mode only of course)
parent 7ab30be5
......@@ -487,6 +487,29 @@ sub SetLockdown($$$)
return 0;
}
#
# Set/Clear the panic flag.
#
sub SetPanic($$)
{
my ($self,$clear) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $uuid = $self->uuid();
my $value = ($clear ? 0 : 1);
DBQueryWarn("update apt_instances set paniced=${value} ".
(!$clear ? ",paniced_timestamp=now() " : "") .
"where uuid='$uuid'") or
return -1;
$self->{'INSTANCE'}->{"paniced"} = $value;
return 0;
}
# Return a URL for the status page.
sub webURL($)
{
......@@ -1845,6 +1868,52 @@ sub Lockdown($$)
return undef;
}
#
# Panic
#
sub Panic($$)
{
my ($self, $clear) = @_;
my $authority = $self->GetGeniAuthority();
my $slice = $self->instance()->GetGeniSlice();
my $context = APT_Geni::GeniContext();
my $oldexpires;
return undef
if (! (defined($authority) &&
defined($slice) && defined($context)));
#
# If the slice is expired, then the credential we generate will
# not be valid. So extend the slice so we can clear the panic.
#
if ($clear && $slice->IsExpired()) {
$oldexpires = $slice->expires();
$slice->SetExpiration(time() + 3600);
}
my $slice_credential = APT_Geni::GenAuthCredential($slice);
goto bad
if (! defined($slice_credential));
my $args = {
"slice_urn" => $slice->urn(),
"credentials" => [$slice_credential->asString()],
};
$args->{"clear"} = 1
if ($clear);
my $cmurl = $authority->url();
$cmurl =~ s/protogeni/protogeni\/stoller/ if ($usemydevtree);
my $response = Genixmlrpc::CallMethod($cmurl, $context, "Panic", $args);
$slice->SetExpiration($oldexpires)
if (defined($oldexpires));
return $response;
bad:
$slice->SetExpiration($oldexpires)
if (defined($oldexpires));
return undef;
}
#
# Ask for Image Imfo
#
......
......@@ -45,6 +45,7 @@ sub usage()
print("Usage: manage_instance reload instance node_id [node_id ...]\n");
print("Usage: manage_instance monitor instance\n");
print("Usage: manage_instance lockdown instance set|clear user|admin\n");
print("Usage: manage_instance panic instance set|clear\n");
print("Usage: manage_instance writecreds instance directory\n");
exit(-1);
}
......@@ -105,6 +106,7 @@ sub DoRefresh();
sub DoReboot();
sub DoReload();
sub DoLockdown();
sub DoPanic();
sub DoManifests();
sub WriteCredentials();
sub StartMonitor();
......@@ -168,6 +170,9 @@ elsif ($action eq "monitor") {
elsif ($action eq "lockdown") {
DoLockdown()
}
elsif ($action eq "panic") {
DoPanic()
}
elsif ($action eq "writecreds") {
WriteCredentials()
}
......@@ -1646,7 +1651,7 @@ sub StartMonitorInternal(;$)
if (kill(0, $pid)) {
print STDERR "Monitor already running ($pid). ".
"Kill it before starting a new one.\n";
exit(0);
return 1;
}
$instance->Update({"monitor_pid" => 0});
}
......@@ -1662,9 +1667,9 @@ sub StartMonitorInternal(;$)
# We just did the operation, no need to ask so soon, and we
# avoid locking the slice in case the user wants to reboot
# another node right away. For reboot/reload, nothing interesting
# is going to be reported for at least 30 seconds (XEN VM).
# is going to be reported for a while.
#
sleep(30);
sleep(15);
my $seconds = ($waitforstartup ? 7200 : 900);
my $interval = 15;
......@@ -1887,6 +1892,91 @@ sub DoLockdown()
exit(0);
}
sub DoPanic()
{
my $emsg;
usage()
if (@ARGV != 1);
my $setclr = shift(@ARGV);
fatal("Must specify either 'set' or 'clear'")
if ($setclr !~ /^(set|clear)$/);
my $slice = $instance->GetGeniSlice();
if (!defined($slice)) {
fatal("No slice for instance");
}
if ($slice->Lock()) {
fatal("Experiment is busy, cannot lock it. Please try again later");
}
#
# Create the webtask object, but AFTER locking the slice so we do
# not destroy one in use.
#
if (defined($webtask_id)) {
$webtask = WebTask->LookupOrCreate($instance->uuid(), $webtask_id);
# Convenient.
$webtask->AutoStore(1);
}
#
# And tell the backend clusters to lockdown the slice.
#
my $coderef = sub {
my ($sliver) = @_;
my $webtask = $sliver->webtask();
my $response = $sliver->Panic(($setclr eq "clear" ? 1 : 0));
if (!defined($response)) {
print STDERR "RPC Error calling Panic\n";
return -1;
}
if ($response->code() != GENIRESPONSE_SUCCESS) {
print STDERR "Could not panic sliver: ".
$response->output() . "\n";
return -1;
}
# Tell the web interface something is different. Real status will
# come later when the monitor starts up.
if ($webtask->sliverstatus()) {
my $blob = $webtask->sliverstatus();
foreach my $node_id (keys(%{ $blob })) {
$blob->{$node_id}->{'status'} = "changing";
}
$webtask->sliverstatus($blob);
}
return 0;
};
my @return_codes = ();
my @agglist = $instance->AggregateList();
if (ParRun({"maxwaittime" => 99999,
"maxchildren" => scalar(@agglist)},
\@return_codes, $coderef, @agglist)) {
$emsg = "Internal error calling Lockdown()";
goto bad;
}
#
# Check the exit codes.
#
foreach my $code (@return_codes) {
if ($code) {
print STDERR "Some slivers could not be paniced";
goto bad;
}
}
if ($instance->SetPanic(($setclr eq "clear" ? 1 : 0))) {
$emsg = "Could not update instance panic flag";
goto bad;
}
StartMonitorInternal();
$slice->UnLock();
exit(0);
bad:
$slice->UnLock();
exit(-1);
}
#
# Write instance credentials to files.
#
......
......@@ -64,6 +64,7 @@ use Data::Dumper;
use XML::Simple;
use Date::Parse;
use POSIX qw(strftime tmpnam);
use POSIX qw(setsid :sys_wait_h);
use Time::Local;
use Compress::Zlib;
use File::Temp qw(tempfile);
......@@ -104,6 +105,7 @@ my $CREATEIMAGE = "$TB/bin/create_image";
my $DELETEIMAGE = "$TB/sbin/delete_image";
my $WAP = "$TB/sbin/withadminprivs";
my $SHAREVLAN = "$TB/sbin/sharevlan";
my $PANIC = "$TB/sbin/panic";
my $XMLLINT = "/usr/local/bin/xmllint";
my $PRERENDER = "$TB/libexec/vis/prerender";
my $IMPORTER = "$TB/sbin/image_import";
......@@ -4688,5 +4690,107 @@ sub Lockdown($)
return GeniResponse->Create(GENIRESPONSE_SUCCESS);
}
#
# Turn on/off "panic" mode for an experiment. We use level 1 since not
# all clusters support control network vlan modifications.
#
sub Panic($)
{
my ($argref) = @_;
my $slice_urn = $argref->{'slice_urn'};
my $clear = $argref->{'clear'};
my $credentials = $argref->{'credentials'};
if (! (defined($credentials) && defined($slice_urn))) {
return GeniResponse->MalformedArgsResponse("Missing arguments");
}
if (! GeniHRN::IsValid($slice_urn)) {
return GeniResponse->MalformedArgsResponse("Invalid URN");
}
my ($credential,$speaksfor) = GeniStd::CheckCredentials($credentials);
return $credential
if (GeniResponse::IsResponse($credential));
my $authority = GeniCM::CreateAuthorityFromCertificate($credential);
return $authority
if (GeniResponse::IsResponse($authority));
my ($slice, $aggregate) = Credential2SliceAggregate($credential);
return $slice
if (defined($slice) && GeniResponse::IsResponse($slice));
if (! (defined($slice) && defined($aggregate))) {
return GeniResponse->Create(GENIRESPONSE_SEARCHFAILED, undef,
"Slice does not exist");
}
if ($slice_urn ne $slice->urn()) {
return GeniResponse->Create(GENIRESPONSE_FORBIDDEN(), undef,
"Credential does not match the URN");
}
#
# Only the SA for the slice can do this.
#
my $caller = GeniHRN->new($credential->owner_urn());
if (! ($caller->IsSA() &&
$caller->domain() eq $slice->urnOBJ()->domain())) {
return GeniResponse->Create(GENIRESPONSE_FORBIDDEN, undef,
"Not enough permission to turn on/off panic mode");
}
if ($slice->Lock() != 0) {
return GeniResponse->BusyResponse();
}
main::AddLogfileMetaDataFromSlice($slice);
my $experiment = $slice->GetExperiment();
if (!defined($experiment)) {
return GeniResponse->Create(GENIRESPONSE_ERROR, undef,
"No local experiment for slice");
}
my $pid = $experiment->pid();
my $eid = $experiment->eid();
my $command = "$WAP $PANIC -l 1 " . ($clear ? "-r " : "") . "$pid $eid";
#
# The backend script sends a bunch of stuff to stdout, so capture it.
# We want to do this in the background cause it is going to take a long
# time, so fork and start the backend script, but wait a few seconds for
# early errors.
#
my $mypid = main::WrapperFork();
if ($mypid) {
sleep(3);
my $kid = waitpid($mypid, &WNOHANG);
if ($kid == $mypid) {
my $stat = $?;
if ($stat & 127) {
# died with a signal, return the signal
$stat = $stat & 127;
} else {
# else return the exit code
$stat = $stat >> 8;
}
$slice->UnLock();
return GeniResponse->Create(GENIRESPONSE_ERROR);
}
# We want to unlock it so we can get status, so we set the shutdown
# flag since that will prevent any other changes from happening.
if (!$clear) {
$slice->SetShutdown(undef, 1);
}
$slice->UnLock();
return GeniResponse->Create(GENIRESPONSE_SUCCESS);
}
GeniUtil::FlipToElabMan();
my $output = GeniUtil::ExecQuiet($command);
# Not a typical op, so always print debugging info;
print STDERR $output;
if ($?) {
return -1;
}
if ($clear) {
$slice->ClearShutdown();
}
return 0;
}
# _Always_ make sure that this 1 is at the end of the file...
1;
......@@ -1123,9 +1123,9 @@ sub SetIdleIgnore($$)
#
# Set the shutdown field.
#
sub SetShutdown($$)
sub SetShutdown($$;$)
{
my ($self, $shutdown) = @_;
my ($self, $shutdown, $isshutdown) = @_;
my $uuid = $self->uuid();
my $when;
......@@ -1135,11 +1135,14 @@ sub SetShutdown($$)
else {
$when = "NULL";
}
$isshutdown = (defined($isshutdown) ? ($isshutdown ? 1 : 0) : 0);
#
# Always clear isshutdown so that expire_daemon knows of change.
#
my $query_result =
DBQueryWarn("update geni_slices set shutdown=$when, isshutdown=0 " .
DBQueryWarn("update geni_slices set ".
" shutdown=$when, isshutdown=$isshutdown " .
"where uuid='$uuid'");
return -1
......@@ -1147,6 +1150,24 @@ sub SetShutdown($$)
# XXX Wrong format, but harmless.
$self->{'SLICE'}->{'shutdown'} = ($shutdown ? time() : undef);
$self->{'SLICE'}->{'isshutdown'} = $isshutdown;
return 0;
}
sub ClearShutdown($)
{
my ($self) = @_;
my $uuid = $self->uuid();
my $query_result =
DBQueryWarn("update geni_slices set ".
" shutdown=null, isshutdown=0 " .
"where uuid='$uuid'");
return -1
if (!$query_result);
# XXX Wrong format, but harmless.
$self->{'SLICE'}->{'shutdown'} = undef;
$self->{'SLICE'}->{'isshutdown'} = 0;
return 0;
}
......
......@@ -117,6 +117,7 @@ elsif ($GENI_VERSION eq "2.0") {
"TriggerImageUpdate"=> \&GeniCMV2::TriggerImageUpdate,
"AddNodes" => \&GeniCMV2::AddNodes,
"DeleteNodes" => \&GeniCMV2::DeleteNodes,
"Panic" => \&GeniCMV2::Panic,
};
}
......
......@@ -138,6 +138,7 @@ class Instance
function profile_version() { return $this->field('profile_version'); }
function status() { return $this->field('status'); }
function canceled() { return $this->field('canceled'); }
function paniced() { return $this->field('paniced'); }
function pid() { return $this->field('pid'); }
function pid_idx() { return $this->field('pid_idx'); }
function public_url() { return $this->field('public_url'); }
......
......@@ -35,6 +35,7 @@ function (_, sup, moment, marked, UriTemplate, ShowImagingModal,
var statusTemplate = _.template(statusString);
var terminateTemplate = _.template(terminateString);
var lastStatus = "";
var paniced = 0;
var lockout = 0;
var lockdown = 0;
var lockdown_code = "";
......@@ -54,6 +55,7 @@ function (_, sup, moment, marked, UriTemplate, ShowImagingModal,
extend = window.APT_OPTIONS.extend || null;
ispprofile = window.APT_OPTIONS.ispprofile;
profile_uuid = window.APT_OPTIONS.profileUUID;
paniced = window.APT_OPTIONS.paniced;
lockout = window.APT_OPTIONS.lockout;
lockdown = window.APT_OPTIONS.lockdown;
lockdown_code= uuid.substr(2, 5);
......@@ -80,6 +82,7 @@ function (_, sup, moment, marked, UriTemplate, ShowImagingModal,
isadmin: window.APT_OPTIONS.isadmin,
isfadmin: window.APT_OPTIONS.isfadmin,
errorURL: errorURL,
paniced: paniced,
lockout: lockout,
lockdown: lockdown,
lockdown_code: lockdown_code,
......@@ -280,6 +283,10 @@ function (_, sup, moment, marked, UriTemplate, ShowImagingModal,
$('#lockout_checkbox').change(function() {
DoLockout($(this).is(":checked"));
});
// Quarantine change event handler.
$('#quarantine_checkbox').change(function() {
DoQuarantine($(this).is(":checked"));
});
/*
* Attach an event handler to the profile status collapse.
......@@ -701,6 +708,28 @@ function (_, sup, moment, marked, UriTemplate, ShowImagingModal,
xmlthing.done(callback);
}
//
// Request panic mode set/clear.
//
function DoQuarantine(mode)
{
mode = (mode ? 1 : 0);
var callback = function(json) {
sup.HideModal('#waitwait-modal');
if (json.code) {
sup.SpitOops("oops",
"Failed to change Quarantine mode: " + json.value);
return;
}
}
sup.ShowModal('#waitwait-modal');
var xmlthing = sup.CallServerMethod(ajaxurl, "status", "Quarantine",
{"uuid" : uuid,
"quarantine" : mode});
xmlthing.done(callback);
}
//
// Request a refresh from the backend cluster, to see if the sliverstatus
// has changed.
......@@ -889,7 +918,7 @@ function (_, sup, moment, marked, UriTemplate, ShowImagingModal,
sup.HideModal('#waitwait-modal');
if (json.code) {
sup.SpitOops("oops", "Failed to reboot: " + json.value);
Sup.spitoops("oops", "Failed to reboot: " + json.value);
return;
}
// Trigger status to change the nodes.
......
......@@ -516,7 +516,7 @@ class Profile
if (!DBQueryWarn("replace into apt_profile_favorites set ".
" uid='$user_uid',uid_idx='$user_idx', ".
" profileid='$profile_id',now()")) {
" profileid='$profile_id',marked=now()")) {
return -1;
}
return 0;
......
......@@ -121,7 +121,9 @@ $routing = array("myprofiles" =>
"DecryptBlocks" =>
"Do_DecryptBlocks",
"Lockout" =>
"Do_Lockout")),
"Do_Lockout",
"Quarantine" =>
"Do_Quarantine")),
"approveuser" =>
array("file" => "approveuser.ajax",
"guest" => false,
......
......@@ -1132,6 +1132,69 @@ function Do_Lockout()
SPITAJAX_RESPONSE("Success");
}
#
# Handle Quarantine request/clear.
#
function Do_Quarantine()
{
global $this_user;
global $ajax_args;
if (!isset($this_user) || !ISADMIN()) {
SPITAJAX_ERROR(1, "Not enough permission.");
return;
}
$this_idx = $this_user->uid_idx();
if (!isset($ajax_args["uuid"])) {
SPITAJAX_ERROR(1, "Missing profile uuid");
return;
}
if (!isset($ajax_args["quarantine"])) {
SPITAJAX_ERROR(1, "Missing quarantine value");
return;
}
$which = ($ajax_args["quarantine"] ? "set" : "clear");
$uuid = $ajax_args["uuid"];
$instance = Instance::Lookup($uuid);
if (!$instance) {
SPITAJAX_ERROR(1, "Unknown instance uuid");
return;
}
$webtask_id = WebTask::GenerateID();
$retval = SUEXEC($this_user->uid(), "nobody",
"webmanage_instance -t $webtask_id -- panic $uuid $which",
SUEXEC_ACTION_IGNORE);
$webtask = WebTask::Lookup($webtask_id);
if ($retval != 0) {
if ($retval < 0) {
SPITAJAX_ERROR(-11, "Internal error, cannot proceed.");
# Notify tbops.
SUEXECERROR(SUEXEC_ACTION_CONTINUE);
}
elseif ($webtask) {
SPITAJAX_ERROR(1, $webtask->TaskValue("output"));
}
elseif ($suexec_output != "") {
SPITAJAX_ERROR(1, $suexec_output);
}
else {
SUEXECERROR(SUEXEC_ACTION_CONTINUE);
SPITAJAX_ERROR(-1, "Internal Error. Please try again later");
}
if ($webtask) {
$webtask->Delete();
}
return;
}
if ($webtask) {
$webtask->Delete();
}
SPITAJAX_RESPONSE("Success");
}
# Local Variables:
# mode:php
# End:
......
......@@ -179,6 +179,7 @@ $extension_history= ($instance->extension_history() ?
CleanString($instance->extension_history()) : "");
$freenodes_url = $freenodes_mapping[$instance->aggregate_urn()];
$lockout = $instance->extension_lockout();
$paniced = $instance->paniced();
#
# We give ssh to the creator (real user or guest user).
......@@ -235,6 +236,7 @@ echo " window.APT_OPTIONS.ispprofile = $ispprofile;\n";
echo " window.APT_OPTIONS.publicURL = $public_url;\n";
echo " window.APT_OPTIONS.lockdown = $lockdown;\n";
echo " window.APT_OPTIONS.lockout = $lockout;\n";
echo " window.APT_OPTIONS.paniced = $paniced;\n";
echo " window.APT_OPTIONS.extension_requested = " .
$instance->extension_requested() . ";\n";
echo " window.APT_OPTIONS.AJAXURL = 'server-ajax.php';\n";
......
......@@ -109,15 +109,25 @@
<% if (isadmin) { %>
<div class='pull-left'
data-toggle='popover'
data-delay='{"hide":1500, "show":500}'
data-html='true'
data-delay='{"hide":1000, "show":500}'
data-content="When checked, only administrator can extend
this experiment. No free time is granted to
user at all">
user at all.">
<label class="checkbox-inline" style='margin-right: 10px;'>
<input type="checkbox" id="lockout_checkbox"
<% if (lockout) { %>checked<% } %> >Lockout</label>
</div>
<div class='pull-left'
data-toggle='popover'
data-delay='{"hide":1000, "show":500}'
data-content="When checked, the experiment is put into
Quarantine (emulab panic) mode.">
<label class="checkbox-inline" style='margin-right: 10px;'>
<input type="checkbox" id="quarantine_checkbox"
<% if (paniced) { %>checked<% } %> >
<span <% if (paniced) { %>class="text-danger"<% } %> >
Quarantined</span></label>
</div>
<% } %>
<div class='pull-right'>
<% if (registered && !isfadmin) { %>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment