Commit 8f4e3191 authored by Leigh B Stoller's avatar Leigh B Stoller
Browse files

Changes for automatic lockdown of experiments:

1. First off, we no longer do automatic lockdown of experiments when
   granting an extension longer then 10 days.

2. Instead, we will lockdown experiments on case by case basis.

3. Changes to the lockdown path that ask the reservation system at the
   target cluster if locking down would throw the reservation system
   into chaos. If so, return a refused error and give admin the choice
   to override. When we do override, send email to local tbops informing
   that the reservation system is in chaos state.
parent 40c2c01f
......@@ -556,29 +556,23 @@ sub SetLogFile($$)
#
sub SetLockdown($$$)
{
my ($self,$which,$clear) = @_;
my ($self,$admin,$user) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $uuid = $self->uuid();
if ($which eq "admin") {
$which = "admin_lockdown";
}
elsif ($which eq "user") {
$which = "user_lockdown";
}
else {
return -1;
}
my $value = ($clear ? 0 : 1);
$admin = ($admin ? 1 : 0);
$user = ($user ? 1 : 0);
DBQueryWarn("update apt_instances set ${which}=${value} ".
DBQueryWarn("update apt_instances set ".
" admin_lockdown='$admin',user_lockdown='$user' ".
"where uuid='$uuid'") or
return -1;
$self->{'INSTANCE'}->{$which} = $value;
$self->{'INSTANCE'}->{"admin_lockdown"} = $admin;
$self->{'INSTANCE'}->{"user_lockdown"} = $user;
return 0;
}
......@@ -2649,9 +2643,9 @@ sub SliverAction($$$;@)
#
# Lockdown
#
sub Lockdown($$)
sub Lockdown($$$)
{
my ($self, $clear) = @_;
my ($self, $clear, $force) = @_;
my $authority = $self->GetGeniAuthority();
my $slice = $self->instance()->GetGeniSlice();
my $context = APT_Geni::GeniContext();
......@@ -2670,6 +2664,8 @@ sub Lockdown($$)
};
$args->{"clear"} = 1
if ($clear);
$args->{"force"} = 1
if ($force);
my $cmurl = $authority->url();
$cmurl = devurl($cmurl) if ($usemydevtree);
......
......@@ -1780,10 +1780,13 @@ sub ExtendInternal($$$$$)
# Save in case of error.
my $oldexpires = $slice->expires();
# Lockdown on admin extensions longer then XX days.
if (defined($this_user) && $this_user->IsAdmin() && !$nolockdown &&
($seconds / (24 * 60 * 60)) > 10) {
$lockdown = 1
# Lockdown on admin extensions longer then XX days.
# No longer doing this.
if (0) {
if (defined($this_user) && $this_user->IsAdmin() && !$nolockdown &&
($seconds / (24 * 60 * 60)) > 10) {
$lockdown = 1
}
}
# Need to update slice before creating new credential.
if ($slice->IsExpired()) {
......@@ -1872,7 +1875,7 @@ sub ExtendInternal($$$$$)
}
# Lockdown.
if ($lockdown) {
if (DoLockdownInternal("set", "user")) {
if (DoLockdownInternal("set", "user", 0, undef)) {
SENDMAIL($TBOPS,
"Failed to lock down APT Instance",
"Failed to lock down $instance\n".
......@@ -2343,7 +2346,7 @@ sub DoExtendOld()
}
# Lockdown.
if ($lockdown) {
if (DoLockdownInternal("set", "user")) {
if (DoLockdownInternal("set", "user", 0, undef)) {
SENDMAIL($TBOPS,
"Failed to lock down APT Instance",
"Failed to lock down $instance\n".
......@@ -3092,40 +3095,35 @@ sub StartMonitorInternal(;$@)
#
# Experiment lockdown.
#
sub DoLockdownInternal($$)
sub DoLockdownInternal($$$$)
{
my ($setclr,$which) = @_;
my ($setclr,$which,$force, $perrmsg) = @_;
my $slice = $instance->GetGeniSlice();
if (!defined($slice)) {
fatal("No slice for instance");
}
my $admin_lockdown = $instance->admin_lockdown();
my $user_lockdown = $instance->user_lockdown();
if ($which eq "all") {
if ($instance->SetLockdown("user", ($setclr eq "clear" ? 1 : 0))) {
print STDERR "Could not update instance lockdown\n";
return -1
}
$which = "admin"
$admin_lockdown = $user_lockdown = ($setclr eq "clear" ? 0 : 1);
}
if ($instance->SetLockdown($which, ($setclr eq "clear" ? 1 : 0))) {
print STDERR "Could not update instance lockdown\n";
return -1
elsif ($which eq "admin") {
$admin_lockdown = ($setclr eq "clear" ? 0 : 1);
}
my $clear = ($instance->admin_lockdown() ||
$instance->user_lockdown() ? 0 : 1);
#
# Have to set/clear the lockdown on the local slice.
#
if ($slice->SetLockdown($clear)) {
print STDERR "Could not update slice lockdown\n";
return -1
elsif ($which eq "user") {
$user_lockdown = ($setclr eq "clear" ? 0 : 1);
}
my $clear = ($admin_lockdown || $user_lockdown ? 0 : 1);
#
# And tell the backend clusters to lockdown the slice.
#
my $coderef = sub {
my ($sliver) = @_;
my $webtask = $sliver->webtask();
my ($errcode, $errmsg);
#
# We cannot do lockdown at AL2S. More generally, it only works at
......@@ -3145,17 +3143,29 @@ sub DoLockdownInternal($$)
"CC: " . $project->OpsEmailAddress());
return 0;
}
my $response = $sliver->Lockdown($clear);
my $response = $sliver->Lockdown($clear, $force);
if (!defined($response)) {
print STDERR "RPC Error calling Lockdown\n";
return -1;
$errmsg = "RPC Error calling Lockdown";
$errcode = -1;
goto bad;
}
# Watch for a refusal from the reservation system on lockdown.
if ($response->code() == GENIRESPONSE_REFUSED && !$clear) {
$errmsg = $response->output();
$errcode = $response->code();
goto bad;
}
if ($response->code() != GENIRESPONSE_SUCCESS) {
print STDERR "Could not lockdown sliver: ".
$response->output() . "\n";
return -1;
$errmsg = "Sliver lockdown failed: " . $response->output();
$errcode = $response->code();
goto bad;
}
return 0;
bad:
print STDERR $errmsg . "\n" if ($debug);
$webtask->output($errmsg);
$webtask->Exited($errcode);
return $errcode;
};
my @return_codes = ();
my @agglist = $instance->AggregateList();
......@@ -3168,22 +3178,55 @@ sub DoLockdownInternal($$)
#
# Check the exit codes.
#
foreach my $code (@return_codes) {
foreach my $aggobj (@agglist) {
my $code = shift(@return_codes);
my $errmsg;
# Updated in a forked child, must refresh.
$aggobj->webtask()->Refresh();
if ($code) {
print STDERR "Some slivers could not be locked down.\n";
return -1;
if ($aggobj->webtask()->output()) {
$errmsg = $aggobj->webtask()->output();
}
else {
$errmsg = "Some slivers could not be locked down.";
}
$$perrmsg = $errmsg if (defined($perrmsg));
return $aggobj->webtask()->exitcode();
}
}
#
# Change local state only if operation succeeds.
#
if ($slice->SetLockdown($clear)) {
print STDERR "Could not update slice lockdown\n";
return -1
}
if ($instance->SetLockdown($admin_lockdown, $user_lockdown)) {
print STDERR "Could not update instance lockdown flags\n";
return -1
}
return 0;
}
sub DoLockdown()
{
my $errmsg;
my $errcode = -1;
my $force = 0;
my $optlist = "f";
my %options = ();
if (! getopts($optlist, \%options)) {
usage();
}
if (defined($options{"f"})) {
$force = 1;
}
usage()
if (@ARGV != 2);
my $setclr = shift(@ARGV);
my $which = shift(@ARGV);
......@@ -3198,11 +3241,14 @@ sub DoLockdown()
}
if ($slice->Lock()) {
$errmsg = "Experiment is busy, cannot lock it. Please try again later";
$errcode = 1;
goto bad;
}
if (DoLockdownInternal($setclr, $which)) {
$slice->UnLock();
fatal("Could not lockdown instance!");
$errcode = DoLockdownInternal($setclr, $which, $force, \$errmsg);
if ($errcode) {
$errmsg = "Failed to lockdown instance: $errcode"
if (!defined($errmsg));
goto bad;
}
$slice->UnLock();
exit(0);
......@@ -3212,9 +3258,9 @@ sub DoLockdown()
print STDERR $errmsg . "\n";
if (defined($webtask)) {
$webtask->output($errmsg);
$webtask->Exited(1);
$webtask->Exited($errcode);
}
exit(1);
exit($errcode);
}
sub DoPanic()
......@@ -4186,7 +4232,7 @@ sub DoSchedTerminate()
}
# Now we can clear this.
if ($instance->user_lockdown()) {
if (DoLockdownInternal("clear", "user")) {
if (DoLockdownInternal("clear", "user", 0, undef)) {
SENDMAIL($TBOPS,
"Failed to clear lock down on APT Instance",
"Failed to clear lock down $instance\n".
......
......@@ -5225,6 +5225,8 @@ sub Lockdown($)
my $slice_urn = $argref->{'slice_urn'};
my $clear = $argref->{'clear'};
my $credentials = $argref->{'credentials'};
my $force = $argref->{'force'};
require Reservation;
if (! (defined($credentials) && defined($slice_urn))) {
return GeniResponse->MalformedArgsResponse("Missing arguments");
......@@ -5252,6 +5254,12 @@ sub Lockdown($)
return GeniResponse->Create(GENIRESPONSE_FORBIDDEN(), undef,
"Credential does not match the URN");
}
my $experiment = $slice->GetExperiment();
if (!defined($experiment)) {
return GeniResponse->Create(GENIRESPONSE_ERROR, undef,
"No local experiment for slice");
}
#
# Only the SA for the slice can do this.
#
......@@ -5262,11 +5270,42 @@ sub Lockdown($)
"Not enough permission to set/clr lockdown");
}
main::AddLogfileMetaDataFromSlice($slice);
if ($slice->SetLockdown(defined($clear) && $clear ? 1 : 0)) {
return GeniResponse->Create(GENIRESPONSE_ERROR);
if (defined($clear) && $clear) {
if ($slice->SetLockdown(1)) {
return GeniResponse->Create(GENIRESPONSE_ERROR);
}
return GeniResponse->Create(GENIRESPONSE_SUCCESS);
}
return GeniResponse->Create(GENIRESPONSE_SUCCESS);
#
# Ask Reservation system if locking down this experiment will
# throw the Reservation system into chaos.
#
my $error = "unknown error";
my $rval = Reservation->Lockdown($slice, \$error, 1, 0);
if (!$rval) {
return GeniResponse->Create(GENIRESPONSE_SUCCESS);
}
if (defined($force) && $force) {
#
# Override the Reservation system, chaos ensues.
#
if (Reservation->Lockdown($slice, \$error, 0, 1)) {
print STDERR "Unable to force lockdown: $error\n";
return GeniResponse->Create(GENIRESPONSE_ERROR, undef,
"Unable to force lockdown: $error");
}
libtestbed::SENDMAIL($TBOPS, "Slice lockdown forced!",
"$slice_urn has been forcibly locked down,\n".
"throwing the reservation system into chaos.\n\n".
"Slice: $slice\n".
"Experiment: $experiment\n",
$TBOPS);
return GeniResponse->Create(GENIRESPONSE_SUCCESS);
}
return GeniResponse->Create(GENIRESPONSE_REFUSED, undef, $error);
}
#
......
......@@ -16,6 +16,7 @@ $(function ()
var firstrowTemplate = null;
var secondrowTemplate = null;
var extensionsTemplate = null;
var GENIRESPONSE_REFUSED = 7;
function initialize()
{
......@@ -280,16 +281,17 @@ $(function ()
//
// Request lockdown set/clear.
//
function DoLockdown(which, lockdown)
function DoLockdown(which, lockdown, force)
{
var action = (lockdown ? "set" : "clear");
// Optional arg.
if (force === undefined) {
force = 0;
}
var callback = function(json) {
if (json.code) {
sup.HideModal("#waitwait-modal", function () {
sup.SpitOops("oops",
"Lockdown failed: " + json.value);
if (lockdown) {
// Flip the checkbox back.
$('#' + which + '-lockdown-checkbox')
......@@ -300,6 +302,25 @@ $(function ()
$('#' + which + '-lockdown-checkbox')
.prop("checked", true);
}
if (json.code != GENIRESPONSE_REFUSED) {
sup.SpitOops("oops",
"Lockdown failed: " + json.value);
return;
}
// Refused.
$('#force-lockdown').click(function (event) {
sup.HideModal('#lockdown-refused', function() {
// Flip the checkbox again
$('#' + which + '-lockdown-checkbox')
.prop("checked", true);
// Again with force.
DoLockdown(which, lockdown, 1);
});
});
$('#lockdown-refused pre').text(json.value);
sup.ShowModal('#lockdown-refused', function () {
$('#force-lockdown').off("click");
});
});
return;
}
......@@ -317,7 +338,8 @@ $(function ()
var xmlthing = sup.CallServerMethod(null, "status", "Lockdown",
{"uuid" : window.UUID,
"which" : which,
"action" : action});
"action" : action,
"force" : force});
xmlthing.done(callback);
}
......
......@@ -48,6 +48,8 @@ $(function ()
var changingtopo = false;
var EMULAB_OPS = "emulab-ops";
var EMULAB_NS = "http://www.protogeni.net/resources/rspec/ext/emulab/1";
var GENIRESPONSE_REFUSED = 7;
var GENIRESPONSE_INSUFFICIENT_NODES = 26;
function initialize()
{
......@@ -789,16 +791,50 @@ $(function ()
//
// Request lockdown set/clear.
//
function DoLockdown(which, lockdown)
function DoLockdown(which, lockdown, force)
{
var action = (lockdown ? "set" : "clear");
// Optional arg.
if (force === undefined) {
force = 0;
}
var callback = function(json) {
sup.HideModal("#waitwait-modal");
if (json.code) {
alert("Failed to change lockdown: " + json.value);
sup.HideModal("#waitwait-modal", function () {
if (lockdown) {
// Flip the checkbox back.
$('#' + which + '_lockdown_checkbox')
.prop("checked", false);
}
else {
// Flip the checkbox back.
$('#' + which + '_lockdown_checkbox')
.prop("checked", true);
}
if (json.code != GENIRESPONSE_REFUSED) {
sup.SpitOops("oops",
"Lockdown failed: " + json.value);
return;
}
// Refused.
$('#force-lockdown').click(function (event) {
sup.HideModal('#lockdown-refused', function() {
// Flip the checkbox again
$('#' + which + '_lockdown_checkbox')
.prop("checked", true);
// Again with force.
DoLockdown(which, lockdown, 1);
});
});
$('#lockdown-refused pre').text(json.value);
sup.ShowModal('#lockdown-refused', function () {
$('#force-lockdown').off("click");
});
});
return;
}
sup.HideModal("#waitwait-modal");
if (which == "user") {
user_lockdown = lockdown;
}
......@@ -816,7 +852,8 @@ $(function ()
var xmlthing = sup.CallServerMethod(ajaxurl, "status", "Lockdown",
{"uuid" : uuid,
"which" : which,
"action" : action});
"action" : action,
"force" : force});
xmlthing.done(callback);
}
......
......@@ -1433,6 +1433,7 @@ function Do_Lockdown()
{
global $this_user;
global $ajax_args;
$force = "";
if (!isset($this_user) || !ISADMIN()) {
SPITAJAX_ERROR(1, "Not enough permission.");
......@@ -1466,6 +1467,9 @@ function Do_Lockdown()
SPITAJAX_ERROR(1, "Not enough permission.");
return;
}
if (isset($ajax_args["force"]) && $ajax_args["force"] == 1) {
$force = "-f";
}
$uuid = $ajax_args["uuid"];
$instance = Instance::Lookup($uuid);
if (!$instance) {
......@@ -1484,7 +1488,7 @@ function Do_Lockdown()
$webtask = WebTask::CreateAnonymous();
$retval = SUEXEC("nobody", "nobody",
"webmanage_instance -t " . $webtask->task_id() .
" lockdown $uuid $action $which ",
" lockdown $uuid $force $action $which ",
SUEXEC_ACTION_IGNORE);
$webtask->Refresh();
......@@ -1495,7 +1499,7 @@ function Do_Lockdown()
SUEXECERROR(SUEXEC_ACTION_CONTINUE);
}
else {
SPITAJAX_ERROR(1, $webtask->TaskValue("output"));
SPITAJAX_ERROR($webtask->exitcode(), $webtask->TaskValue("output"));
}
$webtask->Delete();
return;
......
......@@ -371,3 +371,31 @@ pre {
</div>
</div>
</div>
<div id='lockdown-refused' class='modal fade'>
<div class='modal-dialog'>
<div class='modal-content'>
<div class="modal-header text-center">
<h4>Lockdown Refused</h4>
</div>
<div class='modal-body'>
<p>
The lockdown request has been refused by one or more of the backend
clusters because it would throw the reservation system into chaos.
If you want to force this lockdown, click below.
</p>
<div>
<pre></pre>
</div>
If you want to force this lockdown, click below.
<center>
<button type='button' style='margin-right: 20px;'
data-dismiss='modal'
class='btn btn-primary btn-sm'
id='cancel-lockdown'>Cancel</button>
<button class='btn btn-danger btn-sm'
id='force-lockdown'>Lockdown</button>
</center>
</div>
</div>
</div>
</div>
......@@ -1177,6 +1177,33 @@ pre {
</div>
</div>
</div>
<div id='lockdown-refused' class='modal fade'>
<div class='modal-dialog'>
<div class='modal-content'>
<div class="modal-header text-center">
<h4>Lockdown Refused</h4>
</div>
<div class='modal-body'>
<p>
The lockdown request has been refused by one of the backend
clusters because it would throw the reservation system into chaos:
</p>
<div>
<pre></pre>
</div>
If you want to force this lockdown, click below.
<center>
<button type='button' style='margin-right: 20px;'
data-dismiss='modal'
class='btn btn-primary btn-sm'
id='cancel-lockdown'>Cancel</button>
<button class='btn btn-danger btn-sm'
id='force-lockdown'>Lockdown</button>
</center>
</div>
</div>
</div>
</div>
<div id='waitwait_div'></div>
<div id='terminate_div'></div>
<div id='oops_div'></div>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment