Commit a3e565f4 authored by Leigh Stoller's avatar Leigh Stoller

Big cleanup of the "quickvm" script, in that everything except the create

function has been moved into manage_instance and its associated library
(APT_Instance). Lots of cleanup of the code and more use of webtasks to
communicate with the web server.
parent 7bcc61cb
......@@ -25,6 +25,9 @@ package APT_Instance;
use strict;
use Carp;
use English;
use Data::Dumper;
use POSIX qw(tmpnam);
use Exporter;
use vars qw(@ISA @EXPORT $AUTOLOAD);
......@@ -37,13 +40,17 @@ use emdb;
use libtestbed;
use Brand;
use APT_Profile;
use English;
use Data::Dumper;
use APT_Geni;
use Genixmlrpc;
use GeniResponse;
use GeniCertificate;
use GeniHRN;
use overload ('""' => 'Stringify');
# Configure variables
my $TB = "@prefix@";
my $TBOPS = "@TBOPSEMAIL@";
my $GENEXTENDCRED = "$TB/sbin/protogeni/genextendcred";
# Cache of instances to avoid regenerating them.
my %instances = ();
......@@ -84,7 +91,7 @@ sub Lookup($$;$)
AUTOLOAD {
my $self = $_[0];
my $type = ref($self) or croak "$self is not an object";
my $type = ref($self) or confess "$self is not an object";
my $name = $AUTOLOAD;
$name =~ s/.*://; # strip fully-qualified portion
......@@ -187,15 +194,30 @@ sub Stringify($)
sub LookupBySlice($$)
{
my ($class, $slice_uuid) = @_;
my ($class, $token) = @_;
my $slice_uuid;
if ($slice_uuid !~ /^\w+\-\w+\-\w+\-\w+\-\w+$/) {
if ($token =~ /^\w+\-\w+\-\w+\-\w+\-\w+$/) {
$slice_uuid = $token;
}
elsif (GeniHRN::IsValid($token)) {
#
# We should put the slice_urn into the apt_instances table.
#
require GeniSlice;
my $slice = GeniSlice->Lookup($token);
return undef
if (!defined($slice));
$slice_uuid = $slice->uuid();
}
else {
return undef;
}
my $query_result =
DBQueryWarn("select uuid from apt_instances ".
"where slice_uuid='$slice_uuid'");
return undef
if (!$query_result || !$query_result->numrows);
......@@ -311,6 +333,33 @@ sub RecordHistory($)
return 0;
}
#
# Locate Geni objects for creator and slice.
#
sub GetGeniSlice($)
{
my ($self) = @_;
require GeniSlice;
return GeniSlice->Lookup($self->slice_uuid());
}
sub GetGeniUser($)
{
my ($self) = @_;
require GeniUser;
return GeniUser->Lookup($self->creator_uuid(), 1);
}
sub GetGeniAuthority($)
{
my ($self) = @_;
require GeniAuthority;
return APT_Geni::GetAuthority($self->aggregate_urn());
}
#
# Warn creator that the experiment is going to expire. This is hooked
# in from the sa_daemon, so we can send a message that is less geni like
......@@ -353,5 +402,234 @@ sub WarnExpiring($$)
return 0;
}
#
# Ask aggregate for the console URL for a node.
#
sub ConsoleURL($$)
{
my ($self, $sliver_urn) = @_;
my $authority = $self->GetGeniAuthority();
my $geniuser = $self->GetGeniUser();
my $slice = $self->GetGeniSlice();
my $context = APT_Geni::GeniContext();
return undef
if (! (defined($geniuser) && defined($authority) &&
defined($slice) && defined($context)));
my ($slice_credential, $speaksfor_credential) =
APT_Geni::GenCredentials($slice, $geniuser);
return undef
if (! (defined($speaksfor_credential) &&
defined($slice_credential)));
my $args = {
"slice_urn" => $slice->urn(),
"sliver_urn" => $sliver_urn,
"credentials" => [$slice_credential->asString(),
$speaksfor_credential->asString()],
};
return Genixmlrpc::CallMethod($authority->url(),
$context, "ConsoleURL", $args);
}
#
# Ask aggregate to terminate.
#
sub Terminate($)
{
my ($self) = @_;
my $authority = $self->GetGeniAuthority();
my $geniuser = $self->GetGeniUser();
my $slice = $self->GetGeniSlice();
my $context = APT_Geni::GeniContext();
return undef
if (! (defined($geniuser) && defined($authority) &&
defined($slice) && defined($context)));
my ($slice_credential, $speaksfor_credential) =
APT_Geni::GenCredentials($slice, $geniuser);
return undef
if (! (defined($speaksfor_credential) &&
defined($slice_credential)));
my $args = {
"slice_urn" => $slice->urn(),
"credentials" => [$slice_credential->asString(),
$speaksfor_credential->asString()],
};
#
# We have to watch for resource busy errors, and retry. For a while
# at least. Eventually give up cause it might be a permanently locked
# slice cause of earlier error.
#
my $response;
my $tries = 10;
while ($tries) {
$response =
Genixmlrpc::CallMethod($authority->url(),
$context, "DeleteSlice", $args);
# SEARCHFAILED is success.
return $response
if ($response->code() == GENIRESPONSE_SUCCESS ||
$response->code() == GENIRESPONSE_SEARCHFAILED);
return $response
if ($response->code() != GENIRESPONSE_BUSY);
#
# Wait for a while and try again.
#
$tries--;
if ($tries) {
print STDERR "Slice is busy, will retry again in a bit ...\n";
sleep(30);
}
}
return $response;
}
#
# Ask to extend.
#
sub Extend($$)
{
my ($self, $new_expires) = @_;
my $authority = $self->GetGeniAuthority();
my $geniuser = $self->GetGeniUser();
my $slice = $self->GetGeniSlice();
my $context = APT_Geni::GeniContext();
return undef
if (! (defined($geniuser) && defined($authority) &&
defined($slice) && defined($context)));
#
# We need a special credentential in case the aggregate is enforcing
# limits (as do Utah aggregates).
#
my $slice_urn = $slice->urn();
my $extcred = "";
my $credname = tmpnam();
my $userarg = "-u " . $geniuser->urn();
my ($fh,$certfile);
#
# But if a nonlocal user from Geni, then the user we have in the database
# is not in the same domain as the speaksfor, so we use the geni certificate
# that the trusted signer gave us and is stored in the DB.
#
if ($geniuser->IsLocal() && $geniuser->emulab_user()->IsNonLocal()) {
my (undef, $certificate_string) =
$geniuser->emulab_user()->GetStoredCredential();
if (! defined($certificate_string)) {
print STDERR "Could not get stored certificate for $geniuser\n";
return undef;
}
my $certificate = GeniCertificate->LoadFromString($certificate_string);
if (!defined($certificate)) {
print STDERR "Could not load stored certificate for $geniuser\n";
return undef;
}
$certfile = $certificate->WriteToFile();
$userarg = "-c $certfile";
}
system("$GENEXTENDCRED -a -o $credname -s $slice_urn -t 90 $userarg");
if ($?) {
print STDERR "Could not create extended credential\n";
return undef;
}
if (!open(EXT, $credname)) {
print STDERR "Could not open ext credfile $credname\n";
return undef;
}
while (<EXT>) {
$extcred .= $_;
}
close(EXT);
unlink($credname);
chomp($extcred);
my ($slice_credential, $speaksfor_credential) =
APT_Geni::GenCredentials($slice, $geniuser);
return undef
if (! (defined($speaksfor_credential) &&
defined($slice_credential)));
my $args = {
"slice_urn" => $slice->urn(),
"expiration" => $new_expires,
"credentials" => [$slice_credential->asString(),
$speaksfor_credential->asString(),
$extcred],
};
return Genixmlrpc::CallMethod($authority->url(),
$context, "RenewSlice", $args);
}
#
# Create an Image,
#
sub CreateImage($$$)
{
my ($self, $sliver_urn, $imagename) = @_;
my $authority = $self->GetGeniAuthority();
my $geniuser = $self->GetGeniUser();
my $slice = $self->GetGeniSlice();
my $context = APT_Geni::GeniContext();
return undef
if (! (defined($geniuser) && defined($authority) &&
defined($slice) && defined($context)));
my ($slice_credential, $speaksfor_credential) =
APT_Geni::GenCredentials($slice, $geniuser);
return undef
if (! (defined($speaksfor_credential) &&
defined($slice_credential)));
my $args = {
"slice_urn" => $slice->urn(),
"imagename" => $imagename,
"sliver_urn" => $sliver_urn,
"credentials" => [$slice_credential->asString(),
$speaksfor_credential->asString()],
};
return Genixmlrpc::CallMethod($authority->url(),
$context, "CreateImage", $args);
}
#
# Ask for status.
#
sub SliceStatus($)
{
my ($self) = @_;
my $authority = $self->GetGeniAuthority();
my $geniuser = $self->GetGeniUser();
my $slice = $self->GetGeniSlice();
my $context = APT_Geni::GeniContext();
return undef
if (! (defined($geniuser) && defined($authority) &&
defined($slice) && defined($context)));
my ($slice_credential, $speaksfor_credential) =
APT_Geni::GenCredentials($slice, $geniuser);
return undef
if (! (defined($speaksfor_credential) &&
defined($slice_credential)));
my $args = {
"slice_urn" => $slice->urn(),
"credentials" => [$slice_credential->asString(),
$speaksfor_credential->asString()],
};
return Genixmlrpc::CallMethod($authority->url(),
$context, "SliverStatus", $args);
}
# _Always_ make sure that this 1 is at the end of the file...
1;
This diff is collapsed.
......@@ -56,9 +56,9 @@ my $webtask_id;
#
# Configure variables
#
my $TB = "@prefix@";
my $TBOPS = "@TBOPSEMAIL@";
my $QUICKVM = "$TB/sbin/protogeni/quickvm";
my $TB = "@prefix@";
my $TBOPS = "@TBOPSEMAIL@";
my $MANAGEINSTANCE = "$TB/bin/manage_instance";
#
# Untaint the path
......@@ -85,7 +85,6 @@ use APT_Instance;
use GeniXML;
use GeniHRN;
use WebTask;
use libaudit;
# Protos
sub fatal($);
......@@ -346,12 +345,16 @@ if (defined($instance)) {
}
my ($node) = GeniXML::FindNodes("n:node", $manifest)->get_nodelist();
my $sliver_urn = GeniXML::GetSliverId($node);
my $node_id = GeniXML::GetVirtualId($node);
my $apt_uuid = $instance->uuid();
my $imagename = $profile->name();
#
# Grab the webtask object so we can watch it. We are looking
# for it to finish, so we can unlock the profile for use.
# for it to finish, so we can unlock the profile for use. Note
# this always creates a webtask, even if not directed to on the
# commmand line, so that we can communicate with the script we
# call that does the work.
#
$webtask = WebTask->Create($profile->uuid(), $webtask_id);
if (!defined($webtask)) {
......@@ -364,34 +367,32 @@ if (defined($instance)) {
fatal("Could not lock new profile");
}
my $command = "$QUICKVM " . ($webtask_id ? "-t $webtask_id " : " ") .
"-s $apt_uuid $sliver_urn $imagename";
my $command = "$MANAGEINSTANCE -t " . $webtask->task_id() . " ".
"snapshot $apt_uuid $imagename $node_id";
#
# This returns pretty fast, and then the imaging takes place in
# the background at the aggregate. quickvm keeps a process running
# in the background waiting for the sliver to unlock and the
# sliverstatus to indicate the node is running again.
# the background at the aggregate. The script keeps a process
# running in the background waiting for the sliver to unlock and
# the sliverstatus to indicate the node is running again.
#
my $output = emutil::ExecQuiet($command);
if ($?) {
$profile->Delete(1);
$webtask->Delete();
$webtask->Delete()
if (!defined($webtask_id));
print STDERR $output . "\n";
fatal("Failed to create disk image!");
}
#
# Parse the output to get the new image urn, then stick that
# into the rspec, and update the database.
# The script helpfully put the new image urn in the webtask.
#
my $image_urn;
if ($output =~ /^(urn:.*),/) {
$image_urn = $1;
}
else {
$profile->Delete(1);
fatal("Could not find image urn in:\n$output");
}
# Again, this only makes sense for single node profiles.
if ($profile->UpdateDiskImage($image_urn)) {
$webtask->Refresh();
my $image_urn = $webtask->image_urn();
if (!defined($image_urn) ||
$profile->UpdateDiskImage($image_urn)) {
$webtask->Delete()
if (!defined($webtask_id));
$profile->Delete(1);
fatal("Could not update image URN in rspec");
}
......@@ -404,30 +405,51 @@ if (defined($instance)) {
if ($child) {
exit(0);
}
# Close our descriptors so web server thinks we are disconnected.
if ($webtask_id) {
for (my $i = 0; $i < 1024; $i++) {
POSIX::close($i);
}
}
# Let parent exit;
sleep(2);
if (0) {
AuditStart(0, undef, LIBAUDIT_LOGTBLOGS()|LIBAUDIT_LOGONLY());
AddAuditInfo("cc", "aptnet-logs\@flux.utah.edu");
}
POSIX::setsid();
POSIX::setsid();
}
my $seconds = 1200;
my $interval = 5;
while ($seconds >= 0) {
sleep($interval);
$seconds -= $interval;
#
# We are waiting for the backend process to exit. The web interface is
# reading the webtask structure, but if it fails we want to know that
# so we can delete the profile.
#
while (1) {
sleep(10);
$webtask->Refresh();
last
if (defined($webtask->exited()));
#
# See if the process is still running. If not then it died badly.
# Mark the webtask as exited.
#
my $pid = $webtask->process_id();
if (! kill(0, $pid)) {
# Check again in case it just exited.
$webtask->Refresh();
if (! defined($webtask->exited())) {
$webtask->Exited(-1);
}
last;
}
}
if ($webtask->exitcode()) {
$profile->Delete(1);
$webtask->Delete()
if (!defined($webtask_id));
exit(1);
}
$profile->Unlock();
$webtask->Delete()
if (!defined($webtask_id));
exit(0);
}
exit(0);
......
This diff is collapsed.
......@@ -112,9 +112,24 @@ function Do_TerminateInstance()
$uuid = $instance->uuid();
# This needs work.
SUEXEC("nobody", "nobody", "webquickvm -k $uuid",
SUEXEC_ACTION_IGNORE);
SPITAJAX_RESPONSE("");
$webtask_id = WebTask::GenerateID();
SUEXEC("nobody", "nobody",
"webmanage_instance -t $webtask_id -- terminate $uuid",
SUEXEC_ACTION_CONTINUE);
$webtask = WebTask::Lookup($webtask_id);
if ($retval == 0) {
SPITAJAX_RESPONSE("");
$webtask->Delete();
return;
}
if ($webtask && $webtask->exited()) {
SPITAJAX_ERROR(1, $webtask->TaskData("output"));
$webtask->Delete();
}
else {
SPITAJAX_ERROR(-1, "Internal Error. Please try again later");
}
}
#
......@@ -136,7 +151,7 @@ function Do_GetInstanceManifest()
#
function Do_GetSSHAuthObject()
{
global $instance, $creator;
global $instance, $creator, $this_user;
global $ajax_args;
if (!isset($ajax_args["hostport"])) {
......@@ -148,6 +163,14 @@ function Do_GetSSHAuthObject()
if (StatusSetupAjax()) {
return;
}
#
# XXX Need to deal with multiple members of an experiment.
#
if (!$this_user->SameUser($creator)) {
SPITAJAX_ERROR(1, "Not allowed to ssh; only the creator");
return;
}
$nodeid = $ajax_args["nodeid"];
$auth = SSHAuthObject($creator->uid(), $hostport);
if (!$auth) {
......@@ -162,7 +185,7 @@ function Do_GetSSHAuthObject()
#
function Do_RequestExtension()
{
global $instance, $creator, $this_user;
global $instance, $creator, $this_user, $suexec_output;
global $ajax_args;
global $TBMAIL_OPS, $APTMAIL, $APTBASE;
$reason = "";
......@@ -260,7 +283,7 @@ function Do_RequestExtension()
}
}
$retval = SUEXEC("nobody", "nobody",
"webquickvm -e $howlong $uuid",
"webmanage_instance extend $uuid $howlong",
SUEXEC_ACTION_CONTINUE);
if ($retval == 0) {
......@@ -298,11 +321,10 @@ bad:
function Do_ConsoleURL()
{
global $instance, $creator;
global $suexec_output_array;
global $ajax_args;
if (!isset($ajax_args["node"])) {
SPITAJAX_ERROR(1, "Missing node");
SPITAJAX_ERROR(1, "Missing node argument");
return 1;
}
$node = $ajax_args["node"];
......@@ -316,19 +338,21 @@ function Do_ConsoleURL()
SPITAJAX_ERROR(1, "no slice for instance");
return 1;
}
$webtask_id = WebTask::GenerateID();
$retval = SUEXEC("nobody", "nobody",
"webmanage_instance -c $uuid " . escapeshellarg($node),
"webmanage_instance -t $webtask_id -- consoleurl $uuid " .
escapeshellarg($node),
SUEXEC_ACTION_CONTINUE);
$webtask = WebTask::Lookup($webtask_id);
if ($retval == 0) {
$url = $suexec_output_array[0];
if (preg_match("/^(https:.*)$/", $url, $matches)) {
$url = $matches[1];
}
SPITAJAX_RESPONSE($url);
SPITAJAX_RESPONSE($webtask->TaskValue("value"));
$webtask->Delete();
return;
}
elseif ($retval > 0) {
SPITAJAX_ERROR(1, $suexec_output);
if ($webtask) {
SPITAJAX_ERROR(1, $webtask->TaskData("output"));
$webtask->Delete();
}
else {
SPITAJAX_ERROR(-1, "Internal Error. Please try again later");
......@@ -342,7 +366,6 @@ function Do_Snapshot()
{
global $this_user;
global $ajax_args;
global $suexec_output_array;
$this_idx = $this_user->uid_idx();
......@@ -382,26 +405,27 @@ function Do_Snapshot()
#
# Call out to the backend.
#
$webtask_id = md5(uniqid(rand(),1));
$webtask_id = WebTask::GenerateID();
$retval = SUEXEC($this_user->uid(), "nobody",
"webmanage_instance -t $webtask_id -s $uuid",
"webmanage_instance -t $webtask_id -- snapshot $uuid",
SUEXEC_ACTION_IGNORE);
$webtask = WebTask::Lookup($webtask_id);
if ($retval != 0) {
if ($retval < 0) {
SPITAJAX_ERROR(1, "Internal error, cannot proceed.");
SPITAJAX_ERROR(-11, "Internal error, cannot proceed.");
# Notify tbops.
SUEXECERROR(SUEXEC_ACTION_CONTINUE);
return;
}
if ($webtask) {
SPITAJAX_ERROR(1, $webtask->TaskData("output"));
$webtask->Delete();
}
else {
$line = "please try again later.";
if (count($suexec_output_array)) {
$line = $suexec_output_array[0];
}
SPITAJAX_ERROR(1, "Transient error; $line");
return;
SPITAJAX_ERROR(-1, "Internal Error. Please try again later");
}
return;
}
SPITAJAX_RESPONSE("Success");
}
......@@ -437,11 +461,6 @@ function Do_SnapshotStatus()
$taskdata = $webtask->TaskData();
$blob = array();
if ($webtask->exited()) {
# Success, but not sure what to report. Come back to this later.
$blob["exited"] = $webtask->exited();
$blob["exitcode"] = $webtask->exitcode();
}
#
# Size is in KB to avoid bigint problems. But kill the KB.
#
......@@ -456,7 +475,12 @@ function Do_SnapshotStatus()
}
$blob["node_status"] = $taskdata["rawstate"];
$blob["image_status"] = $taskdata["image_status"];
if ($webtask->exited()) {
# Success, but not sure what to report. Come back to this later.
$blob["exited"] = $webtask->exited();
$blob["exitcode"] = $webtask->exitcode();
$webtask->Delete();
}
SPITAJAX_RESPONSE($blob);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment