Commit 2faf5fd1 authored by Leigh Stoller's avatar Leigh Stoller

Checkpoint the portal side of frisbee events.

The igevent_daemon now also forwards frisbee events for slices to the
Portal pubsubd over the SSL channel.

The aptevent_daemon gets those and adds them to sliverstatus stored in
the webtask for the instance.

The timeout code in create_instance watches for frisbee events and uses
that as another indicator of progress (or lack of). The hope is that we
fail sooner or avoid failing too soon (say cause of a giant image backed
dataset).

As an added bonus, the status page will display frisbee progress (image
name and MB written) in the node status hover popver. I mention this
cause otherwise I would go to my grave without anyone ever noticing and
giving me pat on the back or a smiley face in Slack.
parent 31453993
......@@ -1570,11 +1570,13 @@ sub UpdateWebStatus($$)
!exists($details->{"utc"}) ||
$details->{"utc"} >= $current->{$node_id}->{"utc"}) {
#
# Keep frisbeestatus, see below.
# Keep frisbeestatus for reloading, but we want to clear
# it otherwise.
#
if (exists($current->{$node_id}->{"frisbeestatus"}) &&
exists($details->{"rawstate"}) &&
$details->{"rawstate"} eq TBDB_NODESTATE_RELOADING()) {
exists($details->{"rawstate"}) &&
($details->{"rawstate"} eq TBDB_NODESTATE_RELOADING() ||
$details->{"rawstate"} eq TBDB_NODESTATE_TBSETUP())) {
$details->{"frisbeestatus"} =
$current->{$node_id}->{"frisbeestatus"};
}
......@@ -1586,6 +1588,51 @@ sub UpdateWebStatus($$)
return $current;
}
#
# Update the frisbee status in the webtask.
#
sub UpdateFrisbeeStatus($$)
{
my ($self, $hash) = @_;
DBQueryWarn("lock tables web_tasks write")
or return {};
$self->webtask()->Refresh();
my $current = $self->webtask()->sliverstatus();
if (!defined($current)) {
$current = {};
}
#
# Frisbee events come in via the pubsub channel only, not by polling.
# But we still have to lock the table (see UpdateWebStatus() above).
#
foreach my $urn (keys(%{ $hash })) {
my $details = $hash->{$urn};
my $node_id = $details->{'client_id'};
# This would be unsual.
next
if (!exists($current->{$node_id}));
#
# Strip the version info for now, will cause confusion.
#
if (exists($details->{"imagename"})) {
my $imagename = $details->{"imagename"};
if ($imagename =~ /^([^:]+)(:\d*)$/) {
$details->{"imagename"} = $1;
}
}
$current->{$node_id}->{"frisbeestatus"} = $details;
}
$self->webtask()->sliverstatus($current);
DBQueryWarn("unlock tables");
print STDERR Dumper($current);
return $current;
}
#
# Ask aggregate to terminate a sliver.
#
......@@ -2559,6 +2606,8 @@ sub WaitForSliver($)
# interface can show changes.
#
my $statusblob = $aggobj->UpdateWebStatus($repblob->{'details'});
#print STDERR Dumper($statusblob);
my $changed = 0;
foreach my $urn (keys(%{$repblob->{'details'}})) {
......@@ -2580,12 +2629,28 @@ sub WaitForSliver($)
if ($seconds < 600);
}
else {
if (exists($details->{"rawstate"}) &&
$laststatus->{$node_id}->{"rawstate"} ne
$details->{"rawstate"}) {
# This is IG specific.
$seconds = 300
if ($seconds < 300);
if (exists($details->{"rawstate"})) {
# rawstate is generated Emulab based aggregates.
if ($laststatus->{$node_id}->{"rawstate"} ne
$details->{"rawstate"}) {
$seconds = 300
if ($seconds < 300);
}
#
# See if we are continuing to get frisbee events.
# This implies forward progress, keep waiting.
#
elsif (exists($details->{"frisbeestatus"})) {
my $stamp = $details->{"frisbeestatus"}->{"utc"};
if (time() - $stamp < 120) {
$seconds = 300
if ($seconds < 300);
print STDERR
"Got a recent frisbee event for ".
$details->{"frisbeestatus"}->{"imagename"} .
"\n";
}
}
}
elsif ($laststatus->{$node_id}->{"status"} ne
$details->{"status"}) {
......
#!/usr/bin/perl -w
#
# Copyright (c) 2008-2016 University of Utah and the Flux Group.
# Copyright (c) 2008-2017 University of Utah and the Flux Group.
#
# {{{GENIPUBLIC-LICENSE
#
......@@ -63,6 +63,7 @@ delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
# Protos
sub HandleSliverStatus($$$);
sub HandleImageStatus($$$);
sub HandleFrisbeeStatus($$$);
sub fatal($);
#
......@@ -166,6 +167,10 @@ sub callback($$$)
if ($slice =~ /^\"(.*)\"$/) {
$slice = $1;
}
# Debugging.
return
if (0 && $slice !~ /stoller/);
if ($debug) {
print "Event: $time $site $type $urn $slice $details\n";
}
......@@ -181,6 +186,10 @@ sub callback($$$)
HandleImageStatus($site, $instance, $details);
goto done;
}
elsif ($type eq "FRISBEESTATUS") {
HandleFrisbeeStatus($site, $instance, $details);
goto done;
}
done:
$instance->Purge();
}
......@@ -236,6 +245,32 @@ sub HandleImageStatus($$$)
}
}
#
# Handle an FRISBEESTATUS event.
#
sub HandleFrisbeeStatus($$$)
{
my ($site, $instance, $details) = @_;
if (exists($instance->AggregateHash()->{$site})) {
my $sliver = $instance->AggregateHash()->{$site};
if (0 && $impotent) {
print "Would update frisbee status for $sliver from details\n";
}
else {
if ($debug) {
print "Updating frisbee status for sliver from $details\n";
}
$details = eval { decode_json($details) };
if ($@) {
print STDERR "Could not decode json data: $details\n";
return;
}
$sliver->UpdateFrisbeeStatus({$site => $details});
}
}
}
#
# Setup a signal handler for newsyslog.
#
......
#!/usr/bin/perl -w
#
# Copyright (c) 2008-2016 University of Utah and the Flux Group.
# Copyright (c) 2008-2017 University of Utah and the Flux Group.
#
# {{{GENIPUBLIC-LICENSE
#
......@@ -39,10 +39,10 @@ use POSIX qw(strftime);
#
sub usage()
{
print "Usage: igevent_daemon [-d] [-i]\n";
print "Usage: igevent_daemon [-d] [-n]\n";
exit(1);
}
my $optlist = "di";
my $optlist = "dn";
my $debug = 0;
my $impotent = 0;
......@@ -92,7 +92,7 @@ if (! getopts($optlist, \%options)) {
if (defined($options{"d"})) {
$debug++;
}
if (defined($options{"i"})) {
if (defined($options{"n"})) {
$impotent++;
}
......@@ -114,6 +114,7 @@ use GeniResponse;
use Experiment;
use EmulabConstants;
use Node;
use Interface;
use libtestbed;
use emutil;
use libEmulab;
......@@ -177,7 +178,7 @@ sub callback($$$)
my ($handle, $notification, $data) = @_;
$gotone++;
my $site = event_notification_get_site($handle, $notification);
my $site = event_notification_get_site($handle, $notification);
my $objtype = event_notification_get_objtype($handle, $notification);
#
......@@ -195,10 +196,11 @@ sub callback($$$)
($objtype ne TBDB_TBEVENT_NODESTATE() &&
$objtype ne TBDB_TBEVENT_NODESTARTSTATUS() &&
$objtype ne TBDB_TBEVENT_NODESTATUS() &&
$objtype ne TBDB_TBEVENT_NODEACCOUNTS()));
$objtype ne TBDB_TBEVENT_NODEACCOUNTS() &&
$objtype ne TBDB_TBEVENT_FRISBEESTATUS()));
my $event = event_notification_get_eventtype($handle,$notification);
my $node_id = event_notification_get_objname($handle, $notification);
my $state = event_notification_get_eventtype($handle, $notification);
#
# We do not care about nodes that are not allocated to Geni experiments.
......@@ -206,17 +208,22 @@ sub callback($$$)
# seems like a lot of overhead, but these events are not coming in all
# that fast.
#
# Frisbee status events come in with the IP of the node, not the node_id.
#
if ($objtype eq TBDB_TBEVENT_FRISBEESTATUS()) {
my $interface = Interface->LookupByIP($node_id);
if (!defined($interface)) {
print STDERR "Unknown node $node_id\n";
goto done;
}
$node_id = $interface->node_id();
$interface->Flush();
}
my $node = Node->Lookup($node_id);
if (!defined($node)) {
print STDERR "Unknown node $node_id\n";
goto done;
}
#
# If this is a state change and nothing changed, then do nothing.
#
goto done
if (0 && $objtype && TBDB_TBEVENT_NODESTATE() &&
$state eq $node->eventstate());
# We want this so we can flush it from the cache.
my $experiment = $node->Reservation();
......@@ -229,6 +236,17 @@ sub callback($$$)
goto done
if (!$sliver);
if ($debug) {
my $slice = $sliver->GetSlice();
my $slice_urn = $slice->urn();
$slice->Flush();
goto done
if (0 && $slice_urn !~ /stoller/);
}
if ($debug) {
print "$node_id:$objtype " . ($event ? $event : "") . "\n";
}
#
# This will generate a new event, which we will get here later,
# and forward directly (above).
......@@ -239,18 +257,30 @@ sub callback($$$)
my $oldstatus = $sliver->status();
my $newstatus;
#
# The point here, is that we do not want to send an event if
# ComputeStatus sent one, but since the rawstate has probably
# changed, we want to make sure an event goes out.
#
if ($sliver->ComputeStatus(\$newstatus) == 0 &&
$oldstatus eq $newstatus) {
$sliver->SendStatusEvent();
if ($debug) {
print "State/Status event for $node_id\n";
}
if (!$impotent) {
#
# The point here, is that we do not want to send an event if
# ComputeStatus sent one, but since the rawstate has probably
# changed, we want to make sure an event goes out.
#
if ($sliver->ComputeStatus(\$newstatus) == 0 &&
$oldstatus eq $newstatus) {
$sliver->SendStatusEvent();
}
}
}
elsif ($objtype eq TBDB_TBEVENT_FRISBEESTATUS()) {
my $image = $event;
my $mbytes = event_notification_get_string($handle,
$notification,
"MBYTES_WRITTEN");
if ($debug) {
print "State/Status sent for $node_id\n";
print "Frisbee status event for $node_id: $event $mbytes MB\n";
}
$sliver->SendFrisbeeEvent($image, $mbytes);
}
else {
#
......@@ -260,7 +290,9 @@ sub callback($$$)
if ($debug) {
print "Start Command event for $node_id\n";
}
$sliver->SendStatusEvent();
if (!$impotent) {
$sliver->SendStatusEvent();
}
}
$sliver->Flush();
done:
......
#!/usr/bin/perl -wT
#
# Copyright (c) 2008-2016 University of Utah and the Flux Group.
# Copyright (c) 2008-2017 University of Utah and the Flux Group.
#
# {{{GENIPUBLIC-LICENSE
#
......@@ -1601,6 +1601,33 @@ sub SendStatusEvent($)
return 0;
}
#
# Like above, but send frisbee status stuff.
#
sub SendFrisbeeEvent($$$)
{
my ($self, $imagename, $mb_written) = @_;
my $slice = $self->GetSlice();
return -1
if (!defined($slice));
my $resource_id = $self->resource_id();
my $nickname = $self->nickname() || "";
my $blob = {
"component_urn" => $resource_id,
"client_id" => $nickname,
"imagename" => $imagename,
"MB_written" => $mb_written,
"utc" => time(),
};
GeniEvent->SendEvent({"type" => "FRISBEESTATUS",
"slice" => $slice->urn(),
"urn" => $self->sliver_urn(),
"details" => $blob});
return 0;
}
#
# Generate the blob for status.
#
......
......@@ -928,6 +928,17 @@ $(function ()
"<td class='border-none'>" +
details.rawstate + "</td></tr>";
if (_.has(details, "frisbeestatus")) {
var mb_written = details.frisbeestatus.MB_written;
var imagename = details.frisbeestatus.imagename;
html = html +
"<tr><td class='border-none'>Image:</td>" +
" <td class='border-none'>" +
imagename + "</td></tr>" +
"<tr><td class='border-none'>Written:</td>" +
" <td class='border-none'>" +
mb_written + " MB</td></tr>";
}
if (_.has(details, "execute_state")) {
var tag;
var icon;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment