Commit 98ca9432 authored by Leigh B Stoller's avatar Leigh B Stoller

Lots of changes:

* Big change to how events are forwarded to the Portal; Originally we
  subscribed to events from the local pubsubd, would transform them to
  Geni events, then send them back to the local pubsubd, pubsub_forward
  would pick them up, and then foward to the Portal SSL pubsubd. Now,
  send them directly to the Portal SSL pubsubd, which reduces the load
  on the main pubsubd which was throwing errors because of too much
  load (to be specific, the subscribers were not keeping up, which
  causes pubsubd to throw errors back to the sender). We can do this
  cause pubsub and the event system now include the SSL entrypoints.

  Aside, pubsub_forward is multi-threaded while igevent_daemon is not,
  we might have to play some tricks similar to stated.

* Clean up configure definitions as described in commit 621253f2.

* Various debugging changes to make possible to run an alternate igevent
  daemon out of my devel tree for debuging. Basically, the main igevent
  daemon ignores all events for a specific slice, while my igevenyt
  daemon ignores all the other events and process just the ones for my
  specific slice.
parent 11bc397e
......@@ -42,9 +42,11 @@ sub usage()
print "Usage: igevent_daemon [-d] [-n]\n";
exit(1);
}
my $optlist = "dn";
my $optlist = "dntv";
my $debug = 0;
my $verbose = 0;
my $impotent = 0;
my $testing = 0;
#
# Configure variables
......@@ -54,10 +56,16 @@ my $TBOPS = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
my $PGENIDOMAIN = "@PROTOGENI_DOMAIN@";
my $OURDOMAIN = "@OURDOMAIN@";
my $BOSSNODE = "@BOSSNODE@";
my $PGENISUPPORT = @PROTOGENI_SUPPORT@;
my $LOGFILE = "$TB/log/igevent_daemon.log";
my $MYURN = "urn:publicid:IDN+{OURDOMAIN}+authority+cm";
my $SLEEP_INTERVAL= 60;
# Portal SSL pubsubd running on this host:port
my $CLUSTER_PORTAL = "@CLUSTER_PORTAL@";
my $CLUSTER_PUBSUBD_SSLPORT = "@CLUSTER_PUBSUBD_SSLPORT@";
my $CLUSTER_PUBSUBD_ALTPORT = "@CLUSTER_PUBSUBD_ALTPORT@";
my $CERTFILE = "$TB/etc/emulab.pem";
my $KEYFILE = "$TB/etc/emulab.key";
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin:/usr/site/bin';
......@@ -65,6 +73,9 @@ delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
# Protos
sub fatal($);
# Locals
my $portalhandle;
#
# Turn off line buffering on output
......@@ -92,9 +103,15 @@ if (! getopts($optlist, \%options)) {
if (defined($options{"d"})) {
$debug++;
}
if (defined($options{"t"})) {
$testing++;
}
if (defined($options{"n"})) {
$impotent++;
}
if (defined($options{"v"})) {
$verbose++;
}
# Do this early so that we talk to the right DB.
use vars qw($GENI_DBNAME);
......@@ -110,7 +127,9 @@ require GeniSlice;
require GeniCertificate;
require GeniCredential;
require GeniAggregate;
require GeniEvent;
use GeniResponse;
use GeniHRN;
use Experiment;
use EmulabConstants;
use Node;
......@@ -120,7 +139,7 @@ use emutil;
use libEmulab;
use event;
if (!$impotent && CheckDaemonRunning("igevent_daemon")) {
if (!($impotent || $testing) && CheckDaemonRunning("igevent_daemon")) {
fatal("Not starting another igevent daemon!");
}
# Go to ground.
......@@ -129,7 +148,7 @@ if (! $debug) {
exit(0);
}
}
if (!$impotent && MarkDaemonRunning("igevent_daemon")) {
if (!($impotent || $testing) && MarkDaemonRunning("igevent_daemon")) {
fatal("Could not mark daemon as running!");
}
#
......@@ -148,26 +167,67 @@ $SIG{HUP} = \&handler
GeniUtil::FlipToGeniUser();
# We process a lot of events!
event_set_sockbufsizes(1024 * 64, 1024 * 192);
#
# Capture all events from the local pubsubd and transform them into
# events to send to the portal event server via the local clusterd.
#
my $localhandle = event_register("elvin://localhost", 0);
if (!$localhandle) {
fatal("Unable to register with event system");
fatal("Unable to register with local event system");
}
#
# If we are part of a Portal then we need to forward the geni events
# to the Portal SSL pubsubd.
#
# Here is the complication; for the moment we want just one sender to
# the remote pubsubd. So form a connection to the remote pubsubd, and
# send transformed events (SITE set to a URN, see GeniEvent.pm) there.
# But we also get events that have not been transformed yet and that is
# buried down in the protogeni code, so setup the GeniEvent object with
# handle we create here.
#
if ($CLUSTER_PORTAL ne "") {
#
# If we are the portal then no reason to use the SSL port, use
# the alternate port.
#
# We need to loop until we form this connection. Once it is setup,
# pubsub will keep it connected.
#
while (!$portalhandle) {
if ($CLUSTER_PORTAL eq $BOSSNODE) {
my $url = "elvin://localhost:${CLUSTER_PUBSUBD_ALTPORT}";
$portalhandle = event_register($url, 0);
}
else {
my $url = "elvin://${CLUSTER_PORTAL}:${CLUSTER_PUBSUBD_SSLPORT}";
$portalhandle = event_register_withssl($url, 0,
$CERTFILE, $KEYFILE);
}
if (!$portalhandle) {
print STDERR "Could not connect to CLUSTER pubsubd, waiting.\n";
sleep(5);
}
}
#
# Tell the GeniEvent code to use this handle.
#
GeniEvent->Create($portalhandle);
}
#
# Subscribe to all events.
# Subscribe to all events from local pubsubd.
#
my $tuple = address_tuple_alloc();
if (!$tuple) {
fatal("Could not allocate an address tuple");
}
# We do not want to see events that have already been transformed
# into events we are sending to the portal.
%$tuple = ( site => "*" );
if (!event_subscribe($localhandle, \&callback, $tuple)) {
fatal("Could not subscribe to all events");
}
......@@ -182,19 +242,58 @@ sub callback($$$)
my ($handle, $notification, $data) = @_;
$gotone++;
my $site = event_notification_get_site($handle, $notification);
my $objtype = event_notification_get_objtype($handle, $notification);
my $site = event_notification_get_site($handle, $notification);
#
# If there is a URN, then the event was generated by Geni code.
# We ignore it, forwarded by pubsub_forwarder.
# If the site is set, need to see if it forwards to the local pubsub
# or the remote SSL pubsubd.
#
if ($site && $site =~ /^urn:/) {
if ($site && $site ne "*") {
# This should not happen.
return
if ($site !~ /^urn:/);
#
# If not part of a Portal, no need to do anything, the local
# aptevent_daemon is listening to the local pubsubd.
#
return
if ($CLUSTER_PORTAL eq "");
my $slice = event_notification_get_string($handle,
$notification, "slice");
# This should not happen.
return
if (!defined($slice));
# Local slice, nothing to do, aptevent_daemon will see it.
return
if ($slice =~ /urn:publicid:IDN\+${OURDOMAIN}(\+|:)/);
if ($testing && $slice !~ /purpnurp/) {
print "Ignoring $slice\n";
return;
}
if (0 && $debug && $slice !~ /stoller/) {
#print "Ignoring $slice\n";
return;
}
# Otherwise, need to forward to the remote Portal SSL pubsubd.
if (!event_notify($portalhandle, $notification)) {
print STDERR "Could not send event to Portal\n";
}
if ($debug) {
print "Forwarded notification to the Portal.\n";
}
return;
}
#
# We are looking for node state change events to pass along.
#
my $objtype = event_notification_get_objtype($handle, $notification);
return
if (!defined($objtype) ||
($objtype ne TBDB_TBEVENT_NODESTATE() &&
......@@ -240,14 +339,16 @@ sub callback($$$)
goto done
if (!$sliver);
if ($debug) {
if ($testing || $debug) {
my $slice = $sliver->GetSlice();
my $slice_urn = $slice->urn();
$slice->Flush();
goto done
if (0 && $slice_urn !~ /stoller/);
if ($testing && $slice_urn !~ /purpnurp/) {
#print "Ignoring $slice_urn\n";
goto done;
}
}
if ($debug) {
if ($verbose) {
print "$node_id:$objtype " . ($event ? $event : "") . "\n";
}
......@@ -261,7 +362,7 @@ sub callback($$$)
my $oldstatus = $sliver->status();
my $newstatus;
if ($debug) {
if ($verbose) {
print "State/Status event for $node_id\n";
}
if (!$impotent) {
......@@ -281,7 +382,7 @@ sub callback($$$)
my $mbytes = event_notification_get_string($handle,
$notification,
"MBYTES_WRITTEN");
if ($debug) {
if ($verbose) {
print "Frisbee status event for $node_id: $event $mbytes MB\n";
}
$sliver->SendFrisbeeEvent($image, $mbytes);
......@@ -291,7 +392,7 @@ sub callback($$$)
# A change in the start command status generates a new event.
# This is generated by tmcd when it comes in.
#
if ($debug) {
if ($verbose) {
print "Start Command event for $node_id\n";
}
if (!$impotent) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment