Commit 0f4a4dfb authored by Leigh Stoller's avatar Leigh Stoller

Several sets of changes scattered across all these files.

* More on issue #54; watch for openstack experiments and try to download
  the new openstack stats file via the fast XMLRPC path. Show this as a
  text blob in a new tab on the status page, still need to graph the data.
  The apt_daemon handles the periodic request for the data (every 10
  minutes), which we store in the apt_instances table.

* Addition for Rob on the admin extend page; Add a "more info" button that
  sends the contents of the text box as an email message requesting more
  info and stores that in the ongoing interaction log. Responses from the
  user are not stored though, might look at that someday.

* Another addition for Rob; on the extensions list page, also show expired,
  locked down experiments. Note the sorting; at the top of the list are
  actual extension request (status='ready') while the bottom of the list
  are status='expired'.

* Add a "graphs" tab to the status page, which shows the same idle stats
  graphs that were added to the admin extend page. Most of this change is
  refactoring the code and sharing it between the two pages.
parent 1c1f063f
......@@ -76,7 +76,7 @@ my %instances = ();
my $debug = 0;
# Debugging
my $usemydevtree = 0;
my $usemydevtree = 1;
sub devurl($)
{
my ($cmurl) = @_;
......@@ -2605,5 +2605,33 @@ sub IdleData($)
return undef;
}
#
# Openstack
#
sub OpenstackData($$)
{
my ($self, $client_id) = @_;
my $authority = $self->GetGeniAuthority();
my $context = APT_Geni::RootContext();
my $slice = $self->instance()->GetGeniSlice();
return undef
if (! (defined($authority) && defined($context) &&
defined($slice)));
my $args = {
"slice_urn" => $slice->urn(),
"client_id" => $client_id,
};
my $cmurl = $authority->url();
$cmurl =~ s/\/cm$/\/cluster/;
$cmurl = devurl($cmurl) if ($usemydevtree);
my $response = Genixmlrpc::CallMethod($cmurl, $context,
"SliceOpenstackData", $args);
return $response;
bad:
return undef;
}
# _Always_ make sure that this 1 is at the end of the file...
1;
......@@ -61,13 +61,9 @@ my $PROTOUSER = "elabman";
my $SUDO = "/usr/local/bin/sudo";
my $WGET = "/usr/local/bin/wget";
my $SLEEP_INTERVAL = 300;
my $UPDATE_INTERVAL = (60 * 60 * 24);
my $REPORT_INTERVAL = 24 * 3600;
my $OPENSTACK_INTERVAL = 600;
#
# Update interval countdown. Set to zero so it runs right away.
#
my $update_countdown = 0;
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin:/usr/site/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
......@@ -511,12 +507,69 @@ sub PushUpdates()
}
}
#
# Gather the Openstack utilization file.
#
my %openstackfailures = ();
sub GatherOpenstackUtilization()
{
my $query_result =
DBQueryWarn("select uuid from apt_instances ".
"where status='ready' and isopenstack!=0");
return
if (!$query_result);
while (my ($uuid) = $query_result->fetchrow_array()) {
my $instance = APT_Instance->Lookup($uuid);
if (!defined($instance)) {
print STDERR "No such instance $uuid\n";
next;
}
print STDERR "$instance is running openstack\n"
if ($debug);
next
if ($impotent);
system("$SUDO -u $PROTOUSER $MANAGEINSTANCE openstackstats $uuid");
if ($?) {
my $stat = $? >> 8;
print STDERR "Request for openstackstats returned $stat\n"
if ($debug);
#
# We want to look for openstack instances that are not running
# the stats gathering code, there will not be a file. But it is
# not going to show up for a while, so we let it fail for a
# while before clearing the openstack flag in the DB.
#
if ($stat == GENIRESPONSE_SEARCHFAILED) {
$openstackfailures{"$uuid"} = 0
if (!exists($openstackfailures{"$uuid"}));
if ($openstackfailures{"$uuid"} > 10) {
print STDERR
"Failed to get openstack stats too many times. " .
"Clearing the flag.\n";
$instance->Update({"isopenstack" => 0});
}
else {
$openstackfailures{"$uuid"}++;
}
}
}
$instance->Flush();
}
}
if ($oneshot) {
PushUpdates();
GatherOpenstackUtilization();
exit(0);
}
my $reportcounter = 0;
my $reportcounter = 0;
my $openstackcounter = 0;
# Do this once at startup
if (! NoLogins()) {
......@@ -529,6 +582,7 @@ while (1) {
next;
}
$reportcounter += $SLEEP_INTERVAL;
$openstackcounter += $SLEEP_INTERVAL;
print "Running at ".
POSIX::strftime("20%y-%m-%d %H:%M:%S", localtime()) . "\n";
......@@ -539,11 +593,14 @@ while (1) {
UpdateAggregateGraphs();
PushUpdates();
# Do this once every 24 hours.
if ($reportcounter >= (24 * 60 * 60)) {
if ($reportcounter >= $REPORT_INTERVAL) {
ReportLockdownExpired();
$reportcounter = 0;
}
if ($reportcounter >= $OPENSTACK_INTERVAL) {
GatherOpenstackUtilization();
$reportcounter = 0;
}
exit(0)
if ($oneshot);
......
......@@ -52,9 +52,11 @@ sub usage()
print("Usage: manage_instance updatekeys instance [uid] \n");
print("Usage: manage_instance extend instance [-m message] days [filename]\n");
print("Usage: manage_instance denyextension instance [-m message] [filename]\n");
print("Usage: manage_instance moreinfo instance [-m message] [filename]\n");
print("Usage: manage_instance extendold instance [-f] seconds\n");
print("Usage: manage_instance utilization instance\n");
print("Usage: manage_instance idledata instance\n");
print("Usage: manage_instance openstackstats instance\n");
exit(-1);
}
my $optlist = "dt:s";
......@@ -115,7 +117,7 @@ sub DoConsole();
sub DoTerminate();
sub DoExtend();
sub DoExtendOld();
sub DoDenyExtension();
sub DoDenyOrMoreInfo($);
sub DoRefresh();
sub DoReboot();
sub DoReload();
......@@ -127,6 +129,7 @@ sub DoUpdateKeys();
sub DoDeleteNodes();
sub DoUtilization();
sub DoIdleData();
sub DoOpenstack();
sub WriteCredentials();
sub StartMonitor();
sub StartMonitorInternal(;$@);
......@@ -184,7 +187,10 @@ if ($action eq "extendold") {
DoExtendOld();
}
elsif ($action eq "denyextension") {
DoDenyExtension()
DoDenyOrMoreInfo("deny")
}
elsif ($action eq "moreinfo") {
DoDenyOrMoreInfo("info")
}
elsif ($action eq "consoleurl") {
DoConsole()
......@@ -231,6 +237,9 @@ elsif ($action eq "utilization") {
elsif ($action eq "idledata") {
DoIdleData()
}
elsif ($action eq "openstackstats") {
DoOpenstack()
}
else {
usage();
}
......@@ -1208,6 +1217,7 @@ sub DoExtend()
my $url = $instance->webURL();
my $clusters = join(",", map { $_->domain() }
$instance->AggregateList());
my $pcount = $instance->physnode_count();
my $expires_time = str2time($slice->expires());
my $created_time = str2time($instance->created());
my $extensions = $instance->Brand()->ExtensionsEmailAddress();
......@@ -1508,7 +1518,8 @@ sub DoExtend()
"\n\n".
"Your experiment was started on $created\n".
"Your experiment will now expire at $expires\n".
"It is running on $clusters\n\n\n".
"You are using $pcount physical nodes.\n".
"It is running on $clusters\n\n".
"$url\n",
"From: $extensions\n" .
"BCC: $extensions");
......@@ -1651,13 +1662,14 @@ sub ExtendInternal($$$$)
# Deny extension, sending optional email to user (which is also saved in
# the extension history). We used to do this in PHP, which was silly.
#
sub DoDenyExtension()
sub DoDenyOrMoreInfo($)
{
my ($action) = @_;
my $errcode = -1;
my $reason;
if (! $this_user->IsAdmin()) {
fatal("Only administrators can deny extensions");
fatal("Only administrators can deny extensions or request info");
}
if (@ARGV == 2) {
my $arg = shift(@ARGV);
......@@ -1682,12 +1694,6 @@ sub DoDenyExtension()
}
close(MSG);
}
return DenyExtensionInternal($reason);
}
sub DenyExtensionInternal($)
{
my ($reason) = @_;
my $creator = $instance->GetGeniUser();
my $slice = $instance->GetGeniSlice();
my $name = $instance->name();
......@@ -1697,14 +1703,25 @@ sub DenyExtensionInternal($)
localtime(str2time($instance->created())));
my $now = POSIX::strftime("20%y-%m-%d %H:%M:%S %Z", localtime());
my $url = $instance->webURL();
my $pcount = $instance->physnode_count();
my $extensions= $instance->Brand()->ExtensionsEmailAddress();
my $message = "Your extension was denied by the site administrator!\n";
my $clusters = join(",", map { $_->domain() }
$instance->AggregateList());
my ($message,$subject);
if ($action eq "deny") {
$message = "Your extension was denied by the site administrator!";
$subject = "Experiment Extension Denied: $name";
}
else {
$message = "Hi, we need more information about your experiment: $name";
$subject = "Information request for Experiment: $name";
}
#
# New extension mechanism
#
my $extensionargs = {
"action" => "deny",
"action" => ($action eq "deny" ? "deny" : "info"),
"uid" => $this_user->uid(),
"uid_idx" => $this_user->uid_idx(),
"message" => $message,
......@@ -1726,25 +1743,27 @@ sub DenyExtensionInternal($)
"Date: $now\n".
"Expires: $expires\n".
"Reason:\n".
$message . "\n".
$message . "\n\n".
$reason . "\n\n".
"-----------------------------------------------\n";
$instance->Brand()->SendEmail($creator->email(),
"Experiment Extension Denied: $name",
$message . "\n" .
$reason .
"\n\n".
$instance->Brand()->SendEmail($creator->email(), $subject,
$message . "\n\n" .
$reason . "\n\n".
"Your experiment was started on $created\n".
"Your experiment expires at $expires\n".
"You are using $pcount physical nodes.\n".
"It is running on $clusters\n\n".
"$url\n",
"From: $extensions\n" .
"BCC: $extensions");
$instance->AddExtensionHistory($text);
# For the dashboard and status page.
$instance->Update({"extension_requested" => 0,
"extension_denied" => 1,
"extension_denied_reason" => $reason});
if ($action eq "deny") {
$instance->Update({"extension_requested" => 0,
"extension_denied" => 1,
"extension_denied_reason" => $reason});
}
return 0;
}
......@@ -3252,6 +3271,96 @@ sub DoUtilization()
exit($errcode);
}
#
# Grab the openstack utilization file and stick it into the DB.
#
sub DoOpenstack()
{
my $errmsg;
my $errcode = 1;
#
# Create the webtask object; the web interface gave us an anonymous
# webtask, so we can use it before lock.
#
if (defined($webtask_id)) {
$webtask = WebTask->Lookup($webtask_id);
fatal("Could not lookup webtask object")
if (!defined($webtask));
# Convenient.
$webtask->AutoStore(1);
}
#
# Need to look inside the rspec to find the name of the controller node.
#
if (scalar($instance->AggregateList()) > 1) {
$errmsg = "Too many aggregates, ".
"is this really an Openstack experiment?";
goto bad;
}
my ($aggregate) = $instance->AggregateList();
if (!defined($aggregate->manifest())) {
$errmsg = "Mo manifest for experiment";
goto bad;
}
my $manifest = GeniXML::Parse($aggregate->manifest());
if (! defined($manifest)) {
$errmsg = "Could not parse manifest for $aggregate";
goto bad;
}
#
# We have to look inside the parameters to find the controller node.
#
my $NS = "http://www.protogeni.net/resources/rspec/ext/johnsond/1";
my $controller;
foreach my $param (GeniXML::FindNodesNS("n:profile_parameters/n:parameter",
$manifest, $NS)->get_nodelist()) {
my $value = $param->textContent();
if ($value =~ /^([^=]+)="(.+)"$/) {
if (lc($1) eq "controller") {
$controller = $2;
print "Controller = $controller\n"
if ($debug);
last;
}
}
}
if (!defined($controller)) {
$errmsg = "Could not find the CONTROLLER parameter";
goto bad;
}
#
# So now we can ask the aggregate to grab the file from the proper
# node in the topology; we do not want the cluster to have to figure
# that part out. Hmm, maybe we should tell the cluster what file too?
#
my $response = $aggregate->OpenstackData($controller);
if (!defined($response)) {
$errmsg = "RPC Error calling GetOpenstackStats on $aggregate\n";
$errcode = -1;
goto bad;
}
if ($response->code() != GENIRESPONSE_SUCCESS) {
$errmsg = "Could not get openstack json file for sliver: ".
$response->output();
$errcode = $response->code();
goto bad;
}
if ($debug) {
print $response->value() . "\n";
}
$instance->Update({"openstack_utilization" => $response->value()});
exit(0);
bad:
print STDERR $errmsg . "\n";
if (defined($webtask)) {
$webtask->output($errmsg);
$webtask->Exited($errcode);
}
exit($errcode);
}
#
# Get idledata info from the clusters.
#
......
......@@ -129,6 +129,10 @@ class Instance
function servername() { return $this->field('servername'); }
function aggregate_urn(){ return $this->field('aggregate_urn'); }
function private_key() { return $this->field('privkey'); }
function isopenstack() { return $this->field('isopenstack'); }
function openstack_utilization() {
return $this->field('openstack_utilization');
}
function IsAPT() {
return preg_match('/aptlab/', $this->servername());
}
......
require(window.APT_OPTIONS.configObject,
['underscore', 'js/quickvm_sup', 'moment',
['underscore', 'js/quickvm_sup', 'moment', 'js/idlegraphs',
'js/lib/text!template/adminextend.html',
'js/lib/text!template/waitwait-modal.html',
'js/lib/text!template/oops-modal.html'],
function (_, sup, moment, mainString, waitwaitString, oopsString)
function (_, sup, moment, ShowIdleGraphs,
mainString, waitwaitString, oopsString)
{
'use strict';
var extensions = null;
......@@ -78,6 +79,11 @@ function (_, sup, moment, mainString, waitwaitString, oopsString)
Action("extend");
return false;
});
$('#do-moreinfo').click(function (event) {
event.preventDefault();
Action("moreinfo");
return false;
});
}
//
......@@ -87,17 +93,22 @@ function (_, sup, moment, mainString, waitwaitString, oopsString)
{
var howlong = $('#days').val();
var reason = $("#reason").val();
var method = (action == "extend" ? "RequestExtension" : "DenyExtension");
var method = (action == "extend" ?
"RequestExtension" :
(action == "moreinfo" ?
"MoreInfo" : "DenyExtension"));
var callback = function(json) {
sup.HideModal("#waitwait-modal");
if (json.code) {
var message;
if (json.code < 0) {
message = "Could not extend experiment!";
message = "Operation failed!";
}
else {
message = "Could not extend experiment: " + json.value;
message = "Operation failed: " + json.value;
}
sup.SpitOops("oops", message);
return;
......@@ -203,197 +214,15 @@ function (_, sup, moment, mainString, waitwaitString, oopsString)
xmlthing.done(callback);
}
//
// Slothd graphs.
//
function LoadIdleData()
{
var exptTraffic = [];
var ctrlTraffic = [];
var loadavs = [];
var ProcessSite = function(idledata) {
/*
* Array of objects, one per node. But some nodes might not
* have any data (main array is zero), so need to skip those.
*/
var index = 0;
for (var i in idledata) {
var obj = idledata[i];
var node_id = obj.node_id;
var loadvalues = [];
//
// If idlestats finds no data, the main array is zero length.
// Skip.
//
if (obj.main.length == 0) {
console.info("No idledata for " + node_id);
continue;
}
// The main array is the load average data.
for (var j = 1; j < obj.main.length; j++) {
var loads = obj.main[j];
loadvalues[j - 1] = {
// convert seconds to milliseconds.
"x" : loads[0] * 1000,
"y" : loads[3],
};
}
loadavs[index] = {
"key" : node_id,
"area" : 0,
"values" : loadvalues,
};
var control_iface = obj.interfaces.ctrl_iface;
for (var mac in obj.interfaces) {
//console.info(mac, obj.interfaces[mac]);
if (mac == "ctrl_iface") {
continue;
}
if (obj.interfaces[mac].length) {
var trafficvalues = [];
for (var j = 1; j < obj.interfaces[mac].length; j++) {
var data = obj.interfaces[mac][j];
trafficvalues[j - 1] = {
"x" : data[0] * 1000,
"y" : data[1] + data[2]
};
}
var datum = {
"key" : node_id,
"area" : 0,
"values" : trafficvalues,
};
if (mac == control_iface) {
ctrlTraffic[index] = datum;
}
else {
exptTraffic[index] = datum;
}
}
}
index++;
}
};
var callback = function(json) {
if (json.code) {
console.info("Failed to get idledata: " + json.value);
return;
}
_.each(json.value, function(data, name) {
var idledata = JSON.parse(data);
ProcessSite(idledata);
});
//console.info(loadavs);
//console.info(ctrlTraffic);
//console.info(exptTraffic);
if (loadavs.length) {
$("#loadavg-panel-div").removeClass("hidden");
$("#loadavg-collapse").addClass("in");
window.nv.addGraph(function() {
var chart = window.nv.models.lineWithFocusChart();
CreateIdleGraph('#loadavg-chart svg',
chart, loadavs, "float");
});
}
if (ctrlTraffic.length) {
$("#ctrl-traffic-panel-div").removeClass("hidden");
$("#ctrl-traffic-collapse").addClass("in");
window.nv.addGraph(function() {
var chart = window.nv.models.lineWithFocusChart();
CreateIdleGraph('#ctrl-traffic-chart svg',
chart, ctrlTraffic, "int");
});
}
if (exptTraffic.length) {
$("#expt-traffic-panel-div").removeClass("hidden");
$("#expt-traffic-collapse").addClass("in");
window.nv.addGraph(function() {
var chart = window.nv.models.lineWithFocusChart();
CreateIdleGraph('#expt-traffic-chart svg',
chart, exptTraffic, "int");
});
}
};
var xmlthing = sup.CallServerMethod(null, "status", "IdleData",
{"uuid" : window.UUID});
xmlthing.done(callback);
}
function CreateIdleGraph(id, chart, datums, ytype) {
var tickMultiFormat = d3.time.format.multi([
// not the beginning of the hour
["%-I:%M%p", function(d) { return d.getMinutes(); }],
// not midnight
["%-I%p", function(d) { return d.getHours(); }],
// not the first of the month
["%b %-d", function(d) { return d.getDate() != 1; }],
// not Jan 1st
["%b %-d", function(d) { return d.getMonth(); }],
["%Y", function() { return true; }]
]);
/*
* We need the min,max of the time stamps for the brush. We can use
* just one of the nodes.
*/
var minTime = d3.min(datums[0].values,
function (d) { return d.x; });
var maxTime = d3.max(datums[0].values,
function (d) { return d.x; });
// Adjust the brush to the last day.
if (maxTime - minTime > (3600 * 24 * 1000)) {
minTime = maxTime - (3600 * 24 * 1000);
}
chart.brushExtent([minTime,maxTime]);
// We want different Y axis scales, wow this took a long time
// to figure out.
chart.lines.scatter.yScale(d3.scale.sqrt());
chart.yAxis.scale(d3.scale.sqrt());
chart.xAxis.tickFormat(function (d) {
return tickMultiFormat(new Date(d));
});
chart.x2Axis.tickFormat(function (d) {
return tickMultiFormat(new Date(d));
});
if (ytype == "float") {
chart.yAxis.tickFormat(d3.format(',.2f'));
chart.y2Axis.tickFormat(d3.format(',.2f'));
}
else {
chart.yAxis.tickFormat(d3.format(',.0f'));
chart.y2Axis.tickFormat(d3.format(',.0f'));
}
chart.useInteractiveGuideline(true);
d3.select(id)
.datum(datums)
.call(chart);
// set up the tooltip to display full dates
var tsFormat = d3.time.format('%b %-d, %Y %I:%M%p');
var contentGenerator = chart.interactiveLayer.tooltip.contentGenerator();
var tooltip = chart.interactiveLayer.tooltip;
tooltip.contentGenerator(function (d) {
d.value = d.series[0].data.x; return contentGenerator(d);
});
tooltip.headerFormatter(function (d) {
return tsFormat(new Date(d));
});
tooltip.classes("tooltip-font");
window.nv.utils.windowResize(chart.update);
return chart;
ShowIdleGraphs({"uuid" : window.UUID,
"loadavID" : "#loadavg-panel-div",
"ctrlID" : "#ctrl-traffic-panel-div",
"exptID" : "#expt-traffic-panel-div"});
}
// Helper.
......
//
// Slothd graphs
//
define(['underscore', 'js/quickvm_sup', 'moment'],
function(_, sup, moment)
{
'use strict';
var uuid = null;
var loadavID = null;
var ctrlID = null;
var exptID = null;
var C_callback = null;
function LoadIdleData() {
var exptTraffic = [];
var ctrlTraffic = [];
var loadavs = [];
var ProcessSite = function(idledata) {
/*
* Array of objects, one per node. But some nodes might not
* have any data (main array is zero), so need to skip those.
*/
var index = 0;
for (var i in idledata) {
var obj = idledata[i];
var node_id = obj.node_id;
var loadvalues = [];
//
// If idlestats finds no data, the main array is
// zero length. Skip.
//
if (obj.main.length == 0) {
console.info("No idledata for " + node_id);
continue;
}
// The main array is the load average data.
for (var j = 1; j < obj.main.length; j++) {
var loads = obj.main[j];
loadvalues[j - 1] = {
// convert seconds to milliseconds.
"x" : loads[0] * 1000,
"y" : loads[3],
};