All new accounts created on Gitlab now require administrator approval. If you invite any collaborators, please let Flux staff know so they can approve the accounts.

Commit 2ed46ed2 authored by Leigh B Stoller's avatar Leigh B Stoller

Add linktest support to the portal. Work in progress.

* A new Start Linktest button is in the bottom right of the topology tab.

* When started, we create a new tab to display the linktest output
  (currently the unmodified output from Classic emulab linktest). The state
  of the experiment is change to linktest to prevent further action on the
  experiment, except of course for stop linktest.

* When linktest is running, there is a Stop Linktest button in the upper
  collapse panel. This will stop linktest in its tracks, but the tabs
  remain. When you kill a linktest tab while linktest is running, linktest
  continues running, you have to use the Stop button.

* I have added a small help document to the templates directory that the
  user is shown when they click on the ? mark in the linktest modal.
  Written in markdown format, it is a culling of text from the Emulab wiki
  linktest page.

Caveats:

* Because of polling, there is a short (5-15 seconds) delay before the
  experiment is returned to the ready state. Not much to do about this in a
  polling world. Wouldn't callbacks be nice?

* Linktest can be used on a multisite topology, although cross site links
  cannot be tested at this point. Links/Lans that are contained entirely to
  one site or the other are testable though. We build a new tab for each
  site to spew the linktest output.
parent f7f97d7f
#!/usr/bin/perl -wT
#
# Copyright (c) 2007-2015 University of Utah and the Flux Group.
# Copyright (c) 2007-2016 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -1941,11 +1941,16 @@ sub RunLinktest($$$)
"credentials" => [$slice_credential->asString(),
$speaksfor_credential->asString()],
};
if ($action eq "kill") {
$args->{"stop"} = 1;
if ($action eq "stop") {
$args->{"action"} = "stop";
}
elsif ($action eq "status") {
$args->{"action"} = "status";
}
else {
$args->{"level"} = $level;
$args->{"action"} = "start";
$args->{"async"} = 1;
$args->{"level"} = $level;
}
my $cmurl = $authority->url();
$cmurl =~ s/protogeni/protogeni\/stoller/ if ($usemydevtree);
......
#!/usr/bin/perl -w
#
# Copyright (c) 2000-2015 University of Utah and the Flux Group.
# Copyright (c) 2000-2016 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -1993,11 +1993,12 @@ sub DoLinktest()
my $action = "start";
my $level = 1;
my $errmsg;
my $errcode = 1;
if (@ARGV) {
my $arg = shift(@ARGV);
if ($arg eq "-k") {
$action = "kill";
$action = "stop";
}
elsif ($arg =~ /^\d$/ && $arg >= 1 && $arg <= 4) {
$level = $arg;
......@@ -2006,16 +2007,6 @@ sub DoLinktest()
usage();
}
}
if ($action eq "start") {
if ($instance->status() ne "ready") {
fatal("Must be ready to run linktest!");
}
}
else {
if ($instance->status() ne "linktest") {
fatal("Linktest is not running!");
}
}
my $slice = $instance->GetGeniSlice();
if (!defined($slice)) {
fatal("No slice for instance!");
......@@ -2033,6 +2024,19 @@ sub DoLinktest()
if ($slice->Lock()) {
fatal("Slice is busy, cannot lock it");
}
# Check after lock to prevent concurrent startup.
if ($action eq "start") {
if ($instance->status() ne "ready") {
$slice->UnLock();
fatal("Must be ready to run linktest!");
}
}
else {
if ($instance->status() ne "linktest") {
$slice->UnLock();
fatal("Linktest is not running!");
}
}
#
# Create the webtask object, but AFTER locking the slice so we do
# not destroy one in use.
......@@ -2056,17 +2060,28 @@ sub DoLinktest()
return -1;
}
if ($response->code() != GENIRESPONSE_SUCCESS) {
print STDERR "Could not run linktest on sliver: ".
print STDERR "Could not $action linktest on sliver: ".
$response->output() . "\n";
$webtask->output($response->output());
$webtask->Exited(1);
return 1;
$webtask->Exited($response->code());
return $response->code();
}
my $blob = $response->value();
if ($blob->{'status'} eq "running") {
$webtask->status("running");
$webtask->url($blob->{'url'});
}
elsif ($blob->{'status'} eq "stopped") {
$webtask->status("stopped");
$webtask->results($blob->{'results'});
$webtask->Exited(0);
}
print STDERR "foo\n" . $response->output() . "\n";
$webtask->output($response->output());
$webtask->Exited(0);
return 0;
};
# Change status now.
my $old_status = $instance->status();
$instance->SetStatus("linktest");
my @return_codes = ();
my @agglist = $instance->AggregateList();
if (ParRun({"maxwaittime" => 99999,
......@@ -2081,24 +2096,71 @@ sub DoLinktest()
foreach my $agg (@agglist) {
my $code = shift(@return_codes);
if ($code) {
$errmsg = "Could not run linktest on some slivers";
$errmsg = "Could not $action linktest on some slivers";
if ($agg->webtask()->output()) {
$errmsg .= ": " . $agg->webtask()->output();
$errcode = $code;
}
goto bad;
}
if (!defined($webtask) && $agg->webtask()->output()) {
print $agg->webtask()->output();
if (!defined($webtask) && $agg->webtask()->results()) {
print $agg->webtask()->results();
}
}
$slice->UnLock();
if ($action eq "stop") {
$instance->SetStatus("ready");
$slice->UnLock();
exit(0);
}
#
# Okay, now we want to wait for linktest to finish on all the clusters
# so that we can change the status back to ready.
#
my $logfile = TBMakeLogname("linktest");
if (my $childpid = TBBackGround($logfile)) {
sleep(1);
my $status = 0;
my $foo = waitpid($childpid, &WNOHANG);
if ($foo) {
$status = $? >> 8;
}
# Unlock so user can stop linktest.
$slice->UnLock();
exit($status);
}
#
# Loop, asking each cluster for the linktest status,
#
my %running = map { $_->aggregate_urn() => $_ } @agglist;
while (keys(%running)) {
foreach my $sliver (values(%running)) {
my $response = $sliver->RunLinktest("status");
if (!defined($response)) {
print STDERR "RPC Error calling linktest on $sliver\n";
next;
}
if ($response->code() != GENIRESPONSE_SUCCESS) {
print STDERR "Could not get linktest status for sliver: ".
$response->output() . "\n";
next;
}
my $blob = $response->value();
if ($blob->{'status'} eq "stopped") {
delete($running{$sliver->aggregate_urn()});
}
}
sleep(5);
}
$instance->SetStatus($old_status);
exit(0);
bad:
$instance->SetStatus($old_status);
$slice->UnLock();
print STDERR $errmsg . "\n";
if (defined($webtask)) {
$webtask->output($errmsg);
$webtask->Exited(1);
$webtask->Exited($errcode);
}
exit(1);
}
......
#
# Copyright (c) 2000-2015 University of Utah and the Flux Group.
# Copyright (c) 2000-2016 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -156,6 +156,7 @@ APTJSFILES = $(wildcard $(SRCDIR)/aptui/js/*.js)
APTLIBFILES = $(wildcard $(SRCDIR)/aptui/js/lib/*.js)
APTJQRFILES = $(wildcard $(SRCDIR)/aptui/js/lib/jQRangeSlider/*.js)
APTTEMPLATES = $(wildcard $(SRCDIR)/aptui/template/*.html)
APTTEMPLATES += $(wildcard $(SRCDIR)/aptui/template/*.md)
APTCSSFILES = $(wildcard $(SRCDIR)/aptui/css/*.css)
APTFONTS = $(wildcard $(SRCDIR)/aptui/fonts/*)
APTIMAGES = $(wildcard $(SRCDIR)/aptui/images/*)
......
......@@ -12,6 +12,20 @@ function HideModal(which)
// console.log('Hide modal ' + which);
$( which ).modal('hide');
}
function ShowWaitWait(message)
{
if (message === undefined)
message = "";
$('#waitwait-modal-withmessage-message').html(message);
ShowModal('#waitwait-modal-withmessage');
}
function HideWaitWait()
{
$('#waitwait-modal-withmessage-message').html("");
HideModal('#waitwait-modal-withmessage');
}
function CallServerMethod(url, route, method, args)
{
......@@ -271,6 +285,8 @@ function VerifySpeaksfor(speaksfor, signature)
return {
ShowModal: ShowModal,
HideModal: HideModal,
ShowWaitWait: ShowWaitWait,
HideWaitWait: HideWaitWait,
CallServerMethod: CallServerMethod,
maketopmap: maketopmap,
SpitOops: SpitOops,
......
This diff is collapsed.
......@@ -123,7 +123,9 @@ $routing = array("myprofiles" =>
"Lockout" =>
"Do_Lockout",
"Quarantine" =>
"Do_Quarantine")),
"Do_Quarantine",
"LinktestControl" =>
"Do_Linktest")),
"approveuser" =>
array("file" => "approveuser.ajax",
"guest" => false,
......
<?php
#
# Copyright (c) 2000-2015 University of Utah and the Flux Group.
# Copyright (c) 2000-2016 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -1193,6 +1193,117 @@ function Do_Quarantine()
SPITAJAX_RESPONSE("Success");
}
function Do_Linktest()
{
global $instance, $this_user, $ajax_args, $urn_mapping;
if (StatusSetupAjax(0)) {
return;
}
if (!isset($ajax_args["action"])) {
SPITAJAX_ERROR(1, "Missing action");
return;
}
$action = $ajax_args["action"];
$uuid = $ajax_args["uuid"];
$blob = array();
$level = 1;
if (isset($ajax_args["level"])) {
$level = $ajax_args["level"];
if (!preg_match("/^\d+$/", $level) || $level < 1 || $level > 4) {
SPITAJAX_ERROR(1, "Bad level request");
return;
}
}
if ($action == "start") {
if ($instance->status() == "linktest") {
SPITAJAX_ERROR(1, "Linktest already running");
return;
}
$webtask_id = WebTask::GenerateID();
$retval = SUEXEC($this_user->uid(), "nobody",
"webmanage_instance -t $webtask_id -- ".
"linktest $uuid $level",
SUEXEC_ACTION_IGNORE);
$webtask = WebTask::Lookup($webtask_id);
if ($retval != 0) {
if ($retval < 0) {
SPITAJAX_ERROR(-11, "Internal error, cannot proceed.");
# Notify tbops.
SUEXECERROR(SUEXEC_ACTION_CONTINUE);
}
elseif ($webtask) {
SPITAJAX_ERROR(1, $webtask->TaskValue("output"));
}
else {
SUEXECERROR(SUEXEC_ACTION_CONTINUE);
SPITAJAX_ERROR(-1, "Internal Error. Please try again later");
}
if ($webtask) {
$webtask->Delete();
}
return;
}
# This one does not tell us anything.
$webtask->Delete();
# Look at per sliver.
foreach ($instance->slivers() as $sliver) {
if ($sliver->webtask_id() &&
$webtask = WebTask::Lookup($sliver->webtask_id())) {
$tmp = array();
$tmp["status"] = $webtask->TaskValue("status");
if ($webtask->TaskValue("status") == "running") {
$tmp["url"] = $webtask->TaskValue("url");
}
else {
$tmp["results"] = $webtask->TaskValue("results");
}
$blob[$urn_mapping[$sliver->aggregate_urn()]] = $tmp;
}
}
SPITAJAX_RESPONSE($blob);
}
elseif ($action == "stop") {
if ($instance->status() != "linktest") {
SPITAJAX_RESPONSE(0);
return;
}
$webtask_id = WebTask::GenerateID();
$retval = SUEXEC($this_user->uid(), "nobody",
"webmanage_instance -t $webtask_id -- ".
"linktest $uuid -k",
SUEXEC_ACTION_IGNORE);
$webtask = WebTask::Lookup($webtask_id);
if ($retval != 0) {
if ($retval < 0) {
SPITAJAX_ERROR(-11, "Internal error, cannot proceed.");
# Notify tbops.
SUEXECERROR(SUEXEC_ACTION_CONTINUE);
}
elseif ($webtask) {
SPITAJAX_ERROR(1, $webtask->TaskValue("output"));
}
else {
SUEXECERROR(SUEXEC_ACTION_CONTINUE);
SPITAJAX_ERROR(-1, "Internal Error. Please try again later");
}
if ($webtask) {
$webtask->Delete();
}
return;
}
SPITAJAX_RESPONSE(0);
}
else {
SPITAJAX_ERROR(-1, "Improper lintest action");
}
}
# Local Variables:
# mode:php
# End:
......
<div id='linktest-modal' class='modal fade'>
<div class='modal-dialog'>
<div class='modal-content'>
<div class='modal-header'>
<center>
<h3>Run Linktest
<a class='btn btn-xs' id="linktest-help-button"
data-toggle="collapse" href="#linktest-help">
<span class='glyphicon glyphicon-question-sign'
style='margin-bottom: 4px;'></span></a></h3>
</center>
</div>
<div class='modal-body'>
Please select the test level and then click Start. Higher
levels will take longer to complete and require patience.
<br>
<br>
<center>
<select id='linktest-level'>
<option selected value='1'>Level 1 - Connectivity and Latency</option>
<option value="2">Level 2 - Plus Static Routing</option>
<option value="3">Level 3 - Plus Loss</option>
<option value="4">Level 4 - Plus Bandwidth</option>
</select>
<br>
<br>
<button type='button' style='margin-right: 20px;'
class='btn btn-primary btn-sm'
data-dismiss='modal' aria-hidden='true'>
Cancel</button>
<button type='button' class='btn btn-success btn-sm'
id='linktest-start-button'>
Start</button>
</center>
<div class="collapse" id="linktest-help"></div>
</div>
</div>
</div>
</div>
#### What is Linktest?
Linktest is an online validation test of the network characteristics
of your experiment. It is a check to make sure that we have set up the
network as you requested, within certain limitations listed below.
There are four levels of linktest you can choose:
1. Connectivity and Latency
1. Plus Static Routing
1. Plus Link Loss
1. Plus Bandwidth
It should be obvious what each one does, although it is important to
understand that bandwidth tests can take up to 20 seconds per link, so it
can take a long time to run linktest on a large experiment (one that has
many links).
#### Limitations
* Not all bandwidths can be accurately measured, and linktest will skip
links that it knows will give false results (e.g., slow or lossy
links). Please check the output, and *be sure to test those links
yourself* if your results depend on total accuracy.
* As with any automated testing procedure, we have to balance the desire
for accuracy with the possibility of false positives. To reduce the
number of false positives, we allow for a small amount of fudge on any
link. If your results are dependent on total accuracy, then you should
*test your links yourself!*
* Linktest can take a long time on large experiments. Even on very small
experiments (5-10 nodes), doing the full bandwidth test can add 2-3
minutes. If you decide you have waited long enough, you can use the
Stop Linktest button on the topology tab.
#### Finally
Linktest is a convenient tool intended to do coarse grained testing of your
links to find obvious problems. As mentioned above, if your application or
the paper you are writing depends on absolute fidelity, then you should
*test your links yourself!*
......@@ -131,6 +131,12 @@
<% } %>
<div class='pull-right'>
<% if (registered && !isfadmin) { %>
<button class='btn btn-xs btn-success hidden' disabled
style='margin-right: 10px;'
id='linktest-stop-button' type=button
data-toggle='popover'
data-content='Stop a running or wedged linktest'>
Stop Linktest</button>
<button class='btn btn-xs btn-primary hidden' disabled
id='clone_button' type=button>
Clone</button>
......@@ -244,11 +250,17 @@
<small>Click on a node for more options.
Click and drag to move things around.</small>
<% if (registered) { %>
<button class='btn btn-xs btn-success pull-right' disabled
id='refresh_button' type=button
data-toggle='popover'
data-content='Ask cluster for updated node status'>
Refresh Status</button>
<button class='btn btn-xs btn-success pull-right' disabled
id='refresh_button' type=button
data-toggle='popover'
data-content='Ask cluster for updated node status'>
Refresh Status</button>
<button class='btn btn-xs btn-success pull-right' disabled
style='margin-right: 10px;'
id='linktest-modal-button' type=button
data-toggle='popover'
data-content='Run linktest on the topology'>
Run Linktest</button>
<% } %>
</div>
<div class='tab-pane' id='listview'>
......@@ -436,4 +448,5 @@
<div id='imaging_div'></div>
<div id='oneonly_div'></div>
<div id='approval_div'></div>
<div id='linktest_div'></div>
</div>
......@@ -11,3 +11,20 @@
</div>
</div>
</div>
<div id='waitwait-modal-withmessage' class='modal fade'>
<div class='modal-dialog'>
<div class='modal-content'>
<div class='modal-header'>
<center><h3>Please Wait</h3></center>
</div>
<div class='modal-body'>
<center>
<span id="waitwait-modal-withmessage-message"></span>
<br>
<br>
<img src='images/spinner.gif' />
</center>
</div>
</div>
</div>
</div>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment