Commit ecb66ab5 authored by Leigh Stoller's avatar Leigh Stoller

Add a "regression" mode to both the old assign_wrapper and the new

mapper wrapper. In regression mode, the wrapper/mapper proceeds
normally, creating a .vtop file, and then running assign with a fixed
seed. If the wrapper and the mapper agree on the .vtop file, then the
solution from assign should be identical.

The wrapper/mapper then proceeds normally, reserving resources and
making all the DB changes. Needless to say, this has to be on a
private copy of the database, with all nodes free. Creating that DB
was a tale in its own right.

At completion, call the existing BackupPhysicalState() function that
we use in swapmod, and write all the physical tables we have changed
(just the rows corresponding to the experiment of course). The delete
all that state, and free the nodes.

If everything is working correctly, those physical tables should be
identical when created by the mapper or the wrapper.

Of course, its not quite there yet. I have a few things to fix up
before diff -r produces no results.
parent 5f5452e1
......@@ -44,6 +44,7 @@ sub array_diff($$);
sub LoadCurrent();
sub SetUpTracing($$$$$);
sub fatal(@);
sub FinalizeRegression($);
#
# This function as the main assign loop. It converts the virtual
......@@ -84,19 +85,22 @@ sub usage ()
"implies -n\n";
exit($WRAPPER_FAILED);
}
my $optlist = "vutnfp";
my $optlist = "vutnfpr";
my $verbose = 0;
my $fixmode = 0;
my $updating = 0;
my $toponly = 0;
my $impotent = 0;
my $precheck = 0;
my $regression=0;
my $warnings = 0;
#
# Configure variables
#
my $TBROOT = "@prefix@";
my $NFREE = "$TBROOT/bin/nfree";
my $DBNAME = "@TBDBNAME@";
my $DELAYCAPACITY = @DELAYCAPACITY@; # Can be overridden by user!
$ENV{'PATH'} = "/usr/bin:$TBROOT/libexec:$TBROOT/sbin:$TBROOT/bin";
......@@ -178,14 +182,20 @@ if (defined($options{"p"})) {
if (defined($options{"f"})) {
$fixmode = 1;
}
if (defined($options{"r"})) {
if ($DBNAME eq "tbdb") {
fatal("Cannot use regression mode on main DB");
}
$regression = 1;
}
my $pid = $ARGV[0];
my $eid = $ARGV[1];
my $ptopfile = "$pid-$eid-$$.ptop";
my $ptopfile = ($regression ? "$pid-$eid.ptop" : "$pid-$eid-$$.ptop");
# Since the topfile could change across
# swapins and modifies, it makes sense
# to store all of them. Helps in
# degugging.
my $topfile = "$pid-$eid-$$.top";
my $topfile = ($regression ? "$pid-$eid.top" : "$pid-$eid-$$.top");
TBDebugTimeStampsOn();
......@@ -816,7 +826,7 @@ TBDebugTimeStamp("TOP started");
#
# Do admission control test right away.
#
if (!$toponly) {
if (!($toponly || $regression)) {
fatal("Failed admission control checks!")
if (!TBAdmissionControlCheck(undef, $experiment, \%admission_control));
}
......@@ -838,12 +848,19 @@ LoadPhysInfo();
#
LoadExperiment();
if ($regression) {
print STDERR "Freeing reserved nodes in regression mode\n";
system("export NORELOAD=1; $NFREE -x -a $pid $eid") == 0
or fatal("Could not release nodes in regression mode");
}
#
# If updating, load current experiment resources. We have to be careful
# of how this is merged in with the (new) desired topology. See below.
#
if ($updating) {
LoadCurrent();
if ($updating || $regression) {
LoadCurrent()
if ($updating);
print STDERR "Resetting DB before updating.\n";
$experiment->RemovePhysicalState();
}
......@@ -907,6 +924,11 @@ while (1) {
last
if ($retval == 0);
if ($regression) {
FinalizeRegression(1);
fatal("Failed to find solution in regression mode");
}
if (!$precheck && !$tried_precheck) {
my $ptopfile0 = $ptopfile;
my $impotent0 = $impotent;
......@@ -1017,6 +1039,8 @@ sub RunAssign ()
if ($virtcount || $simcount);
$cmdargs = "-n $cmdargs"
if ($precheck);
$cmdargs = "-s 123456 $cmdargs"
if ($regression);
my $cmd;
......@@ -1091,7 +1115,8 @@ sub RunAssign ()
# for debugging and archiving purposes
# We do not call it .log though, since we do not want it copied
# out to the user directory every swapin. See Experiment.pm
system("/bin/cp assign.log $pid-$eid-$$.assign");
my $assignlog = ($regression ? "$pid-$eid.assign" : "$pid-$eid-$$.assign");
system("/bin/cp assign.log $assignlog");
if (!open(ASSIGNFP, "assign.log")) {
print("Could not open assign logfile!\n");
return -1;
......@@ -2279,85 +2304,6 @@ PatchVirts();
exit(0)
if ($impotent);
#
# Seed the virt_agents table. This should probably be done elsewhere.
# Anyway, each lan/link needs an agent to handle changes to delays or
# other link parameters, and that agent (might be several) will be
# running on more than one node. Delay node agent, wireless agent,
# etc. They might be running on a node different then where the link
# is really (delay node). So, just send all link event to all nodes,
# and let them figure out what they should do (what to ignore, what to
# act on). So, specify a wildcard; a "*" for the vnode will be treated
# specially by the event scheduler, and no ipaddr will be inserted
# into the event. Second, add pseudo agents, one for each member of
# the link (or just one if a lan). The objname is lan-vnode, and
# allows us to send an event to just the agent controlling that link
# (or lan node delay). The agents will subscribe to these additional
# names when they start up.
#
# Must clean these to avoid duplicates.
DBQueryFatal("delete from event_groups ".
"where pid='$pid' and eid='$eid' and ".
" (group_name='__all_lans' or group_name='__all_tracemon')");
foreach my $lan (keys(%virt_lans)) {
DBQueryFatal("replace into virt_agents ".
" (exptidx, pid, eid, vname, vnode, objecttype) ".
" select '$experiment_idx', '$pid', '$eid', '$lan', '*', ".
" idx from event_objecttypes where ".
" event_objecttypes.type='LINK'");
#
# XXX there is no link (delay) agent running on plab nodes
# (i.e., protocol==ipv4) currently, so we cannot be sending them
# events that they will not acknowledge.
#
if (virtlanprotocol($lan) ne "ipv4") {
DBQueryFatal("insert into event_groups ".
" (exptidx, pid, eid, idx, group_name, agent_name) ".
" values ('$experiment_idx', ".
" '$pid', '$eid', NULL, '__all_lans', '$lan')");
}
# Must clean these to avoid duplicates created by the loop below.
DBQueryFatal("delete from event_groups ".
"where pid='$pid' and eid='$eid' and ".
" group_name='${lan}-tracemon'");
# Must clean these cause of how this code used to work; temporary.
DBQueryFatal("delete from virt_agents ".
"where pid='$pid' and eid='$eid' and ".
" vname='${lan}-tracemon'");
foreach my $member (virtlanmembers($lan)) {
my ($vnode) = split(":", $member);
DBQueryFatal("replace into virt_agents ".
" (exptidx, pid, eid, vname, vnode, objecttype) ".
" select '$experiment_idx', '$pid', '$eid', ".
" '${lan}-${vnode}', '*', ".
" idx from event_objecttypes where ".
" event_objecttypes.type='LINK'");
DBQueryFatal("replace into virt_agents ".
" (exptidx, pid, eid, vname, vnode, objecttype) ".
" select '$experiment_idx', '$pid', '$eid', ".
" '${lan}-${vnode}-tracemon', '*', ".
" idx from event_objecttypes where ".
" event_objecttypes.type='LINKTRACE'");
DBQueryFatal("insert into event_groups ".
" (exptidx, pid, eid, idx, group_name, agent_name) ".
" values ('$experiment_idx', '$pid', '$eid', NULL, ".
" '__all_tracemon', '${lan}-${vnode}-tracemon')");
DBQueryFatal("insert into event_groups ".
" (exptidx, pid, eid, idx, group_name, agent_name) ".
" values ('$experiment_idx', '$pid', '$eid', NULL, ".
" '${lan}-tracemon','${lan}-${vnode}-tracemon')");
}
}
#
# Enter delays.
#
......@@ -2968,6 +2914,8 @@ if( $simcount > 0 ) {
}
TBDebugTimeStamp("assign_wrapper finished");
FinalizeRegression(0)
if ($regression);
exit(0);
######################################################################
......@@ -5418,7 +5366,7 @@ sub CreateTopFile()
$maximum_nodes = $physnode_count + keys(%delaynodes);
$minimum_nodes = POSIX::ceil($minimum_nodes);
if (! $impotent) {
if (! ($impotent || $regression)) {
DBQueryFatal("UPDATE experiments set maximum_nodes=$maximum_nodes, " .
" minimum_nodes=$minimum_nodes ".
"where pid='$pid' and eid='$eid'");
......@@ -5715,6 +5663,39 @@ sub nodejailosid($)
return $nextosid;
}
#
# In regression mode we want to save the physical state and then clear
# the physical resources.
#
sub FinalizeRegression($)
{
my ($error) = @_;
my $cwd;
chomp($cwd = `/bin/pwd`);
if (!$error) {
print STDERR "Saving physical state in regression mode\n";
if ($experiment->BackupPhysicalState("$cwd/$pid-$eid.pstate") != 0) {
print STDERR "Could not save physical state!\n";
exit(1);
}
}
print STDERR "Removing physical state in regression mode\n";
if ($experiment->RemovePhysicalState() != 0) {
print STDERR "Could not remove physical state!\n";
exit(1);
}
if (scalar(keys(%newreservednodes))) {
my @nodeids = keys(%newreservednodes);
system("export NORELOAD=1; $NFREE -x $pid $eid @nodeids");
if ($?) {
exit(1);
}
}
return 0;
}
#
# All exits happen via this function!
#
......
This diff is collapsed.
......@@ -26,7 +26,7 @@ use POSIX ":sys_wait_h";
# Failures in assign always cause the caller to stop retrying.
#
# The CANRECOVER bit indicates 'recoverability' (no db or physical
# state was modified by the time the error occurred). This is relavent
# state was modified by the time the error occurred). This is relevant
# to only modify operations (update).
#
my $WRAPPER_SUCCESS = 0x00;
......@@ -43,30 +43,35 @@ sub usage ()
print STDERR " -u - Enables update mode\n";
print STDERR " -f - Fix current resources during update mode\n";
print STDERR " -n - Run assign, but do not reserve/modify resources.\n";
print STDERR " -r - Regression mode.\n";
print STDERR " -p - Do a precheck for mapability on an empty testbed - ".
"implies -n\n";
exit($WRAPPER_FAILED);
}
my $optlist = "vunfp";
my $verbose = 0;
my $debug = 1;
my $fixmode = 0;
my $updating = 0;
my $impotent = 1;
my $precheck = 0;
my $quiet = 0;
my $warnings = 0;
my $maxrun = 3; # Maximum number of times we run assign.
my $optlist = "dvunfprqc";
my $verbose = 0;
my $debug = 0;
my $fixmode = 0;
my $updating = 0;
my $impotent = 0;
my $precheck = 0;
my $regression = 0;
my $quiet = 0;
my $clear = 0;
my $warnings = 0;
my $maxrun = 3; # Maximum number of times we run assign.
#
# Configure variables
#
my $TB = "@prefix@";
my $DBNAME = "@TBDBNAME@";
my $TBOPS = "@TBOPSEMAIL@";
my $ASSIGN = "$TB/libexec/assign";
my $WRAPPER2 = "$TB/libexec/assign_wrapper2";
my $PTOPGEN = "$TB/libexec/ptopgen";
my $VTOPGEN = "$TB/bin/vtopgen";
my $NFREE = "$TB/bin/nfree";
#
# Load the Testbed support stuff.
......@@ -77,12 +82,14 @@ use libtestbed;
use libtblog;
use libvtop;
use libadminctrl;
use User;
# Protos
sub fatal(@);
sub debug($);
sub chat($);
sub RunAssign($$);
sub FinalizeRegression($);
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
......@@ -114,6 +121,9 @@ if (@ARGV < 2) {
if (defined($options{"v"})) {
$verbose++;
}
if (defined($options{"d"})) {
$debug++;
}
if (defined($options{"u"})) {
$updating = 1;
}
......@@ -126,6 +136,19 @@ if (defined($options{"f"})) {
if (defined($options{"p"})) {
$precheck = 1;
}
if (defined($options{"r"})) {
if ($DBNAME eq "tbdb") {
fatal("Cannot use regression mode on main DB");
}
$regression = 1;
$clear = 1;
}
if (defined($options{"q"})) {
$quiet = 1;
}
if (defined($options{"c"})) {
$clear = 1;
}
my $pid = $ARGV[0];
my $eid = $ARGV[1];
......@@ -133,6 +156,17 @@ my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
fatal("Could not lookup experiment object $pid,$eid!")
}
#
# Verify that this person can muck with the experiment.
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
tbdie("You ($UID) do not exist!");
}
if (!TBAdmin() &&
! $experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) {
fatal("You do not have permission to map this experiment!");
}
#
# These are the flags to the vtop creation code.
......@@ -146,6 +180,8 @@ $vtopflags |= $libvtop::VTOP_FLAGS_FIXNODES
if ($fixmode);
$vtopflags |= $libvtop::VTOP_FLAGS_IMPOTENT
if ($impotent);
$vtopflags |= $libvtop::VTOP_FLAGS_REGRESSION
if ($regression);
chat("Starting the new and improved mapper wrapper.\n");
......@@ -153,6 +189,24 @@ my $vtop = libvtop->Create($experiment, $vtopflags);
if (!defined($vtop)) {
fatal("Could not create vtop structure for $experiment");
}
#
# If updating, load current experiment resources. We have to be careful
# of how this is merged in with the (new) desired topology.
#
if ($updating) {
$vtop->LoadCurrentResources() == 0
or fatal("Could not load current resources into mapper");
}
if (!$impotent && ($updating || $clear)) {
if ($regression) {
chat("Freeing reserved nodes in regression mode\n");
system("export NORELOAD=1; $NFREE -x -a $pid $eid") == 0
or fatal("Could not release nodes in regression mode");
}
chat("Clearing physical state before updating.\n");
$experiment->RemovePhysicalState();
}
TBDebugTimeStamp("vtopgen started");
$vtop->CreateVtop() == 0
or fatal("Could not create vtop for $experiment");
......@@ -178,7 +232,7 @@ TBDebugTimeStamp("mapper loop started");
while (1) {
chat("Assign run $currentrun\n");
my $prefix = ($debug ? "$pid-$eid" : "$pid-$eid-$$");
my $prefix = ($debug || $regression ? "$pid-$eid" : "$pid-$eid-$$");
#
# When precheck is on, we only do one run in impotent mode and exit.
......@@ -201,10 +255,13 @@ while (1) {
last
if ($retval == 0);
if ($retval < 0) {
if ($retval < 0 || $regression) {
#
# Failure in assign.
#
FinalizeRegression(1)
if ($regression);
fatal({type => 'primary', severity => SEV_ERROR,
error => ['unretriable_assign_error']},
"Unretriable error. Giving up.");
......@@ -246,6 +303,8 @@ while (1) {
$currentrun++;
}
TBDebugTimeStamp("mapper loop finished");
FinalizeRegression(0)
if ($regression);
exit(0);
#
......@@ -263,8 +322,12 @@ sub RunAssign($$)
# Do admission control test, and gather the info.
#
my %admission_control;
fatal("Failed admission control checks!")
if (!TBAdmissionControlCheck(undef, $experiment, \%admission_control));
if (!$regression) {
if (!TBAdmissionControlCheck(undef, $experiment, \%admission_control)){
tberror("Failed admission control checks!\n");
return -1;
}
}
#
# Snapshot physical resources.
......@@ -294,7 +357,8 @@ sub RunAssign($$)
TBDebugTimeStamp("ptopgen started");
system("$PTOPGEN $ptopargs > $ptopfile");
if ($?) {
fatal("Failure in $ptopfile");
tberror("Failure in ptopgen\n");
return -1;
}
TBDebugTimeStamp("ptopgen finished");
......@@ -316,16 +380,22 @@ sub RunAssign($$)
#
# Now generate a vtop file and dump it to a file.
#
open(VTOPFILE, "> $vtopfile") or
fatal("Could not open $vtopfile: $!");
$vtop->PrintTop(*VTOPFILE) == 0 or
fatal("Could not print vtop file for $experiment");
if (! open(VTOPFILE, "> $vtopfile")) {
tberror("Could not open $vtopfile: $!\n");
return -1;
}
if ($vtop->PrintTop(*VTOPFILE) != 0) {
tberror("Could not print vtop file for $experiment\n");
return -1;
}
close(VTOPFILE);
if ($impotent) {
$experiment->Update({"maximum_nodes" => $vtop->maximum_nodes(),
"minimum_nodes" => $vtop->minimum_nodes() })
== 0 or fatal("Could not update min/max nodes for $experiment");
if (! ($impotent || $regression)) {
if ($experiment->Update({"maximum_nodes" => $vtop->maximum_nodes(),
"minimum_nodes" => $vtop->minimum_nodes() })){
tberror("Could not update min/max nodes for $experiment\n");
return -1;
}
}
# Run assign
......@@ -335,6 +405,8 @@ sub RunAssign($$)
if ($vtop->virtnodecount() || $vtop->simnodecount());
$args = "-n $args"
if ($precheck);
$args = "-s 123456 $args"
if ($regression);
chat("assign command: '$cmd $args'\n");
#
......@@ -357,9 +429,9 @@ sub RunAssign($$)
kill('TERM', -$pgrp);
waitpid($childpid, 0);
fatal({cause => 'canceled', severity => SEV_IMMEDIATE,
error => ['cancel_flag']},
"Cancel flag set; aborting assign run!");
tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
error => ['cancel_flag']},
"Cancel flag set; aborting assign run!\n");
return -1;
}
# Loop again to reap child above before exit.
......@@ -379,9 +451,9 @@ sub RunAssign($$)
# Check cancel flag before continuing.
if ($experiment->canceled()) {
fatal({cause => 'canceled', severity => SEV_IMMEDIATE,
error => ['cancel_flag']},
"Cancel flag set; aborting assign run!");
tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
error => ['cancel_flag']},
"Cancel flag set; aborting assign run!\n");
return -1;
}
......@@ -416,7 +488,7 @@ sub RunAssign($$)
return 0;
}
debug("Reading assign results.\n");
chat("Reading assign results.\n");
if (!open(ASSIGNFP, "assign.log")) {
print("Could not open assign logfile! $!\n");
return -1;
......@@ -432,9 +504,9 @@ sub RunAssign($$)
}
# Check cancel flag before continuing.
if ($experiment->canceled()) {
fatal({cause => 'canceled', severity => SEV_IMMEDIATE,
error => ['cancel_flag']},
"Cancel flag set; aborting assign run!");
tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
error => ['cancel_flag']},
"Cancel flag set; aborting assign run!\n");
return -1;
}
if ($vtop->AllocNodes() != 0) {
......@@ -449,7 +521,40 @@ sub RunAssign($$)
print("Could not InitializePhysNodes\n");
return -1;
}
exit(0);
return 0;
}
#
# In regression mode we want to save the physical state and then clear
# the physical resources.
#
sub FinalizeRegression($)
{
my ($error) = @_;
my $cwd;
chomp($cwd = `/bin/pwd`);
if (!$error) {
chat("Saving physical state in regression mode\n");
if ($experiment->BackupPhysicalState("$cwd/$pid-$eid.pstate") != 0) {
print STDERR "Could not save physical state!\n";
exit(1);
}
}
chat("Removing physical state in regression mode\n");
if ($experiment->RemovePhysicalState() != 0) {
print STDERR "Could not remove physical state!\n";
exit(1);
}
if ($vtop->newreservednodes()) {
my @newreservednodes = $vtop->newreservednodes();
system("export NORELOAD=1; $NFREE -x $pid $eid @newreservednodes");
if ($?) {
exit(1);
}
}
return 0;
}
# We will come through here no matter how we exit.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment