Commit 3593d9c6 authored by Leigh B. Stoller's avatar Leigh B. Stoller

My attempt to improve swapmod ...

Previously, any error in assign wrapper would cause the experiment to
swap out because the "DB had been modified" ... well I have isolated
all of the changes that are made, and errors in assign_wrapper proper
no longer do that. tbswap now restores the experiment back the way it
was. Not that errors after assign_wrapper (like in os_setup) are still
a problem.

In addition, rather then kill off all of the vlans, leave them in
place and then do a comparison after assign wrapper, removing obsolete
and modified vlans only. I have made use of the obsolete vlans table
for this by having snmpit track its changes in that table. There is a
bunch of new code in Lan.pm for doing the comparisons.
parent 0e225c56
This diff is collapsed.
......@@ -43,6 +43,7 @@ sub getrtabid($$);
sub array_diff($$);
sub LoadCurrent();
sub SetUpTracing($$$$$);
sub fatal(@);
#
# This function as the main assign loop. It converts the virtual
......@@ -187,17 +188,6 @@ my $topfile = "$pid-$eid-$$.top";
TBDebugTimeStampsOn();
#
# All exits happen via this function!
#
sub fatal (@)
{
&tberror(@_);
# We next go to the END block below.
exit($WRAPPER_FAILED);
}
#
# We want warnings to cause assign_wrapper to exit abnormally.
# We will come through here no matter how we exit though.
......@@ -518,6 +508,8 @@ my %admission_control = ();
my %reserved_v2pmap = ();
my %reserved_v2vmap = ();
my %oldreservednodes = ();
my %newreservednodes = ();
my $oldreservedclean = 0;
# reserved_p2vmap is indexed by physical and contains one or more virtual
# nodes
my %reserved_p2vmap = ();
......@@ -802,7 +794,7 @@ LoadExperiment();
if ($updating) {
LoadCurrent();
print STDERR "Resetting DB before updating.\n";
TBExptRemovePhysicalState( $pid, $eid );
$experiment->RemovePhysicalState();
}
#
......@@ -1309,10 +1301,7 @@ sub RunAssign ()
# work all the time i.e. in the example discussed above
my $oldreserved_pid = OLDRESERVED_PID;
my $oldreserved_eid = OLDRESERVED_EID;
if (scalar(keys %oldreservednodes)) {
# We can't recover after this coz we are making changes to
# the DB
$NoRecover = 1;
if (scalar(keys(%oldreservednodes)) && !$oldreservedclean) {
TBDebugTimeStamp("Moving Old Reserved nodes to ".
"$oldreserved_pid/$oldreserved_eid ".
"and back started");
......@@ -1330,11 +1319,12 @@ sub RunAssign ()
return -1;
}
# We need to move this back and forth the holding reservation only
# once i.e. in the first call to RunAssign(). If it gets repeatedly
# called coz only some pnode resources got nalloc'ed, we don't have
# to do the above again.
undef %oldreservednodes;
#
# We need to only once i.e. in the first call to RunAssign().
# If it gets repeatedly called coz only some pnode resources
# got nalloced, we do not have to do the above again.
#
$oldreservedclean = 1;
}
TBDebugTimeStamp("reserving started");
......@@ -1374,6 +1364,7 @@ sub RunAssign ()
foreach my $node (@reserved) {
if (exists($toreserve{$node})) {
$newreservednodes{$node} = $node;
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_INIT_DIRTY());
}
}
......@@ -1421,6 +1412,8 @@ sub RunAssign ()
print "Successfully reserved all physical nodes we needed.\n";
foreach my $node (keys(%toreserve)) {
# Remeber all newly allocated nodes for later free if failure.
$newreservednodes{$node} = $node;
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_INIT_DIRTY());
}
......@@ -1519,7 +1512,7 @@ if ($needwanassign) {
# Recoverability ends.
# All fatal() calls from this point do not have the recoverable '64' bit set.
#
$NoRecover = 1;
#$NoRecover = 1;
# VIRTNODES HACK: Local virtnodes have to be mapped now. This is a little
# hokey in that the virtnodes just need to be allocated from the pool that
......@@ -1738,36 +1731,6 @@ TBExptSetPortRange();
# queries to the DB.
LoadPhysResources();
#
# For update, wipe old interfaces in DB (normally done by nfree.)
# These will get rebuilt soon.
#
if ($updating && !$impotent) {
foreach my $pnode (keys(%phys_nodes)) {
#
# Do not need to do this for phys nodes that are to be
# released, or for virtnodes since they do not have interfaces
# associated with them directly. This is probably a bad assumption
# though, and perhaps this entire function should be moved to the
# library.
#
next
if (physnodeisvirtnode($pnode) ||
physnodereuse($pnode) eq "unused");
DBQueryFatal("update interfaces set IP='',IPaliases=NULL,mask=NULL,".
" rtabid='0',vnode_id=NULL " .
"where node_id='$pnode' and ".
" role='" . TBDB_IFACEROLE_EXPERIMENT() . "'");
# Clean the virtual interfaces table for this node too.
DBQueryFatal("delete from vinterfaces where node_id='$pnode'");
# And interface settings.
DBQueryFatal("delete from interface_settings where node_id='$pnode'");
}
}
######################################################################
# Step 3 - Convert to vlans, delays, and portmap
#
......@@ -2951,7 +2914,7 @@ if( $simcount > 0 ) {
}
TBDebugTimeStamp("assign_wrapper finished");
exit 0;
exit(0);
######################################################################
# Subroutines
......@@ -3421,7 +3384,6 @@ sub UploadVlans()
if (!defined($linkedlan));
my $virtlanidx = virtlanidx($lan->vname());
my $linkedlanid = $linkedlan->lanid();
printdb("Update vinterfaces: $lan: $virtlanidx -> $linkedlanid\n");
DBQueryFatal("update vinterfaces set vlanid='$linkedlanid' ".
......@@ -5758,4 +5720,31 @@ sub nodejailosid($)
return $nextosid;
}
#
# All exits happen via this function!
#
sub fatal (@)
{
#
# Free any newly reserved nodes (in update mode) so that tbswap knows
# it is safe to recover the experiment. If we bypass this and leave
# through the END block then NoRecover will still be set and tbswap
# will know to swap the experiment out.
#
if ($updating) {
if (scalar(keys(%newreservednodes))) {
$NoRecover = 0
if (system("nfree -x $pid $eid " .
join(" ", keys(%newreservednodes))) == 0);
}
else {
# When not updating this is meaningless to tbswap.
$NoRecover = 0;
}
}
&tberror(@_);
# We next go to the END block above.
exit($WRAPPER_FAILED);
}
......@@ -20,6 +20,7 @@ my $TB = '@prefix@';
use libdb;
use User;
use Experiment;
use snmpit_lib;
use snmpit_remote;
use libtblog;
......@@ -35,7 +36,7 @@ sub doListVlans($);
sub doListPorts($);
sub doPortStatus($@);
sub doGetStats($);
sub doVlansFromTables($@);
sub doVlansFromTables($$@);
sub doReset($@);
sub doMakeVlan($$@);
sub doDeleteVlan($@);
......@@ -157,6 +158,7 @@ if ($opt{q}) {
#
my $pid;
my $eid;
my $experiment;
my @ports;
my @optvlanids = ();
my $equaltrunking = 0;
......@@ -405,10 +407,12 @@ if ($pid && $eid) {
#
# First, make sure the experiment exists
#
if (!ExpState($pid,$eid)) {
$experiment = Experiment->Lookup($pid,$eid);
if (!defined($experiment)) {
die "There is no experiment $eid in project $pid\n";
}
if ($UID && !TBExptAccessCheck($UID,$pid,$eid,TB_EXPT_MODIFY)) {
if (defined($this_user) &&
!$experiment->AccessCheck($this_user, TB_EXPT_MODIFY)) {
die "You do not have permission to modify experiment $pid/$eid\n";
}
}
......@@ -698,7 +702,7 @@ COMMAND: foreach my $command (@commands) {
last;
}; # /ports/ && do
/tables/ && do {
$exitval += doVlansFromTables(\@stacks,@vlans);
$exitval += doVlansFromTables($experiment,\@stacks,@vlans);
last;
}; # /tables/ && do
/reset/ && do {
......@@ -881,7 +885,7 @@ $vlan_id,$ddep, $pideid, $vname, $members
next;
}
$vname = $vlan->vname();
my $experiment = $vlan->GetExperiment();
$experiment = $vlan->GetExperiment();
#
# Permissions check - people only get to see their own VLANs
......@@ -1219,7 +1223,8 @@ $port, $inoctets, $inunicast,$innunicast,$indiscards,$inerr, $inunk, $out
# Creates all VLANs given. Looks up identifiers in the database to determine
# the membership.
#
sub doVlansFromTables($@) {
sub doVlansFromTables($$@) {
my $experiment = shift;
my $stacks = shift;
my @vlans = @_;
......@@ -1288,7 +1293,10 @@ sub doVlansFromTables($@) {
# Don't try to put ports in a VLAN if it couldn't be created
#
$errors++;
} else { setVlanTag($vlan, $vlan_number); }
} else {
setVlanTag($vlan, $vlan_number);
VLan->RecordVlanInsertion($experiment, $vlan, $vlan_number);
}
}
#
......@@ -1354,6 +1362,8 @@ sub doReset($@) {
}
foreach my $vlan (@existant_vlans) {
setVlanTag($vlan, 0);
VLan->RecordVLanDeletion($vlan);
}
}
return $errors;
......@@ -1420,6 +1430,13 @@ sub doDeleteVlan($@) {
my $stacks = shift;
my @vlan_names = @_;
#
# Hand over to outer boss.
#
if ($ELABINELAB) {
return RemoteDoReset(@vlan_names);
}
my $errors = 0;
my %exists = ();
......@@ -1442,6 +1459,9 @@ sub doDeleteVlan($@) {
if (!$ok) {
$errors++;
}
foreach my $vlan (@existant_vlans) {
VLan->RecordVLanDeletion($vlan);
}
}
}
......@@ -1577,16 +1597,14 @@ sub doRecreateVlans($) {
#
# Get a list of all experiments, so that we can re-create their VLANs
#
my @expts = ();
my $result = DBQueryFatal("select pid,eid from experiments ".
"where state = '". EXPTSTATE_ACTIVE. "'");
while (my ($pid,$eid) = $result->fetchrow()) {
my @vlans = getExperimentVlans($pid,$eid);
doVlansFromTables($stacks,@vlans);
my @experiments = Experiment->AllActive();
foreach my $experiment (@experiments) {
my @vlans = getExperimentVlans($experiment->pid(), $experiment->eid());
doVlansFromTables($experiment, $stacks, @vlans)
if (@vlans);
}
return 1;
}
#
......
......@@ -74,7 +74,7 @@ sub commonTail($$)
defined($response->{"output"}) && $response->{"output"} ne "") {
print $response->{"output"};
}
return ($response->{"code"}) ? undef : $response->{"output"};
return $response;
}
......@@ -149,7 +149,10 @@ sub RemoteDoVlansFromTables(@)
if (! keys(%$vlantable));
my $errors = 0;
my $xmlback = commonTail("setup",$vlantable);
my $response = commonTail("setup", $vlantable);
return 1
if ($response->{"code"});
my $xmlback = $response->{"output"};
if (defined($xmlback)) {
foreach my $vlres (split ',', $xmlback) {
......@@ -165,6 +168,8 @@ sub RemoteDoVlansFromTables(@)
print STDERR "could not set vlan tag for $vlan\n";
$errors++;
}
VLan->RecordVlanInsertion($vlan->GetExperiment(),
$vlan->lanid(), $tag);
}
}
else {
......@@ -182,8 +187,14 @@ sub RemoteDoReset(@)
return 0
if (! @vlans);
my $res = commonTail("destroy", join(",", @vlans));
return !defined($res);
my $response = commonTail("destroy", join(",", @vlans));
return 1
if ($response->{"code"});
foreach my $vlan (@vlans) {
VLan->RecordVLanDeletion($vlan);
}
return 0;
}
#
......@@ -207,8 +218,11 @@ sub RemoteDoTrunking($$@)
}
$arg->{"port"} = $port;
my $res = commonTail("trunk", $arg);
return !defined($res);
my $response = commonTail("trunk", $arg);
return 1
if ($response->{"code"});
return 0;
}
#
......@@ -224,7 +238,10 @@ sub RemoteDoList(@)
my @list = ();
my $arg = @vlans ? join(",", @vlans) : "";
my $xmlback = commonTail("list",$arg);
my $response = commonTail("list",$arg);
return 1
if ($response->{"code"});
my $xmlback = $response->{"output"};
if (!defined($xmlback)) { return @list; }
my $prefix = "" ;
......
......@@ -95,6 +95,7 @@ my $signaled = 0;
my $tbdir = "$TB/bin";
my $tbdata = "tbdata";
my $checkquota = "$TB/sbin/checkquota";
my $wrapper = "$TB/libexec/assign_wrapper";
my $batch = 0;
my $idleswap = 0;
my $autoswap = 0;
......@@ -298,6 +299,11 @@ if (my $instance = Template::Instance->LookupByExptidx($experiment->idx())) {
}
}
# XXX Hack for geni mode.
if ($genimode) {
$experiment->SetState(EXPTSTATE_ACTIVE);
}
#
# Verify user and get his DB uid and other info for later.
#
......@@ -1026,6 +1032,17 @@ elsif ($inout eq "modify") {
goto FWHOSED;
}
}
elsif ($genimode) {
#
# Need the min/max numbers, usually done during prerun.
#
print "Doing a pre-assign ...\n";
if (system("$wrapper -t $pid $eid")) {
fatal({type => 'secondary', severity => SEV_SECONDARY,
error => ['update_aborted', undef]},
"assign prerun failed!");
}
}
#
# Our next state depends on whether the experiment was active or swapped.
......@@ -1055,6 +1072,7 @@ elsif ($inout eq "modify") {
else {
$optarg = ($reboot ? "-reboot" : "");
$optarg .= ($eventsys_restart ? " -eventsys_restart" : "");
$optarg .= ($genimode ? "-noswapout" : "");
}
if ($experiment->Swap($Experiment::EXPT_SWAPMOD, $optarg) == 0) {
......@@ -1092,8 +1110,14 @@ elsif ($inout eq "modify") {
$modifyError = "Update aborted; experiment swapped out.";
}
else {
tbreport(SEV_SECONDARY, 'update_aborted', 'state_restored');
$modifyError = "Update aborted; old state restored.";
if ($genimode) {
tbreport(SEV_SECONDARY, 'update_aborted','still active');
$modifyError = "Update aborted; still swapped in.";
}
else {
tbreport(SEV_SECONDARY, 'update_aborted','state_restored');
$modifyError = "Update aborted; old state restored.";
}
# Reset the swapper since the experiment is still running.
$experiment->SetSwapper($last_swapper);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment