From 3593d9c61c9714d2031252b9efa3250d4bfdc2cc Mon Sep 17 00:00:00 2001 From: "Leigh B. Stoller" Date: Wed, 9 Jul 2008 22:49:08 +0000 Subject: [PATCH] My attempt to improve swapmod ... Previously, any error in assign wrapper would cause the experiment to swap out because the "DB had been modified" ... well I have isolated all of the changes that are made, and errors in assign_wrapper proper no longer do that. tbswap now restores the experiment back the way it was. Not that errors after assign_wrapper (like in os_setup) are still a problem. In addition, rather then kill off all of the vlans, leave them in place and then do a comparison after assign wrapper, removing obsolete and modified vlans only. I have made use of the obsolete vlans table for this by having snmpit track its changes in that table. There is a bunch of new code in Lan.pm for doing the comparisons. --- db/Lan.pm.in | 398 +++++++++++++++++++++++++----------- tbsetup/assign_wrapper.in | 97 ++++----- tbsetup/snmpit.in | 48 +++-- tbsetup/snmpit_remote.pm.in | 31 ++- tbsetup/swapexp.in | 28 ++- tbsetup/tbswap.in | 221 +++++++++++++------- 6 files changed, 545 insertions(+), 278 deletions(-) diff --git a/db/Lan.pm.in b/db/Lan.pm.in index 9403440a3..823b91b41 100644 --- a/db/Lan.pm.in +++ b/db/Lan.pm.in @@ -174,7 +174,7 @@ sub Create($$$;$$$) " vname=$safe_vname, ". " type='$type', ". " vidx='$vidx', ". - " link='$linkid', ". + " link=$linkid, ". " ready=0"); return undef if (!defined($query_result)); @@ -183,19 +183,6 @@ sub Create($$$;$$$) my $lanid = $query_result->insertid(); my $lan = Lan->Lookup($lanid); - # Keep the vlans table in sync for now. Will remove later. - if ($type eq "vlan" && !$initialize) { - $query_result = - DBQueryWarn("insert into vlans (id,exptidx,pid,eid,virtual) ". - "values ". - "(0, '$exptidx', '$pid', '$eid', $safe_vname)"); - if (!$query_result) { - DBQueryWarn("delete from lans where lanid='$lanid'"); - return undef; - } - my $vlanid = $query_result->insertid(); - $lan->SetAttribute("vlanid", $vlanid); - } print "Created lan: $lan\n" if ($debug && $lan); return $lan; @@ -239,14 +226,6 @@ sub Destroy($) "where lanid='$lanid' and memberid='$memberid'")); } - # Keep the vlans table in sync for now. Will remove later. - if ($type eq "vlan" && !$initialize) { - my $vlanid; - - $self->GetAttribute("vlanid", \$vlanid); - DBQueryWarn("delete from vlans where id='$vlanid'"); - } - # Must delete attributes after above vlan removal but before lan removal. return -1 if (!DBQueryWarn("delete from lan_attributes ". @@ -318,10 +297,6 @@ sub BackupExperimentLans($$$) "into outfile '$pstatedir/$table'") or return -1; } - # Keep the vlans table in sync for now. Will remove later. - DBQueryWarn("select * from vlans where exptidx='$exptidx' ". - "into outfile '$pstatedir/vlans' ") - or return -1; return 0; } @@ -336,9 +311,8 @@ sub RestoreExperimentLans($$$) return -1 if (! ref($experiment)); - # Keep the vlans table in sync for now. Will remove later. foreach my $table ("lans", "lan_attributes", "lan_members", - "lan_member_attributes", "ifaces", "vlans") { + "lan_member_attributes", "ifaces") { if (-e "$pstatedir/$table") { DBQueryWarn("load data infile '$pstatedir/$table' ". "into table $table") @@ -348,6 +322,86 @@ sub RestoreExperimentLans($$$) return 0; } +# +# Compare current vlans with pre-modify vlans to see which ones changed. +# These are the ones we will delete from the switches. The ones that do not +# change can be left alone. In the common case, this should save on the +# amount of vlan churning we do for swapmod. +# +# We return two lists of vlan ids; ones that have changed and need to be +# deleted, and the rest. +# +sub CompareVlansWithSwitches($$$) +{ + my ($class, $experiment, $pdiff, $psame) = @_; + + my $exptidx = $experiment->idx(); + my @changed = (); + my @same = (); + + # + # Grab the existing vlans from the vlans table (managed by snmpit). + # + my $query_result = + DBQueryWarn("select id,virtual,members,tag from vlans ". + "where exptidx='$exptidx'"); + return -1 + if (!$query_result); + + while (my ($oldid,$vname,$oldmembers,$tag) = + $query_result->fetchrow_array()) { + + my $vlan = VLan->Lookup($experiment, $vname); + if (!defined($vlan) || $vlan->type ne "vlan") { + push(@changed, $oldid); + next; + } + my $newid = $vlan->lanid(); + + # + # Compare the members list. + # + my @oldportlist = split(/\s/, $oldmembers); + my @newportlist; + if ($vlan->PortList(\@newportlist) != 0) { + print STDERR "Could not get portlist for $vlan\n"; + return -1; + } + if (scalar(@oldportlist) != scalar(@newportlist)) { + push(@changed, $oldid); + next; + } + my $diff = 0; + foreach my $port (@oldportlist) { + if (! grep {$_ eq $port} @newportlist) { + $diff++; + } + } + if ($diff) { + push(@changed, $oldid); + next; + } + push(@same, $oldid); + + # + # Change the new lan (and its partner entries) to have the old id + # number, so that it matches what is on the switch, as told by + # the vlans table. + # + # This is bad; if one of these updates fails, we are screwed. + # + foreach my $table ("lans", "lan_attributes", "lan_members", + "lan_member_attributes", "ifaces") { + DBQueryWarn("update $table set lanid=$oldid ". + "where lanid='$newid'") + or return -1; + } + } + @$pdiff = @changed; + @$psame = @same; + return 0; +} + # # Stringify for output. # @@ -518,12 +572,9 @@ sub AddInterface($$$$;$$) return undef; } - # Keep vlans table in sync for now. if ($self->type() eq "vlan" && !$initialize) { my $nodeiface = $node->node_id() . ":" . $iface; - my $vlanid; - - $self->GetAttribute("vlanid", \$vlanid); + my $vlanid = $self->lanid(); DBQueryWarn("update vlans set ". " members=CONCAT_WS(' ', members, '$nodeiface') ". @@ -542,40 +593,34 @@ sub AddMember($$;$) return undef if (!ref($self)); - my $member = Lan::Member->Create($self); - - if (defined($node)) { - if (!ref($node)) { - $node = Node->Lookup($node); - if (!defined($node)) { - $member->Destroy(); - return undef; - } - } - - # And the node. - if ($member->SetAttribute("node_id", $node->node_id()) != 0) { - $member->Destroy(); - return undef; - } - # Set the attribute for the physical interface. - if (defined($iface) && - $member->SetAttribute("iface", $iface) != 0) { - $member->Destroy(); + if (!ref($node)) { + $node = Node->Lookup($node); + if (!defined($node)) { return undef; } + } - # Keep vlans table in sync for now. - if ($self->type() eq "vlan" && !$initialize) { - my $member = $node->node_id() . ":" . $iface; - my $vlanid; + my $member = Lan::Member->Create($self, $node); - $self->GetAttribute("vlanid", \$vlanid); + # And the node attribute. + if ($member->SetAttribute("node_id", $node->node_id()) != 0) { + $member->Destroy(); + return undef; + } + # Set the attribute for the physical interface. + if (defined($iface) && + $member->SetAttribute("iface", $iface) != 0) { + $member->Destroy(); + return undef; + } + # Keep vlans table in sync. + if ($self->type() eq "vlan" && !$initialize) { + my $member = $node->node_id() . ":" . $iface; + my $vlanid = $self->lanid(); - DBQueryWarn("update vlans set ". - " members=CONCAT_WS(' ', members, '$member') ". - "where id='$vlanid'"); - } + DBQueryWarn("update vlans set ". + " members=CONCAT_WS(' ', members, '$member') ". + "where id='$vlanid'"); } return $member; } @@ -979,14 +1024,21 @@ sub Stringify($) # # Create a new Lan::Member object (on a specific lan) and return it. # -sub Create($$) +sub Create($$$) { - my ($class, $lan) = @_; - my $lanid = $lan->lanid(); + my ($class, $lan, $node) = @_; + + if (!ref($node)) { + $node = Node->Lookup($node); + return 0 + if (!defined($node)); + } + my $nodeid = $node->node_id(); + my $lanid = $lan->lanid(); my $query_result = DBQueryWarn("insert into lan_members set ". - " lanid='$lanid', memberid=NULL"); + " lanid='$lanid', memberid=NULL, node_id='$nodeid'"); return undef if (!$query_result); @@ -1255,14 +1307,14 @@ sub Create($$$$$;$) my $query_result = DBQueryWarn("select idx from virt_lan_lans ". "where exptidx=$exptidx and vname='$vname'"); - return -1 + return undef if (!$query_result || !$query_result->numrows); my ($vidx) = $query_result->fetchrow_array(); # Use supplied member (which provides the ifaceid) or generate a # new one. if (!defined($member)) { - $member = $lan->AddMember(); + $member = Lan::Member->Create($lan, $node); return undef if (!defined($member)); } @@ -1773,6 +1825,8 @@ use English; use Lan; use overload ('""' => 'Stringify'); +my $SNMPIT = "$TB/bin/snmpit"; + # Cache of instances to avoid regenerating them. my %vlans = (); @@ -1942,6 +1996,29 @@ sub MemberList($$) return $self->GetLan()->MemberList($plist); } +sub PortList($$) +{ + my ($self, $pref) = @_; + my @members; + my @ports = (); + + return -1 + if ($self->MemberList(\@members) != 0); + + foreach my $member (@members) { + my $nodeid; + my $iface; + + return -1 + if ($member->GetAttribute("node_id", \$nodeid) != 0 || + $member->GetAttribute("iface", \$iface) != 0); + + push(@ports, "$nodeid:$iface"); + } + @$pref = @ports; + return 0; +} + # # Get value of an attribute. # @@ -1971,11 +2048,8 @@ sub SetTag($$) { my ($self, $tag) = @_; - # Keep vlans table in sync for now. if (!$initialize) { - my $vlanid; - - $self->GetAttribute("vlanid", \$vlanid); + my $vlanid = $self->lanid(); DBQueryWarn("update vlans set tag='$tag' ". "where id='$vlanid'"); @@ -2045,10 +2119,128 @@ sub ExperimentVLans($$$) return 0; } +# +# Utility function to add a vlan to the switch infrastructure. +# +sub Instantiate($) +{ + my ($self) = @_; + + return -1 + if (! ref($self)); + + my $experiment = $self->GetExperiment(); + return -1 + if (!defined($experiment)); + + my $pid = $experiment->pid(); + my $eid = $experiment->eid(); + my $vname = $self->vname(); + my $lanid = $self->lanid(); + + #print "Setting up VLAN $vname ($lanid) in $pid/$eid\n"; + system("$SNMPIT -t $pid $eid $lanid"); + return -1 + if ($?); + return 0; +} + +# +# Utility function to remove a vlan from the switch infrastructure. +# +sub UnInstantiate($) +{ + my ($self) = @_; + + return -1 + if (! ref($self)); + + my $experiment = $self->GetExperiment(); + return -1 + if (!defined($experiment)); + + my $pid = $experiment->pid(); + my $eid = $experiment->eid(); + my $vname = $self->vname(); + my $lanid = $self->lanid(); + + #print "Removing VLAN $vname ($lanid) from $pid/$eid\n"; + system("$SNMPIT -r $pid $eid $lanid"); + return -1 + if ($?); + return 0; +} + +# +# Class methods to maintain the backup vlans table, which records what +# is on the switches. This is used to optimize swapmod, nothing else. +# +sub RecordVlanInsertion($$$$) +{ + my ($class, $experiment, $lanid, $tag) = @_; + + my $pid = $experiment->pid(); + my $eid = $experiment->pid(); + my $exptidx = $experiment->idx(); + my $vlan = VLan->Lookup($lanid); + return -1 + if (!defined($vlan)); + + my $vname = $vlan->vname(); + my @portlist; + $vlan->PortList(\@portlist) == 0 + or return -1; + my $members = join(" ", @portlist); + + DBQueryWarn("replace into vlans set ". + " id='$lanid', pid='$pid', eid='$eid', exptidx='$exptidx', ". + " virtual='$vname', members='$members', tag='$tag'") + or return -1; + + return 0; +} + +sub RecordVLanDeletion($$) +{ + my ($class, $id) = @_; + + DBQueryWarn("delete from vlans where id='$id'") + or return -1; + + return 0; +} + +# +# Return a list of stale vlans for an experiment; vlans in the vlans +# table that need to be removed from the switches. This happens when a +# swapmod fails badly cause we try to optimize swapmod by not tearing +# down vlans until the swapin phase, so as not churn vlans that have not +# changed. +# +sub StaleVlanList($$$) +{ + my ($class, $experiment, $pref) = @_; + + my $pid = $experiment->pid(); + my $eid = $experiment->pid(); + my $exptidx = $experiment->idx(); + my @result = (); + + my $query_result = + DBQueryWarn("select id from vlans where exptidx=$exptidx"); + return -1 + if (!$query_result); + + while (my ($id) = $query_result->fetchrow_array()) { + push(@result, $id); + } + @$pref = @result; + return 0; +} + ############################################################################ # -# The most common kind of Lan is a Vlan, so lets create a package/object -# for it. +# Another convenience package, for tunnels. # package Tunnel; use libdb; @@ -2101,9 +2293,9 @@ sub GetExperiment($) { return $_[0]->{'EXPT'}; } # # Create a new Tunnel object and return it. No members yet ... # -sub Create($$$$;$$) +sub Create($$$$$;$$) { - my ($class, $experiment, $vname, $secretkey, $mask, $port) = @_; + my ($class, $experiment, $vname, $secretkey, $style, $mask, $port) = @_; # The new lan has the 'ready' bit set to zero. my $lan = Lan->Create($experiment, $vname, "tunnel"); @@ -2115,6 +2307,10 @@ sub Create($$$$;$$) $lan->Destroy(); return undef; } + if ($lan->SetAttribute("style", $style) != 0) { + $lan->Destroy(); + return undef; + } if (defined($port) && $lan->SetAttribute("serverport", $port) != 0) { $lan->Destroy(); @@ -2163,14 +2359,14 @@ sub Stringify($) # sub AddInterface($$$$;$$) { - my ($self, $node, $vnode, $vport, $ip, $peerip, $isserver) = @_; + my ($self, $node, $vnode, $vport) = @_; if (!ref($node)) { $node = Node->Lookup($node); return undef if (!defined($node)); } - my $member = $self->AddMember($node, $ip, $peerip, $isserver); + my $member = $self->AddMember($node); return undef if (!defined($member)); @@ -2183,11 +2379,11 @@ sub AddInterface($$$$;$$) } # -# Add a member to a tunnel. +# Add a member to a tunnel. The caller needs to a bunch of stuff. # -sub AddMember($$$;$$) +sub AddMember($$) { - my ($self, $node, $ip, $peerip, $isserver) = @_; + my ($self, $node) = @_; if (!ref($node)) { $node = Node->Lookup($node); @@ -2198,48 +2394,6 @@ sub AddMember($$$;$$) return undef if (!defined($member)); - if ($member->SetAttribute("ipaddr", $ip) != 0) { - $member->Destroy(); - return undef; - } - if (defined($peerip) && - $member->SetAttribute("peeripaddr", $peerip) != 0) { - $member->Destroy(); - return undef; - } - if (defined($isserver) && $isserver && - $member->SetAttribute("isserver", "1") != 0) { - $member->Destroy(); - return undef; - } - # I think this table is going to go away. - my $pid = $self->pid(); - my $eid = $self->eid(); - my $exptidx = $self->exptidx(); - my $node_id = $node->node_id(); - my $lan = $self->vname(); - my $secretkey; - my $ipmask; - my $serverport; - - if ($self->GetAttribute("secretkey", \$secretkey) != 0 || - $self->GetAttribute("ipmask", \$ipmask) != 0 || - $self->GetAttribute("serverport", \$serverport) != 0) { - $member->Destroy(); - return undef; - } - $isserver = (defined($isserver) && $isserver ? 1 : 0); - - if (!DBQueryWarn("insert into tunnels ". - " (pid, eid, node_id, vname, isserver, port, ". - " peer_ip, mask, assigned_ip, password) ". - "values ('$pid', '$eid', '$node_id', '$lan', ". - " $isserver, '$serverport', ". - " '$peerip', '$ipmask', '$ip','$secretkey')")) { - $member->Destroy(); - return undef; - } - return $member; } diff --git a/tbsetup/assign_wrapper.in b/tbsetup/assign_wrapper.in index 09e2b3d56..9e8aa877e 100644 --- a/tbsetup/assign_wrapper.in +++ b/tbsetup/assign_wrapper.in @@ -43,6 +43,7 @@ sub getrtabid($$); sub array_diff($$); sub LoadCurrent(); sub SetUpTracing($$$$$); +sub fatal(@); # # This function as the main assign loop. It converts the virtual @@ -187,17 +188,6 @@ my $topfile = "$pid-$eid-$$.top"; TBDebugTimeStampsOn(); -# -# All exits happen via this function! -# -sub fatal (@) -{ - &tberror(@_); - - # We next go to the END block below. - exit($WRAPPER_FAILED); -} - # # We want warnings to cause assign_wrapper to exit abnormally. # We will come through here no matter how we exit though. @@ -518,6 +508,8 @@ my %admission_control = (); my %reserved_v2pmap = (); my %reserved_v2vmap = (); my %oldreservednodes = (); +my %newreservednodes = (); +my $oldreservedclean = 0; # reserved_p2vmap is indexed by physical and contains one or more virtual # nodes my %reserved_p2vmap = (); @@ -802,7 +794,7 @@ LoadExperiment(); if ($updating) { LoadCurrent(); print STDERR "Resetting DB before updating.\n"; - TBExptRemovePhysicalState( $pid, $eid ); + $experiment->RemovePhysicalState(); } # @@ -1309,10 +1301,7 @@ sub RunAssign () # work all the time i.e. in the example discussed above my $oldreserved_pid = OLDRESERVED_PID; my $oldreserved_eid = OLDRESERVED_EID; - if (scalar(keys %oldreservednodes)) { - # We can't recover after this coz we are making changes to - # the DB - $NoRecover = 1; + if (scalar(keys(%oldreservednodes)) && !$oldreservedclean) { TBDebugTimeStamp("Moving Old Reserved nodes to ". "$oldreserved_pid/$oldreserved_eid ". "and back started"); @@ -1330,11 +1319,12 @@ sub RunAssign () return -1; } - # We need to move this back and forth the holding reservation only - # once i.e. in the first call to RunAssign(). If it gets repeatedly - # called coz only some pnode resources got nalloc'ed, we don't have - # to do the above again. - undef %oldreservednodes; + # + # We need to only once i.e. in the first call to RunAssign(). + # If it gets repeatedly called coz only some pnode resources + # got nalloced, we do not have to do the above again. + # + $oldreservedclean = 1; } TBDebugTimeStamp("reserving started"); @@ -1374,6 +1364,7 @@ sub RunAssign () foreach my $node (@reserved) { if (exists($toreserve{$node})) { + $newreservednodes{$node} = $node; TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_INIT_DIRTY()); } } @@ -1421,6 +1412,8 @@ sub RunAssign () print "Successfully reserved all physical nodes we needed.\n"; foreach my $node (keys(%toreserve)) { + # Remeber all newly allocated nodes for later free if failure. + $newreservednodes{$node} = $node; TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_INIT_DIRTY()); } @@ -1519,7 +1512,7 @@ if ($needwanassign) { # Recoverability ends. # All fatal() calls from this point do not have the recoverable '64' bit set. # -$NoRecover = 1; +#$NoRecover = 1; # VIRTNODES HACK: Local virtnodes have to be mapped now. This is a little # hokey in that the virtnodes just need to be allocated from the pool that @@ -1738,36 +1731,6 @@ TBExptSetPortRange(); # queries to the DB. LoadPhysResources(); -# -# For update, wipe old interfaces in DB (normally done by nfree.) -# These will get rebuilt soon. -# -if ($updating && !$impotent) { - foreach my $pnode (keys(%phys_nodes)) { - # - # Do not need to do this for phys nodes that are to be - # released, or for virtnodes since they do not have interfaces - # associated with them directly. This is probably a bad assumption - # though, and perhaps this entire function should be moved to the - # library. - # - next - if (physnodeisvirtnode($pnode) || - physnodereuse($pnode) eq "unused"); - - DBQueryFatal("update interfaces set IP='',IPaliases=NULL,mask=NULL,". - " rtabid='0',vnode_id=NULL " . - "where node_id='$pnode' and ". - " role='" . TBDB_IFACEROLE_EXPERIMENT() . "'"); - - # Clean the virtual interfaces table for this node too. - DBQueryFatal("delete from vinterfaces where node_id='$pnode'"); - - # And interface settings. - DBQueryFatal("delete from interface_settings where node_id='$pnode'"); - } -} - ###################################################################### # Step 3 - Convert to vlans, delays, and portmap # @@ -2951,7 +2914,7 @@ if( $simcount > 0 ) { } TBDebugTimeStamp("assign_wrapper finished"); -exit 0; +exit(0); ###################################################################### # Subroutines @@ -3421,7 +3384,6 @@ sub UploadVlans() if (!defined($linkedlan)); my $virtlanidx = virtlanidx($lan->vname()); my $linkedlanid = $linkedlan->lanid(); - printdb("Update vinterfaces: $lan: $virtlanidx -> $linkedlanid\n"); DBQueryFatal("update vinterfaces set vlanid='$linkedlanid' ". @@ -5758,4 +5720,31 @@ sub nodejailosid($) return $nextosid; } +# +# All exits happen via this function! +# +sub fatal (@) +{ + # + # Free any newly reserved nodes (in update mode) so that tbswap knows + # it is safe to recover the experiment. If we bypass this and leave + # through the END block then NoRecover will still be set and tbswap + # will know to swap the experiment out. + # + if ($updating) { + if (scalar(keys(%newreservednodes))) { + $NoRecover = 0 + if (system("nfree -x $pid $eid " . + join(" ", keys(%newreservednodes))) == 0); + } + else { + # When not updating this is meaningless to tbswap. + $NoRecover = 0; + } + } + + &tberror(@_); + # We next go to the END block above. + exit($WRAPPER_FAILED); +} diff --git a/tbsetup/snmpit.in b/tbsetup/snmpit.in index 532005985..1fc0d85e5 100755 --- a/tbsetup/snmpit.in +++ b/tbsetup/snmpit.in @@ -20,6 +20,7 @@ my $TB = '@prefix@'; use libdb; use User; +use Experiment; use snmpit_lib; use snmpit_remote; use libtblog; @@ -35,7 +36,7 @@ sub doListVlans($); sub doListPorts($); sub doPortStatus($@); sub doGetStats($); -sub doVlansFromTables($@); +sub doVlansFromTables($$@); sub doReset($@); sub doMakeVlan($$@); sub doDeleteVlan($@); @@ -157,6 +158,7 @@ if ($opt{q}) { # my $pid; my $eid; +my $experiment; my @ports; my @optvlanids = (); my $equaltrunking = 0; @@ -405,10 +407,12 @@ if ($pid && $eid) { # # First, make sure the experiment exists # - if (!ExpState($pid,$eid)) { + $experiment = Experiment->Lookup($pid,$eid); + if (!defined($experiment)) { die "There is no experiment $eid in project $pid\n"; } - if ($UID && !TBExptAccessCheck($UID,$pid,$eid,TB_EXPT_MODIFY)) { + if (defined($this_user) && + !$experiment->AccessCheck($this_user, TB_EXPT_MODIFY)) { die "You do not have permission to modify experiment $pid/$eid\n"; } } @@ -698,7 +702,7 @@ COMMAND: foreach my $command (@commands) { last; }; # /ports/ && do /tables/ && do { - $exitval += doVlansFromTables(\@stacks,@vlans); + $exitval += doVlansFromTables($experiment,\@stacks,@vlans); last; }; # /tables/ && do /reset/ && do { @@ -881,7 +885,7 @@ $vlan_id,$ddep, $pideid, $vname, $members next; } $vname = $vlan->vname(); - my $experiment = $vlan->GetExperiment(); + $experiment = $vlan->GetExperiment(); # # Permissions check - people only get to see their own VLANs @@ -1219,7 +1223,8 @@ $port, $inoctets, $inunicast,$innunicast,$indiscards,$inerr, $inunk, $out # Creates all VLANs given. Looks up identifiers in the database to determine # the membership. # -sub doVlansFromTables($@) { +sub doVlansFromTables($$@) { + my $experiment = shift; my $stacks = shift; my @vlans = @_; @@ -1288,7 +1293,10 @@ sub doVlansFromTables($@) { # Don't try to put ports in a VLAN if it couldn't be created # $errors++; - } else { setVlanTag($vlan, $vlan_number); } + } else { + setVlanTag($vlan, $vlan_number); + VLan->RecordVlanInsertion($experiment, $vlan, $vlan_number); + } } # @@ -1354,6 +1362,8 @@ sub doReset($@) { } foreach my $vlan (@existant_vlans) { setVlanTag($vlan, 0); + VLan->RecordVLanDeletion($vlan); + } } return $errors; @@ -1420,6 +1430,13 @@ sub doDeleteVlan($@) { my $stacks = shift; my @vlan_names = @_; + # + # Hand over to outer boss. + # + if ($ELABINELAB) { + return RemoteDoReset(@vlan_names); + } + my $errors = 0; my %exists = (); @@ -1442,6 +1459,9 @@ sub doDeleteVlan($@) { if (!$ok) { $errors++; } + foreach my $vlan (@existant_vlans) { + VLan->RecordVLanDeletion($vlan); + } } } @@ -1577,16 +1597,14 @@ sub doRecreateVlans($) { # # Get a list of all experiments, so that we can re-create their VLANs # - my @expts = (); - my $result = DBQueryFatal("select pid,eid from experiments ". - "where state = '". EXPTSTATE_ACTIVE. "'"); - while (my ($pid,$eid) = $result->fetchrow()) { - my @vlans = getExperimentVlans($pid,$eid); - doVlansFromTables($stacks,@vlans); + my @experiments = Experiment->AllActive(); + foreach my $experiment (@experiments) { + my @vlans = getExperimentVlans($experiment->pid(), $experiment->eid()); + + doVlansFromTables($experiment, $stacks, @vlans) + if (@vlans); } - return 1; - } # diff --git a/tbsetup/snmpit_remote.pm.in b/tbsetup/snmpit_remote.pm.in index df9becb02..327cbfbf5 100644 --- a/tbsetup/snmpit_remote.pm.in +++ b/tbsetup/snmpit_remote.pm.in @@ -74,7 +74,7 @@ sub commonTail($$) defined($response->{"output"}) && $response->{"output"} ne "") { print $response->{"output"}; } - return ($response->{"code"}) ? undef : $response->{"output"}; + return $response; } @@ -149,7 +149,10 @@ sub RemoteDoVlansFromTables(@) if (! keys(%$vlantable)); my $errors = 0; - my $xmlback = commonTail("setup",$vlantable); + my $response = commonTail("setup", $vlantable); + return 1 + if ($response->{"code"}); + my $xmlback = $response->{"output"}; if (defined($xmlback)) { foreach my $vlres (split ',', $xmlback) { @@ -165,6 +168,8 @@ sub RemoteDoVlansFromTables(@) print STDERR "could not set vlan tag for $vlan\n"; $errors++; } + VLan->RecordVlanInsertion($vlan->GetExperiment(), + $vlan->lanid(), $tag); } } else { @@ -182,8 +187,14 @@ sub RemoteDoReset(@) return 0 if (! @vlans); - my $res = commonTail("destroy", join(",", @vlans)); - return !defined($res); + my $response = commonTail("destroy", join(",", @vlans)); + return 1 + if ($response->{"code"}); + + foreach my $vlan (@vlans) { + VLan->RecordVLanDeletion($vlan); + } + return 0; } # @@ -207,8 +218,11 @@ sub RemoteDoTrunking($$@) } $arg->{"port"} = $port; - my $res = commonTail("trunk", $arg); - return !defined($res); + my $response = commonTail("trunk", $arg); + return 1 + if ($response->{"code"}); + + return 0; } # @@ -224,7 +238,10 @@ sub RemoteDoList(@) my @list = (); my $arg = @vlans ? join(",", @vlans) : ""; - my $xmlback = commonTail("list",$arg); + my $response = commonTail("list",$arg); + return 1 + if ($response->{"code"}); + my $xmlback = $response->{"output"}; if (!defined($xmlback)) { return @list; } my $prefix = "" ; diff --git a/tbsetup/swapexp.in b/tbsetup/swapexp.in index c1e09f117..de04b467b 100644 --- a/tbsetup/swapexp.in +++ b/tbsetup/swapexp.in @@ -95,6 +95,7 @@ my $signaled = 0; my $tbdir = "$TB/bin"; my $tbdata = "tbdata"; my $checkquota = "$TB/sbin/checkquota"; +my $wrapper = "$TB/libexec/assign_wrapper"; my $batch = 0; my $idleswap = 0; my $autoswap = 0; @@ -298,6 +299,11 @@ if (my $instance = Template::Instance->LookupByExptidx($experiment->idx())) { } } +# XXX Hack for geni mode. +if ($genimode) { + $experiment->SetState(EXPTSTATE_ACTIVE); +} + # # Verify user and get his DB uid and other info for later. # @@ -1026,6 +1032,17 @@ elsif ($inout eq "modify") { goto FWHOSED; } } + elsif ($genimode) { + # + # Need the min/max numbers, usually done during prerun. + # + print "Doing a pre-assign ...\n"; + if (system("$wrapper -t $pid $eid")) { + fatal({type => 'secondary', severity => SEV_SECONDARY, + error => ['update_aborted', undef]}, + "assign prerun failed!"); + } + } # # Our next state depends on whether the experiment was active or swapped. @@ -1055,6 +1072,7 @@ elsif ($inout eq "modify") { else { $optarg = ($reboot ? "-reboot" : ""); $optarg .= ($eventsys_restart ? " -eventsys_restart" : ""); + $optarg .= ($genimode ? "-noswapout" : ""); } if ($experiment->Swap($Experiment::EXPT_SWAPMOD, $optarg) == 0) { @@ -1092,8 +1110,14 @@ elsif ($inout eq "modify") { $modifyError = "Update aborted; experiment swapped out."; } else { - tbreport(SEV_SECONDARY, 'update_aborted', 'state_restored'); - $modifyError = "Update aborted; old state restored."; + if ($genimode) { + tbreport(SEV_SECONDARY, 'update_aborted','still active'); + $modifyError = "Update aborted; still swapped in."; + } + else { + tbreport(SEV_SECONDARY, 'update_aborted','state_restored'); + $modifyError = "Update aborted; old state restored."; + } # Reset the swapper since the experiment is still running. $experiment->SetSwapper($last_swapper); diff --git a/tbsetup/tbswap.in b/tbsetup/tbswap.in index 394395638..b03d31e62 100644 --- a/tbsetup/tbswap.in +++ b/tbsetup/tbswap.in @@ -24,7 +24,9 @@ use English; sub usage() { - print STDERR "Usage: $0 -force { in | out | update [-reboot] [-eventsys_restart] } pid eid\n"; + print STDERR + "Usage: $0 {in|out|modify [-reboot] [-eventsys_restart]} ". + "[-noswapout] [-genimode] pid eid\n"; exit(-1); } @@ -57,6 +59,7 @@ use libtblog; use libArchive; use Experiment; use User; +use Lan; #require exitonwarn; # exitonwarn isn't really a module, so just require it @@ -81,11 +84,12 @@ my $cnetstack = "-S Control"; my $cnetvlanname = "Control"; -sub REAL() { return 4; } -sub CLEANUP() { return 3; } -sub RETRY() { return 2; } +sub REAL() { return 5; } +sub CLEANUP() { return 4; } +sub RETRY() { return 3; } +sub MODIFY() { return 2; } sub UPDATE() { return 1; } -sub UPDATE_RECOVER() { return 0; } +sub MODIFY_RECOVER() { return 0; } # # Grab global enable of linkdelays. @@ -103,13 +107,14 @@ my $updateReconfig = 1; my $update_Eventsys_restart = 0; my $elabinelab = 0; my $plabinelab = 0; -my $force = 0; -my $errors = 0; -my $updatehosed = 0; +my $force = 0; +my $noswapout = 0; +my $genimode = 0; +my $errors = 0; +my $updatehosed = 0; my $state; my $canceled; my $os_setup_pid; -my $cleanvlans; my $nextState; # @@ -122,6 +127,7 @@ my $swapop = shift; if (!$swapop || (($swapop ne "in") && ($swapop ne "out") && + ($swapop ne "modify") && ($swapop ne "update"))) { usage(); } @@ -140,8 +146,10 @@ while ($#ARGV > 1) { } elsif ($arg eq "-noreconfig") { $updateReboot = 0; $updateReconfig = 0; - } elsif ($arg eq "-eventsys_restart" && $swapop eq "update") { + } elsif ($arg eq "-eventsys_restart" && $swapop eq "modify") { $update_Eventsys_restart = 1; + } elsif ($arg eq "-noswapout") { + $noswapout = 0; } else { usage(); } @@ -220,7 +228,7 @@ if (!$force) { tbdie("Experiment should be SWAPPING. Currently $state.") if ($state ne EXPTSTATE_SWAPPING); } - elsif ($swapop eq "update") { + elsif ($swapop eq "modify" || $swapop eq "update") { tbdie("Experiment should be MODIFY_RESWAP. Currently $state.") if ($state ne EXPTSTATE_MODIFY_RESWAP); } @@ -256,7 +264,7 @@ if ($swapop eq "out") { tblog_set_attempt(1); $errors = doSwapout(REAL); } -elsif ($swapop eq "update") { +elsif ($swapop eq "update" || $swapop eq "modify") { # # Before swapout, do cursory admission control to see if the # modified experiment will be swappable. assign_wrapper does a @@ -282,9 +290,13 @@ elsif ($swapop eq "update") { # Phase One -- swap experiment partially out. # print STDERR "Backing up physical state...\n"; - TBExptBackupPhysicalState($pid,$eid); + $experiment->BackupPhysicalState(); - $errors = doSwapout(UPDATE); + # + # Actually, in update mode this is not done cause we are anticipating + # adding nodes only. + # + $errors = ($swapop eq "modify" ? doSwapout(MODIFY) : 0); if (0) { print STDERR "Doing a swapmodswapout on the experiment archive ...\n"; @@ -306,7 +318,7 @@ elsif ($swapop eq "update") { # # Phase Two -- swap experiment back in. # - $errors = doSwapin(UPDATE); + $errors = doSwapin(MODIFY); if ($errors) { # @@ -325,17 +337,17 @@ elsif ($swapop eq "update") { # It is safe to remove the phystate since we know it was # backed up above, and cause we do not know if assign_wrapper # made it to that point before it failed. - if (TBExptRemoveVirtualState($pid, $eid) || - TBExptRestoreVirtualState($pid, $eid) || - TBExptRemovePhysicalState($pid, $eid) || - TBExptRestorePhysicalState($pid,$eid)) { + if ($experiment->RemoveVirtualState($pid, $eid) || + $experiment->RestoreVirtualState($pid, $eid) || + $experiment->RemovePhysicalState($pid, $eid) || + $experiment->RestorePhysicalState($pid,$eid)) { print STDERR "Could not restore backed-up state; "; $CanRecover = 0; } else { print STDERR "Doing a recovery swap-in of old state.\n"; - if (doSwapin(UPDATE_RECOVER)) { + if (doSwapin(MODIFY_RECOVER)) { print STDERR "Could not swap in old physical state; "; $CanRecover = 0; } @@ -347,10 +359,17 @@ elsif ($swapop eq "update") { # (caller) will then have to do more clean up, hence the special # exit status indicated by $updatehosed. # - if (! $CanRecover) { - print STDERR "Recovery aborted! Swapping experiment out.\n"; - doSwapout(CLEANUP); - $updatehosed = 1; + if (!$CanRecover) { + if ($noswapout) { + print STDERR + "No Recovery, but leaving experiment swapped in.\n"; + } + else { + print STDERR + "Recovery aborted! Swapping experiment out.\n"; + doSwapout(CLEANUP); + $updatehosed = 1; + } } else { print STDERR "Update recovery successful.\n"; @@ -448,7 +467,7 @@ exit(0); ## sub doSwapout($) { - my $type = shift; # REAL==4, CLEANUP==3, RETRY==2, UPDATE==1. + my $type = shift; my $swapout_errors = 0; tblog_set_cleanup(1) if $type == CLEANUP; @@ -473,7 +492,7 @@ sub doSwapout($) { if (! $TESTMODE) { if (! ($DISABLE_EVENTS || $elabinelab)) { if ($type >= RETRY || - ($update_Eventsys_restart && $type == UPDATE) ) { + ($update_Eventsys_restart && $type == MODIFY) ) { print "Stopping the event system\n"; if (system("eventsys_control stop $pid,$eid")) { tberror({type => 'secondary', severity => SEV_SECONDARY, @@ -518,7 +537,7 @@ sub doSwapout($) { # When modifying an elabinelab experiment, leave the vlans intact # so that the inner networks are not suddenly disconnected! # - if (! ($elabinelab && $type == UPDATE)) { + if ($type != MODIFY) { TBDebugTimeStamp("snmpit started"); print STDERR "Removing VLANs.\n"; if (system("snmpit -r $pid $eid")) { @@ -526,11 +545,32 @@ sub doSwapout($) { error => ['vlan_reset_failed']}, "Failed to reset VLANs"); $swapout_errors = 1; - } else { - $cleanvlans = 0; } TBDebugTimeStamp("snmpit finished"); } + # + # Must check for stale vlans that we kept around in the above clause + # since they will not be in the lans table anymore. + # + if ($type == CLEANUP) { + my @stale; + if (VLan->StaleVlanList($experiment, \@stale) != 0) { + tberror({type => 'secondary', severity => SEV_SECONDARY, + error => ['vlan_reset_failed']}, + "Failed to get stale VLANs"); + $swapout_errors = 1; + } + if (@stale) { + print "Removing stale vlans @stale\n"; + system("snmpit ". join(" ", map("-o $_", @stale))); + if ($?) { + tberror({type => 'summary', severity => SEV_SECONDARY, + error => ['vlan_reset_failed']}, + "Failed to remove stale vlans"); + $swapout_errors = 1; + } + } + } } if ($type >= CLEANUP) { @@ -618,11 +658,12 @@ sub doSwapout($) { # Since this is an actual swapout, # reset our count of swap out nag emails sent. # - DBQueryWarn("update experiments set swap_requests='',sim_reswap_count='0' ". + DBQueryWarn("update experiments set swap_requests='', ". + " sim_reswap_count='0' ". "where eid='$eid' and pid='$pid'"); } else { # - # $type == RETRY or $type == UPDATE. + # $type == RETRY or $type == MODIFY. # Therefore, don't deallocate nodes which have been successfully # incorporated into the experiment (i.e., are RES_READY). # (nfree will send deallocated nodes to RES_FREE_DIRTY) @@ -753,29 +794,28 @@ sub doSwapout($) { "Failed to reset mountpoints."); } TBDebugTimeStamp("exports finished"); - } - # - # Resetting named maps and email lists is fast and idempotent, - # so whatever. - # - print "Resetting named maps.\n"; - TBDebugTimeStamp("named started"); - if (system("named_setup")) { - tbwarn "Failed to reset named map."; - } - TBDebugTimeStamp("named finished"); + # + # Ditto these two. + # + print "Resetting named maps.\n"; + TBDebugTimeStamp("named started"); + if (system("named_setup")) { + tbwarn "Failed to reset named map."; + } + TBDebugTimeStamp("named finished"); - print "Resetting email lists.\n"; - TBDebugTimeStamp("genelists started"); - if (system("genelists -t")) { - tbwarn "Failed to reset email lists."; + print "Resetting email lists.\n"; + TBDebugTimeStamp("genelists started"); + if (system("genelists -t")) { + tbwarn "Failed to reset email lists."; + } + TBDebugTimeStamp("genelists finished"); } - TBDebugTimeStamp("genelists finished"); } # - # Wipe the DB clean except during UPDATE or RETRY. In those + # Wipe the DB clean except during MODIFY or RETRY. In those # cases, assign_wrapper will reset the DB after reading # the info. # @@ -802,7 +842,7 @@ sub doSwapout($) { ## sub doSwapin($) { - my $type = shift; # REAL==4, RETRY==2, UPDATE==1, UPDATE_RECOVER=0. + my $type = shift; # Just the physnodes ... my @deleted_pnodes = (); @@ -811,7 +851,7 @@ sub doSwapin($) { # and updating the DB state. # - if ($type > UPDATE_RECOVER) { + if ($type > MODIFY_RECOVER) { # # Hacky test to allow disabling of linkdelays if the node is going # to run Linux. See sitevar above. @@ -891,7 +931,7 @@ sub doSwapin($) { # # Errors are fatal; no recovery or retry. # - if ($type == UPDATE) { + if ($type == MODIFY || $type == UPDATE) { my $allocstate = TBDB_ALLOCSTATE_RES_TEARDOWN(); $db_result = @@ -993,7 +1033,7 @@ sub doSwapin($) { # If there are any Plab dslice nodes in the experiment, create the # dslice now # - if ($type > UPDATE_RECOVER) { + if ($type > MODIFY_RECOVER) { # Are there any Plab nodes? First get a list of node types in the exp; # if any are types hosted by any of the PLCs we know about, create # all slices necessary for the experiment in a single plabslice call. @@ -1080,8 +1120,8 @@ sub doSwapin($) { # Setup any control-net firewall. # This must be done before reloading and rebooting nodes. # - if ($firewalled && ($type == REAL || $type == UPDATE) && - doFW($pid, $eid, (($type == UPDATE) ? FWADDNODES : FWSETUP), undef)) { + if ($firewalled && ($type == REAL || $type == MODIFY) && + doFW($pid, $eid, (($type == MODIFY) ? FWADDNODES : FWSETUP), undef)) { return 1; } @@ -1090,8 +1130,8 @@ sub doSwapin($) { # file and that must be done before os_setup (i.e., before nodes are # rebooted). # - if ($plabinelab && !$TESTMODE && $type > UPDATE_RECOVER) { - # for UPDATE and RETRY we pass in the -u to pick up new nodes + if ($plabinelab && !$TESTMODE && $type > MODIFY_RECOVER) { + # for MODIFY and RETRY we pass in the -u to pick up new nodes my $optarg = ($type == REAL ? "" : "-u"); print "Setting up plabinelab.\n"; @@ -1113,7 +1153,7 @@ sub doSwapin($) { # and may have remapped interfaces on the nodes. # if ($type == RETRY || - ($type == UPDATE && ($updateReboot || $updateReconfig))) { + ($type == MODIFY && ($updateReboot || $updateReconfig))) { my $needreboot = ($type == RETRY || $updateReboot) ? 1 : 0; print STDERR "Marking nodes for ", @@ -1139,7 +1179,7 @@ sub doSwapin($) { } # Do this only when nodes are to be rebooted. $experiment->ClearPortRegistration() - if ($type == UPDATE); + if ($type == MODIFY); } # @@ -1184,6 +1224,33 @@ sub doSwapin($) { # parallel with os_setup (no DB dependencies, etc.) # + # + # When doing a modify, we have to compare vlans to determine which + # vlans actually changed and need to be deleted, before processing + # the new vlans for the experiment. Note that vlans that already + # exist on the switches will be left as is by snmpit. + # + if ($type == MODIFY) { + my @diff = (); + my @same = (); + if (Lan->CompareVlansWithSwitches($experiment, \@diff, \@same) != 0) { + tberror({type => 'summary', severity => SEV_SECONDARY, + error => ['vlan_setup_failed']}, + "Failed to compare old vlans"); + return 1; + } + if (@diff) { + print "Removing obsolete vlans @diff\n"; + system("snmpit ". join(" ", map("-o $_", @diff))); + if ($?) { + tberror({type => 'summary', severity => SEV_SECONDARY, + error => ['vlan_setup_failed']}, + "Failed to remove old vlans"); + return 1; + } + } + } + print "Setting up VLANs.\n"; TBDebugTimeStamp("snmpit started"); if (system("snmpit -t $pid $eid")) { @@ -1194,23 +1261,21 @@ sub doSwapin($) { } TBDebugTimeStamp("snmpit finished"); - # - # An error now means that the VLANS need to be cleaned up. - # - $cleanvlans = 1; - - print "Setting up email lists.\n"; - TBDebugTimeStamp("genelists started"); - if (system("genelists -t")) { - tbwarn "Failed to update email lists."; - # - # This is a non-fatal error. - # + # No need to do this except during a real swapin. + if ($type == REAL) { + print "Setting up email lists.\n"; + TBDebugTimeStamp("genelists started"); + if (system("genelists -t")) { + tbwarn "Failed to update email lists."; + # + # This is a non-fatal error. + # + } + TBDebugTimeStamp("genelists finished"); } - TBDebugTimeStamp("genelists finished"); # - # Don't clear port counters on UPDATE. + # Don't clear port counters on MODIFY. # (XXX should clear new nodes' port counters.) if ($type >= RETRY) { @@ -1282,7 +1347,7 @@ sub doSwapin($) { # For the robot testbed, start the location piper *before* the event # system. # - if (-x $piper && ($type != UPDATE && $type != UPDATE_RECOVER)) { + if (-x $piper && ($type != MODIFY && $type != MODIFY_RECOVER)) { print "Starting the location piper.\n"; if (system("$piper $pid $eid")) { tberror "Failed to start the location piper."; @@ -1291,7 +1356,7 @@ sub doSwapin($) { } if ( $update_Eventsys_restart || - ($type != UPDATE && $type != UPDATE_RECOVER) ) { + ($type != MODIFY && $type != MODIFY_RECOVER) ) { print "Starting the event system.\n"; TBDebugTimeStamp("eventsys_control started"); if (system("eventsys_control start $pid,$eid")) { @@ -1312,7 +1377,7 @@ sub doSwapin($) { "where pid='$pid' and eid='$eid'"); my ($linktest_level,$linktest_pid) = $query_result->fetchrow_array(); - if ($linktest_level && ($type == REAL || $type == UPDATE)) { + if ($linktest_level && ($type == REAL || $type == MODIFY)) { if ($linktest_pid) { tbwarn "Linktest is already running! $linktest_pid"; } @@ -1332,8 +1397,8 @@ sub doSwapin($) { # # ElabinElab setup. This might not be the right place for this! # - if ($elabinelab && !$TESTMODE && ($type == REAL || $type == UPDATE)) { - my $optarg = ($type == UPDATE ? "-u" : ""); + if ($elabinelab && !$TESTMODE && ($type == REAL || $type == MODIFY)) { + my $optarg = ($type == MODIFY ? "-u" : ""); print "Setting up elabinelab. This could take a while!\n"; TBDebugTimeStamp("elabinelab setup started"); @@ -1351,7 +1416,7 @@ sub doSwapin($) { # if (! ($DISABLE_EVENTS || $elabinelab)) { if ( $update_Eventsys_restart || - ($type != UPDATE && $type != UPDATE_RECOVER) ) { + ($type != MODIFY && $type != MODIFY_RECOVER) ) { TBDebugTimeStamp("Starting event time"); if (system("tevc -e $pid/$eid now __ns_sequence start")) { tberror({type => 'secondary', severity => SEV_SECONDARY, -- GitLab