Commit 3ae7da68 authored by Kirk Webb's avatar Kirk Webb

More updates:

* Added comments
* Added Emulab copyright
* made mod_PLC handle the "not assigned" error case in freeNode()
  - optimization and less log clutter.
* bug fix in plabmonitord (ISUP decection)
parent df2b09b3
# -*- python -*-
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
import sys
import os
......
# -*- python -*-
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
"""
Library for interfacing with Plab. This abstracts out the concepts of
Plab central, slices, and nodes. All data (except static things like
......@@ -116,6 +122,9 @@ signal.signal(signal.SIGPIPE, signal.SIG_IGN)
# Plab abstraction
#
#
# Class responsible for parsing planetlab sites file
#
class siteParser:
def __init__(self):
......@@ -261,15 +270,21 @@ class Plab:
print known
pass
# Add new nodes
# Create list of nodes to add or update
toadd = [] # List of node entries to add to DB
toupdate = [] # List of node entries to update in the DB
for nodeent in avail:
# Replace sequences of bad chars in the site entity with
# a single "-".
nodeent['SITE'] = BADSITECHARS.sub("-", nodeent['SITE'])
nid = nodeent['NODEID']
# If we don't know about this node, then add it and mark
# that we are _not_ doing an update.
if not known.has_key(nid):
toadd.append((nodeent, False))
pass
# If we do know abou this node, check to see if any of its
# attributes have changed, and if so, add it to the list and
# mark if for update.
else:
kent = known[nid]
if kent['HNAME'] != nodeent['HNAME'] or \
......@@ -279,24 +294,30 @@ class Plab:
pass
pass
pass
# Process the add/update list - add to, or update in the DB.
if len(toadd):
# Are we ignoring new entries?
if ignorenew:
if libtestbed.gv.verbose:
print "%d new Plab nodes, but ignored for now" % len(toadd)
pass
pass
# If not ignoring, do the addition/update.
else:
addstr = ""
updstr = ""
print "There are %d new/changed Plab nodes." % len(toadd)
for nodeent, update in toadd:
# Get the linktype here so we can report it in email.
self.__findLinkType(nodeent)
if libtestbed.gv.debug:
print "Found linktype %s for node %s" % \
(nodeent['LINKTYPE'], nodeent['IP'])
pass
pass
# Add/update the node in the DB.
self.__addNode(nodeent, update)
# Rest of block adds a line for the add/update messages.
nodestr = "%s\t\t%s\t\t%s\t\t%s\t\t%s\n" % \
(nodeent['NODEID'],
nodeent['IP'],
......@@ -311,8 +332,10 @@ class Plab:
pass
pass
# We need to update DNS since we've added hosts..
print "Forcing a named map update ..."
os.spawnl(os.P_WAIT, NAMED_SETUP, NAMED_SETUP)
# Now announce that we've added/updated nodes.
SENDMAIL(TBOPS,
"Plab nodes have been added/updated in the DB.",
"The following plab nodes have been added to the DB:\n"
......
# -*- python -*-
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
import sys
import os
......
# -*- python -*-
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
import sys
sys.path.append("@prefix@/lib")
......@@ -227,16 +232,40 @@ class mod_PLC:
def freeNode(self, node):
agent = PLCagent(node.slice.slicename)
try:
res = tryXmlrpcCmd(agent.UnAssignNodes, node.IP)
if libtestbed.gv.debug:
print res
tries = 3
while 1:
TIMESTAMP("freenode %s try %d started." % (node.nodeid,
DEF_TRIES-tries+1))
try:
res = tryXmlrpcCmd(agent.UnAssignNodes, node.IP,
inittries=tries, raisefault=True)
if libtestbed.gv.debug:
print res
pass
pass
# Node may not actually be assigned to the slice, causing
# the xmlrpc call to fail stating this fact. If it ain't
# here, then we're done.
except xmlrpclib.Fault, e:
if e.faultString.find("not assigned") != -1:
print "%s isn't allocate to PLC slice %s." % \
(node.nodeid, node.slice.slicename)
break
elif e.triesleft > 0:
tries = e.triesleft
else:
print "Failed to release node %s from slice %s" % \
(node.nodeid, node.slice.slicename)
raise
pass
# success
else:
break
pass
except:
print "Failed to release node %s from slice %s" % \
(node.nodeid, node.slice.slicename)
raise
TIMESTAMP("freenode %s finished." % node.nodeid)
return res
def renewNode(self, node, length = 0):
......
# -*- python -*-
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
import sys
sys.path.append("@prefix@/lib")
......
#!/usr/local/bin/python
# -*- python -*-
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
import sys
sys.path.append("@prefix@/lib")
......
......@@ -86,8 +86,9 @@ if (defined($options{"d"})) {
#
my $logfile = "$TB/log/plabmonitord";
my @oldnodes = ();
my $LOOPINT = 1800; # 1/2 hour between successive loops.
my $BATCHNUM = 40;
my $LOOPSLEEP = 1800; # 1/2 hour between successive loops.
my $PAUSETIME = 120; # 2 minute pause after running vnode_setup
my $BATCHNUM = 40; # degree of parallelization
#
# daemonize
......@@ -113,6 +114,7 @@ while (1) {
"----------------\n";
print "Starting plabmonitord loop at ". TimeStamp() ."\n";
# Get entries for plab physnodes in hwdown.
my $query_result =
DBQueryWarn("select r1.node_id,n1.phys_nodeid from reserved as r1 ".
"left join nodes as n1 on n1.node_id=r1.node_id ".
......@@ -129,7 +131,8 @@ while (1) {
}
#
# Build up current node list
# Build up current node list. We prefer to try nodes that haven't
# failed in previous iterations (the newly dead).
#
my @nodes = ();
while (my ($vnode,$pnode) = $query_result->fetchrow_array()) {
......@@ -143,9 +146,14 @@ while (1) {
}
}
# We're done with last run's failed node list. Clear it since it
# will be built again this run.
@oldnodes = ();
# Run through the nodes in parallelized batches
while (my @batch = splice(@nodes,0,$BATCHNUM)) {
# Do some initial checks on the nodes and remove those that
# fail these from testing this time around.
my @batch2 = grep(nodeprecheck($_), @batch);
my @vnodes = map {$_->[0]} @batch2;
my @pnodes = map {$_->[1]} @batch2;
......@@ -157,13 +165,16 @@ while (1) {
TimeStamp() . "\n";
#
# Try to setup the vnodes.
# Try to setup this batch of vnodes.
#
system("vnode_setup -f -d -n $BATCHNUM $PLABMOND_PID $PLABMOND_EID @vnodes");
print "sleeping for a bit.\n"
if ($debug);
sleep(180); # wait a bit..
sleep($PAUSETIME); # wait a bit to let final stragglers check in.
# Check the nodes to find out which are up, and which failed
# in the vnode_setup we just ran.
print "Checking vnode_setup run status\n";
my @failed = grep(nodepostalloc($_), @batch2);
......@@ -172,8 +183,9 @@ while (1) {
my @fvnodes = map {$_->[0]} @failed;
my @fpnodes = map {$_->[1]} @failed;
print "vnode_setup failed for the following vnodes: @fvnodes\n";
system("vnode_setup -f -d -k $PLABMOND_PID $PLABMOND_EID @fvnodes");
print "### vnode_setup failed for the following vnodes: @fvnodes\n";
print "### Proceding to tear them down.\n";
system("vnode_setup -f -d -k -n $BATCHNUM $PLABMOND_PID $PLABMOND_EID @fvnodes");
}
# Keep track of failed nodes for next daemon loop iteration.
......@@ -181,28 +193,32 @@ while (1) {
}
loop:
my $leftover = $LOOPINT - (time() - $start);
$leftover = ($leftover > 0) ? $leftover : 0;
print "Sleeping for $leftover seconds.\n";
sleep($leftover);
# Wait a while between setup attempts to let PLAB deallocation percolate.
print "Sleeping for $LOOPSLEEP seconds.\n";
sleep($LOOPSLEEP);
}
#
# Check vnode status, moving nodes back into production if they booted up,
# and leaving them in hwdown if they didn't.
#
sub nodepostalloc($) {
my ($vnode, $pnode) = @{$_[0]};
my $revive = 0;
my $retval = 1;
my $state = TBDB_NODESTATE_UNKNOWN();
# Did the node send ISUP (boot up completely)?
if (TBGetNodeEventState($vnode, \$state) &&
$state eq TBDB_NODESTATE_ISUP())
{
$revive = 1;
}
print "State for $vnode is $state\n";
print "State for $vnode is $state\n" if $debug;
#
# That all worked. Move the pnode out of hwdown and back into
# It came up! Move the pnode out of hwdown and back into
# normal holding experiment.
#
if ($revive &&
......@@ -220,6 +236,8 @@ sub nodepostalloc($) {
"$pnode has been brought back from the afterworld!",
$TBOPS);
$retval = 0;
# It didn't come up..
} else {
print "Leaving $pnode in hwdown!\n";
$retval = 1
......@@ -229,6 +247,10 @@ sub nodepostalloc($) {
}
#
# Preliminary checks:
# * make sure pnode is still in hwdown
#
sub nodeprecheck($) {
my ($vnode, $pnode) = @{$_[0]};
my $retval = 1;
......@@ -246,19 +268,21 @@ sub nodeprecheck($) {
"pid = '$NODEDEAD_PID' and ".
"eid = '$NODEDEAD_EID'");
if (!$query_result) {
print "Node entry DB check failed! Waiting a bit ...\n";
return;
print "Node $pnode entry DB check failed! Skipping it ...\n";
$retval = 0;
}
if (!$query_result->num_rows()) {
elsif (!$query_result->num_rows()) {
print "Node was removed out from under us! Continuing on ...\n";
return;
$retval = 0;
}
return $retval;
}
#
# send mail with given message, and exit (also printing message).
#
sub fatal($)
{
local($msg) = $_[0];
......@@ -267,6 +291,10 @@ sub fatal($)
die($msg);
}
#
# Helper function - see if item is first element in any member of
# an array of tuples.
#
sub search($@)
{
$target = shift;
......
#!/usr/local/bin/python
# -*- python -*-
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
import sys
sys.path.append("@prefix@/lib")
......
#!/usr/local/bin/python
# -*- python -*-
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
import sys
sys.path.append("@prefix@/lib")
......
#!/usr/local/bin/python
# -*- python -*-
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
import sys
sys.path.append("@prefix@/lib")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment