Commit 856c2509 authored by Kirk Webb's avatar Kirk Webb

Snapshot.

* Changed the way options are parsed in the python scripts so that modules
  can easily add and use their own options independent of top-level scripts.

* Added --noIS and --pollNodes module options.

* Added batch option to vnode_setup (degree of parallelization)
  - defaults to 10

* Major updates to plamonitord
  - batches testing, currently to 40
parent 556e884f
......@@ -11,6 +11,7 @@ import pwd
import MySQLdb
import libtestbed
from libtestbed import *
__dbName = "@TBDBNAME@"
......@@ -33,7 +34,7 @@ def TBDBConnect():
name = "uid%d" % uid
dbuser = "%s:%s:%d" % (sys.argv[0], name, os.getpid())
if debug:
if libtestbed.gv.debug:
print "Connecting to db %s as %s" % (__dbName, dbuser)
# Connect, with retries
......@@ -55,7 +56,7 @@ def DBQueryFatal(queryPat, querySub = (), asDict = False):
else:
cursor = __dbConnection.cursor()
if debug:
if libtestbed.gv.debug:
print "Executing DB query %s" % queryPat
tries = __dbQueryMaxtries
......@@ -63,7 +64,7 @@ def DBQueryFatal(queryPat, querySub = (), asDict = False):
try:
cursor.execute(queryPat, querySub)
ret = cursor.fetchall()
if debug:
if libtestbed.gv.debug:
rs = `ret`
if len(rs) > 60:
rs = rs[:60] + "..."
......
......@@ -8,6 +8,7 @@ import syslog
import traceback
import xmlrpclib
from optparse import OptionParser
from warnings import warn
#
......@@ -29,30 +30,24 @@ DEF_TIMEOUT = 1*60 # default timeout interval
MAXCONSECEXC = 3
#
# informational output control variables
# Global Variable "namespace" and command line argument parser setup
#
debug = 0
verbose = 0
gv = False
parser = OptionParser()
parser.add_option("-v", "--verbose", dest="verbose", default=False,
action="store_true", help="Say more about internal stuff")
parser.add_option("-d", "--debug", dest="debug", default=False,
action="store_true", help="Say A LOT about internal stuff")
#
# Handle generic arguments.
#
def handleArgs(args):
"""
Takes a list of command-line arguments, interprets those at the
beginning that are meant for libplab (-vd), and returns the remainder
of the arguments.
"""
global verbose, debug
import getopt
opts, args = getopt.getopt(args, "vd")
for o, a in opts:
if o == "-v":
verbose = 1
if o == "-d":
debug = 1
return args
global parser, gv
gv, unparsed_args = parser.parse_args(args)
return unparsed_args
def SENDMAIL(To, Subj, Msg, From = None, Headers = None, Files = ()):
"""
......@@ -181,7 +176,7 @@ def enable_sigs(osigs):
#
class TimeoutError: pass
def alrmhandler(signum, frame):
if debug:
if gv.debug:
print "Timeout! Raising TimeoutError."
raise TimeoutError
......@@ -282,7 +277,7 @@ def ForkCmd(cmd, args=(), timeout=DEF_TIMEOUT,
try: exval = os.wait()[1]
except: exval = 256
else:
if debug:
if gv.debug:
if os.WIFEXITED(exval):
print "Process complete, exit value: %d" % \
os.WEXITSTATUS(exval)
......@@ -298,7 +293,7 @@ def ForkCmd(cmd, args=(), timeout=DEF_TIMEOUT,
# child
else:
def sigusrexit(signum, frame):
if debug:
if gv.debug:
print "Received SIGUSR1, bailing out"
os._exit(1)
......@@ -352,7 +347,7 @@ def tryXmlrpcCmd(cmd, args = (),
tries = inittries
if debug:
if gv.debug:
print "About to perform command %s with args:\n\t%s" % \
(cmd, args)
while 1:
......@@ -375,14 +370,14 @@ def tryXmlrpcCmd(cmd, args = (),
e.triesleft = tries
raise xmlrpclib.Fault, e
except TimeoutError, e:
if debug:
if gv.debug:
print "Caught a timeout error, setting triesleft and raising."
e.triesleft = tries
raise TimeoutError, e
except (socket.error, xmlrpclib.ProtocolError), e:
print "Encountered problem communicating with agent " \
"while executing command: %s" % cmd.func_name
if debug:
if gv.debug:
print "Exception is of type: %s" % e
if tries > 0:
......
......@@ -11,6 +11,7 @@ import pwd
import MySQLdb
import libtestbed
from libtestbed import *
__dbName = "@TBDBNAME@"
......@@ -33,7 +34,7 @@ def TBDBConnect():
name = "uid%d" % uid
dbuser = "%s:%s:%d" % (sys.argv[0], name, os.getpid())
if debug:
if libtestbed.gv.debug:
print "Connecting to db %s as %s" % (__dbName, dbuser)
# Connect, with retries
......@@ -55,7 +56,7 @@ def DBQueryFatal(queryPat, querySub = (), asDict = False):
else:
cursor = __dbConnection.cursor()
if debug:
if libtestbed.gv.debug:
print "Executing DB query %s" % queryPat
tries = __dbQueryMaxtries
......@@ -63,7 +64,7 @@ def DBQueryFatal(queryPat, querySub = (), asDict = False):
try:
cursor.execute(queryPat, querySub)
ret = cursor.fetchall()
if debug:
if libtestbed.gv.debug:
rs = `ret`
if len(rs) > 60:
rs = rs[:60] + "..."
......
# -*- python -*-
"""
Library for interfacing with Plab and, specifically, dslice. This
abstracts out the concepts of Plab central, slices, and nodes. All
data (except static things like certificates) is kept in the Emulab
DB. Unlike the regular dslice svm, this one supports dynamically
changing which nodes are in a slice.
This requires an already obtained dslice certficate and key. By default
it expects to find these in the @prefix@/etc/plab/ subdirectory.
Library for interfacing with Plab. This abstracts out the concepts of
Plab central, slices, and nodes. All data (except static things like
certificates) is kept in the Emulab DB. Unlike the regular dslice
svm, this one supports dynamically changing which nodes are in a
slice.
This requires an already obtained dslice certficate and key. By
default it expects to find these in the @prefix@/etc/plab/
subdirectory.
"""
import sys
......@@ -26,6 +27,7 @@ from warnings import warn
#
# Testbed and DB access libs
#
import libtestbed
from libtestbed import *
from libdb import *
......@@ -47,6 +49,7 @@ RENEW_TIME = 2*24*60*60 # Renew two days before lease expires
RENEW_TIMEOUT = 1*60 # give the node manager a minute to respond to renew
FREE_TIMEOUT = 1*60 # give the node manager a minute to respond to free
NODEPROBEINT = 10
TBOPS = "@TBOPSEMAIL@".replace("\\","")
MAILTAG = "@THISHOMEBASE@"
......@@ -86,6 +89,13 @@ NODEPREFIX = "plab"
BADSITECHARS = re.compile(r"\W+")
PLABBASEPRIO = 20000
#
# Additions to parser
#
parser.add_option("--pollNode", action="store_true", dest="pollNode",
default=False,
help="Poll node before attempting to set it up.")
#
# var to track failed renewals
#
......@@ -165,11 +175,11 @@ class siteParser:
class Plab:
def __init__(self, agent = None):
def __init__(self, agent=None):
if not agent:
self.agent = agents[DEF_AGENT]()
pass
if debug:
if libtestbed.gv.debug:
print "Using module: %s" % self.agent.modname
pass
pass
......@@ -222,12 +232,12 @@ class Plab:
except:
extype, exval, extrace = sys.exc_info()
print "Error talking to agent: %s: %s" % (extype, exval)
if debug:
if libtestbed.gv.debug:
print extrace
print "Going back to sleep until next scheduled poll"
return
if debug:
if libtestbed.gv.debug:
print "Got advertisement list:"
print avail
pass
......@@ -246,7 +256,7 @@ class Plab:
# Get node info we already have.
known = self.__getKnownPnodes()
if debug:
if libtestbed.gv.debug:
print "Got known pnodes:"
print known
pass
......@@ -255,6 +265,7 @@ class Plab:
toadd = [] # List of node entries to add to DB
toupdate = [] # List of node entries to update in the DB
for nodeent in avail:
nodeent['SITE'] = BADSITECHARS.sub("-", nodeent['SITE'])
nid = nodeent['NODEID']
if not known.has_key(nid):
toadd.append((nodeent, False))
......@@ -271,7 +282,7 @@ class Plab:
if len(toadd):
if ignorenew:
if verbose:
if libtestbed.gv.verbose:
print "%d new Plab nodes, but ignored for now" % len(toadd)
pass
pass
......@@ -281,11 +292,11 @@ class Plab:
print "There are %d new/changed Plab nodes." % len(toadd)
for nodeent, update in toadd:
self.__findLinkType(nodeent)
if debug:
if libtestbed.gv.debug:
print "Found linktype %s for node %s" % \
(nodeent['LINKTYPE'], nodeent['IP'])
pass
self.__addNode(nodeent, update)
pass
self.__addNode(nodeent, update)
nodestr = "%s\t\t%s\t\t%s\t\t%s\t\t%s\n" % \
(nodeent['NODEID'],
nodeent['IP'],
......@@ -410,7 +421,6 @@ class Plab:
nodeid = NODEPREFIX + nodeent['NODEID']
priority = PLABBASEPRIO + int(nodeent['NODEID'])
hostonly = nodeent['HNAME'].replace(".", "-")
site = BADSITECHARS.sub("-", nodeent['SITE'])
print "Creating pnode %s as %s, priority %d." % \
(nodeent['IP'], nodeid, priority)
......@@ -432,7 +442,7 @@ class Plab:
DBQueryFatal("replace into widearea_nodeinfo"
" (node_id, contact_uid, hostname, site)"
" values (%s, %s, %s, %s)",
(nodeid, 'bnc', nodeent['HNAME'], site))
(nodeid, 'bnc', nodeent['HNAME'], nodeent['SITE']))
DBQueryFatal("replace into interfaces"
" (node_id, card, port, IP, interface_type,"
......@@ -472,7 +482,7 @@ class Plab:
vprio = (priority * 100) + (n+1)
sshdport = 38000+(n+1)
vnodeid = "%s-%d" % (nodeid, n+1)
if verbose:
if libtestbed.gv.verbose:
print "Creating vnode %s, priority %d" % (vnodeid, vprio)
pass
......@@ -523,7 +533,7 @@ class Plab:
it doesn't change.
"""
if not hasattr(self, "__getNodetypeInfoCache"):
if debug:
if libtestbed.gv.debug:
print "Getting node type info"
pass
res = DBQueryFatal("select osid, control_iface"
......@@ -541,7 +551,7 @@ class Plab:
addNode helper function. Returns a (nodeid, priority) tuple of
the next free nodeid and priority for Plab nodes.
"""
if debug:
if libtestbed.gv.debug:
print "Getting next free nodeid"
DBQueryFatal("lock tables nextfreenode write")
try:
......@@ -731,7 +741,7 @@ class Slice:
XXX This should probably be made lazy, since not all operations
really need it
"""
if verbose:
if libtestbed.gv.verbose:
print "Loading slice for pid/eid %s/%s" % (self.pid, self.eid)
res = DBQueryFatal("select slicename, slicemeta from plab_slices"
" where pid = %s and eid = %s",
......@@ -843,6 +853,12 @@ class Node:
self.nodeid = nodeid
self.IP = self.__findIP()
self.nodemeta = None
self.pollNode = False
if hasattr(gv,"pollNode"):
self.pollNode = gv.pollNode
pass
return
# XXX: may want to rethink signal handling here.
def _create(self, force=False):
......@@ -878,6 +894,20 @@ class Node:
self.slice.slicename, self.nodeid,
self.nodemeta, self.leaselen))
if self.pollNode:
TIMESTAMP("Waiting for %s to respond" % self.nodeid)
while True:
try:
self.__perform("/bin/true")
pass
except:
time.sleep(NODEPROBEINT)
pass
else: break
pass
TIMESTAMP("Node %s ready." % self.nodeid)
pass
TIMESTAMP("createnode %s finished." % self.nodeid)
return
......@@ -886,7 +916,7 @@ class Node:
Loads an already allocated node from the DB. Don't call this
directly, use Slice.loadNode instead.
"""
if verbose:
if libtestbed.gv.verbose:
print "Loading node %s" % self.nodeid
res = DBQueryFatal("select slicename, nodemeta"
" from plab_slice_nodes where node_id = %s",
......@@ -975,7 +1005,7 @@ class Node:
TIMESTAMP("emulabify %s finished." % self.nodeid)
def addToGroup(self, user, group):
if verbose:
if libtestbed.gv.verbose:
print "Adding %s to group %s on node %s" % \
(user, group, self.nodeid)
self.__perform("sudo /usr/sbin/usermod -G %s %s" % (group, user))
......@@ -985,11 +1015,11 @@ class Node:
Unpacks a locally stored gzip'd tarball to the specified path
(default /) on the remote node. Always done as remote root.
"""
if verbose:
if libtestbed.gv.verbose:
print "Unpacking tgz %s to %s on %s" % \
(tgzpath, destpath, self.nodeid)
try:
if debug:
if libtestbed.gv.debug:
print "Trying to grab rootball through loopback service"
self.__perform("sudo wget -q -nH -P /tmp " +
ROOTBALL_URL + tgzname)
......@@ -1004,7 +1034,7 @@ class Node:
"""
Executes the given command on the remote node via sshtb
"""
if debug:
if libtestbed.gv.debug:
print "Performing '%s' on %s" % (command, self.nodeid)
if os.spawnl(os.P_WAIT, SSH, SSH, "-host", self.nodeid, command):
raise RuntimeError, "ssh '%s' failed" % command
......@@ -1015,7 +1045,7 @@ class Node:
as root.
"""
import popen2
if debug:
if libtestbed.gv.debug:
print "Copying %s to %s on %s" % \
(localfile, remotefile, self.nodeid)
# dd is a bit overbearing for this job, but I can't do something
......@@ -1043,6 +1073,6 @@ class Node:
assert (len(res) > 0), \
"No IP found for nodeid %s" % self.nodeid
((IP, ), ) = res
if debug:
if libtestbed.gv.debug:
print "Found IP %s for node %s" % (IP, self.nodeid)
return IP
......@@ -8,6 +8,7 @@ import syslog
import traceback
import xmlrpclib
from optparse import OptionParser
from warnings import warn
#
......@@ -29,30 +30,24 @@ DEF_TIMEOUT = 1*60 # default timeout interval
MAXCONSECEXC = 3
#
# informational output control variables
# Global Variable "namespace" and command line argument parser setup
#
debug = 0
verbose = 0
gv = False
parser = OptionParser()
parser.add_option("-v", "--verbose", dest="verbose", default=False,
action="store_true", help="Say more about internal stuff")
parser.add_option("-d", "--debug", dest="debug", default=False,
action="store_true", help="Say A LOT about internal stuff")
#
# Handle generic arguments.
#
def handleArgs(args):
"""
Takes a list of command-line arguments, interprets those at the
beginning that are meant for libplab (-vd), and returns the remainder
of the arguments.
"""
global verbose, debug
import getopt
opts, args = getopt.getopt(args, "vd")
for o, a in opts:
if o == "-v":
verbose = 1
if o == "-d":
debug = 1
return args
global parser, gv
gv, unparsed_args = parser.parse_args(args)
return unparsed_args
def SENDMAIL(To, Subj, Msg, From = None, Headers = None, Files = ()):
"""
......@@ -181,7 +176,7 @@ def enable_sigs(osigs):
#
class TimeoutError: pass
def alrmhandler(signum, frame):
if debug:
if gv.debug:
print "Timeout! Raising TimeoutError."
raise TimeoutError
......@@ -282,7 +277,7 @@ def ForkCmd(cmd, args=(), timeout=DEF_TIMEOUT,
try: exval = os.wait()[1]
except: exval = 256
else:
if debug:
if gv.debug:
if os.WIFEXITED(exval):
print "Process complete, exit value: %d" % \
os.WEXITSTATUS(exval)
......@@ -298,7 +293,7 @@ def ForkCmd(cmd, args=(), timeout=DEF_TIMEOUT,
# child
else:
def sigusrexit(signum, frame):
if debug:
if gv.debug:
print "Received SIGUSR1, bailing out"
os._exit(1)
......@@ -352,7 +347,7 @@ def tryXmlrpcCmd(cmd, args = (),
tries = inittries
if debug:
if gv.debug:
print "About to perform command %s with args:\n\t%s" % \
(cmd, args)
while 1:
......@@ -375,14 +370,14 @@ def tryXmlrpcCmd(cmd, args = (),
e.triesleft = tries
raise xmlrpclib.Fault, e
except TimeoutError, e:
if debug:
if gv.debug:
print "Caught a timeout error, setting triesleft and raising."
e.triesleft = tries
raise TimeoutError, e
except (socket.error, xmlrpclib.ProtocolError), e:
print "Encountered problem communicating with agent " \
"while executing command: %s" % cmd.func_name
if debug:
if gv.debug:
print "Exception is of type: %s" % e
if tries > 0:
......
......@@ -4,9 +4,18 @@ import sys
sys.path.append("@prefix@/lib")
import xmlrpclib
import getopt
import libtestbed
from libtestbed import *
#
# Setup mod_PLC's parse args
#
parser.add_option("--noIS", dest="noIS", action="store_true",
default=False,
help="Don't run InstantiateSliver() in mod_PLC")
#
# PLC constants
#
......@@ -92,7 +101,11 @@ class PLCagent:
class mod_PLC:
def __init__(self):
self.modname = "mod_PLC"
pass
self.noIS = False
if hasattr(libtestbed.gv,"noIS"):
self.noIS = libtestbed.gv.noIS
pass
return
def createSlice(self, slice):
......@@ -101,7 +114,7 @@ class mod_PLC:
try:
res = tryXmlrpcCmd(agent.createSlice)
if debug:
if libtestbed.gv.debug:
print res
pass
pass
......@@ -112,7 +125,7 @@ class mod_PLC:
try:
res = tryXmlrpcCmd(agent.AssignUsers,
EMULABMAN_EMAIL)
if debug:
if libtestbed.gv.debug:
print res
pass
pass
......@@ -124,7 +137,7 @@ class mod_PLC:
res = tryXmlrpcCmd(agent.AssignShares,
(DEF_PLC_LEASELEN,
DEF_PLC_SHARES))
if debug:
if libtestbed.gv.debug:
print res
pass
pass
......@@ -148,7 +161,7 @@ class mod_PLC:
res = tryXmlrpcCmd(agent.AssignShares,
(DEF_PLC_LEASELEN,
DEF_PLC_SHARES))
if debug:
if libtestbed.gv.debug:
print res
pass
pass
......@@ -169,7 +182,7 @@ class mod_PLC:
try:
res = tryXmlrpcCmd(agent.AssignNodes, node.IP,
inittries=tries, raisefault=True)
if debug:
if libtestbed.gv.debug:
print res
pass
pass
......@@ -193,29 +206,30 @@ class mod_PLC:
pass
# push changes out immediately.
try:
TIMESTAMP("Starting InstantiateSliver() on %s." % node.nodeid)
res = tryXmlrpcCmd(agent.InstantiateSliver, node.IP)
TIMESTAMP("InstantiateSliver() complete on %s." % node.nodeid)
if debug:
print res
if not self.noIS:
# push changes out immediately.
try:
TIMESTAMP("Starting InstantiateSliver() on %s." % node.nodeid)
res = tryXmlrpcCmd(agent.InstantiateSliver, node.IP)
TIMESTAMP("InstantiateSliver() complete on %s." % node.nodeid)
if libtestbed.gv.debug:
print res
pass
pass
except:
print "Failed to instantiate sliver %s on slice %s" % \
(node.nodeid, node.slice.slicename)
self.freeNode(node)
raise
pass
except:
print "Failed to instantiate sliver %s on slice %s" % \
(node.nodeid, node.slice.slicename)
self.freeNode(node)
raise
return (res, None, None)
def freeNode(self, node):
agent = PLCagent(node.slice.slicename)
try:
res = tryXmlrpcCmd(agent.UnAssignNodes, node.IP)
if debug:
if libtestbed.gv.debug:
print res
pass
pass
......
......@@ -11,6 +11,7 @@ import nodemgr, nodemgrproxy
import lease
import cPickle
import libtestbed
from libtestbed import *
#
......@@ -55,7 +56,7 @@ class mod_dslice:
1, LEASELEN, (node.IP,)))
assert (len(tickets) == 1), "%d tickets returned" % len(tickets)
ticketdata = tickets[0]
if debug:
if libtestbed.gv.debug:
print "Obtained ticket:"
print ticketdata
pass
......@@ -102,7 +103,7 @@ class mod_dslice:
break
pass
if debug:
if libtestbed.gv.debug:
print "Obtained lease/vm:"
print leasedata
pass
......@@ -157,7 +158,7 @@ class mod_dslice:
should be one of the keys that ssh naturally knows about, or
those commands will fail.
"""
if verbose:
if libtestbed.gv.verbose:
print "Adding pubkey to node %s" % node.nodeid
pass
if not identityfile.endswith(".pub"):
......@@ -166,7 +167,7 @@ class mod_dslice:
pubkey = file(identityfile, "rb").read().strip()
nodemgr = self.__createNodemgrProxy(node.IP)
ret = tryXmlrpcCmd(nodemgr.addkey, (node.slice.slicename, pubkey))
if debug:
if libtestbed.gv.debug:
print "Added key: %s" % `ret`
pass
return ret
......@@ -201,7 +202,7 @@ class mod_dslice:
else:
break
if debug:
if libtestbed.gv.debug:
print "Obtained new lease:"
print self.leasedata
self.lease = lease.lease(self.leasedata)
......@@ -216,11 +217,11 @@ class mod_dslice:
# XXX This is a workaround for a bug in M2Crypto
import tempfile
if verbose:
if libtestbed.gv.verbose:
print "Generating slice keypair"
# pdssi = Plab Dynamic Slice SSH Identity
fname = tempfile.mktemp("pdssi%d" % os.getpid())
if debug:
if libtestbed.gv.debug:
print "Writing keypair to %s(.pub)" % fname
if os.spawnlp(os.P_WAIT, "ssh-keygen",
"ssh-keygen", "-t", "rsa", "-b", "1024", "-P", "",
......@@ -235,7 +236,7 @@ class mod_dslice:
# Below here is the way it _should_ be done
if verbose:
if libtestbed.gv.verbose:
print "Generating slice keypair"
key = RSA.gen_key(1024, 35) # OpenSSH ssh-keygen uses 35 for e
......@@ -257,7 +258,7 @@ class mod_dslice:
Also caches the agent for later reuse.
"""
if not self.__agentProxy:
if verbose:
if libtestbed.gv.verbose:
print "Connecting to agent %s" % AGENTIP
if insecure:
args = (AGENTIP, agent.PORT)
......@@ -273,7 +274,7 @@ class mod_dslice:
manager. Also caches the nodemgr for later reuse.
"""
if not self.__nodemgrProxies[IP]:
if verbose:
if libtestbed.gv.verbose:
print "Connecting to nodemgr on %s" % IP
self.__nodemgrProxies[IP] = \
nodemgrproxy.nodemgrproxy(IP,
......
......@@ -85,8 +85,9 @@ if (defined($options{"d"})) {
# Local vars
#
my $logfile = "$TB/log/plabmonitord";
my @nodes = ();
my $SLEEPINT = 300; # five minutes between alloc retries.
my @oldnodes = ();
my $LOOPINT = 1800; # 1/2 hour between successive loops.
my $BATCHNUM = 40;
#
# daemonize
......@@ -106,8 +107,11 @@ print "Plab Monitor Daemon starting... pid $$, at ".`date`;
#
while (1) {
my $start = time