Commit ae2eec76 authored by Kirk Webb's avatar Kirk Webb

Kirk takes the weed whacker to the plab code. This is the first pass result.

I'll come along for a closer cut in the future.

* Modularized the plab communications 'adaptor' interface and moved the
  dslice- and PLC-specific code into their own modules.

* Wrote an API definition README

* Separated out generic routines from libplab into their own library modules
  (libtestbed.py and libdb.py)

Functionally, not much has changed - this was just a massive re-org with some
other cleanup.  Should be much easier to code up new PLAB interfaces as the
plab folks flail around in their attempt to standardize on something.

XXX: may want to re-think where the generic library modules should go.  If
more python code enters Elab, we'll probably want to move 'em to more standard
locations.

This isn't the end of the cleanup - I would eventually like to go back and
rethink the class structures, beef up the comments, and extend the API.
parent 515b0d7a
...@@ -1431,6 +1431,8 @@ outfiles="$outfiles Makeconf GNUmakefile \ ...@@ -1431,6 +1431,8 @@ outfiles="$outfiles Makeconf GNUmakefile \
tbsetup/tarfiles_setup tbsetup/webtarfiles_setup \ tbsetup/tarfiles_setup tbsetup/webtarfiles_setup \
tbsetup/fetchtar.proxy tbsetup/webfrisbeekiller \ tbsetup/fetchtar.proxy tbsetup/webfrisbeekiller \
tbsetup/plab/GNUmakefile tbsetup/plab/libplab.py \ tbsetup/plab/GNUmakefile tbsetup/plab/libplab.py \
tbsetup/plab/libtestbed.py tbsetup/plab/libdb.py \
tbsetup/plab/mod_dslice.py tbsetup/plab/mod_PLC.py \
tbsetup/plab/plabslice tbsetup/plab/plabnode tbsetup/plab/plabdaemon \ tbsetup/plab/plabslice tbsetup/plab/plabnode tbsetup/plab/plabdaemon \
tbsetup/plab/plabmetrics tbsetup/plab/plabstats \ tbsetup/plab/plabmetrics tbsetup/plab/plabstats \
tbsetup/plab/plabmonitord tbsetup/plab/plablinkdata \ tbsetup/plab/plabmonitord tbsetup/plab/plablinkdata \
......
...@@ -476,6 +476,8 @@ outfiles="$outfiles Makeconf GNUmakefile \ ...@@ -476,6 +476,8 @@ outfiles="$outfiles Makeconf GNUmakefile \
tbsetup/tarfiles_setup tbsetup/webtarfiles_setup \ tbsetup/tarfiles_setup tbsetup/webtarfiles_setup \
tbsetup/fetchtar.proxy tbsetup/webfrisbeekiller \ tbsetup/fetchtar.proxy tbsetup/webfrisbeekiller \
tbsetup/plab/GNUmakefile tbsetup/plab/libplab.py \ tbsetup/plab/GNUmakefile tbsetup/plab/libplab.py \
tbsetup/plab/libtestbed.py tbsetup/plab/libdb.py \
tbsetup/plab/mod_dslice.py tbsetup/plab/mod_PLC.py \
tbsetup/plab/plabslice tbsetup/plab/plabnode tbsetup/plab/plabdaemon \ tbsetup/plab/plabslice tbsetup/plab/plabnode tbsetup/plab/plabdaemon \
tbsetup/plab/plabmetrics tbsetup/plab/plabstats \ tbsetup/plab/plabmetrics tbsetup/plab/plabstats \
tbsetup/plab/plabmonitord tbsetup/plab/plablinkdata \ tbsetup/plab/plabmonitord tbsetup/plab/plablinkdata \
......
# -*- python -*-
#
# Database functions
#
import sys
import os
import pwd
import MySQLdb
# XXX: inherit!
debug = 1
__dbName = "@TBDBNAME@"
__dbQueryMaxtries = 1
__dbConnMaxtries = 5
__dbConnection = None
def TBDBConnect():
global __dbConnection
if __dbConnection:
return
# Create a DB username for accounting purposes
uid = os.getuid()
try:
name = pwd.getpwuid(uid)[0]
except KeyError:
name = "uid%d" % uid
dbuser = "%s:%s:%d" % (sys.argv[0], name, os.getpid())
if debug:
print "Connecting to db %s as %s" % (__dbName, dbuser)
# Connect, with retries
for tries in range(__dbConnMaxtries):
try:
__dbConnection = MySQLdb.connect(db = __dbName, user = dbuser)
except:
time.sleep(1)
else:
break
else:
raise RuntimeError, "Cannot connect to DB after several attempts!"
def DBQueryFatal(queryPat, querySub = (), asDict = False):
TBDBConnect()
if asDict:
cursor = __dbConnection.cursor(MySQLdb.cursors.DictCursor)
else:
cursor = __dbConnection.cursor()
if debug:
print "Executing DB query %s" % queryPat
tries = __dbQueryMaxtries
while tries:
try:
cursor.execute(queryPat, querySub)
ret = cursor.fetchall()
if debug:
rs = `ret`
if len(rs) > 60:
rs = rs[:60] + "..."
print "Result: %s" % rs
return ret
except MySQLdb.MySQLError:
tries -= 1
if tries == 0:
raise
else:
time.sleep(1)
try:
__dbConnection.ping()
except MySQLdb.MySQLError: pass
tbmsg = "".join(traceback.format_exception(*sys.exc_info()))
SENDMAIL(TBOPS, "DB query failed", "DB query failed:\n\n%s" % tbmsg, TBOPS)
raise RuntimeError, "Aah! Escaped DBQueryFatal loop"
def DBQuery(*args):
try:
ret = DBQueryFatal(*args)
except MySQLdb.MySQLError:
return None
This diff is collapsed.
...@@ -17,7 +17,7 @@ SUBDIRS = libdslice etc ...@@ -17,7 +17,7 @@ SUBDIRS = libdslice etc
SBIN_STUFF = plabslice plabnode plabdaemon plabmetrics plabstats \ SBIN_STUFF = plabslice plabnode plabdaemon plabmetrics plabstats \
plabmonitord plablinkdata plabdist plabhttpd plabmonitord plablinkdata plabdist plabhttpd
LIB_STUFF = libplab.py LIB_STUFF = libplab.py libtestbed.py libdb.py mod_dslice.py mod_PLC.py
LIBEXEC_STUFF = webplabstats LIBEXEC_STUFF = webplabstats
......
This document describes the Planetlab<->Elab adapter module API.
Modules that wish to function as adaptors between Emulab and Planetlab
must implement all the functions in this document that are not marked
"optional".
** Design principles:
* Separation of concerns
To the greatest extent possible, Elab should not be aware of Plab
specifics, and vice versa. libplab is the Emulab-centric side of this
separation, while the Plab-specific access/adaptor modules are in
separate files which represent different ways of interacting with
Planetlab (for e.g., mod_PLC uses the PLC interface, while mod_dslice
uses the dslice interface).
* Modular architecture
Nodes and slices are encapsulated in classes. Some function
prototypes below specify these objects as parameters.
Node objects have the following accessible elements:
nodeid - string naming the node this object represents (Elab specific)
nodemeta - string of data specific to adapter module, related to node.
slice - slice object this node belongs to (or will belong to).
IP - IP address of this node
Slice objects have the following accessible elements:
slicename - string naming the slice this object represents
slicemeta - string of data specific to adapter module, related to slice.
Adapter modules should not rely on the presence of any other object
members.
** API:
CONSTANT: modname - text string naming this module - just informational.
FUNCTION: getFree
ARGS: None
RETURNS: list of IP addresses representing the available set of plab nodes.
SIDE EFFECTS: None
FUNCTION: createSlice
ARGS: slicename - REQUIRED string naming the slice to create
RETURNS: 2 element tuple (<int1>, <string1>)
<int1> - integer representing success (1) or failure (0)
<string1> - string containing module-specific data. 'None'
may be returned. This data will be fed back to
other module functions and will be saved off.
SIDE EFFECTS: Causes the named plab slice to be created.
FUNCTION: destroySlice
ARGS: slicename - REQUIRED string naming the slice to destroy
RETURNS: integer representing success (1) or failure (0)
SIDE EFFECTS: Causes the named plab slice to be destroyed/removed.
FUNCTION: createNode
ARGS: node - REQUIRED node object representing the node to create
RETURNS: 3 element tuple (<int1>, <string1>, <string2>)
<int1> - integer representing success (1) or failure (0)
<string1> - string containing node expiration date/time in
SQL time format: 'YYYY-MM-DD HH:MM:SS'
<string2> - string containing module-specific data for this node.
This string will be fed back to other module functions
and will be saved off.
SIDE EFFECTS: Allocates the given node to the given slice, and ensures
"emulabman" has ssh access to it. Should not
successfully return until the plab vnode has been
allocated and instantiated (i.e., is ready to interact
with). May be interrupted externally if it takes too long.
FUNCTION: freeNode
ARGS: node - REQUIRED node object representing the node to free
RETURNS: integer representing success (1) or failure (0)
SIDE EFFECTS: Causes the given node to be removed from the slice, and its
resources freed.
FUNCTION: renewNode
ARGS: node - REQUIRED node object representing the node to renew
length - OPTIONAL time (in seconds) length for renewal.
RETURNS: 3 element tuple (<int1>, <string1>, <string2>)
<int1> - integer representing success (1) or failure (0)
<string1> - string containing node expiration date/time in
SQL time format: 'YYYY-MM-DD HH:MM:SS'
<string2> - string containing module-specific data for this node.
This string will be fed back to other module functions
and will be saved off.
SIDE EFFECTS: Causes this node's 'lease' or 'lifetime' to be renewed. This
function should strive to add as much time to the lease as
possible if 'length' is not specified.
# -*- python -*-
#
# Database functions
#
import sys
import os
import pwd
import MySQLdb
# XXX: inherit!
debug = 1
__dbName = "@TBDBNAME@"
__dbQueryMaxtries = 1
__dbConnMaxtries = 5
__dbConnection = None
def TBDBConnect():
global __dbConnection
if __dbConnection:
return
# Create a DB username for accounting purposes
uid = os.getuid()
try:
name = pwd.getpwuid(uid)[0]
except KeyError:
name = "uid%d" % uid
dbuser = "%s:%s:%d" % (sys.argv[0], name, os.getpid())
if debug:
print "Connecting to db %s as %s" % (__dbName, dbuser)
# Connect, with retries
for tries in range(__dbConnMaxtries):
try:
__dbConnection = MySQLdb.connect(db = __dbName, user = dbuser)
except:
time.sleep(1)
else:
break
else:
raise RuntimeError, "Cannot connect to DB after several attempts!"
def DBQueryFatal(queryPat, querySub = (), asDict = False):
TBDBConnect()
if asDict:
cursor = __dbConnection.cursor(MySQLdb.cursors.DictCursor)
else:
cursor = __dbConnection.cursor()
if debug:
print "Executing DB query %s" % queryPat
tries = __dbQueryMaxtries
while tries:
try:
cursor.execute(queryPat, querySub)
ret = cursor.fetchall()
if debug:
rs = `ret`
if len(rs) > 60:
rs = rs[:60] + "..."
print "Result: %s" % rs
return ret
except MySQLdb.MySQLError:
tries -= 1
if tries == 0:
raise
else:
time.sleep(1)
try:
__dbConnection.ping()
except MySQLdb.MySQLError: pass
tbmsg = "".join(traceback.format_exception(*sys.exc_info()))
SENDMAIL(TBOPS, "DB query failed", "DB query failed:\n\n%s" % tbmsg, TBOPS)
raise RuntimeError, "Aah! Escaped DBQueryFatal loop"
def DBQuery(*args):
try:
ret = DBQueryFatal(*args)
except MySQLdb.MySQLError:
return None
This diff is collapsed.
This diff is collapsed.
# -*- python -*-
import sys
sys.path.append("@prefix@/lib")
import httplib
import xmlrpclib
from libtestbed import *
#
# PLC constants
#
DEF_PLC_URI = "https://www.planet-lab.org/db/slices/dynamicprog.php"
DEF_PLC_USER = "lepreau@cs.utah.edu"
DEF_PLC_PASS = "phurds"
DEF_PLC_LEASELEN = 1*30*24*60*60 # add one month (XXX: for now)
DEF_PLC_SHARES = 30
EMULABMAN_EMAIL = "emulabman@emulab.net"
DEF_PLAB_URL = "www.planet-lab.org"
PLAB_LIST_URIS = ("/db/nodes/all_ips.php",)
class PLCagent:
def __init__(self, slicename,
uri = DEF_PLC_URI,
username = DEF_PLC_USER,
password = DEF_PLC_PASS):
if not slicename:
raise RuntimeError, "Must provide a slicename!"
self.__slice = {}
self.__slice['sliceName'] = slicename
self.__auth = {}
self.__auth['AuthMethod'] = "password"
self.__auth['username'] = username
self.__auth['AuthString'] = password
try:
self.__server = xmlrpclib.ServerProxy(uri)
except:
print "Failed to create XML-RPC proxy"
raise
def createSlice(self):
return self.__server.createSlice(self.__slice, self.__auth)
def deleteSlice(self):
print self.__auth
return self.__server.deleteSlice(self.__slice, self.__auth)
def AssignNodes(self, nodelist):
if type(nodelist) != tuple:
nodelist = (nodelist,)
nodes = {}
nodes['nodeList'] = nodelist
return self.__server.AssignNodes(self.__slice, self.__auth, nodes)
def UnAssignNodes(self, nodelist):
if type(nodelist) != tuple:
nodelist = (nodelist,)
nodes = {}
nodes['nodeList'] = nodelist
return self.__server.UnAssignNodes(self.__slice, self.__auth, nodes)
def AssignUsers(self, userlist):
if type(userlist) != tuple:
userlist = (userlist,)
users = {}
users['userList'] = userlist
print self.__auth
return self.__server.AssignUsers(self.__slice, self.__auth, users)
def UnAssignUsers(self, userlist):
if type(userlist) != tuple:
userlist = (userlist,)
users = {}
users['userList'] = userlist
return self.__server.UnAssignUsers(self.__slice, self.__auth, users)
def AssignShares(self, renewtime, numshares):
shareinfo = {}
shareinfo['renewTime'] = renewtime
shareinfo['share'] = numshares
return self.__server.AssignShares(self.__slice, self.__auth, shareinfo)
def InstantiateSliver(self, nodelist):
if type(nodelist) != tuple:
nodelist = (nodelist,)
nodes = {}
nodes['nodeList'] = nodelist
return self.__server.InstantiateSliver(self.__slice, self.__auth, nodes)
def listSlice(self):
return self.__server.listSlice(self.__auth)
class mod_PLC:
def __init__(self):
self.modname = "mod_PLC"
pass
# XXX: fixup to use online hosts file
def getFree(self):
avail = []
conn = httplib.HTTPSConnection(DEF_PLAB_URL)
for ipuri in PLAB_LIST_URIS:
conn.request("GET", ipuri)
res = conn.getresponse()
if res.status != 200:
raise RuntimeError, "HTTP Error getting IPLIST: %s\n" \
"Code: %d Reason: %s" % \
(ipuri, res.status, res.reason)
avail += res.read().split()
pass
return avail
def createSlice(self, slicename):
agent = PLCagent(slicename)
try:
res = tryXmlrpcCmd(agent.createSlice)
if debug:
print res
pass
pass
except:
print "Failed to create slice %s" % slicename
raise
try:
res = tryXmlrpcCmd(agent.AssignUsers,
EMULABMAN_EMAIL)
if debug:
print res
pass
pass
except:
print "Failed to assign emulabman to slice %s" % slicename
raise
try:
res = tryXmlrpcCmd(agent.AssignShares,
(DEF_PLC_LEASELEN,
DEF_PLC_SHARES))
if debug:
print res
pass
pass
except:
print "Failed to assign shares to slice %s" % slicename
raise
return (res, None)
def deleteSlice(self, slicename):
agent = PLCagent(slicename)
tryXmlrpcCmd(agent.deleteSlice)
pass
def createNode(self, node):
# add the node to the PLC slice.
agent = PLCagent(node.slice.slicename)
tries = 3
while 1:
TIMESTAMP("createnode %s try %d started." % (node.nodeid,
DEF_TRIES-tries+1))
try:
res = tryXmlrpcCmd(agent.AssignNodes, node.IP,
inittries=tries, raisefault=True)
if debug:
print res
pass
pass
# We may have actually gotten the lease/vm even though
# the xmlrpc call appeared to fail. We check for this
# condition here, which will show up on subsequent
# allocation attempts.
except xmlrpclib.Fault, e:
if e.faultString.find("already assigned") != -1:
print "Lease for %s already exists." % node.nodeid
break
elif e.triesleft > 0:
tries = e.triesleft
else:
raise
pass
# success
else:
break
pass
# push changes out immediately.
try:
TIMESTAMP("Starting InstantiateSliver() on %s." % node.nodeid)
res = tryXmlrpcCmd(agent.InstantiateSliver, node.IP)
TIMESTAMP("InstantiateSliver() complete on %s." % node.nodeid)
if debug:
print res
pass
pass
except:
print "Failed to instantiate sliver %s on slice %s" % \
(node.nodeid, node.slice.slicename)
raise
leaselen = time.time() + DEF_PLC_LEASELEN
return (res, None, leaselen)
def freeNode(self, node):
agent = PLCagent(node.slice.slicename)
try:
res = tryXmlrpcCmd(agent.UnAssignNodes, node.IP)
if debug:
print res
pass
pass
except:
print "Failed to release node %s from slice %s" % \
(node.nodeid, node.slice.slicename)
raise
return res
# XXX: implement
def renewNode(self, node, length = 0):
return(0,None,None)
This diff is collapsed.
...@@ -5,6 +5,7 @@ import sys, os ...@@ -5,6 +5,7 @@ import sys, os
sys.path.append("@prefix@/lib") sys.path.append("@prefix@/lib")
import syslog import syslog
import getopt import getopt
from libtestbed import *
import libplab import libplab
GETFREE_PERIOD = 2*60 GETFREE_PERIOD = 2*60
...@@ -80,13 +81,13 @@ def doDaemon(func, period, logname): ...@@ -80,13 +81,13 @@ def doDaemon(func, period, logname):
consecexc = MAXCONSECEXC consecexc = MAXCONSECEXC
if not libplab.debug: if not debug:
daemonize(logname) daemonize(logname)
while True: while True:
start = time.clock() start = time.clock()
try: try:
func() func()
except libplab.SignalInterrupt, e: except SignalInterrupt, e:
print "Received signal %s in daemon loop, exiting." % e.signum print "Received signal %s in daemon loop, exiting." % e.signum
sys.exit(0) sys.exit(0)
except KeyboardInterrupt: except KeyboardInterrupt:
...@@ -100,11 +101,11 @@ def doDaemon(func, period, logname): ...@@ -100,11 +101,11 @@ def doDaemon(func, period, logname):
print "Going back to sleep until next scheduled run" print "Going back to sleep until next scheduled run"
else: else:
print "Too many consecutive exceptions seen, bailing out!" print "Too many consecutive exceptions seen, bailing out!"
libplab.SENDMAIL(libplab.TBOPS, "Plabdaemon Exiting", SENDMAIL(libplab.TBOPS, "Plabdaemon Exiting",
"The plab %s daemon has seen too many " "The plab %s daemon has seen too many "
"consecutive exceptions and is bailing out." "consecutive exceptions and is bailing out."
"Someone needs to check the log!" % "Someone needs to check the log!" %
func.func_name) func.func_name)
raise raise
else: else:
...@@ -119,7 +120,7 @@ def doDaemon(func, period, logname): ...@@ -119,7 +120,7 @@ def doDaemon(func, period, logname):
def main(args): def main(args):
me = args[0] me = args[0]
try: try:
args = libplab.handleArgs(args[1:]) args = handleArgs(args[1:])
if not len(args): if not len(args):
usage(me) usage(me)
command = args[0] command = args[0]
......
...@@ -168,12 +168,17 @@ while (1) { ...@@ -168,12 +168,17 @@ while (1) {
next; next;
} }
print "\n\#\#\# Checking $vnode on $pnode at " . TimeStamp() . "\n"; print "##############################################################################\n";
print "### Checking $vnode on $pnode at " . TimeStamp() . "\n";
# #
# Try to tear it down, set it up, and wait for ISUP. # Try to tear it down, set it up, and wait for ISUP.
# #
system("vnode_setup -f -k -d $PLABMOND_PID $PLABMOND_EID $vnode");
# Bah, can't do unassign->assign in plab right now since unassign
# is async, and may clobber the immediate assign afterward.
#
#system("vnode_setup -f -k -d $PLABMOND_PID $PLABMOND_EID $vnode");
system("vnode_setup -f -d $PLABMOND_PID $PLABMOND_EID $vnode"); system("vnode_setup -f -d $PLABMOND_PID $PLABMOND_EID $vnode");
if ($?) { if ($?) {
print "Failed to allocate $vnode on $pnode\n"; print "Failed to allocate $vnode on $pnode\n";
......
...@@ -4,36 +4,42 @@ ...@@ -4,36 +4,42 @@
import sys import sys
sys.path.append("@prefix@/lib") sys.path.append("@prefix@/lib")
import getopt import getopt
import libtestbed
import libplab import libplab
import xmlrpclib
import time import time
import socket
TRIES = 3 TRIES = 3
SLEEPINT = 5 SLEEPINT = 5
def usage(me): def usage(me):
print "Usage: %s [ -vd ] { alloc | free } pid eid nodeid" % me print "Usage: %s [ -vd ] { alloc | free } [-f] pid eid nodeid" % me
sys.exit(1) sys.exit(1)
def main(args): def main(args):
me = args[0] me = args[0]
alloctries = TRIES alloctries = TRIES
setuptries = TRIES setuptries = TRIES
Force = False
try: try:
command, pid, eid, nodeid = libplab.handleArgs(args[1:]) args = libtestbed.handleArgs(args[1:])
except getopt.GetoptError: except getopt.GetoptError:
usage(me) usage(me)
except ValueError: except ValueError:
usage(me) usage(me)
command = args[0]
opts, args = getopt.getopt(args[1:], "f")
for o, a in opts:
if o == "-f":
Force = True
pid, eid, nodeid = args
plab = libplab.Plab() plab = libplab.Plab()
slice = plab.loadSlice(pid, eid) slice = plab.loadSlice(pid, eid)
if command == "alloc": if command == "alloc":
node = slice.createNode(nodeid) node = slice.createNode(nodeid, force=Force)
node.addKey("/root/.ssh/identity.pub")
while 1: while 1:
try: try:
...@@ -52,7 +58,7 @@ def main(args): ...@@ -52,7 +58,7 @@ def main(args):
else: else:
print "Giving up after %s setup attempts on %s" % \ print "Giving up after %s setup attempts on %s" % \
(TRIES, nodeid) (TRIES, nodeid)
node.free() #XXX node.free()
raise raise
elif command == "renew": elif command == "renew":
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@