Commit 78b210d1 authored by Kirk Webb's avatar Kirk Webb

Updated Emulab plab interface to support PLC. Bumped rootball version

parent e710fa48
...@@ -909,7 +909,7 @@ JAILIPMASK=255.240.0.0 ...@@ -909,7 +909,7 @@ JAILIPMASK=255.240.0.0
IPBASE=10 IPBASE=10
SFSSUPPORT=1 SFSSUPPORT=1
PLABSUPPORT=0 PLABSUPPORT=0
PLAB_ROOTBALL="plabroot-8.tgz" PLAB_ROOTBALL="plabroot-9.tar.bz2"
PLAB_SLICEPREFIX="emulab" PLAB_SLICEPREFIX="emulab"
TBLOGFACIL="local5" TBLOGFACIL="local5"
LINKTEST_NSPATH="/share/linktest-ns" LINKTEST_NSPATH="/share/linktest-ns"
......
...@@ -101,7 +101,7 @@ JAILIPMASK=255.240.0.0 ...@@ -101,7 +101,7 @@ JAILIPMASK=255.240.0.0
IPBASE=10 IPBASE=10
SFSSUPPORT=1 SFSSUPPORT=1
PLABSUPPORT=0 PLABSUPPORT=0
PLAB_ROOTBALL="plabroot-8.tgz" PLAB_ROOTBALL="plabroot-9.tar.bz2"
PLAB_SLICEPREFIX="emulab" PLAB_SLICEPREFIX="emulab"
TBLOGFACIL="local5" TBLOGFACIL="local5"
LINKTEST_NSPATH="/share/linktest-ns" LINKTEST_NSPATH="/share/linktest-ns"
......
...@@ -70,6 +70,16 @@ ROOTBALL_HTTP_URLPATH = HTTPD_SITE + HTTPD_PORT + HTTP_PATH ...@@ -70,6 +70,16 @@ ROOTBALL_HTTP_URLPATH = HTTPD_SITE + HTTPD_PORT + HTTP_PATH
DEF_ROOTBALL_NAME = "@PLAB_ROOTBALL@" DEF_ROOTBALL_NAME = "@PLAB_ROOTBALL@"
SLICEPREFIX = "@PLAB_SLICEPREFIX@" SLICEPREFIX = "@PLAB_SLICEPREFIX@"
#
# PLC constants
#
DEF_PLC_URI = "https://www.planet-lab.org/db/slices/dynamicprog.php"
DEF_PLC_USER = "lepreau@cs.utah.edu"
DEF_PLC_PASS = "phurds"
DEF_PLC_LEASELEN = 1*30*24*60*60 # add one month (XXX: for now)
DEF_PLC_SHARES = 30
EMULABMAN_EMAIL = "emulabman@emulab.net"
# #
# How many seconds to sleep between failures and how many times to try # How many seconds to sleep between failures and how many times to try
# commands to both the dslice agent, and individual node managers. # commands to both the dslice agent, and individual node managers.
...@@ -83,6 +93,11 @@ DEF_TRIES = 3 ...@@ -83,6 +93,11 @@ DEF_TRIES = 3
verbose = 0 verbose = 0
debug = 0 debug = 0
#
# Method of operation
#
method = "PLC"
# #
# var to track failed renewals # var to track failed renewals
# #
...@@ -539,6 +554,76 @@ def tryXmlrpcCmd(cmd, args = (), ...@@ -539,6 +554,76 @@ def tryXmlrpcCmd(cmd, args = (),
raise raise
class PLCagent:
def __init__(self, slicename,
uri = DEF_PLC_URI,
username = DEF_PLC_USER,
password = DEF_PLC_PASS):
if not slicename:
raise RuntimeError, "Must provide a slicename!"
self.__slice = {}
self.__slice['sliceName'] = slicename
self.__auth = {}
self.__auth['AuthMethod'] = "password"
self.__auth['username'] = username
self.__auth['AuthString'] = password
try:
self.__server = xmlrpclib.ServerProxy(uri)
except:
print "Failed to create XML-RPC proxy"
raise
def createSlice(self):
return self.__server.createSlice(self.__slice, self.__auth)
def deleteSlice(self):
return self.__server.deleteSlice(self.__slice, self.__auth)
def AssignNodes(self, nodelist):
if type(nodelist) != tuple:
nodelist = (nodelist,)
nodes = {}
nodes['nodeList'] = nodelist
return self.__server.AssignNodes(self.__slice, self.__auth, nodes)
def UnAssignNodes(self, nodelist):
if type(nodelist) != tuple:
nodelist = (nodelist,)
nodes = {}
nodes['nodeList'] = nodelist
return self.__server.UnAssignNodes(self.__slice, self.__auth, nodes)
def AssignUsers(self, userlist):
if type(userlist) != tuple:
userlist = (userlist,)
users = {}
users['userList'] = userlist
return self.__server.AssignUsers(self.__slice, self.__auth, users)
def UnAssignUsers(self, userlist):
if type(userlist) != tuple:
userlist = (userlist,)
users = {}
users['userList'] = userlist
return self.__server.UnAssignUsers(self.__slice, self.__auth, users)
def AssignShares(self, renewtime, numshares):
shareinfo = {}
shareinfo['renewTime'] = renewtime
shareinfo['share'] = numshares
return self.__server.AssignShares(self.__slice, self.__auth, shareinfo)
def InstantiateSliver(self, nodelist):
if type(nodelist) != tuple:
nodelist = (nodelist,)
nodes = {}
nodes['nodeList'] = nodelist
return self.__server.InstantiateSliver(self.__slice, self.__auth, nodes)
def listSlice(self):
return self.__server.listSlice(self.__auth)
# #
# Plab abstraction # Plab abstraction
# #
...@@ -1009,7 +1094,7 @@ class Plab: ...@@ -1009,7 +1094,7 @@ class Plab:
else: else:
args = (AGENTIP, agent.PORT, agent.SSLPORT, args = (AGENTIP, agent.PORT, agent.SSLPORT,
self.keyfile, self.certfile, self.cacertfile) self.keyfile, self.certfile, self.cacertfile)
self.__agentProxy = agentproxy.agentproxy(*args) self.__agentProxy = agentproxy.agentproxy(*args)
return self.__agentProxy return self.__agentProxy
...@@ -1027,6 +1112,7 @@ class Slice: ...@@ -1027,6 +1112,7 @@ class Slice:
Creates a new slice that initially contains no nodes. Don't call Creates a new slice that initially contains no nodes. Don't call
this directly, use Plab.createSlice instead. this directly, use Plab.createSlice instead.
""" """
cleanup = 0
res = DBQueryFatal("select idx from experiments " res = DBQueryFatal("select idx from experiments "
"where pid=%s " "where pid=%s "
"and eid=%s", "and eid=%s",
...@@ -1035,19 +1121,66 @@ class Slice: ...@@ -1035,19 +1121,66 @@ class Slice:
raise RuntimeError, "Didn't get any results while looking for idx" raise RuntimeError, "Didn't get any results while looking for idx"
eindex = res[0][0] eindex = res[0][0]
self.slicename = "%s_%s" % (SLICEPREFIX, eindex) self.slicename = "%s_%s" % (SLICEPREFIX, eindex)
print "Creating Plab slice %s." % self.slicename print "Creating Plab slice %s." % self.slicename
self.privkey, self.pubkey = self.__genKeypair()
try: # Method dependant slice creation steps
DBQueryFatal("insert into plab_slices" if method == "dslice":
" (pid, eid, slicename, privkey, pubkey) " self.privkey, self.pubkey = self.__genKeypair()
" values (%s, %s, %s, %s, %s)",
(self.pid, self.eid, self.slicename, elif method == "PLC":
self.privkey, self.pubkey)) self.agent = PLCagent(self.slicename)
except: self.privkey = self.pubkey = None
# No cleanup necessary try:
try:
res = tryXmlrpcCmd(self.agent.createSlice)
if debug:
print res
except:
print "Failed to create slice %s" % self.slicename
raise
try:
res = tryXmlrpcCmd(self.agent.AssignUsers,
EMULABMAN_EMAIL)
if debug:
print res
except:
print "Failed to assign emulabman to slice %s" % \
self.slicename
raise
try:
res = tryXmlrpcCmd(self.agent.AssignShares,
(DEF_PLC_LEASELEN,
DEF_PLC_SHARES))
if debug:
print res
except:
print "Failed to assign shares to slice %s" % \
self.slicename
raise
except:
cleanup = 1
# Method independent slice creation steps
if not cleanup:
try:
DBQueryFatal("insert into plab_slices"
" (pid, eid, slicename, privkey, pubkey) "
" values (%s, %s, %s, %s, %s)",
(self.pid, self.eid, self.slicename,
self.privkey, self.pubkey))
except:
cleanup = 1
if cleanup:
# Method dependant failure cleanup
if method == "PLC":
tryXmlrpcCmd(self.agent.deleteSlice)
# Method independant failure cleanup
DBQueryFatal("delete from plab_slices where slicename=%s",
(self.slicename,))
raise raise
# It turns out that there's no concrete "slice" in dslice, so
# nothing real needs to be done
def _load(self): def _load(self):
""" """
...@@ -1067,6 +1200,8 @@ class Slice: ...@@ -1067,6 +1200,8 @@ class Slice:
assert (len(res) == 1), \ assert (len(res) == 1), \
"Multiple slices found for %s-%s" % (self.pid, self.eid) "Multiple slices found for %s-%s" % (self.pid, self.eid)
((self.slicename, self.privkey, self.pubkey), ) = res ((self.slicename, self.privkey, self.pubkey), ) = res
if method == "PLC":
self.agent = PLCagent(self.slicename)
def destroy(self): def destroy(self):
""" """
...@@ -1076,18 +1211,26 @@ class Slice: ...@@ -1076,18 +1211,26 @@ class Slice:
slice is destroyed. slice is destroyed.
""" """
print "Destroying Plab slice %s." % self.slicename print "Destroying Plab slice %s." % self.slicename
res = DBQueryFatal("select node_id from plab_slice_nodes" if method == "dslice":
" where slicename = %s", res = DBQueryFatal("select node_id from plab_slice_nodes"
(self.slicename)) " where slicename = %s",
print "\tRemoving any remaining nodes in slice.." (self.slicename))
for (nodeid,) in res: print "\tRemoving any remaining nodes in slice.."
node = self.loadNode(nodeid) for (nodeid,) in res:
node.free() node = self.loadNode(nodeid)
del node # Encourage the GC'er node.free()
del node # Encourage the GC'er
print "\tRemoving slice DB entry."
osigs = disable_sigs(TERMSIGS) osigs = disable_sigs(TERMSIGS)
if method == "PLC":
try:
tryXmlrpcCmd(self.agent.deleteSlice)
except:
print "Failed to delete PLC slice!"
try: try:
print "\tRemoving slice DB entry."
DBQueryFatal("delete from plab_slices where slicename = %s", DBQueryFatal("delete from plab_slices where slicename = %s",
(self.slicename,)) (self.slicename,))
except: except:
...@@ -1166,11 +1309,22 @@ class Slice: ...@@ -1166,11 +1309,22 @@ class Slice:
class Node: class Node:
def __init__(self, slice, nodeid): def __init__(self, slice, nodeid):
self.slice, self.plab = slice, slice.plab self.slice, self.agent = slice, slice.agent
self.nodeid = nodeid self.nodeid = nodeid
self.ip = self.__findIP() self.ip = self.__findIP()
self.__nodemgrProxy = None self.__nodemgrProxy = None
if method == "PLC":
res = DBQueryFatal("select w.hostname from"
" nodes as n left join widearea_nodeinfo as w"
" on n.phys_nodeid = w.node_id"
" where n.node_id = %s limit 1",
(nodeid,))
assert (len(res) > 0), "%s not found in widearea_info table!" % \
nodeid
self.hostname = res[0][0]
# XXX: may want to rethink signal handling here. # XXX: may want to rethink signal handling here.
def _create(self): def _create(self):
""" """
...@@ -1189,39 +1343,41 @@ class Node: ...@@ -1189,39 +1343,41 @@ class Node:
raise RuntimeError, "Entry for plab node %s already exists " \ raise RuntimeError, "Entry for plab node %s already exists " \
"in the DB" % self.nodeid "in the DB" % self.nodeid
# Now get a ticket, and redeem it for a vm lease if method == "dslice":
print "Creating Plab node %s on %s." % (self.nodeid, self.ip) # Now get a ticket, and redeem it for a vm lease
agent = self.plab._createAgentProxy() print "Creating Plab node %s on %s." % (self.nodeid, self.ip)
tickets = tryXmlrpcCmd(agent.newtickets, agent = self.plab._createAgentProxy()
(self.slice.slicename, 1, LEASELEN, (self.ip,))) tickets = tryXmlrpcCmd(agent.newtickets,
assert (len(tickets) == 1), "%d tickets returned" % len(tickets) (self.slice.slicename, 1, LEASELEN,
self.ticketdata = tickets[0] (self.ip,)))
if debug: assert (len(tickets) == 1), "%d tickets returned" % len(tickets)
print "Obtained ticket:" self.ticketdata = tickets[0]
print self.ticketdata if debug:
print "Obtained ticket:"
print self.ticketdata
nodemgr = self._createNodemgrProxy() nodemgr = self._createNodemgrProxy()
self.leasedata = None self.leasedata = None
tries = DEF_TRIES tries = DEF_TRIES
while 1: while 1:
TIMESTAMP("createnode %s try %d started." % (self.nodeid, TIMESTAMP("createnode %s try %d started." % (self.nodeid,
DEF_TRIES-tries+1)) DEF_TRIES-tries+1))
try: try:
self.leasedata = tryXmlrpcCmd(nodemgr.newleasevm, self.leasedata = tryXmlrpcCmd(nodemgr.newleasevm,
(self.ticketdata, (self.ticketdata,
self.slice.privkey, self.slice.privkey,
self.slice.pubkey), self.slice.pubkey),
inittries = tries, inittries = tries,
raisefault = True) raisefault = True)
# We may have actually gotten the lease/vm even though # We may have actually gotten the lease/vm even though
# the xmlrpc call appeared to fail. We check for this # the xmlrpc call appeared to fail. We check for this
# condition here, which will show up on subsequent allocation # condition here, which will show up on subsequent allocation
# attempts. # attempts.
except xmlrpclib.Fault, e: except xmlrpclib.Fault, e:
if e.faultString.find("already exists") != -1: if e.faultString.find("already exists") != -1:
print "Lease for %s already exists; deleting." % self.nodeid print "Lease for %s already exists; deleting." % self.nodeid
nodeleases = tryXmlrpcCmd(nodemgr.getleases) nodeleases = tryXmlrpcCmd(nodemgr.getleases)
for mylease in nodeleases: for mylease in nodeleases:
if mylease.find(self.slice.slicename) != -1: if mylease.find(self.slice.slicename) != -1:
...@@ -1235,29 +1391,59 @@ class Node: ...@@ -1235,29 +1391,59 @@ class Node:
"lease for slice %s on %s" % \ "lease for slice %s on %s" % \
(self.slice.slicename, self.nodeid) (self.slice.slicename, self.nodeid)
if e.triesleft > 0: if e.triesleft > 0:
tries = e.triesleft tries = e.triesleft
else:
raise
# success
else: else:
raise break
# success
else: # Good, we have a lease; now put an entry into the DB
break if debug:
print "Obtained lease/vm:"
print self.leasedata
self.lease = lease.lease(self.leasedata)
# Note that the lease's end_time happens to be formatted the
# same as a SQL DATETIME (how conspicuously convenient...)
DBQueryFatal("insert into plab_slice_nodes"
" (pid, eid, slicename, node_id,"
" ticketdata, leasedata, leaseend)"
" values (%s, %s, %s, %s, %s, %s, %s)",
(self.slice.pid, self.slice.eid,
self.slice.slicename, self.nodeid,
self.ticketdata, self.leasedata,
self.lease.end_time))
elif method == "PLC":
TIMESTAMP("createnode %s started." % self.nodeid)
try:
res = tryXmlrpcCmd(self.agent.AssignNodes, self.hostname)
if debug:
print res
except:
print "Failed to assign %s to PLC slice %s" % \
(self.nodeid, self.slice.slicename)
raise
try:
res = tryXmlrpcCmd(self.agent.InstantiateSliver,
self.hostname)
if debug:
print res
except:
print "Failed to instantiate sliver %s on slice %s" % \
(self.nodeid, self.slice.slicename)
raise
leaselen = time.time() + DEF_PLC_LEASELEN
DBQueryFatal("insert into plab_slice_nodes"
" (pid, eid, slicename, node_id,"
" ticketdata, leasedata, leaseend)"
" values (%s, %s, %s, %s, %s, %s, %s)",
(self.slice.pid, self.slice.eid,
self.slice.slicename, self.nodeid,
None, None, leaselen))
# Good, we have a lease; now put an entry into the DB
if debug:
print "Obtained lease/vm:"
print self.leasedata
self.lease = lease.lease(self.leasedata)
# Note that the lease's end_time happens to be formatted the
# same as a SQL DATETIME (how conspicuously convenient...)
DBQueryFatal("insert into plab_slice_nodes"
" (pid, eid, slicename, node_id,"
" ticketdata, leasedata, leaseend)"
" values (%s, %s, %s, %s, %s, %s, %s)",
(self.slice.pid, self.slice.eid,
self.slice.slicename, self.nodeid,
self.ticketdata, self.leasedata,
self.lease.end_time))
TIMESTAMP("createnode %s finished." % self.nodeid) TIMESTAMP("createnode %s finished." % self.nodeid)
...@@ -1280,7 +1466,8 @@ class Node: ...@@ -1280,7 +1466,8 @@ class Node:
assert (slicename == self.slice.slicename), \ assert (slicename == self.slice.slicename), \
"Node %s loaded by slice %s, but claims to be in slice %s" % \ "Node %s loaded by slice %s, but claims to be in slice %s" % \
(self.nodeid, self.slice.slicename, slicename) (self.nodeid, self.slice.slicename, slicename)
self.lease = lease.lease(self.leasedata) if method == "dslice":
self.lease = lease.lease(self.leasedata)
def free(self): def free(self):
""" """
...@@ -1304,7 +1491,8 @@ class Node: ...@@ -1304,7 +1491,8 @@ class Node:
print "Freeing Plab node %s." % self.nodeid print "Freeing Plab node %s." % self.nodeid
# Get node manager handle # Get node manager handle
nodemgr = self._createNodemgrProxy() if method == "dslice":
nodemgr = self._createNodemgrProxy()
# Remove the DB entry first. # Remove the DB entry first.
try: try:
...@@ -1317,33 +1505,45 @@ class Node: ...@@ -1317,33 +1505,45 @@ class Node:
"Unable to delete entry for sliver %s from the DB:" "Unable to delete entry for sliver %s from the DB:"
"\n\n%s" % (self.nodeid, tbstr), TBOPS) "\n\n%s" % (self.nodeid, tbstr), TBOPS)
tries = DEF_TRIES if method == "dslice":
while 1: tries = DEF_TRIES
TIMESTAMP("freenode %s try %d started." % (self.nodeid, while 1:
DEF_TRIES-tries+1)) TIMESTAMP("freenode %s try %d started." % (self.nodeid,
try: DEF_TRIES-tries+1))
tryXmlrpcCmd(nodemgr.deletelease, self.slice.slicename, try:
inittries = tries, raisefault = 1) tryXmlrpcCmd(nodemgr.deletelease, self.slice.slicename,
except xmlrpclib.Fault, e: inittries = tries, raisefault = 1)
if e.faultString.find("does not exist") != -1: except xmlrpclib.Fault, e:
print "Lease for %s did not exist on node" % self.nodeid if e.faultString.find("does not exist") != -1:
deleted = 1 print "Lease for %s did not exist on node" % self.nodeid
deleted = 1
break
elif e.triesleft > 0:
tries = e.triesleft
else:
break
except:
print "Warning: couldn't delete the lease for %s on %s" % \
(self.slice.slicename, self.nodeid)
tbstr = "".join(traceback.format_exception(*sys.exc_info()))
SENDMAIL(TBOPS, "Sliver lease deletion failed on %s, "
"dslice %s" % (self.nodeid, self.slice.slicename),
"Sliver lease deletion failed:\n\n%s" % tbstr, TBOPS)
break break
elif e.triesleft > 0:
tries = e.triesleft
else: else:
deleted = 1
break break
elif method == "PLC":
TIMESTAMP("freenode %s started." % self.nodeid)
try:
res = tryXmlrpcCmd(self.agent.UnAssignNodes, self.hostname)
if debug:
print res
except: except:
print "Warning: couldn't delete the lease for %s on %s" % \ print "Failed to release node %s from slice %s" % \
(self.slice.slicename, self.nodeid) (self.nodeid, self.slice.slicename)
tbstr = "".join(traceback.format_exception(*sys.exc_info())) raise
SENDMAIL(TBOPS, "Sliver lease deletion failed on %s, "
"dslice %s" % (self.nodeid, self.slice.slicename),
"Sliver lease deletion failed:\n\n%s" % tbstr, TBOPS)
break
else:
deleted = 1
break
TIMESTAMP("freenode %s finished." % self.nodeid) TIMESTAMP("freenode %s finished." % self.nodeid)
return not deleted return not deleted
...@@ -1379,6 +1579,7 @@ class Node: ...@@ -1379,6 +1579,7 @@ class Node:
disable_sigs_parent = TERMSIGS) disable_sigs_parent = TERMSIGS)
return res[0] | res[1] return res[0] | res[1]
# XXX: fix for PLC
def _renew(self): def _renew(self):
"""