Commit 41c54939 authored by Kirk Webb's avatar Kirk Webb
Browse files

The revived Plab interface is here!

Lots of updates to the plab backend, including improved plab <-> elab node
id translation and update handling.  Includes support for the current PLC
API, and the new pl_conf node manager interface API.  Several more db library
routines were ported from the perl library to the python one to support the
new code (mostly the node_id tracking stuff).  Fixes to the client side and
also a rootball creation cleanup (binaries removed from the CVS repo).

There are also enhancements to the experiment view page for experiments
including plab nodes: site and widearea hostname are now displayed along
with the other node information.

Note that the way setup timeout for vnodes is calculated has been changed a
bit.  Instead of using a hardwired base timeout, the base timeout is now
based on the reload_waittime database field, which comes from the 'OS'
(e.g., FBSD-JAIL, RHL-PLAB) the vnode runs.

The default max duration for a plab slice created through the plab_ez interface
is set to 1 year, and linktest is currently disabled and hidden through
the ez interface.

There is still work to do, but this checkin brings with it a functional
plab portal!
parent 2d003fcc
......@@ -2285,6 +2285,7 @@ outfiles="$outfiles Makeconf GNUmakefile \
tbsetup/fetchtar.proxy tbsetup/webfrisbeekiller \
tbsetup/plab/GNUmakefile tbsetup/plab/libplab.py \
tbsetup/plab/mod_dslice.py tbsetup/plab/mod_PLC.py \
tbsetup/plab/mod_PLCNM.py \
tbsetup/plab/plabslice tbsetup/plab/plabnode tbsetup/plab/plabrenewd \
tbsetup/plab/plabmetrics tbsetup/plab/plabstats \
tbsetup/plab/plabmonitord tbsetup/plab/plablinkdata \
......
......@@ -723,6 +723,7 @@ outfiles="$outfiles Makeconf GNUmakefile \
tbsetup/fetchtar.proxy tbsetup/webfrisbeekiller \
tbsetup/plab/GNUmakefile tbsetup/plab/libplab.py \
tbsetup/plab/mod_dslice.py tbsetup/plab/mod_PLC.py \
tbsetup/plab/mod_PLCNM.py \
tbsetup/plab/plabslice tbsetup/plab/plabnode tbsetup/plab/plabrenewd \
tbsetup/plab/plabmetrics tbsetup/plab/plabstats \
tbsetup/plab/plabmonitord tbsetup/plab/plablinkdata \
......
......@@ -25,8 +25,24 @@ from libtestbed import *
#
# Debug vars.
#
verbose = 0;
debug = 0;
verbose = 0
debug = 0
# Constants
TBOPSPID = "emulab-ops"
NODEDEAD_PID = TBOPSPID
NODEDEAD_EID = "hwdown"
TB_NODEHISTORY_OP_MOVE = "move"
# Node Log Types
TB_NODELOGTYPE_MISC = "misc"
TB_NODELOGTYPES = (TB_NODELOGTYPE_MISC, )
TB_DEFAULT_NODELOGTYPE = TB_NODELOGTYPE_MISC
# Node History Stuff.
TB_NODEHISTORY_OP_FREE = "free"
TB_NODEHISTORY_OP_ALLOC = "alloc"
TB_NODEHISTORY_OP_MOVE = "move"
#
# DB variables.
......@@ -90,7 +106,7 @@ def DBQuery(queryPat, querySub = (), asDict = False):
if ret == None:
return ()
return ret
except MySQLdb.MySQLError:
except MySQLdb.MySQLError, e:
tries -= 1
if tries == 0:
break
......@@ -100,6 +116,7 @@ def DBQuery(queryPat, querySub = (), asDict = False):
__dbConnection.ping()
except MySQLdb.MySQLError:
pass
tbmsg = queryPat % cursor.connection.literal(querySub)
tbmsg += "\n\n"
tbmsg += "".join(traceback.format_exception(*sys.exc_info()))
......@@ -112,6 +129,157 @@ def DBQueryFatal(*args):
raise RuntimeError, "DBQueryFatal failed"
return ret
def DBQueryWarn(*args):
return DBQuery(*args)
def DBQuoteSpecial(str):
TBDBConnect()
return __dbConnection.escape_string(str)
#
# Map UID to DB UID (login). Does a DB check to make sure user is known to
# the DB (user obviously has a regular account), and that account will
# always match what the DB says. Redundant, I know. But consider it a
# sanity (or consistency) check.
#
# usage: UNIX2DBUID(int uid)
# returns username if the UID is okay.
# raises a UserError exception if the UID is bogus.
#
class UserError(StandardError): pass # XXX: need better suite of exceptions
def UNIX2DBUID (unix_uid):
qres = \
DBQueryFatal("select uid from users where unix_uid=%s",
(unix_uid))
if not len(qres):
raise UserError, "*** %s not a valid Emulab user!" % uid
pwname = pwd.getpwuid(unix_uid)[0]
dbuser = qres[0][0]
if dbuser != pwname:
raise UserError, "*** %s (passwd file) does not match %s (db)" % \
(pwname, dbuser)
return dbuser
#
# Helper. Test if numeric. Convert to dbuid if numeric.
#
def MapNumericUID(uid):
name = ""
try:
uid = int(uid)
name = UNIX2DBUID(uid)
except ValueError:
name = uid
pass
return name
#
# Return the IDX for a current experiment.
#
# usage: TBExptIDX(char $pid, char *gid, int \$idx)
# returns 1 if okay.
# returns 0 if error.
#
class UnknownExptID(StandardError): pass # XXX: need better suite of exceptions
def TBExptIDX(pid,eid):
qres = \
DBQueryWarn("select idx from experiments "
"where pid=%s and eid=%s",
(pid, eid))
if not len(qres):
raise UnknownExptID, "Experiment %s/%s unknown!" % (pid,eid)
idx = qres[0][0]
return int(idx)
#
# Insert a Log entry for a node.
#
# usage: TBSetNodeLogEntry(char *node, char *uid, char *type, char *message)
# Returns 1 if okay.
# Returns 0 if failed.
#
def TBSetNodeLogEntry(node, dbuid, type, message):
if not TBValidNodeName(node) or not TBValidNodeLogType(type):
return 0
return DBQueryWarn("insert into nodelog "
"values "
"(%s, NULL, %s, %s, %s, now())",
(node, type, dbuid, message))
#
# Validate a node name.
#
# usage: TBValidNodeName(char *name)
# Returns 1 if the node is valid.
# Returns 0 if not.
#
def TBValidNodeName(node):
qres = \
DBQueryWarn("select node_id from nodes where node_id=%s",
(node))
if len(qres) == 0:
return 0
return 1
#
# Validate a node log type.
#
# usage: TBValidNodeLogType(char *type)
# Returns 1 if the type string is valid.
# Returns 0 if not.
#
def TBValidNodeLogType(type):
if type in TB_NODELOGTYPES:
return 1
return 0
#
# Mark a Phys node as down. Cannot use next reserve since the pnode is not
# going to go through the free path.
#
# usage: MarkPhysNodeDown(char *nodeid)
#
def MarkPhysNodeDown(pnode):
pid = NODEDEAD_PID;
eid = NODEDEAD_EID;
DBQueryFatal("lock tables reserved write")
DBQueryFatal("update reserved set "
" pid=%s,eid=%s,rsrv_time=now() "
"where node_id=%s",
(pid, eid, pnode))
DBQueryFatal("unlock tables")
TBSetNodeHistory(pnode, TB_NODEHISTORY_OP_MOVE, os.getuid(), pid, eid)
return
def TBSetNodeHistory(nodeid, op, uid, pid, eid):
exptidx = 0
try:
exptidx = TBExptIDX(pid, eid)
except:
print "*** WARNING: No such experiment %s/%s!" % (pid,eid)
return 0
try:
uid = int(uid)
# val = <expr> ? TrueRet : FalseRet
uid = uid == 0 and "root" or UNIX2DBUID(uid)
pass
except ValueError:
pass
return DBQueryWarn("insert into node_history set "
" history_id=0, node_id=%s, op=%s, "
" uid=%s, stamp=UNIX_TIMESTAMP(now()), "
" exptidx=%s",
(nodeid,op,uid,exptidx))
def TBSiteVarExists(name):
name = DBQuoteSpecial(name)
......@@ -142,9 +310,3 @@ def TBGetSiteVar(name):
raise RuntimeException, \
"*** attempted to fetch unknown site variable name!"
def DBQuoteSpecial(str):
TBDBConnect()
return __dbConnection.escape_string(str)
......@@ -929,10 +929,18 @@ elsif (@vnodelist) {
my $pnode = $vnode2pnode{$node};
my $islocal= exists($nodes{$pnode});
my $wstart = $waitstart{$node};
my $maxwait = 90 + (40 * $pnodevcount{$pnode});
my $curallocstate;
my $actual_state;
#
# Base the maxwait for vnodes on the reboot_waittime field for
# their respective OSIDs, with some slop time that scales up
# as a function of the number of vnodes on the parent pnode.
#
my $osid = $osids{$node};
my $reboot_time = $reboot_waittime{$osid};
my $maxwait = $reboot_time + (40 * $pnodevcount{$pnode});
TBGetNodeAllocState($node, \$curallocstate);
#
......
......@@ -17,7 +17,7 @@ SUBDIRS = libdslice etc
SBIN_STUFF = plabslice plabnode plabrenewd plabmetrics plabstats \
plabmonitord plablinkdata plabdist plabhttpd plabdiscover
LIB_STUFF = libplab.py mod_dslice.py mod_PLC.py
LIB_STUFF = libplab.py mod_dslice.py mod_PLC.py mod_PLCNM.py
LIBEXEC_STUFF = webplabstats
......
This diff is collapsed.
# -*- python -*-
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
import sys
sys.path.append("@prefix@/lib")
import xmlrpclib
import getopt
import fcntl
import time
import calendar
import cPickle
from libtestbed import *
#
# output control vars
#
verbose = 0
debug = 0
#
# PLC constants
#
DEF_PLC_URI = "https://www.planet-lab.org/PLCAPI/"
DEF_PLC_USER = "lepreau@cs.utah.edu"
DEF_PLC_PASS = "phurds" # XXX: hardcoded, cleartext passwds bad.
DEF_NM_PORT = "814"
MAX_PLC_LEASELEN = 2*30*24*60*60 # defined by PLC as two months
MIN_LEASE_ADDTIME = 23*60*60 # less than a day used? leave it be then..
MAX_LEASE_SLOP = 600 # (ten minutes)
MAX_CACHE_TIME = 3600 # (one hour)
DEF_PLC_SHARES = 30 # XXX: totally arbitrary
EMULABMAN_EMAIL = "emulabman@emulab.net"
PLC_LOCKFILE = "/tmp/.PLC-lock"
DEF_PLC_SPACING = 3 # seconds
DEF_SLICE_DESC = "Slice created by Emulab"
DEF_EMULAB_URL = "http://www.emulab.net"
MAJOR_VERS = 1
MINOR_VERS = 0
MIN_REV = 10
#
# Reflective wrapper class for real PLCagent.
# This class forces global, mutually exclusive access to PLC
# function calls.
#
# XXX: Created per Jay's request, then deactivated per Jay's
# subsequent request.
#
class __PLCagent:
class __PLCMutexMethod:
def __init__(self, funcname, obj):
self.__lockfile = open(PLC_LOCKFILE, "w")
self.__meth = eval("obj.%s" % funcname)
self.func_name = funcname
return
def __call__(self, *args):
retval = None
if debug:
TIMESTAMP("Acquiring PLC lock")
pass
fcntl.lockf(self.__lockfile, fcntl.LOCK_EX)
if debug:
TIMESTAMP("Lock acquired.")
pass
time.sleep(DEF_PLC_SPACING)
fcntl.lockf(self.__lockfile, fcntl.LOCK_UN)
if debug:
TIMESTAMP("PLC lock released")
pass
return self.__meth(*args)
pass
def __init__(self, *args):
self.__myPLC = _PLCagent(*args)
return
def __getattr__(self, name):
return self.__PLCMutexMethod(name, self.__myPLC)
class NMagent:
def __init__(self, IP, nmport = DEF_NM_PORT):
self.__server = xmlrpclib.ServerProxy("http://" + IP + ":" +
nmport + "/")
self.__vers = []
pass
def create_sliver(self, ticket):
return self.__server.create_sliver(xmlrpclib.Binary(ticket))
def delete_sliver(self, rcap):
return self.__server.delete_sliver(rcap)
def version(self):
if not self.__vers:
try:
res = self.__server.version()
if type(res) == list and len(res) == 2 and res[0] == 0:
verslist = res[1].split(".")
major = verslist[0]
minor, revision = verslist[1].split("-")
self.__vers = [int(major), int(minor), int(revision)]
pass
else:
self.__vers = [0,0,0]
pass
pass
except:
self.__vers = [0,0,0]
pass
return self.__vers
pass
#
# The real PLC agent. Wraps up standard arguments to the
# PLC XMLRPC interface.
#
# XXX: a number of functions here need to be updated to cope with lists
# and tuples.
class PLCagent:
def __init__(self, slicename,
uri = DEF_PLC_URI,
username = DEF_PLC_USER,
password = DEF_PLC_PASS):
if not slicename:
raise RuntimeError, "Must provide a slicename!"
self.__slice = {}
self.__slice['sliceName'] = slicename
self.__slicename = slicename
self.__auth = {}
self.__auth['AuthMethod'] = "password"
self.__auth['Username'] = username
self.__auth['AuthString'] = password
self.__auth['Role'] = "pi"
self.__insmeth = "delegated"
try:
self.__server = xmlrpclib.ServerProxy(uri)
except:
print "Failed to create XML-RPC proxy"
raise
return
def getSliceName(self):
return self.__slice['sliceName']
def SliceCreate(self):
return self.__server.SliceCreate(self.__auth, self.__slicename)
def SliceDelete(self):
return self.__server.SliceDelete(self.__auth, self.__slicename)
def SliceUpdate(self, slicedesc = DEF_SLICE_DESC,
sliceURL = DEF_EMULAB_URL):
return self.__server.SliceUpdate(self.__auth, self.__slicename,
sliceURL, slicedesc)
def SliceRenew(self, expdate):
return self.__server.SliceRenew(self.__auth, self.__slicename,
expdate)
def SliceNodesAdd(self, nodelist):
if not type(nodelist) == list:
nodelist = [nodelist,]
pass
return self.__server.SliceNodesAdd(self.__auth, self.__slicename,
nodelist)
def SliceNodesDel(self, nodelist):
if not type(nodelist) == list:
nodelist = [nodelist,]
return self.__server.SliceNodesDel(self.__auth, self.__slicename,
nodelist)
def SliceNodesList(self):
return self.__server.SliceNodesList(self.__auth, self.__slicename)
def SliceUsersAdd(self, userlist):
if type(userlist) != tuple:
userlist = (userlist,)
return self.__server.SliceUsersAdd(self.__auth, self.__slicename,
userlist)
def SliceUsersDel(self, userlist):
if type(userlist) != tuple:
userlist = (userlist,)
return self.__server.SliceUsersDel(self.__auth, self.__slicename,
userlist)
def SliceUsersList(self):
return self.__server.SliceUsersList(self.__auth, self.__slicename)
def SliceGetTicket(self):
return self.__server.SliceGetTicket(self.__auth, self.__slicename)
def SliceSetInstantiationMethod(self):
return self.__server.SliceSetInstantiationMethod(self.__auth,
self.__slicename,
self.__insmeth)
def SliceInfo(self):
return self.__server.SliceInfo(self.__auth, [self.__slicename,])
pass # end of PLCagent class
class mod_PLCNM:
def __init__(self):
self.modname = "mod_PLCNM"
self.__PLCagent = None
self.__sliceexpdict = {}
self.__sliceexptime = 0
return
def createSlice(self, slice):
agent = self.__getAgent(slice.slicename)
res = None
now = calendar.timegm(time.gmtime())
try:
res = tryXmlrpcCmd(agent.SliceCreate)
if debug:
print "SliceCreate result: %s" % res
pass
pass
except:
print "Failed to create slice %s" % slice.slicename
raise
try:
res = tryXmlrpcCmd(agent.SliceSetInstantiationMethod)
if debug:
print "SliceSetInstantiationMethod result: %s" % res
pass
pass
except:
print "Failed to set slice instantiation type to delegated"
raise
try:
res = tryXmlrpcCmd(agent.SliceUsersAdd,
EMULABMAN_EMAIL)
if debug:
print "SliceUsersAdd result: %s" % res
pass
pass
except:
print "Failed to assign emulabman to slice %s" % slice.slicename
raise
# PLC has a limit on the size of XMLRPC responses, so trying to
# get back a ticket with all Plab nodes included was getting truncated.
# The workaround is to _not_ add _any_ nodes to the slice via PLC!
#
# Steve Muir sez:
# "tickets don't actually need any nodes in them, the PLC
# agent currently ignores the node list. i suppose it's possible
# that in the future we might start checking the node list but my
# feeling is that we probably won't. so in the short-term you can
# just leave the node list empty."
#try:
# nodelist = map(lambda x: x[2], slice.getSliceNodes())
# res = tryXmlrpcCmd(agent.SliceNodesAdd, nodelist)
# if debug:
# print "SliceNodesAdd result: %s" % res
# pass
# pass
#except:
# print "Failed to add nodes to slice %s" % slice.slicename
# raise
try:
res = tryXmlrpcCmd(agent.SliceUpdate)
if debug:
print "SliceUpdate result: %s" % res
pass
pass
except:
print "Failed to update info for slice: %s" % slice.slicename
raise
try:
PLCticket = tryXmlrpcCmd(agent.SliceGetTicket)
if debug:
print PLCticket
pass
pass
except:
print "Failed to get PLC ticket for slice %s" % slice.slicename
raise
leaseend = now + MAX_PLC_LEASELEN
return (res, cPickle.dumps(PLCticket), leaseend)
def deleteSlice(self, slice):
agent = self.__getAgent(slice.slicename)
tryXmlrpcCmd(agent.SliceDelete, OKstrs = ["does not exist"])
pass
# XXX: copied code - not correct for PLC/NM hybrid
def renewSlice(self, slice):
agent = self.__getAgent(slice.slicename)
ret = 0
now = calendar.timegm(time.gmtime()) # make explicit that we want UTC
# Get current PLC timeout for this slice
leaseend = self.getSliceExpTime(slice.slicename)
# Warn that we weren't able to get the exp. time from PLC,
# but don't fail - try to renew anyway.
if not leaseend:
print "Couldn't get slice expiration time from PLC!"
leaseend = slice.leaseend
pass
# Allow some slop in our recorded time versus PLC's. This is necessary
# since we calculate the expiration locally. If we are off by too much
# then adjust to PLC's recorded expiration.
if abs(leaseend - slice.leaseend) > MAX_LEASE_SLOP:
print "Warning: recorded lease for %s doesn't agree with PLC" % \
slice.slicename
print "\tRecorded: %s Actual: %s" % (slice.leaseend, leaseend)
slice.leaseend = leaseend
pass
# Expired! Just bitch about it; try renewal anyway. The renewal
# code in liabplab will send email.
if leaseend < now:
print "Slice %s (%s/%s) has expired!" % \
(slice.slicename, slice.pid, slice.eid)
pass
# Max out leaseend as far as (politically) possible
addtime = now + MAX_PLC_LEASELEN - leaseend
# If the lease is within delta of the max, don't bother.
if addtime < MIN_LEASE_ADDTIME:
print "Slice %s (%s/%s) doesn't need to be renewed" % \
(slice.slicename, slice.pid, slice.eid)
return 1
try:
res = tryXmlrpcCmd(agent.AssignShares,
(addtime,
DEF_PLC_SHARES),
NOKstrs = ["does not Exist",