Commit 03921d0e authored by Kirk Webb's avatar Kirk Webb

A bunch of plab interface updates that I've worked on over the last while.
Most significant is the revamped renewal code that tries to push the leases
out to the policy defined maximum of two months during each iteration through
the plabrenewd daemon loop.

- added python lib code to get SiteVars
- Fixed up comments to reflect current code operation
- revamped renewal code (again)
  - changed all times to UTC for consistency
  - removed node-level renew invocation in favor of slice-level
    - if backend module requires node-level renewals, it must handle them
      itself in the slice-level function
    - better reporting
- set admin bit if creating svc slice
  - other updates to ensure admin bit is preserved
- update rootball handling function naming
- updated tryXmlRpcCmd() to accept two new sets of strings, and a callback
  function.  The strings represent Faults that either 1) indicate success,
  or 2) indicate failure.  The callback is another optional error handling
  method, allowing the caller to decide how to treat individual faults as
  they see fit.
- updated the backend module code to take advantage of the new string
  match status identifiers in tryXmlRpcCmd()
- completely revamped slice renewal code in mod_PLC backend
  - compare against real lease expiration data gathered direct from PLC
  We used to just infer from our originally requested lease length
  - warn when our notion of expiration doesn't match PLC's
- added agent caching and lease expiration info caching to mod_PLC
  backend.
parent f7016238
......@@ -104,3 +104,40 @@ def DBQueryFatal(*args):
if ret == None:
raise RuntimeError, "DBQueryFatal failed"
return ret
def TBSiteVarExists(name):
name = DBQuoteSpecial(name)
qres = DBQueryFatal("select name from sitevariables "
"where name=%s", (name,))
if len(qres) > 0:
return 1
else:
return 0
pass
def TBGetSiteVar(name):
name = DBQuoteSpecial(name)
qres = DBQueryFatal("select value, defaultvalue from sitevariables "
"where name=%s", (name,))
if len(qres) > 0:
value, defaultvalue = qres[0]
if value: return value
elif defaultvalue: return defaultvalue
pass
raise RuntimeException, \
"*** attempted to fetch unknown site variable name!"
def DBQuoteSpecial(str):
TBDBConnect()
return __dbConnection.escape_string(str)
......@@ -120,6 +120,7 @@ def SENDMAIL(To, Subj, Msg, From = None, Headers = None, Files = ()):
sm.close()
return 1
#
# General library functions
#
......@@ -325,6 +326,7 @@ def ForkCmd(cmd, args=(), timeout=DEF_TIMEOUT,
def tryXmlrpcCmd(cmd, args = (),
inittries = DEF_TRIES,
sleepint = DEF_SLEEPINT,
OKstrs=[], NOKstrs=[], callback=None,
raisefault = False):
"""
This helper/wrapper function's job is to invoke the commands to the
......@@ -337,20 +339,27 @@ def tryXmlrpcCmd(cmd, args = (),
inittries: <int> number of retries before the function gives up
and reraises the last caught exception.
sleepint: <int> how long to sleep (in seconds) between retries.
OKstrs: <list> success strings to check against any XMLRPC faults.
If one is seen, then return 'success' (0)
NOKstrs: <list> failure strings to check against any XMLRPC faults.
If one is seen, then return 'failure' (1)
callback: <func_ptr> pointer to a function to call when an XMLRPC
fault is encountered. it should return and integer, where:
0 means success; 1 means fail; and -1 means keep trying.
raisefault: <boolean> indicates whether or not to reraise an
xmlrpclib Fault exception when caught. When true it
also adds a new 'triesleft' member to the Fault class
instance containing the number of attempts this
function had remaining when the Fault exception was
encountered.
encountered. XXX: DEPRECATED!
RETURNS:
This function returns the result returned by the passed in command.
This function returns the result returned by the passed in RPC.
SIDE EFFECTS:
Invokes the passed in command with the passed in arguments.
Catches protocol/socket exceptions for command retry.
(Optionally) catches xmlrpclib.Fault exceptions for command retry.
Catches xmlrpclib.Fault exceptions for potential command retry.
Adds a 'triesleft' member to all exceptions reraised prior to tries=0.
Understands TimeoutError exceptions, and will reraise them.
......@@ -361,6 +370,7 @@ def tryXmlrpcCmd(cmd, args = (),
if debug:
print "About to perform command %s with args:\n\t%s" % \
(cmd, args)
while 1:
tries = tries - 1
try:
......@@ -373,28 +383,68 @@ def tryXmlrpcCmd(cmd, args = (),
return cmd(args)
else:
return cmd()
pass
# RPC returned a fault - process it here.
except xmlrpclib.Fault, e:
# If a callback fault handler was passed, then call it to assess
# the damage first.
if callback:
cres = callback(e.faultCode, e.faultString)
if cres == 0:
return "Fault received, but operation succeeded."
elif cres == 1:
raise
pass
# If any of these string appears in the fault, then the desired
# outcome of the function has been met, so return success.
for fstr in OKstrs:
if e.faultString.find(fstr) != -1:
return fstr
pass
# If any of these strings are found, then the RPC failed, no
# sense retrying.
for fstr in NOKstrs:
if e.faultString.find(fstr) != -1:
raise
pass
print "XML-RPC Fault happened while executing agent " \
"command: %s" % cmd.func_name
print "\tCode: %s, Error: %s" % (e.faultCode, e.faultString)
# Raise xmlrpc exception, if requested. Report tries left in
# fault object.
# XXX: this really should be removed once we've determined that
# no existing code uses this facility.
if raisefault:
e.triesleft = tries
raise xmlrpclib.Fault, e
pass
# Jump out if we receive a timeout exception.
except TimeoutError, e:
if debug:
print "Caught a timeout error, setting triesleft and raising."
e.triesleft = tries
raise TimeoutError, e
# Communications errors are non-fatal unless they occur
# several times in a row.
except (socket.error, xmlrpclib.ProtocolError), e:
print "Encountered problem communicating with agent " \
"while executing command: %s" % cmd.func_name
if debug:
print "Exception is of type: %s" % e
pass
pass
if tries > 0:
print "Sleeping for %s seconds, then retrying %s command" % \
(sleepint, cmd.func_name)
time.sleep(sleepint)
pass
else:
# XXX: perhaps this should raise its own, new type of
# exception.
......
......@@ -104,3 +104,40 @@ def DBQueryFatal(*args):
if ret == None:
raise RuntimeError, "DBQueryFatal failed"
return ret
def TBSiteVarExists(name):
name = DBQuoteSpecial(name)
qres = DBQueryFatal("select name from sitevariables "
"where name=%s", (name,))
if len(qres) > 0:
return 1
else:
return 0
pass
def TBGetSiteVar(name):
name = DBQuoteSpecial(name)
qres = DBQueryFatal("select value, defaultvalue from sitevariables "
"where name=%s", (name,))
if len(qres) > 0:
value, defaultvalue = qres[0]
if value: return value
elif defaultvalue: return defaultvalue
pass
raise RuntimeException, \
"*** attempted to fetch unknown site variable name!"
def DBQuoteSpecial(str):
TBDBConnect()
return __dbConnection.escape_string(str)
This diff is collapsed.
......@@ -120,6 +120,7 @@ def SENDMAIL(To, Subj, Msg, From = None, Headers = None, Files = ()):
sm.close()
return 1
#
# General library functions
#
......@@ -325,6 +326,7 @@ def ForkCmd(cmd, args=(), timeout=DEF_TIMEOUT,
def tryXmlrpcCmd(cmd, args = (),
inittries = DEF_TRIES,
sleepint = DEF_SLEEPINT,
OKstrs=[], NOKstrs=[], callback=None,
raisefault = False):
"""
This helper/wrapper function's job is to invoke the commands to the
......@@ -337,20 +339,27 @@ def tryXmlrpcCmd(cmd, args = (),
inittries: <int> number of retries before the function gives up
and reraises the last caught exception.
sleepint: <int> how long to sleep (in seconds) between retries.
OKstrs: <list> success strings to check against any XMLRPC faults.
If one is seen, then return 'success' (0)
NOKstrs: <list> failure strings to check against any XMLRPC faults.
If one is seen, then return 'failure' (1)
callback: <func_ptr> pointer to a function to call when an XMLRPC
fault is encountered. it should return and integer, where:
0 means success; 1 means fail; and -1 means keep trying.
raisefault: <boolean> indicates whether or not to reraise an
xmlrpclib Fault exception when caught. When true it
also adds a new 'triesleft' member to the Fault class
instance containing the number of attempts this
function had remaining when the Fault exception was
encountered.
encountered. XXX: DEPRECATED!
RETURNS:
This function returns the result returned by the passed in command.
This function returns the result returned by the passed in RPC.
SIDE EFFECTS:
Invokes the passed in command with the passed in arguments.
Catches protocol/socket exceptions for command retry.
(Optionally) catches xmlrpclib.Fault exceptions for command retry.
Catches xmlrpclib.Fault exceptions for potential command retry.
Adds a 'triesleft' member to all exceptions reraised prior to tries=0.
Understands TimeoutError exceptions, and will reraise them.
......@@ -361,6 +370,7 @@ def tryXmlrpcCmd(cmd, args = (),
if debug:
print "About to perform command %s with args:\n\t%s" % \
(cmd, args)
while 1:
tries = tries - 1
try:
......@@ -373,28 +383,68 @@ def tryXmlrpcCmd(cmd, args = (),
return cmd(args)
else:
return cmd()
pass
# RPC returned a fault - process it here.
except xmlrpclib.Fault, e:
# If a callback fault handler was passed, then call it to assess
# the damage first.
if callback:
cres = callback(e.faultCode, e.faultString)
if cres == 0:
return "Fault received, but operation succeeded."
elif cres == 1:
raise
pass
# If any of these string appears in the fault, then the desired
# outcome of the function has been met, so return success.
for fstr in OKstrs:
if e.faultString.find(fstr) != -1:
return fstr
pass
# If any of these strings are found, then the RPC failed, no
# sense retrying.
for fstr in NOKstrs:
if e.faultString.find(fstr) != -1:
raise
pass
print "XML-RPC Fault happened while executing agent " \
"command: %s" % cmd.func_name
print "\tCode: %s, Error: %s" % (e.faultCode, e.faultString)
# Raise xmlrpc exception, if requested. Report tries left in
# fault object.
# XXX: this really should be removed once we've determined that
# no existing code uses this facility.
if raisefault:
e.triesleft = tries
raise xmlrpclib.Fault, e
pass
# Jump out if we receive a timeout exception.
except TimeoutError, e:
if debug:
print "Caught a timeout error, setting triesleft and raising."
e.triesleft = tries
raise TimeoutError, e
# Communications errors are non-fatal unless they occur
# several times in a row.
except (socket.error, xmlrpclib.ProtocolError), e:
print "Encountered problem communicating with agent " \
"while executing command: %s" % cmd.func_name
if debug:
print "Exception is of type: %s" % e
pass
pass
if tries > 0:
print "Sleeping for %s seconds, then retrying %s command" % \
(sleepint, cmd.func_name)
time.sleep(sleepint)
pass
else:
# XXX: perhaps this should raise its own, new type of
# exception.
......
This diff is collapsed.
......@@ -12,7 +12,7 @@ from libtestbed import *
import libplab
import getopt
RENEW_PERIOD = 60*60
RENEW_PERIOD = 6*60*60 # every six hours
def usage(me):
print "Usage: %s [ -vd ]" % me
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment