Commit c863bb2e authored by Leigh B. Stoller's avatar Leigh B. Stoller
Browse files

Moved this library up one level since it is now used by more then just

the plab code.
parent 0527441a
......@@ -17,7 +17,7 @@ SUBDIRS = libdslice etc
SBIN_STUFF = plabslice plabnode plabrenewd plabmetrics plabstats \
plabmonitord plablinkdata plabdist plabhttpd plabdiscover
LIB_STUFF = libplab.py libtestbed.py mod_dslice.py mod_PLC.py
LIB_STUFF = libplab.py mod_dslice.py mod_PLC.py
LIBEXEC_STUFF = webplabstats
......
# -*- python -*-
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
import sys
import os
import time
import signal
import syslog
import socket
import traceback
import xmlrpclib
from optparse import OptionParser
from warnings import warn
#
# Constants
#
TBOPS = "@TBOPSEMAIL@".replace("\\","")
MAILTAG = "@THISHOMEBASE@"
SCRIPTNAME = sys.argv[0][sys.argv[0].rfind("/")+1:]
DEFAULT_DATA_PATH = "@prefix@/etc/plab/" # ensure this ends in a slash
#
# How many seconds to sleep between failures and how many times to try
# commands to both the dslice agent, and individual node managers.
#
DEF_SLEEPINT = 5
DEF_TRIES = 3
DEF_TIMEOUT = 1*60 # default timeout interval
MAXCONSECEXC = 3
#
# Output control variables.
#
verbose = 0
debug = 0
class TBParser (OptionParser):
"""
Slightly modified OptionParser that simply adds some universally
useful options. May want to extend to have different error behavior.
"""
def __init__(self, *args):
OptionParser.__init__(self, *args)
self.add_option("-v", "--verbose", dest="verbose", default=False,
action="store_true", help="Say more about internal stuff")
self.add_option("-d", "--debug", dest="debug", default=False,
action="store_true", help="Say A LOT about internal stuff")
return
pass
def SENDMAIL(To, Subj, Msg, From = None, Headers = None, Files = ()):
"""
Sends email to someone about something :)
This function is similar to its perl library counterpart.
ARGS:
To: <string> Email address of recipient.
Subj: <string> Subject of email.
Msg: <string> Message text.
From: <string> Email address of sender (optional).
Headers: <string> Extra header strings (must newline terminate all but
the last one) (optional).
Files: <tuple> List of files to append to message body (optional).
RETURNS:
Always returns 1
SIDE EFFECTS:
Can raise exceptions via called methods/functions.
"""
Tag = MAILTAG.upper()
# damn, no good way to tell if this fails
sm = os.popen("/usr/sbin/sendmail -t", "w")
#
# Sendmail will figure this out if not given.
#
if From:
sm.write("From: %s\n" % From)
if Headers:
sm.write("%s\n" % Headers)
sm.write("X-NetBed: %s\n" % SCRIPTNAME)
sm.write("To: %s\n" % To)
sm.write("Subject: %s: %s\n" % (Tag, Subj))
sm.write("\n")
sm.write("%s\n" % Msg)
sm.write("\n")
if len(Files):
for fname in Files:
try:
infile = open(fname)
sm.write("\n--------- %s --------\n" % fname)
for line in infile.readlines():
sm.write(line)
pass
infile.close()
pass
except:
pass
pass
pass
sm.write("\n")
sm.close()
return 1
#
# General library functions
#
#
# Print out a timestamp with optional message
#
def TIMESTAMP(msgstr = ""):
mytime = time.strftime("%H:%M:%S")
print "TIMESTAMP: %s %s" % (mytime, msgstr)
#
# Termination signals, and global var to track if we got one when
# they are disabled with disable_sigs
#
TERMSIGS = (signal.SIGTERM, signal.SIGHUP, signal.SIGINT)
gotsig = 0
class SignalInterrupt(Exception):
def __init__(self, signum):
self.signum = signum
#
# Keep track of last terminal signal received
#
def localSigHandler(signum, frame):
"""
Keep track of received signals.
"""
global gotsig
gotsig = signum
if verbose:
print "Caught signal %s" % signum
def disable_sigs(sigs):
"""
Put signal watcher into place. I wish you could just temporarily
block (but not ignore) signals in python - alas.
"""
osigs = {}
for sig in sigs:
osigs[sig] = signal.signal(sig, localSigHandler)
return osigs
def enable_sigs(osigs):
"""
Reinstate old signal handlers and then raise an exception if
one was caught while we had them disabled.
"""
global gotsig
for sig,handler in osigs.items():
signal.signal(sig, handler)
if gotsig:
tmp = gotsig
gotsig = 0
raise SignalInterrupt(tmp)
#
# Local timeout error class and generic alarm handler
# Also listed are a couple of state saving vars for the alarm handler
# when the local one is installed. The *_alarm calls are nestable
#
class TimeoutError: pass
def alrmhandler(signum, frame):
if debug:
print "Timeout! Raising TimeoutError."
raise TimeoutError
oalrmhandlerstk = [] # alarm handler stack
oalrmtmostk = [] # alarm timeout stack
def enable_alarm():
"""
Install a little local alarm handler, stash away old one, and
it's pending alarm timeout (if set).
"""
global oalrmhandlerstk, oalrmtmostk
oalrmhandlerstk.append(signal.signal(signal.SIGALRM, alrmhandler))
oalrmtmo = signal.alarm(0)
if oalrmtmo:
oalrmtmo += time.time()
oalrmtmostk.append(oalrmtmo)
def disable_alarm():
"""
Restore old handler and timeout. If the old timeout has passed, warn,
and send the alarm signal immediately.
"""
signal.signal(signal.SIGALRM, oalrmhandlerstk.pop())
oalrmtmo = oalrmtmostk.pop()
if oalrmtmo:
diff = oalrmtmo - time.time()
if diff > 0:
signal.alarm(diff)
else:
warn("missed a timeout deadline, sending SIGALRM immediately!")
os.kill(os.getpid(), signal.SIGALRM)
def ForkCmd(cmd, args=(), timeout=DEF_TIMEOUT,
disable_sigs_parent=(), disable_sigs_child=()):
"""
Fork and run the given command, and optionally timeout in the parent.
ARGS:
cmd: <bound method | function> command to run.
args: <tuple> arguments to the above command.
timeout: <int> seconds to wait for child/command to complete
before killing it off and returning
disable_sigs_parent: <tuple of ints> signals to disable in parent
disable_sigs_child: <tuple of ints> signals to disable in child
RETURNS:
two element tuple. The first element is a boolean, indicating whether
or not an exception was caught while executing the command. The second
element is the return code from the command (which could be meaningless
if an exception was caught).
SIDE EFFECTS:
Forks child process to run provided command. Blocks signals
if instructed to with disable_sigs() (does an enable_sigs() before
returning).
"""
osigs = None
childpid = os.fork()
# parent
if childpid:
gotexc = 0
exval = 256
if disable_sigs_parent:
osigs = disable_sigs(disable_sigs_parent)
enable_alarm()
signal.alarm(timeout)
while 1:
try:
exval = os.waitpid(childpid, 0)[1]
except TimeoutError:
print "Timeout waiting for command completion: %s" % \
cmd.func_name
gotexc = 1
break
except OSError, e:
# Interrupted syscall: just jump back on it.
if e.errno == 4:
continue
else:
gotexc = 1
break
except:
gotexc = 1
break
else:
break
signal.alarm(0)
if gotexc:
tb = "".join(traceback.format_exception_only(*sys.exc_info()[:2]))
print "Exception caught while trying to " \
"run command %s\n%s" % (cmd.func_name, tb)
try: os.kill(childpid, signal.SIGUSR1)
except: pass
try: exval = os.wait()[1]
except: exval = 256
else:
if debug:
if os.WIFEXITED(exval):
print "Process complete, exit value: %d" % \
os.WEXITSTATUS(exval)
if os.WIFSIGNALED(exval):
print "Process signalled: %d" % \
os.WTERMSIG(exval)
disable_alarm()
if osigs:
enable_sigs(osigs)
return (gotexc, os.WEXITSTATUS(exval))
# child
else:
def sigusrexit(signum, frame):
if debug:
print "Received SIGUSR1, bailing out"
os._exit(1)
retval = 1
if disable_sigs_child:
osigs = disable_sigs(disable_sigs_child)
signal.signal(signal.SIGUSR1, sigusrexit)
try:
if type(args) == tuple:
retval = cmd(*args)
else:
retval = cmd(args)
except:
traceback.print_exception(*sys.exc_info())
os._exit(retval)
def tryXmlrpcCmd(cmd, args = (),
inittries = DEF_TRIES,
sleepint = DEF_SLEEPINT,
OKstrs=[], NOKstrs=[], callback=None,
raisefault = False):
"""
This helper/wrapper function's job is to invoke the commands to the
central agent, or local node manager, taking steps to retry and
recover from failure.
ARGS:
cmd: <bound method | function> command to try.
args: <tuple> arguments to pass to the above command.
inittries: <int> number of retries before the function gives up
and reraises the last caught exception.
sleepint: <int> how long to sleep (in seconds) between retries.
OKstrs: <list> success strings to check against any XMLRPC faults.
If one is seen, then return 'success' (0)
NOKstrs: <list> failure strings to check against any XMLRPC faults.
If one is seen, then return 'failure' (1)
callback: <func_ptr> pointer to a function to call when an XMLRPC
fault is encountered. it should return and integer, where:
0 means success; 1 means fail; and -1 means keep trying.
raisefault: <boolean> indicates whether or not to reraise an
xmlrpclib Fault exception when caught. When true it
also adds a new 'triesleft' member to the Fault class
instance containing the number of attempts this
function had remaining when the Fault exception was
encountered. XXX: DEPRECATED!
RETURNS:
This function returns the result returned by the passed in RPC.
SIDE EFFECTS:
Invokes the passed in command with the passed in arguments.
Catches protocol/socket exceptions for command retry.
Catches xmlrpclib.Fault exceptions for potential command retry.
Adds a 'triesleft' member to all exceptions reraised prior to tries=0.
Understands TimeoutError exceptions, and will reraise them.
"""
tries = inittries
if debug:
print "About to perform command %s with args:\n\t%s" % \
(cmd, args)
while 1:
tries = tries - 1
try:
if args:
# have to differentiate since the '*' operator wants
# a tuple - throws an exception if its operand isn't
if type(args) == tuple:
return cmd(*args)
else:
return cmd(args)
else:
return cmd()
pass
# RPC returned a fault - process it here.
except xmlrpclib.Fault, e:
# If a callback fault handler was passed, then call it to assess
# the damage first.
if callback:
cres = callback(e.faultCode, e.faultString)
if cres == 0:
return "Fault received, but operation succeeded."
elif cres == 1:
raise
pass
# If any of these string appears in the fault, then the desired
# outcome of the function has been met, so return success.
for fstr in OKstrs:
if e.faultString.find(fstr) != -1:
return fstr
pass
# If any of these strings are found, then the RPC failed, no
# sense retrying.
for fstr in NOKstrs:
if e.faultString.find(fstr) != -1:
raise
pass
print "XML-RPC Fault happened while executing agent " \
"command: %s" % cmd.func_name
print "\tCode: %s, Error: %s" % (e.faultCode, e.faultString)
# Raise xmlrpc exception, if requested. Report tries left in
# fault object.
# XXX: this really should be removed once we've determined that
# no existing code uses this facility.
if raisefault:
e.triesleft = tries
raise xmlrpclib.Fault, e
pass
# Jump out if we receive a timeout exception.
except TimeoutError, e:
if debug:
print "Caught a timeout error, setting triesleft and raising."
e.triesleft = tries
raise TimeoutError, e
# Communications errors are non-fatal unless they occur
# several times in a row.
except (socket.error, xmlrpclib.ProtocolError), e:
print "Encountered problem communicating with agent " \
"while executing command: %s" % cmd.func_name
if debug:
print "Exception is of type: %s" % e
pass
pass
if tries > 0:
print "Sleeping for %s seconds, then retrying %s command" % \
(sleepint, cmd.func_name)
time.sleep(sleepint)
pass
else:
# XXX: perhaps this should raise its own, new type of
# exception.
print "Giving up after %s tries" % inittries
raise
class logger:
def __init__(self, logname):
syslog.openlog(logname, syslog.LOG_PID, syslog.LOG_USER)
self.buf = ""
return
def close(self):
syslog.closelog()
return
def flush(self): pass
def write(self, str):
# Ugh
self.buf += str
while self.buf.find("\n") >= 0:
pos = self.buf.find("\n")
line = self.buf[:pos]
self.buf = self.buf[pos+1:]
syslog.syslog(line)
pass
return # XXX: need to return # bytes written?
pass
class pydaemon:
SYSLOG = "__SysLog__"
def __init__(self, logname = ""):
self.logname = logname
return
def daemonize(self):
"""
Fork off into a daemon process, redirecting stdout and stderr to
logfile.
Based on code from the ASPN Python Cookbook.
"""
# First fork
if os.fork():
sys.exit(0)
pass
# Decouple from parent environment.
os.chdir("/")
os.umask(0)
os.setsid()
# Second fork
if os.fork():
sys.exit(0)
pass
# Redirect standard fd's
si = open("/dev/null", 'r')
so = open("/dev/null", 'a+', 0)
os.dup2(si.fileno(), sys.stdin.fileno())
os.dup2(so.fileno(), sys.stdout.fileno())
os.dup2(so.fileno(), sys.stderr.fileno())
# Redirect output
outfile = None
if self.logname == self.SYSLOG:
outfile = logger(SCRIPTNAME)
pass
elif self.logname:
outfile = open(logname, "a+")
pass
else:
return
if outfile:
sys.stdout = sys.stderr = outfile
pass
else:
print "Couldn't open output log"
pass
return
def daemonLoop(self, func, period, maxconsecexc = MAXCONSECEXC):
"""
Forks off into a daemon process with output directed to logfile, and
calls the given func every period seconds.
"""
import time
import traceback
consecexc = maxconsecexc
while True:
start = time.clock()
try:
func()
pass
except SignalInterrupt, e:
print "Received signal %s in daemon loop, exiting." % e.signum
sys.exit(0)
pass
except KeyboardInterrupt:
print "Received keyboard interrupt in daemon loop, exiting."
sys.exit(1)
pass
except:
print "Exception caught in plab daemon loop:"
print "".join(traceback.format_exception(*sys.exc_info()))
consecexc -= 1
if consecexc > 0:
print "Going back to sleep until next scheduled run"
else:
print "Too many consecutive exceptions seen, bailing out!"
SENDMAIL(TBOPS, "%s Exiting",
"The plab %s daemon has seen too many "
"consecutive exceptions and is bailing out."
"Someone needs to check the log!" %
(SCRIPTNAME, func.func_name), TBOPS)
raise
pass
else:
consecexc = maxconsecexc
pass
end = time.clock()
if end - start < period:
wait = period - (end - start)
print "Sleeping %g seconds" % wait
time.sleep(wait)
pass
pass
return # NOTREACHED
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment