Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
emulab
emulab-devel
Commits
c863bb2e
Commit
c863bb2e
authored
Oct 08, 2004
by
Leigh B. Stoller
Browse files
Moved this library up one level since it is now used by more then just
the plab code.
parent
0527441a
Changes
2
Hide whitespace changes
Inline
Side-by-side
tbsetup/plab/GNUmakefile.in
View file @
c863bb2e
...
...
@@ -17,7 +17,7 @@ SUBDIRS = libdslice etc
SBIN_STUFF = plabslice plabnode plabrenewd plabmetrics plabstats \
plabmonitord plablinkdata plabdist plabhttpd plabdiscover
LIB_STUFF = libplab.py
libtestbed.py
mod_dslice.py mod_PLC.py
LIB_STUFF = libplab.py mod_dslice.py mod_PLC.py
LIBEXEC_STUFF = webplabstats
...
...
tbsetup/plab/libtestbed.py.in
deleted
100644 → 0
View file @
0527441a
# -*- python -*-
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
import
sys
import
os
import
time
import
signal
import
syslog
import
socket
import
traceback
import
xmlrpclib
from
optparse
import
OptionParser
from
warnings
import
warn
#
# Constants
#
TBOPS
=
"@TBOPSEMAIL@"
.
replace
(
"
\\
"
,
""
)
MAILTAG
=
"@THISHOMEBASE@"
SCRIPTNAME
=
sys
.
argv
[
0
][
sys
.
argv
[
0
].
rfind
(
"/"
)
+
1
:]
DEFAULT_DATA_PATH
=
"@prefix@/etc/plab/"
# ensure this ends in a slash
#
# How many seconds to sleep between failures and how many times to try
# commands to both the dslice agent, and individual node managers.
#
DEF_SLEEPINT
=
5
DEF_TRIES
=
3
DEF_TIMEOUT
=
1
*
60
# default timeout interval
MAXCONSECEXC
=
3
#
# Output control variables.
#
verbose
=
0
debug
=
0
class
TBParser
(
OptionParser
):
"""
Slightly modified OptionParser that simply adds some universally
useful options. May want to extend to have different error behavior.
"""
def
__init__
(
self
,
*
args
):
OptionParser
.
__init__
(
self
,
*
args
)
self
.
add_option
(
"-v"
,
"--verbose"
,
dest
=
"verbose"
,
default
=
False
,
action
=
"store_true"
,
help
=
"Say more about internal stuff"
)
self
.
add_option
(
"-d"
,
"--debug"
,
dest
=
"debug"
,
default
=
False
,
action
=
"store_true"
,
help
=
"Say A LOT about internal stuff"
)
return
pass
def
SENDMAIL
(
To
,
Subj
,
Msg
,
From
=
None
,
Headers
=
None
,
Files
=
()):
"""
Sends email to someone about something :)
This function is similar to its perl library counterpart.
ARGS:
To: <string> Email address of recipient.
Subj: <string> Subject of email.
Msg: <string> Message text.
From: <string> Email address of sender (optional).
Headers: <string> Extra header strings (must newline terminate all but
the last one) (optional).
Files: <tuple> List of files to append to message body (optional).
RETURNS:
Always returns 1
SIDE EFFECTS:
Can raise exceptions via called methods/functions.
"""
Tag
=
MAILTAG
.
upper
()
# damn, no good way to tell if this fails
sm
=
os
.
popen
(
"/usr/sbin/sendmail -t"
,
"w"
)
#
# Sendmail will figure this out if not given.
#
if
From
:
sm
.
write
(
"From: %s
\n
"
%
From
)
if
Headers
:
sm
.
write
(
"%s
\n
"
%
Headers
)
sm
.
write
(
"X-NetBed: %s
\n
"
%
SCRIPTNAME
)
sm
.
write
(
"To: %s
\n
"
%
To
)
sm
.
write
(
"Subject: %s: %s
\n
"
%
(
Tag
,
Subj
))
sm
.
write
(
"
\n
"
)
sm
.
write
(
"%s
\n
"
%
Msg
)
sm
.
write
(
"
\n
"
)
if
len
(
Files
):
for
fname
in
Files
:
try
:
infile
=
open
(
fname
)
sm
.
write
(
"
\n
--------- %s --------
\n
"
%
fname
)
for
line
in
infile
.
readlines
():
sm
.
write
(
line
)
pass
infile
.
close
()
pass
except
:
pass
pass
pass
sm
.
write
(
"
\n
"
)
sm
.
close
()
return
1
#
# General library functions
#
#
# Print out a timestamp with optional message
#
def
TIMESTAMP
(
msgstr
=
""
):
mytime
=
time
.
strftime
(
"%H:%M:%S"
)
print
"TIMESTAMP: %s %s"
%
(
mytime
,
msgstr
)
#
# Termination signals, and global var to track if we got one when
# they are disabled with disable_sigs
#
TERMSIGS
=
(
signal
.
SIGTERM
,
signal
.
SIGHUP
,
signal
.
SIGINT
)
gotsig
=
0
class
SignalInterrupt
(
Exception
):
def
__init__
(
self
,
signum
):
self
.
signum
=
signum
#
# Keep track of last terminal signal received
#
def
localSigHandler
(
signum
,
frame
):
"""
Keep track of received signals.
"""
global
gotsig
gotsig
=
signum
if
verbose
:
print
"Caught signal %s"
%
signum
def
disable_sigs
(
sigs
):
"""
Put signal watcher into place. I wish you could just temporarily
block (but not ignore) signals in python - alas.
"""
osigs
=
{}
for
sig
in
sigs
:
osigs
[
sig
]
=
signal
.
signal
(
sig
,
localSigHandler
)
return
osigs
def
enable_sigs
(
osigs
):
"""
Reinstate old signal handlers and then raise an exception if
one was caught while we had them disabled.
"""
global
gotsig
for
sig
,
handler
in
osigs
.
items
():
signal
.
signal
(
sig
,
handler
)
if
gotsig
:
tmp
=
gotsig
gotsig
=
0
raise
SignalInterrupt
(
tmp
)
#
# Local timeout error class and generic alarm handler
# Also listed are a couple of state saving vars for the alarm handler
# when the local one is installed. The *_alarm calls are nestable
#
class
TimeoutError
:
pass
def
alrmhandler
(
signum
,
frame
):
if
debug
:
print
"Timeout! Raising TimeoutError."
raise
TimeoutError
oalrmhandlerstk
=
[]
# alarm handler stack
oalrmtmostk
=
[]
# alarm timeout stack
def
enable_alarm
():
"""
Install a little local alarm handler, stash away old one, and
it's pending alarm timeout (if set).
"""
global
oalrmhandlerstk
,
oalrmtmostk
oalrmhandlerstk
.
append
(
signal
.
signal
(
signal
.
SIGALRM
,
alrmhandler
))
oalrmtmo
=
signal
.
alarm
(
0
)
if
oalrmtmo
:
oalrmtmo
+=
time
.
time
()
oalrmtmostk
.
append
(
oalrmtmo
)
def
disable_alarm
():
"""
Restore old handler and timeout. If the old timeout has passed, warn,
and send the alarm signal immediately.
"""
signal
.
signal
(
signal
.
SIGALRM
,
oalrmhandlerstk
.
pop
())
oalrmtmo
=
oalrmtmostk
.
pop
()
if
oalrmtmo
:
diff
=
oalrmtmo
-
time
.
time
()
if
diff
>
0
:
signal
.
alarm
(
diff
)
else
:
warn
(
"missed a timeout deadline, sending SIGALRM immediately!"
)
os
.
kill
(
os
.
getpid
(),
signal
.
SIGALRM
)
def
ForkCmd
(
cmd
,
args
=
(),
timeout
=
DEF_TIMEOUT
,
disable_sigs_parent
=
(),
disable_sigs_child
=
()):
"""
Fork and run the given command, and optionally timeout in the parent.
ARGS:
cmd: <bound method | function> command to run.
args: <tuple> arguments to the above command.
timeout: <int> seconds to wait for child/command to complete
before killing it off and returning
disable_sigs_parent: <tuple of ints> signals to disable in parent
disable_sigs_child: <tuple of ints> signals to disable in child
RETURNS:
two element tuple. The first element is a boolean, indicating whether
or not an exception was caught while executing the command. The second
element is the return code from the command (which could be meaningless
if an exception was caught).
SIDE EFFECTS:
Forks child process to run provided command. Blocks signals
if instructed to with disable_sigs() (does an enable_sigs() before
returning).
"""
osigs
=
None
childpid
=
os
.
fork
()
# parent
if
childpid
:
gotexc
=
0
exval
=
256
if
disable_sigs_parent
:
osigs
=
disable_sigs
(
disable_sigs_parent
)
enable_alarm
()
signal
.
alarm
(
timeout
)
while
1
:
try
:
exval
=
os
.
waitpid
(
childpid
,
0
)[
1
]
except
TimeoutError
:
print
"Timeout waiting for command completion: %s"
%
\
cmd
.
func_name
gotexc
=
1
break
except
OSError
,
e
:
# Interrupted syscall: just jump back on it.
if
e
.
errno
==
4
:
continue
else
:
gotexc
=
1
break
except
:
gotexc
=
1
break
else
:
break
signal
.
alarm
(
0
)
if
gotexc
:
tb
=
""
.
join
(
traceback
.
format_exception_only
(
*
sys
.
exc_info
()[:
2
]))
print
"Exception caught while trying to "
\
"run command %s
\n
%s"
%
(
cmd
.
func_name
,
tb
)
try
:
os
.
kill
(
childpid
,
signal
.
SIGUSR1
)
except
:
pass
try
:
exval
=
os
.
wait
()[
1
]
except
:
exval
=
256
else
:
if
debug
:
if
os
.
WIFEXITED
(
exval
):
print
"Process complete, exit value: %d"
%
\
os
.
WEXITSTATUS
(
exval
)
if
os
.
WIFSIGNALED
(
exval
):
print
"Process signalled: %d"
%
\
os
.
WTERMSIG
(
exval
)
disable_alarm
()
if
osigs
:
enable_sigs
(
osigs
)
return
(
gotexc
,
os
.
WEXITSTATUS
(
exval
))
# child
else
:
def
sigusrexit
(
signum
,
frame
):
if
debug
:
print
"Received SIGUSR1, bailing out"
os
.
_exit
(
1
)
retval
=
1
if
disable_sigs_child
:
osigs
=
disable_sigs
(
disable_sigs_child
)
signal
.
signal
(
signal
.
SIGUSR1
,
sigusrexit
)
try
:
if
type
(
args
)
==
tuple
:
retval
=
cmd
(
*
args
)
else
:
retval
=
cmd
(
args
)
except
:
traceback
.
print_exception
(
*
sys
.
exc_info
())
os
.
_exit
(
retval
)
def
tryXmlrpcCmd
(
cmd
,
args
=
(),
inittries
=
DEF_TRIES
,
sleepint
=
DEF_SLEEPINT
,
OKstrs
=
[],
NOKstrs
=
[],
callback
=
None
,
raisefault
=
False
):
"""
This helper/wrapper function's job is to invoke the commands to the
central agent, or local node manager, taking steps to retry and
recover from failure.
ARGS:
cmd: <bound method | function> command to try.
args: <tuple> arguments to pass to the above command.
inittries: <int> number of retries before the function gives up
and reraises the last caught exception.
sleepint: <int> how long to sleep (in seconds) between retries.
OKstrs: <list> success strings to check against any XMLRPC faults.
If one is seen, then return 'success' (0)
NOKstrs: <list> failure strings to check against any XMLRPC faults.
If one is seen, then return 'failure' (1)
callback: <func_ptr> pointer to a function to call when an XMLRPC
fault is encountered. it should return and integer, where:
0 means success; 1 means fail; and -1 means keep trying.
raisefault: <boolean> indicates whether or not to reraise an
xmlrpclib Fault exception when caught. When true it
also adds a new 'triesleft' member to the Fault class
instance containing the number of attempts this
function had remaining when the Fault exception was
encountered. XXX: DEPRECATED!
RETURNS:
This function returns the result returned by the passed in RPC.
SIDE EFFECTS:
Invokes the passed in command with the passed in arguments.
Catches protocol/socket exceptions for command retry.
Catches xmlrpclib.Fault exceptions for potential command retry.
Adds a 'triesleft' member to all exceptions reraised prior to tries=0.
Understands TimeoutError exceptions, and will reraise them.
"""
tries
=
inittries
if
debug
:
print
"About to perform command %s with args:
\n\t
%s"
%
\
(
cmd
,
args
)
while
1
:
tries
=
tries
-
1
try
:
if
args
:
# have to differentiate since the '*' operator wants
# a tuple - throws an exception if its operand isn't
if
type
(
args
)
==
tuple
:
return
cmd
(
*
args
)
else
:
return
cmd
(
args
)
else
:
return
cmd
()
pass
# RPC returned a fault - process it here.
except
xmlrpclib
.
Fault
,
e
:
# If a callback fault handler was passed, then call it to assess
# the damage first.
if
callback
:
cres
=
callback
(
e
.
faultCode
,
e
.
faultString
)
if
cres
==
0
:
return
"Fault received, but operation succeeded."
elif
cres
==
1
:
raise
pass
# If any of these string appears in the fault, then the desired
# outcome of the function has been met, so return success.
for
fstr
in
OKstrs
:
if
e
.
faultString
.
find
(
fstr
)
!=
-
1
:
return
fstr
pass
# If any of these strings are found, then the RPC failed, no
# sense retrying.
for
fstr
in
NOKstrs
:
if
e
.
faultString
.
find
(
fstr
)
!=
-
1
:
raise
pass
print
"XML-RPC Fault happened while executing agent "
\
"command: %s"
%
cmd
.
func_name
print
"
\t
Code: %s, Error: %s"
%
(
e
.
faultCode
,
e
.
faultString
)
# Raise xmlrpc exception, if requested. Report tries left in
# fault object.
# XXX: this really should be removed once we've determined that
# no existing code uses this facility.
if
raisefault
:
e
.
triesleft
=
tries
raise
xmlrpclib
.
Fault
,
e
pass
# Jump out if we receive a timeout exception.
except
TimeoutError
,
e
:
if
debug
:
print
"Caught a timeout error, setting triesleft and raising."
e
.
triesleft
=
tries
raise
TimeoutError
,
e
# Communications errors are non-fatal unless they occur
# several times in a row.
except
(
socket
.
error
,
xmlrpclib
.
ProtocolError
),
e
:
print
"Encountered problem communicating with agent "
\
"while executing command: %s"
%
cmd
.
func_name
if
debug
:
print
"Exception is of type: %s"
%
e
pass
pass
if
tries
>
0
:
print
"Sleeping for %s seconds, then retrying %s command"
%
\
(
sleepint
,
cmd
.
func_name
)
time
.
sleep
(
sleepint
)
pass
else
:
# XXX: perhaps this should raise its own, new type of
# exception.
print
"Giving up after %s tries"
%
inittries
raise
class
logger
:
def
__init__
(
self
,
logname
):
syslog
.
openlog
(
logname
,
syslog
.
LOG_PID
,
syslog
.
LOG_USER
)
self
.
buf
=
""
return
def
close
(
self
):
syslog
.
closelog
()
return
def
flush
(
self
):
pass
def
write
(
self
,
str
):
# Ugh
self
.
buf
+=
str
while
self
.
buf
.
find
(
"
\n
"
)
>=
0
:
pos
=
self
.
buf
.
find
(
"
\n
"
)
line
=
self
.
buf
[:
pos
]
self
.
buf
=
self
.
buf
[
pos
+
1
:]
syslog
.
syslog
(
line
)
pass
return
# XXX: need to return # bytes written?
pass
class
pydaemon
:
SYSLOG
=
"__SysLog__"
def
__init__
(
self
,
logname
=
""
):
self
.
logname
=
logname
return
def
daemonize
(
self
):
"""
Fork off into a daemon process, redirecting stdout and stderr to
logfile.
Based on code from the ASPN Python Cookbook.
"""
# First fork
if
os
.
fork
():
sys
.
exit
(
0
)
pass
# Decouple from parent environment.
os
.
chdir
(
"/"
)
os
.
umask
(
0
)
os
.
setsid
()
# Second fork
if
os
.
fork
():
sys
.
exit
(
0
)
pass
# Redirect standard fd's
si
=
open
(
"/dev/null"
,
'r'
)
so
=
open
(
"/dev/null"
,
'a+'
,
0
)
os
.
dup2
(
si
.
fileno
(),
sys
.
stdin
.
fileno
())
os
.
dup2
(
so
.
fileno
(),
sys
.
stdout
.
fileno
())
os
.
dup2
(
so
.
fileno
(),
sys
.
stderr
.
fileno
())
# Redirect output
outfile
=
None
if
self
.
logname
==
self
.
SYSLOG
:
outfile
=
logger
(
SCRIPTNAME
)
pass
elif
self
.
logname
:
outfile
=
open
(
logname
,
"a+"
)
pass
else
:
return
if
outfile
:
sys
.
stdout
=
sys
.
stderr
=
outfile
pass
else
:
print
"Couldn't open output log"
pass
return
def
daemonLoop
(
self
,
func
,
period
,
maxconsecexc
=
MAXCONSECEXC
):
"""
Forks off into a daemon process with output directed to logfile, and
calls the given func every period seconds.
"""
import
time
import
traceback
consecexc
=
maxconsecexc
while
True
:
start
=
time
.
clock
()
try
:
func
()
pass
except
SignalInterrupt
,
e
:
print
"Received signal %s in daemon loop, exiting."
%
e
.
signum
sys
.
exit
(
0
)
pass
except
KeyboardInterrupt
:
print
"Received keyboard interrupt in daemon loop, exiting."
sys
.
exit
(
1
)
pass
except
:
print
"Exception caught in plab daemon loop:"
print
""
.
join
(
traceback
.
format_exception
(
*
sys
.
exc_info
()))
consecexc
-=
1
if
consecexc
>
0
:
print
"Going back to sleep until next scheduled run"
else
:
print
"Too many consecutive exceptions seen, bailing out!"
SENDMAIL
(
TBOPS
,
"%s Exiting"
,
"The plab %s daemon has seen too many "
"consecutive exceptions and is bailing out."
"Someone needs to check the log!"
%
(
SCRIPTNAME
,
func
.
func_name
),
TBOPS
)
raise
pass
else
:
consecexc
=
maxconsecexc
pass
end
=
time
.
clock
()
if
end
-
start
<
period
:
wait
=
period
-
(
end
-
start
)
print
"Sleeping %g seconds"
%
wait
time
.
sleep
(
wait
)
pass
pass
return
# NOTREACHED
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment