Commit b6da3a51 authored by Kirk Webb's avatar Kirk Webb

* Node.__copy() now uses rsync instead of weird 'dd' pipe
  - can do since sudo now works from square one after sliver instantiation

* Made fixsudo and addgroup operations in emulabify() non-fatal
  - setup sometimes works even if they don't (esp. on dirty sliver)

* option parser fixes

* Shutup stupid warning messages from remote commands (tcgetattr, sudo lecture)
parent f08a73f8
......@@ -46,8 +46,8 @@ class TBParser (OptionParser):
Slightly modified OptionParser that simply adds some universally
useful options. May want to extend to have different error behavior.
"""
def __init__(self):
OptionParser.__init__(self)
def __init__(self, *args):
OptionParser.__init__(self, *args)
self.add_option("-v", "--verbose", dest="verbose", default=False,
action="store_true", help="Say more about internal stuff")
self.add_option("-d", "--debug", dest="debug", default=False,
......
......@@ -23,7 +23,7 @@ include $(TESTBED_SRCDIR)/GNUmakerules
mkdirs: $(LOG_DIR) $(WWW_DIR)
rootball-link:
-ln -s $(INSTALL_ETCDIR)/plab/$(ROOTBALL) $(WWW_DIR)/$(ROOTBALL)
-ln -fs $(INSTALL_ETCDIR)/plab/$(ROOTBALL) $(WWW_DIR)/$(ROOTBALL)
install: sbin-install mkdirs rootball-link
......
......@@ -28,6 +28,7 @@ import httplib
import xml.parsers.expat
import re
from popen2 import Popen4
from warnings import warn
#
......@@ -1021,9 +1022,21 @@ class Node:
"""
TIMESTAMP("emulabify started on %s." % self.nodeid)
print "Overlaying Emulab files on %s ..." % self.nodeid
self.__copy(DEFAULT_DATA_PATH + "fixsudo.sh", "/tmp/fixsudo.sh")
self.__perform("-tt sh /tmp/fixsudo.sh")
self.addToGroup(self.slice.slicename, "root")
try:
self.__copy(DEFAULT_DATA_PATH + "fixsudo.sh", "/tmp/fixsudo.sh")
self.__perform("-tt sh /tmp/fixsudo.sh", quiet = True)
pass
except RuntimeError:
print "fixsudo failed on %s; attempting to carry on anyway.." % \
self.nodeid
pass
try:
self.addToGroup(self.slice.slicename, "root")
pass
except RuntimeError:
print "Adding slice user to 'root' group on %s failed; " \
"attempting to carry on anyway." % self.nodeid
pass
self.unpackTgz(rootballpath, rootballname)
TIMESTAMP("emulabify finished on %s." % self.nodeid)
......@@ -1050,38 +1063,50 @@ class Node:
print "Warning: couldn't get tarball via local service on %s: " \
"Falling back to remote transfer." % self.nodeid
self.__copy(tgzpath + tgzname, "/tmp/" + tgzname)
pass
self.__perform("sudo tar -jxf /tmp/" + tgzname + " -C %s" % destpath)
self.__perform("sudo tar -jxf /tmp/" + tgzname + " -C %s" % destpath,
quiet = True)
return
def __perform(self, command):
def __perform(self, command, quiet = False):
"""
Executes the given command on the remote node via sshtb
Executes the given command on the remote node via sshtb, run as
the slice user.
"""
if debug:
print "Performing '%s' on %s" % (command, self.nodeid)
if os.spawnl(os.P_WAIT, SSH, SSH, "-host", self.nodeid, command):
raise RuntimeError, "ssh '%s' failed" % command
command = "%s -host %s %s" % (SSH, self.nodeid, command)
return self.__execute(command, quiet)
def __copy(self, localfile, remotefile):
"""
Copies a file from the local system to the remote node, doing so
as root.
as the slice user.
"""
import popen2
if debug:
print "Copying %s to %s on %s" % \
(localfile, remotefile, self.nodeid)
# dd is a bit overbearing for this job, but I can't do something
# simply like an scp (because the I can't get remote root), or a
# cat with a redirect (because sshtb munges the redirect and
# winds up evaluating it in a local shell)
cpyobj = popen2.Popen4("%s -host %s 'dd of=%s' < '%s'" %
(SSH, self.nodeid, remotefile, localfile))
cpyout = cpyobj.fromchild.read()
cpystatus = cpyobj.wait()
if os.WEXITSTATUS(cpystatus):
raise RuntimeError, "Copying %s to %s failed (excode: %s):\n%s" % \
(localfile, remotefile, os.WEXITSTATUS(cpystatus), cpyout)
pass
# We're using rsync now.
command = "rsync -e '%s -host' %s %s:%s" % \
(SSH, localfile, self.nodeid, remotefile)
return self.__execute(command)
def __execute(self, command, quiet = False):
"""
Executes the given command, optionally squelching the output.
"""
cmdobj = Popen4(command)
cmdout = cmdobj.fromchild.read()
cmdstatus = cmdobj.wait()
if (not quiet) and cmdout:
print cmdout
pass
if cmdstatus:
raise RuntimeError, "'%s' failed (excode: %s). output:\n%s" % \
(command, cmdstatus, cmdout)
return
def __findIP(self):
"""
......
......@@ -46,8 +46,8 @@ class TBParser (OptionParser):
Slightly modified OptionParser that simply adds some universally
useful options. May want to extend to have different error behavior.
"""
def __init__(self):
OptionParser.__init__(self)
def __init__(self, *args):
OptionParser.__init__(self, *args)
self.add_option("-v", "--verbose", dest="verbose", default=False,
action="store_true", help="Say more about internal stuff")
self.add_option("-d", "--debug", dest="debug", default=False,
......
......@@ -278,6 +278,7 @@ class mod_PLC:
def freeNode(self, node):
agent = PLCagent(node.slice.slicename)
res = None
tries = 3
while 1:
TIMESTAMP("freenode %s try %d started." % (node.nodeid,
......
......@@ -88,8 +88,8 @@ my $logfile = "$TB/log/plabmonitord";
my @oldnodes = ();
my $LOOPSLEEP = 1800; # 1/2 hour between successive loops.
my $PAUSETIME = 120; # 2 minute pause after running vnode_setup
my $SETUPWAIT = 960; # 16 minutes to wait for vnode to setup.
my $BATCHNUM = 20; # degree of parallelization
my $SETUPWAIT = 960; # how long to wait for vnode to setup.
my $BATCHNUM = 40; # degree of parallelization
#
# daemonize
......@@ -178,18 +178,35 @@ while (1) {
# Check the nodes to find out which are up, and which failed
# in the vnode_setup we just ran.
print "Checking vnode_setup run status\n";
my @failed = grep(nodepostalloc($_), @batch2);
my @failed = ();
my @succeeded = ();
foreach $vnode (@batch2) {
if (nodepostalloc($vnode)) {
push(@succeeded, $vnode);
} else {
push(@failed, $vnode);
}
}
# If any fail, try to tear them down.
if (@failed) {
my @fvnodes = map {$_->[0]} @failed;
my @fpnodes = map {$_->[1]} @failed;
print "### vnode_setup failed for the following vnodes: @fvnodes\n";
print "### Proceding to tear them down.\n";
system("vnode_setup -f -d -k -n $BATCHNUM $PLABMOND_PID $PLABMOND_EID @fvnodes");
}
# If any succedded, let tbops know about it.
if (@succeeded) {
my @spnodes = map {$_->[1]} @succeeded;
SENDMAIL($TBOPS, @succeeded ." plab nodes revived.",
"The following nodes have been brought back from the afterworld:\n\n".
join("\n", @spnodes) ."\n",
$TBOPS);
}
# Keep track of failed nodes for next daemon loop iteration.
push(@oldnodes, @failed);
}
......@@ -207,7 +224,7 @@ while (1) {
sub nodepostalloc($) {
my ($vnode, $pnode) = @{$_[0]};
my $revive = 0;
my $retval = 1;
my $retval = 0;
my $state = TBDB_NODESTATE_UNKNOWN();
# Did the node send ISUP (boot up completely)?
......@@ -233,16 +250,12 @@ sub nodepostalloc($) {
TBSetNodeLogEntry($pnode, "root", TB_DEFAULT_NODELOGTYPE(),
"'Moved to $PLABHOLDING_EID; ".
"plab node $vnode setup okay by monitor.'");
SENDMAIL($TBOPS, "$pnode is alive",
"$pnode has been brought back from the afterworld!",
$TBOPS);
$retval = 0;
$retval = 1;
# It didn't come up..
# We couldn't get it up..
} else {
print "Leaving $pnode in hwdown!\n";
$retval = 1
$retval = 0;
}
return $retval;
......
......@@ -9,17 +9,20 @@
import sys
sys.path.append("@prefix@/lib")
import getopt
import libtestbed
from libtestbed import *
import libplab
def usage(me):
print "Usage: %s [ -vd ] { create | destroy } pid eid" % me
sys.exit(-1)
usage = "%prog [ -vd ] { create | destroy } pid eid"
def main(args):
me = args[0]
parser = TBParser()
parser.parse_args()
parser = TBParser(usage)
(opts, args) = parser.parse_args()
if len(args) < 3:
parser.error("Incorrect number of arguments")
command,pid,eid = args
plab = libplab.Plab()
if command == "create":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment