Commit b6da3a51 authored by Kirk Webb's avatar Kirk Webb

* Node.__copy() now uses rsync instead of weird 'dd' pipe
  - can do since sudo now works from square one after sliver instantiation

* Made fixsudo and addgroup operations in emulabify() non-fatal
  - setup sometimes works even if they don't (esp. on dirty sliver)

* option parser fixes

* Shutup stupid warning messages from remote commands (tcgetattr, sudo lecture)
parent f08a73f8
...@@ -46,8 +46,8 @@ class TBParser (OptionParser): ...@@ -46,8 +46,8 @@ class TBParser (OptionParser):
Slightly modified OptionParser that simply adds some universally Slightly modified OptionParser that simply adds some universally
useful options. May want to extend to have different error behavior. useful options. May want to extend to have different error behavior.
""" """
def __init__(self): def __init__(self, *args):
OptionParser.__init__(self) OptionParser.__init__(self, *args)
self.add_option("-v", "--verbose", dest="verbose", default=False, self.add_option("-v", "--verbose", dest="verbose", default=False,
action="store_true", help="Say more about internal stuff") action="store_true", help="Say more about internal stuff")
self.add_option("-d", "--debug", dest="debug", default=False, self.add_option("-d", "--debug", dest="debug", default=False,
......
...@@ -23,7 +23,7 @@ include $(TESTBED_SRCDIR)/GNUmakerules ...@@ -23,7 +23,7 @@ include $(TESTBED_SRCDIR)/GNUmakerules
mkdirs: $(LOG_DIR) $(WWW_DIR) mkdirs: $(LOG_DIR) $(WWW_DIR)
rootball-link: rootball-link:
-ln -s $(INSTALL_ETCDIR)/plab/$(ROOTBALL) $(WWW_DIR)/$(ROOTBALL) -ln -fs $(INSTALL_ETCDIR)/plab/$(ROOTBALL) $(WWW_DIR)/$(ROOTBALL)
install: sbin-install mkdirs rootball-link install: sbin-install mkdirs rootball-link
......
...@@ -28,6 +28,7 @@ import httplib ...@@ -28,6 +28,7 @@ import httplib
import xml.parsers.expat import xml.parsers.expat
import re import re
from popen2 import Popen4
from warnings import warn from warnings import warn
# #
...@@ -1021,9 +1022,21 @@ class Node: ...@@ -1021,9 +1022,21 @@ class Node:
""" """
TIMESTAMP("emulabify started on %s." % self.nodeid) TIMESTAMP("emulabify started on %s." % self.nodeid)
print "Overlaying Emulab files on %s ..." % self.nodeid print "Overlaying Emulab files on %s ..." % self.nodeid
self.__copy(DEFAULT_DATA_PATH + "fixsudo.sh", "/tmp/fixsudo.sh") try:
self.__perform("-tt sh /tmp/fixsudo.sh") self.__copy(DEFAULT_DATA_PATH + "fixsudo.sh", "/tmp/fixsudo.sh")
self.addToGroup(self.slice.slicename, "root") self.__perform("-tt sh /tmp/fixsudo.sh", quiet = True)
pass
except RuntimeError:
print "fixsudo failed on %s; attempting to carry on anyway.." % \
self.nodeid
pass
try:
self.addToGroup(self.slice.slicename, "root")
pass
except RuntimeError:
print "Adding slice user to 'root' group on %s failed; " \
"attempting to carry on anyway." % self.nodeid
pass
self.unpackTgz(rootballpath, rootballname) self.unpackTgz(rootballpath, rootballname)
TIMESTAMP("emulabify finished on %s." % self.nodeid) TIMESTAMP("emulabify finished on %s." % self.nodeid)
...@@ -1050,38 +1063,50 @@ class Node: ...@@ -1050,38 +1063,50 @@ class Node:
print "Warning: couldn't get tarball via local service on %s: " \ print "Warning: couldn't get tarball via local service on %s: " \
"Falling back to remote transfer." % self.nodeid "Falling back to remote transfer." % self.nodeid
self.__copy(tgzpath + tgzname, "/tmp/" + tgzname) self.__copy(tgzpath + tgzname, "/tmp/" + tgzname)
pass
self.__perform("sudo tar -jxf /tmp/" + tgzname + " -C %s" % destpath) self.__perform("sudo tar -jxf /tmp/" + tgzname + " -C %s" % destpath,
quiet = True)
return
def __perform(self, command): def __perform(self, command, quiet = False):
""" """
Executes the given command on the remote node via sshtb Executes the given command on the remote node via sshtb, run as
the slice user.
""" """
if debug: if debug:
print "Performing '%s' on %s" % (command, self.nodeid) print "Performing '%s' on %s" % (command, self.nodeid)
if os.spawnl(os.P_WAIT, SSH, SSH, "-host", self.nodeid, command): command = "%s -host %s %s" % (SSH, self.nodeid, command)
raise RuntimeError, "ssh '%s' failed" % command return self.__execute(command, quiet)
def __copy(self, localfile, remotefile): def __copy(self, localfile, remotefile):
""" """
Copies a file from the local system to the remote node, doing so Copies a file from the local system to the remote node, doing so
as root. as the slice user.
""" """
import popen2
if debug: if debug:
print "Copying %s to %s on %s" % \ print "Copying %s to %s on %s" % \
(localfile, remotefile, self.nodeid) (localfile, remotefile, self.nodeid)
# dd is a bit overbearing for this job, but I can't do something pass
# simply like an scp (because the I can't get remote root), or a # We're using rsync now.
# cat with a redirect (because sshtb munges the redirect and command = "rsync -e '%s -host' %s %s:%s" % \
# winds up evaluating it in a local shell) (SSH, localfile, self.nodeid, remotefile)
cpyobj = popen2.Popen4("%s -host %s 'dd of=%s' < '%s'" % return self.__execute(command)
(SSH, self.nodeid, remotefile, localfile))
cpyout = cpyobj.fromchild.read() def __execute(self, command, quiet = False):
cpystatus = cpyobj.wait() """
if os.WEXITSTATUS(cpystatus): Executes the given command, optionally squelching the output.
raise RuntimeError, "Copying %s to %s failed (excode: %s):\n%s" % \ """
(localfile, remotefile, os.WEXITSTATUS(cpystatus), cpyout) cmdobj = Popen4(command)
cmdout = cmdobj.fromchild.read()
cmdstatus = cmdobj.wait()
if (not quiet) and cmdout:
print cmdout
pass
if cmdstatus:
raise RuntimeError, "'%s' failed (excode: %s). output:\n%s" % \
(command, cmdstatus, cmdout)
return
def __findIP(self): def __findIP(self):
""" """
......
...@@ -46,8 +46,8 @@ class TBParser (OptionParser): ...@@ -46,8 +46,8 @@ class TBParser (OptionParser):
Slightly modified OptionParser that simply adds some universally Slightly modified OptionParser that simply adds some universally
useful options. May want to extend to have different error behavior. useful options. May want to extend to have different error behavior.
""" """
def __init__(self): def __init__(self, *args):
OptionParser.__init__(self) OptionParser.__init__(self, *args)
self.add_option("-v", "--verbose", dest="verbose", default=False, self.add_option("-v", "--verbose", dest="verbose", default=False,
action="store_true", help="Say more about internal stuff") action="store_true", help="Say more about internal stuff")
self.add_option("-d", "--debug", dest="debug", default=False, self.add_option("-d", "--debug", dest="debug", default=False,
......
...@@ -278,6 +278,7 @@ class mod_PLC: ...@@ -278,6 +278,7 @@ class mod_PLC:
def freeNode(self, node): def freeNode(self, node):
agent = PLCagent(node.slice.slicename) agent = PLCagent(node.slice.slicename)
res = None
tries = 3 tries = 3
while 1: while 1:
TIMESTAMP("freenode %s try %d started." % (node.nodeid, TIMESTAMP("freenode %s try %d started." % (node.nodeid,
......
...@@ -88,8 +88,8 @@ my $logfile = "$TB/log/plabmonitord"; ...@@ -88,8 +88,8 @@ my $logfile = "$TB/log/plabmonitord";
my @oldnodes = (); my @oldnodes = ();
my $LOOPSLEEP = 1800; # 1/2 hour between successive loops. my $LOOPSLEEP = 1800; # 1/2 hour between successive loops.
my $PAUSETIME = 120; # 2 minute pause after running vnode_setup my $PAUSETIME = 120; # 2 minute pause after running vnode_setup
my $SETUPWAIT = 960; # 16 minutes to wait for vnode to setup. my $SETUPWAIT = 960; # how long to wait for vnode to setup.
my $BATCHNUM = 20; # degree of parallelization my $BATCHNUM = 40; # degree of parallelization
# #
# daemonize # daemonize
...@@ -178,18 +178,35 @@ while (1) { ...@@ -178,18 +178,35 @@ while (1) {
# Check the nodes to find out which are up, and which failed # Check the nodes to find out which are up, and which failed
# in the vnode_setup we just ran. # in the vnode_setup we just ran.
print "Checking vnode_setup run status\n"; print "Checking vnode_setup run status\n";
my @failed = grep(nodepostalloc($_), @batch2); my @failed = ();
my @succeeded = ();
foreach $vnode (@batch2) {
if (nodepostalloc($vnode)) {
push(@succeeded, $vnode);
} else {
push(@failed, $vnode);
}
}
# If any fail, try to tear them down. # If any fail, try to tear them down.
if (@failed) { if (@failed) {
my @fvnodes = map {$_->[0]} @failed; my @fvnodes = map {$_->[0]} @failed;
my @fpnodes = map {$_->[1]} @failed;
print "### vnode_setup failed for the following vnodes: @fvnodes\n"; print "### vnode_setup failed for the following vnodes: @fvnodes\n";
print "### Proceding to tear them down.\n"; print "### Proceding to tear them down.\n";
system("vnode_setup -f -d -k -n $BATCHNUM $PLABMOND_PID $PLABMOND_EID @fvnodes"); system("vnode_setup -f -d -k -n $BATCHNUM $PLABMOND_PID $PLABMOND_EID @fvnodes");
} }
# If any succedded, let tbops know about it.
if (@succeeded) {
my @spnodes = map {$_->[1]} @succeeded;
SENDMAIL($TBOPS, @succeeded ." plab nodes revived.",
"The following nodes have been brought back from the afterworld:\n\n".
join("\n", @spnodes) ."\n",
$TBOPS);
}
# Keep track of failed nodes for next daemon loop iteration. # Keep track of failed nodes for next daemon loop iteration.
push(@oldnodes, @failed); push(@oldnodes, @failed);
} }
...@@ -207,7 +224,7 @@ while (1) { ...@@ -207,7 +224,7 @@ while (1) {
sub nodepostalloc($) { sub nodepostalloc($) {
my ($vnode, $pnode) = @{$_[0]}; my ($vnode, $pnode) = @{$_[0]};
my $revive = 0; my $revive = 0;
my $retval = 1; my $retval = 0;
my $state = TBDB_NODESTATE_UNKNOWN(); my $state = TBDB_NODESTATE_UNKNOWN();
# Did the node send ISUP (boot up completely)? # Did the node send ISUP (boot up completely)?
...@@ -233,16 +250,12 @@ sub nodepostalloc($) { ...@@ -233,16 +250,12 @@ sub nodepostalloc($) {
TBSetNodeLogEntry($pnode, "root", TB_DEFAULT_NODELOGTYPE(), TBSetNodeLogEntry($pnode, "root", TB_DEFAULT_NODELOGTYPE(),
"'Moved to $PLABHOLDING_EID; ". "'Moved to $PLABHOLDING_EID; ".
"plab node $vnode setup okay by monitor.'"); "plab node $vnode setup okay by monitor.'");
$retval = 1;
SENDMAIL($TBOPS, "$pnode is alive",
"$pnode has been brought back from the afterworld!",
$TBOPS);
$retval = 0;
# It didn't come up.. # We couldn't get it up..
} else { } else {
print "Leaving $pnode in hwdown!\n"; print "Leaving $pnode in hwdown!\n";
$retval = 1 $retval = 0;
} }
return $retval; return $retval;
......
...@@ -9,17 +9,20 @@ ...@@ -9,17 +9,20 @@
import sys import sys
sys.path.append("@prefix@/lib") sys.path.append("@prefix@/lib")
import getopt import getopt
import libtestbed from libtestbed import *
import libplab import libplab
def usage(me): usage = "%prog [ -vd ] { create | destroy } pid eid"
print "Usage: %s [ -vd ] { create | destroy } pid eid" % me
sys.exit(-1)
def main(args): def main(args):
me = args[0] me = args[0]
parser = TBParser() parser = TBParser(usage)
parser.parse_args() (opts, args) = parser.parse_args()
if len(args) < 3:
parser.error("Incorrect number of arguments")
command,pid,eid = args
plab = libplab.Plab() plab = libplab.Plab()
if command == "create": if command == "create":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment