Commit c3f0111e authored by David Johnson's avatar David Johnson

Add a script, epmodeset, that moves a node between elab and plab widearea

"modes".  Also some machinations in libplab and the federation to support
not only object synch, but also "operations".
parent 3c53a3f1
......@@ -2544,7 +2544,7 @@ outfiles="$outfiles Makeconf GNUmakefile \
utils/backupswitches utils/setbuildinfo utils/checkquota \
utils/spewconlog utils/xlogin \
utils/opsdb_control utils/opsdb_control.proxy \
utils/remove_old_www \
utils/remove_old_www utils/epmodeset \
www/GNUmakefile www/defs.php3 www/dbdefs.php3 www/xmlrpc.php3 \
www/xmlrpcpipe.php3 \
www/swish.conf www/websearch \
......
......@@ -913,7 +913,7 @@ outfiles="$outfiles Makeconf GNUmakefile \
utils/backupswitches utils/setbuildinfo utils/checkquota \
utils/spewconlog utils/xlogin \
utils/opsdb_control utils/opsdb_control.proxy \
utils/remove_old_www \
utils/remove_old_www utils/epmodeset \
www/GNUmakefile www/defs.php3 www/dbdefs.php3 www/xmlrpc.php3 \
www/xmlrpcpipe.php3 \
www/swish.conf www/websearch \
......
......@@ -2033,6 +2033,7 @@ class EmulabPlcObjTranslator:
def __init__(self,plc):
self.plc = plc
self.__allowed_objects = [ 'project','node','user' ]
self.__mgmtOperations = { 'node': [ 'setstate' ] }
pass
def __checkMapByElabId(self,objtype,objid):
......@@ -2155,6 +2156,26 @@ class EmulabPlcObjTranslator:
return
def manageObject(self,objtype,objid,op,opargs=[]):
if not objtype in self.__allowed_objects:
raise RuntimeError("unknown object type '%s'" % objtype)
if not objtype in self.__mgmtOperations \
and not op in self.__mgmtOperations[objtype]:
raise RuntimeError("unknown management operation %s/%s" \
% (objtype,op))
ss = self.plc.agent.syncSupport()
if not ss or not objtype in ss:
raise RuntimeError("plc agent '%s' does not support object '%s'" \
" delete!" % (self.plc.agent.__class__.__name__,
objtype))
plab_id,plab_name = self.__checkMapByElabId(objtype,objid)
self.plc.agent.manageObject(objtype,plab_id,op,opargs)
return
# XXX This translation stuff ought to be split, with libplab getting the
# data from the Emulab db, then passing it to the PLCagent to translate...
# but unnecessary for now!
......@@ -3521,7 +3542,7 @@ class Node:
# have to set us up with root before anything else
try:
self.__copy(DEFAULT_DATA_PATH + "/fixsudo.sh", "/tmp/fixsudo.sh",
tries=12,interval=15)
tries=1000,interval=15)
self.__perform("-tt sh /tmp/fixsudo.sh", quiet = True,
tries=3,interval=5)
pass
......
......@@ -494,6 +494,10 @@ class PLCagent:
def NodeDelete(self,id):
return self.__server.DeleteNode(self.auth,id)
def NodeSetBootState(self,id,boot_state):
self.__server.UpdateNode(self.auth,id,{ 'boot_state':boot_state })
pass
def PersonAdd(self,name,email,url,phone,passwd,roles):
sr = name.rsplit(" ",1)
if len(sr) == 1:
......@@ -616,6 +620,9 @@ class mod_PLC4:
'longitude','latitude'],
'del':'SiteDelete' }
self.mgmt_tr = dict()
self.mgmt_tr['node'] = { 'setstate':'NodeSetBootState' }
return
def setdebug(self,sdebug,sverbose):
......@@ -664,6 +671,20 @@ class mod_PLC4:
return retval
def manageObject(self,objtype,objid,op,opargs=[]):
agent = self.__getAgent(None)
method = getattr(agent,self.mgmt_tr[objtype][op])
fargs = list(opargs)
fargs.insert(0,objid)
if debug:
print "managing object of type '%s' with args '%s'" \
% (objtype,fargs)
fargs = tuple(fargs)
tryXmlrpcCmd(method,fargs)
return objid
def deleteObject(self,objtype,id):
agent = self.__getAgent(None)
......
......@@ -13,7 +13,9 @@ from libtestbed import *
import libplab
import traceback
usage = "\t%prog [-d] <plcid_or_name> <sync|delete> <objtype> <objid>"
usage = "\t%prog [-d] <plcid_or_name> <sync|delete> <objtype> <objid>\n" \
"\t%prog [-d] <plcid_or_name> <manage> <objtype> <objid> <op> <opargs>\n"
def main(args):
me = args[0]
......@@ -38,9 +40,25 @@ def main(args):
elif op == 'delete':
translator.deleteObject(objtype,objid)
pass
else:
raise RuntimeError("unknown command %s" % op)
elif len(args) > 5:
plcid,op,objtype,objid,mgmt_op = args[:5]
opargs = args[5:]
# load up the plc:
plc = libplab.PLC(plcid)
translator = libplab.EmulabPlcObjTranslator(plc)
if op == 'manage':
translator.manageObject(objtype,objid,mgmt_op,opargs)
else:
raise RuntimeError("unknown command %s" % op)
else:
parser.error("Incorrect number of arguments")
pass
sys.exit(0)
if __name__ == "__main__":
main(sys.argv)
......@@ -24,7 +24,7 @@ SBIN_SCRIPTS = vlandiff vlansync withadminprivs export_tables cvsupd.pl \
grabswitchconfig backupswitches cvsinit checkquota \
spewconlog opsdb_control newnode suchown archive_list \
wanodecheckin wanodecreate spewimage \
anonsendmail
anonsendmail epmodeset
WEB_SBIN_SCRIPTS= webnewnode webdeletenode webspewconlog webarchive_list \
webwanodecheckin webspewimage
......
#!/usr/bin/perl -w
#
# EMULAB-COPYRIGHT
# Copyright (c) 2008 University of Utah and the Flux Group.
# All rights reserved.
#
# Simple script to move nodes between "elab mode" and "plab mode"...
#
#
# Configure variables
#
my $TB = "@prefix@";
# XXX stinky hack detection
my $ISUTAH = @TBMAINSITE@;
my $NFREE = "$TB/bin/nfree";
my $RESERVE = "$TB/sbin/sched_reserve";
my $OSSELECT = "$TB/bin/os_select";
my $REBOOT = "$TB/bin/node_reboot";
my $PLABFED = "$TB/sbin/plabfed";
# name of the PLC we're talking to (config metadata in emulab db)
my $PLC = "ProtoGENI";
# osid for the plab mfs (on emulab widearea usb dongles, second partition)
my $PLABMFS_OSID = 1550;
#
# Yes, nasty hack... but we have no way of specifying modes and "groups" of
# node_ids that are really the same node in the db. Can't do that until we
# free ourselves from ID'ing nodes via IP address.
# So, instead, we restrict to a couple types (to prevent operator mistakes),
# and we figure out which nodes from each type represent the same phys node
# based off the hostname field in the widearea_nodeinfo table. Yes, this sucks,
# but anything else is meta-physical. Ha ha!
#
my $etype = "pcpg";
my $ptype = "pcpgeniphys";
use lib '@prefix@/lib';
use libdb;
use Node;
use NodeType;
# Turn off line buffering on output
$| = 1;
if (!TBAdmin()) {
die "Sorry, only testbed administrators can run this script!\n";
}
if (!$ISUTAH) {
die "Sorry, this only makes sense at Utah Emulab at the moment!\n";
}
#
# Handle command-line arguments
#
sub usage {
die "usage: epmodeset <elab|plab> <node> ... <node>\n";
}
if (@ARGV < 2) {
usage();
}
my ($mode,@nodes) = @ARGV;
if (!($mode eq "elab" || $mode eq "plab")) {
usage();
}
#
# First, only operate on ! virtnodes.
# If putting the node in elab mode:
# * (if node was in elab mode (free or alloc, noop)
# * (if node was in plab mode, need to move plab phys node into hwdown)
# * move elab phys from hwdown to reloading (nfree is easiest)
# * force reboot
#
# If putting the node in plab mode:
# * (if in plab mode, noop)
# * (if in elab mode, make sure in hwdown, reloading, or free, else fail)
# * move elab phys node to hwdown first
# * os_select on elab phys node to boot plab mfs
# * mv plab phys node to hwdown, mv plab vnode -1 to plab-monitor
#
#
# Resolve all node names/ids and create elab->plab, plab->elab maps.
#
# nid->Node maps (elab->elab and plab->plab)
my %einfo = ();
my %pinfo = ();
# nid->nid maps (elab->plab and plab->elab)
my %epmap = ();
my %pemap = ();
foreach my $n (@nodes) {
if (!($n =~ /^[a-zA-Z][a-zA-Z0-9\-\.]+$/)) {
die "Bad node '$n', exiting!\n";
}
# yes, we are assuming that hostnames will be the same for the elab and plab
# node_ids.
my $qres = DBQueryFatal("select wa.hostname" .
" from widearea_nodeinfo as wa " .
" left join nodes as n on wa.node_id=n.node_id" .
" where (wa.node_id='$n' or wa.hostname='$n')" .
" and (n.type='$etype' or n.type='$ptype')" .
" group by wa.hostname");
if ($qres->num_rows() != 1) {
die "Could not find exactly one matching widearea hostname for '$n'!";
}
my ($hostname) = $qres->fetch_row();
$qres = DBQueryFatal("select wa.node_id,n.type,wa.hostname" .
" from widearea_nodeinfo as wa " .
" left join nodes as n on wa.node_id=n.node_id" .
" where wa.hostname='$hostname'" .
" and (n.type='$etype' or n.type='$ptype')");
if ($qres->num_rows() != 2) {
die "Should be exactly two widearea_nodeinfo records for node '$n'!\n";
}
my $i = 2;
my ($enid,$pnid) = (undef,undef);
while ($i--) {
my ($nid,$ntype,$hostname) = $qres->fetch_row();
if ($ntype eq $etype) {
my $node = Node->Lookup($nid);
if (!defined($node)) {
die "Could not find node '$n', exiting!\n";
}
$einfo{$nid} = $node;
$enid = $nid;
}
elsif ($ntype eq $ptype) {
my $node = Node->Lookup($nid);
if (!defined($node)) {
die "Could not find node '$n', exiting!\n";
}
$pinfo{$nid} = $node;
$pnid = $nid;
}
}
if (!(defined($enid) && defined($pnid))) {
die "Could not find elab and plab widearea_nodeinfo for node '$n'!\n";
}
$epmap{$enid} = $pnid;
$pemap{$pnid} = $enid;
}
foreach my $n (keys(%einfo)) {
print "Working on elab $n (plab $epmap{$n}):\n";
if ($mode eq 'elab') {
my ($pid,$eid);
my $allocated = NodeidToExp($n,\$pid,\$eid);
if (!$allocated) {
print STDERR "Node '$n' already free, skipping.\n";
next;
}
if ($allocated && ($pid ne NODEDEAD_PID || $eid ne NODEDEAD_EID)) {
print STDERR "Node '$n' already allocated to $pid/$eid, skipping!\n";
next;
}
if ($allocated) {
if (system("$NFREE ".NODEDEAD_PID." ".NODEDEAD_EID." $n")) {
print STDERR "nfree failed for '$n', skipping!";
next;
}
}
# so, now the node will be reloading... just need to fix up the plab
# node_id stuff.
$allocated = NodeidToExp($epmap{$n},\$pid,\$eid);
if (system("$RESERVE ".NODEDEAD_PID." ".NODEDEAD_EID." $epmap{$n}")) {
print STDERR "sched_reserve failed for '$epmap{$n}', skipping!\n";
next;
}
if ($allocated) {
if (system("$NFREE $pid $eid " . $epmap{$n})) {
print STDERR "nfree failed for '$epmap{$n}', skipping!";
next;
}
}
}
elsif ($mode eq 'plab') {
my ($pid,$eid);
my $allocated = NodeidToExp($n,\$pid,\$eid);
if ($allocated && ($pid ne NODEDEAD_PID || $eid ne NODEDEAD_EID)) {
print STDERR "Node '$n' allocated to $pid/$eid, skipping.\n";
next;
}
if (!$allocated) {
if (system("$RESERVE ".NODEDEAD_PID." ".NODEDEAD_EID." $n")) {
print STDERR "sched_reserve failed for '$n', skipping!\n";
next;
}
}
if (system("$OSSELECT $PLABMFS_OSID $n")) {
print STDERR "os_select failed for '$n', skipping!\n";
next;
}
# make sure node gets reinstalled from its PLC
if (system("$PLABFED $PLC manage node $n setstate rins")) {
print STDERR "plabfed failed to set node '$n' boot state to rins!\n";
next;
}
system("$REBOOT $n");
}
}
exit(0);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment