Commit bd587829 authored by Leigh Stoller's avatar Leigh Stoller

First cut at supporting RON (or more generally, remote nodes).

* tmcd/ron: A new directory of client code, based on the freebsd
  client code, but scaled back to the bare minimum. Does only account
  and group file maintenance. I redid the account stuff so that only
  emulab accounts are operated on. Does not require a stub file, but
  instead keeps a couple of local dbm files recording what groups and
  accounts were added by Emulab. There is a ton of paranoia checking
  to make sure that local accounts are not touched.

  The update script that runs on the client node detaches so that the
  ssh from boss returns immediately. update can also be run from the
  node periodically and at boottime. The script is installed setuid
  root, but checks to make sure that *only* root or "emulabman" has
  invoked it.

* utils/sshremote: New file. For remote nodes, instead of using sshtb,
  use sshremote, which ssh's in as "emulabman", which needs to be a
  local non-root user, but with an authorized_keys file containing
  boss' public key.

* web interface changes: Allow user to specify his own public key in
  addition to the emulab key.

  Add option in showexp page to update accounts on nodes in the
  experiment. I was originally intending to do this from approveuser,
  but this was easier and faster. I will add an option to do it on the
  approveuser page later.

* libdb.pm: Add a TBIsNodeRemote() query to see if a node is in the
  local testbed or a pcRemote node. Currently, this test is hardwired
  to a check for class=pcRemote, but this will need to change to a
  node_types property at some point.

* node_update: Reorg so that there is a maximum number of children
  created. Previously, a child was forked for each node, but that
  could chew up too many processes, especially for remote nodes which
  might hang up. For the same reason, we need to "lock" the experiment
  so that it cannot be terminated while a node_update is in progress.
  Might be to relax that, but this was easy for now. Also add
  distinction between local and remote, since for remote we use
  sshremote insted of sshtb. Various cleanup stuff

* mkacct; When generating a new account, include user supplied pub key
  in the authorized keys file, in addition to the eumlab generated
  key. Both keys are stored in the DB in the users table. Anytime we
  update an account, get a fresh copy of the emulab pub key, in case
  user changes it.
parent bbd6db94
......@@ -1221,10 +1221,12 @@ outfiles="$outfiles Makeconf GNUmakefile \
tbsetup/console_setup.proxy tbsetup/exports_setup.proxy \
tip/GNUmakefile \
tmcd/GNUmakefile tmcd/freebsd/GNUmakefile tmcd/linux/GNUmakefile \
tmcd/ron/GNUmakefile \
tmcd/netbsd/GNUmakefile \
tmcd/tmcd.restart \
utils/GNUmakefile utils/vlandiff utils/vlansync utils/delay_config \
utils/sshtb utils/create_image utils/node_admin utils/webcreateimage \
utils/sshremote \
www/GNUmakefile www/defs.php3 www/dbdefs.php3 \
vis/GNUmakefile vis/vistopology vis/webvistopology vis/top2gif \
vis/top2png vis/render \
......
......@@ -293,10 +293,12 @@ outfiles="$outfiles Makeconf GNUmakefile \
tbsetup/console_setup.proxy tbsetup/exports_setup.proxy \
tip/GNUmakefile \
tmcd/GNUmakefile tmcd/freebsd/GNUmakefile tmcd/linux/GNUmakefile \
tmcd/ron/GNUmakefile \
tmcd/netbsd/GNUmakefile \
tmcd/tmcd.restart \
utils/GNUmakefile utils/vlandiff utils/vlansync utils/delay_config \
utils/sshtb utils/create_image utils/node_admin utils/webcreateimage \
utils/sshremote \
www/GNUmakefile www/defs.php3 www/dbdefs.php3 \
vis/GNUmakefile vis/vistopology vis/webvistopology vis/top2gif \
vis/top2png vis/render \
......
......@@ -52,7 +52,7 @@ use Exporter;
EXPTSTATE_NEW EXPTSTATE_PRERUN EXPTSTATE_SWAPPED EXPTSTATE_SWAPPING
EXPTSTATE_ACTIVATING EXPTSTATE_ACTIVE EXPTSTATE_TESTING
EXPTSTATE_TERMINATING EXPTSTATE_TERMINATED
EXPTSTATE_TERMINATING EXPTSTATE_TERMINATED EXPTSTATE_UPDATING
BATCHSTATE_POSTED BATCHSTATE_RUNNING BATCHSTATE_TERMINATING
BATCHSTATE_ACTIVATING
......@@ -78,7 +78,8 @@ use Exporter;
ExpNodes DBDateTime DefaultImageID GroupLeader TBGroupUnixInfo
TBValidNodeLogType TBValidNodeName TBSetNodeLogEntry
TBSetSchedReload MapNodeOSID TBLockExp TBUnLockExp TBSetExpSwapTime
TBUnixGroupList TBOSID TBImageID TBdbfork VnameToNodeid
TBUnixGroupList TBOSID TBImageID TBdbfork VnameToNodeid TBExpLocked
TBIsNodeRemote
);
# Must come after package declaration!
......@@ -134,6 +135,7 @@ sub EXPTSTATE_ACTIVE() { "active"; }
sub EXPTSTATE_TESTING() { "testing"; }
sub EXPTSTATE_TERMINATING() { "terminating"; }
sub EXPTSTATE_TERMINATED() { "ended"; }
sub EXPTSTATE_UPDATING() { "updating"; }
sub BATCHSTATE_POSTED() { "posted"; }
sub BATCHSTATE_ACTIVATING() { "activating"; }
......@@ -840,6 +842,32 @@ sub TBLockExp($$)
return 1;
}
#
# Test if Experiment is locked
#
# usage: TBExpLocked(char *pid, char *eid)
# returns 1 if locked.
# returns 0 if an invalid pid/eid or if an error.
#
sub TBExpLocked($$)
{
my($pid, $eid) = @_;
my $query_result =
DBQueryWarn("select expt_locked from experiments ".
"where eid='$eid' and pid='$pid'");
if (! $query_result ||
$query_result->numrows == 0) {
return 0;
}
my @row = $query_result->fetchrow_array();
if (! defined($row[0])) {
return 0;
}
return 1;
}
#
# UnLock Experiment.
#
......@@ -1391,6 +1419,33 @@ sub TBGetNodeEventState($$)
return 1;
}
#
# Is a node remote?
#
# usage TBIsNodeRemote(char *node)
# Returns 1 if yes.
# Returns 0 if no.
#
sub TBIsNodeRemote($)
{
my ($nodeid) = @_;
my $query_result =
DBQueryFatal("select class from node_types as t ".
"left join nodes as n on t.type=n.type ".
"where n.node_id='$nodeid'");
if (! $query_result->num_rows) {
return 0;
}
my @row = $query_result->fetchrow_array();
if ($row[0] =~ /pcRemote/i) {
return 1;
}
return 0;
}
#
# Issue a DB query. Argument is a string. Returns the actual query object, so
# it is up to the caller to test it. I would not for one moment view this
......
......@@ -131,7 +131,8 @@ if (!TBAdmin($UID)) {
# user's earliest project membership to use for the default group.
#
$query_result =
DBQueryFatal("select u.usr_pswd,u.unix_uid,u.usr_name,u.usr_email,m.pid ".
DBQueryFatal("select u.usr_pswd,u.unix_uid,u.usr_name, ".
" u.usr_email,u.home_pubkey,m.pid ".
" from users as u ".
"left join group_membership as m ".
" on u.uid=m.uid and m.pid=m.gid ".
......@@ -145,7 +146,8 @@ my $pswd = $db_row[0];
my $user_number = $db_row[1];
my $fullname = $db_row[2];
my $user_email = $db_row[3];
my $defpid = $db_row[4];
my $user_pubkey = $db_row[4];
my $defpid = $db_row[5];
#
# Unix info for users default group.
......@@ -242,6 +244,23 @@ if (system("$SSH $control_node $CHPASS -p $pswd $user")) {
fatal("Could not change password for user $user on $control_node.\n");
}
#
# Update the DB with the users public key.
#
if (-e "$HOMEDIR/$user/.ssh/identity.pub" ) {
my $key = `cat $HOMEDIR/$user/.ssh/identity.pub`;
if ($key =~ /^([-\@\w\s\.]+)$/) {
$key = $1;
}
else {
fatal("Bad public key: $key");
}
chomp $key;
DBQueryFatal("update users set emulab_pubkey='$key' where uid='$user'");
}
exit(0);
sub fatal {
......@@ -296,6 +315,10 @@ sub FirstTime()
fatal("Copying over $HOMEDIR/$user/.ssh/identity.pub ".
"to authorized_keys");
}
if (defined($user_pubkey)) {
system("echo \"$user_pubkey\" >> ".
"$HOMEDIR/$user/.ssh/authorized_keys");
}
chmod(0600, "$HOMEDIR/$user/.ssh/authorized_keys") or
fatal("Could not chmod $HOMEDIR/$user/.ssh/authorized_keys: $!");
}
......
......@@ -13,16 +13,14 @@ use Getopt::Std;
# The output is all jumbled together since the updates are issued in parallel.
# Might be a pain when debugging.
#
# usage: node_update [-b] <pid> <eid>
#
sub usage()
{
print STDOUT "Usage: node_update [-b] <pid> <eid>\n".
"Update user accounts and NFS mounts on nodes in your experiment.\n".
"Use the -b option to use batch operation (place in background).\n";
"Update user accounts and NFS mounts on nodes in your project.\n".
"Use -b to use batch operation (place in background, send email).\n";
exit(-1);
}
my $optlist = "b";
my $optlist = "be:";
#
# Configure variables
......@@ -33,6 +31,7 @@ my $TBOPS = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
my $ssh = "$TB/bin/sshtb -n";
my $sshremote = "$TB/bin/sshremote -n";
my $expsetup = "$TB/sbin/exports_setup";
my $batchmode = 0;
......@@ -73,9 +72,15 @@ if (defined($options{"b"})) {
if ($pid =~ /^([-\@\w]+)$/) {
$pid = $1;
}
else {
die("*** Bad data in pid: $pid\n");
}
if ($eid =~ /^([-\@\w]+)$/) {
$eid = $1;
}
else {
die("*** Bad data in eid: $eid\n");
}
my $user_name;
my $user_email;
......@@ -89,42 +94,66 @@ my $dbuid;
# That is, it must be setuid root.
#
if ($EUID != 0) {
die("Must be root! Maybe its a development version?");
die("*** $0:\n".
" Must be root! Maybe its a development version?\n");
}
#
# Verify actual user and get his DB uid.
#
if (! UNIX2DBUID($UID, \$dbuid)) {
print STDOUT "Go Away! You do not exist in the Emulab Database.\n";
exit(1);
die("*** $0:\n".
" You do not exist in the Emulab Database.\n");
}
if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
print STDOUT "Cannot determine your name and email address.\n";
exit(1);
die("*** $0:\n".
" Cannot determine your name and email address.\n");
}
#
# Verify that this person is allowed to do this. Must be an admin type,
# the experiment creator or the project leader.
#
if ($UID && ! TBAdmin()) {
if ($UID && !TBAdmin()) {
my $expt_leader = ExpLeader($pid, $eid);
my $proj_leader = ProjLeader($pid);
if (!$expt_leader || !$proj_leader) {
print STDERR "No such Experiment $eid or no such Project $pid\n";
exit(1);
die("*** $0:\n".
" No such Experiment $eid or no such Project $pid\n");
}
if ($expt_leader ne $dbuid && $proj_leader ne $dbuid) {
print STDERR "You must be either the experiment creator or ".
"the project leader\n";
exit(1);
die("*** $0:\n".
" You must be the experiment creator or the project leader\n");
}
}
#
# We need to lock down the experiment during this.
#
DBQueryFatal("lock tables experiments write");
if (TBExpLocked($pid, $eid)) {
DBQueryWarn("unlock tables");
die("*** $0:\n".
" Experiment $pid/$eid is in transition. Please try later!\n");
}
#
# A sanity check. Lets make sure the experiment is in the swapped in
# state so that we are not trying to update nodes that are still booting
# or swapping out, etc.
#
if (ExpState($pid, $eid) ne EXPTSTATE_ACTIVE) {
DBQueryWarn("unlock tables");
die("*** $0:\n".
" The experiment $pid/$eid must be fully activated first!\n");
}
TBLockExp($pid, $eid);
DBQueryFatal("unlock tables");
#
# Batchmode (as from the web interface) goes to background and reports
# later via email.
......@@ -153,26 +182,11 @@ if ($batchmode) {
#
$UID = $EUID;
if (system("$expsetup")) {
print STDERR "*** Failed to setup mountpoints.\n";
fatal("Exports Setup Failed");
}
# Give ops a chance to react.
sleep(2);
#
# A sanity check. Lets make sure the experiment is in the swapped in
# state so that we are not trying to update nodes that are still booting
# or swapping out, etc.
#
if (ExpState($pid, $eid) ne EXPTSTATE_ACTIVE) {
fatal("The experiment $pid/$eid must be fully activated first!\n".
"Please try again later, after activation. The experiment leader\n".
"will receive email notification when the experiment has been\n".
"fully activated, or you can check the status via the web ".
"interface.\n");
exit(1);
}
#
# Get the list of nodes that need to be "updated."
#
......@@ -182,30 +196,51 @@ if (! @nodes) {
}
#
# Fire off an update process so that we can overlap them all.
# We need the pid so we can wait for them all before preceeding.
# We want some overlap, but not too much since we could burn up
# a lot processes on wedged nodes. Issue a small number in parallel,
# and wait once we reach the limit for one to finish, before issuing
# the next one.
#
my $maxpids = 0;
foreach my $node ( @nodes ) {
$mypid = UpdateNode($node);
$pids{$node} = $mypid;
while ($maxpids >= 5) {
my $thispid = waitpid(-1, 0);
my $thisnode = $pids{$thispid};
if ($?) {
$failed++;
print STDERR "Update of node $thisnode failed!\n";
}
else {
print STDOUT "$thisnode updated ...\n";
}
delete($pids{$thispid});
$maxpids--;
}
my $thispid = UpdateNode($node);
$pids{$thispid} = $node;
$maxpids++;
sleep(1);
}
#
# Wait for all the children to exit before continuing.
# Wait for any remaining children to exit before continuing.
#
foreach my $node ( @nodes ) {
my $mypid = $pids{$node};
foreach my $thispid ( keys(%pids) ) {
my $node = $pids{$thispid};
waitpid($mypid, 0);
waitpid($thispid, 0);
if ($?) {
$failed++;
print STDERR "Update of node $node failed!\n";
}
else {
print STDOUT "$node Updated ...\n";
print STDOUT "$node updated ...\n";
}
}
TBUnLockExp($pid, $eid);
NotifyUser("Node Update Complete", $failed);
if (defined($logname)) {
unlink($logname);
......@@ -222,6 +257,12 @@ sub UpdateNode {
print STDOUT "Updating $node ...\n";
#
# We need to know if its a remote or local node, so we know how
# to update it. This info needs to be in the DB at some point.
#
my($isremote) = TBIsNodeRemote($node);
$mypid = fork();
if ($mypid) {
return $mypid;
......@@ -260,7 +301,12 @@ sub UpdateNode {
exit(0);
}
else {
exec("$ssh $node /etc/testbed/update");
if ($isremote) {
exec("$sshremote $node /usr/local/etc/testbed/update");
}
else {
exec("$ssh $node /etc/testbed/update");
}
exit(0);
}
exit(0);
......@@ -270,7 +316,6 @@ sub NotifyUser($$)
{
my($mesg, $iserr) = @_;
my($subject, $from, $to, $hdrs);
local $MAIL;
print STDOUT "$mesg\n";
......@@ -302,26 +347,16 @@ sub NotifyUser($$)
"$hdrs";
}
if (! ($MAIL = OPENMAIL($to, $subject, $from, $hdrs))) {
die("Cannot start mail program!");
}
print $MAIL $mesg;
if (open(IN, "$logname")) {
print $MAIL "\n\n--------- $logname ---------\n\n";
while (<IN>) {
print $MAIL "$_";
}
close(IN);
}
close($MAIL);
#
# Send a message to the testbed list. Append the logfile.
#
SENDMAIL($to, $subject, $mesg, $from, $hdrs, ($logname));
}
sub fatal($) {
my($mesg) = @_;
TBUnLockExp($pid, $eid);
NotifyUser($mesg, 1);
if (defined($logname)) {
unlink($logname);
......
#
# XXX ONLY RUN THIS INSTALL ON A FREEBSD RON NODE!
#
# Trivial. These things just need to be installed into the right place
# on a testbed node before cutting an image.
#
#
SRCDIR = @srcdir@
TESTBED_SRCDIR = @top_srcdir@
OBJDIR = ../..
SUBDIR = tmcd/ron
include $(OBJDIR)/Makeconf
all: bossnode
include $(TESTBED_SRCDIR)/GNUmakerules
INSTALL_DIR = /etc/local/etc/testbed
INSTALL = /usr/bin/install -c
INSTALL_PROG = /usr/bin/install -c -m 755
install: misc-install script-install bin-install
dir-install:
-mkdir -p $(INSTALL_DIR)
misc-install: dir-install bossnode
$(INSTALL) bossnode $(INSTALL_DIR)/bossnode
bin-install: dir-install
$(INSTALL_PROG) ../tmcc $(INSTALL_DIR)/tmcc
script-install: dir-install
$(INSTALL_PROG) $(SRCDIR)/update $(INSTALL_DIR)/update
$(INSTALL_PROG) $(SRCDIR)/libsetup.pm $(INSTALL_DIR)/libsetup.pm
$(INSTALL_PROG) $(SRCDIR)/liblocsetup.pm $(INSTALL_DIR)/liblocsetup.pm
# $(INSTALL_PROG) $(SRCDIR)/rc.testbed /usr/local/etc/rc.d/testbed.sh
post-install:
chown root $(INSTALL_DIR)/update
chmod u+s $(INSTALL_DIR)/update
#
# We build the bossnode file from config info
#
bossnode: GNUmakefile
echo >$@ "$(BOSSNODE)"
clean:
rm -f bossnode
#!/usr/bin/perl -wT
#
# FreeBSD specific routines and constants for the client bootime setup stuff.
#
package liblocsetup;
use Exporter;
@ISA = "Exporter";
@EXPORT =
qw ( $CP $EGREP
os_groupadd os_useradd os_userdel os_usermod os_mkdir
os_groupdel os_cleanup_node
);
# Must come after package declaration!
use English;
#
# This is the FreeBSD dependent part of the setup library.
#
my $SETUPDIR = "/usr/local/etc/testbed";
libsetup::libsetup_init($SETUPDIR);
#
# Various programs and things specific to FreeBSD and that we want to export.
#
$CP = "/bin/cp";
$EGREP = "/usr/bin/egrep -s -q";
#
# These are not exported
#
my $USERADD = "/usr/sbin/pw useradd";
my $USERDEL = "/usr/sbin/pw userdel";
my $USERMOD = "/usr/sbin/pw usermod";
my $GROUPADD = "/usr/sbin/pw groupadd";
my $GROUPDEL = "/usr/sbin/pw groupdel";
my $CHPASS = "/usr/bin/chpass -p";
my $MKDB = "/usr/sbin/pwd_mkdb -p";
my $MKDIR = "/bin/mkdir";
#
# OS dependent part of cleanup node state.
#
sub os_cleanup_node () {
return 0;
}
#
# Add a new group
#
sub os_groupadd($$)
{
my($group, $gid) = @_;
return system("$GROUPADD $group -g $gid");
}
#
# Remove a new group
#
sub os_groupdel($)
{
my($group) = @_;
return system("$GROUPDEL $group");
}
#
# Remove a user account.
#
sub os_userdel($)
{
my($login) = @_;
return system("$USERDEL $login");
}
#
# Modify user group membership.
#
sub os_usermod($$$$)
{
my($login, $gid, $glist, $root) = @_;
# if ($root) {
# $glist = join(',', split(/,/, $glist), "wheel");
# }
if ($glist ne "") {
$glist = "-G $glist";
}
return system("$USERMOD $login -g $gid $glist");
}
#
# Add a user.
#
sub os_useradd($$$$$$$$)
{
my($login, $uid, $gid, $pswd, $glist, $homedir, $gcos, $root) = @_;
# if ($root) {
# $glist = join(',', split(/,/, $glist), "wheel");
# }
if ($glist ne "") {
$glist = "-G $glist";
}
if (system("$USERADD $login -u $uid -g $gid $glist ".
"-m -s /bin/tcsh -c \"$gcos\"") != 0) {
warn "*** WARNING: $USERADD $login error.\n";
return -1;
}
if (system("$CHPASS $pswd $login") != 0) {
warn "*** WARNING: $CHPASS $login error.\n";
return -1;
}
return 0;
}
1;
This diff is collapsed.
#!/bin/sh
#
# Boottime initialization.
#
if [ -f /usr/local/etc/testbed/update ]; then
echo "Checking for Emulab account updates ..."
/usr/local/etc/testbed/update
fi
#!/usr/bin/perl -wT
use English;
use Fcntl ':flock';
use POSIX qw(strftime);
#
# Update the system with new accounts/mounts/etc.
#
#
# Untaint path
#
$ENV{'PATH'} = '/bin:/sbin:/usr/bin:/usr/local/bin:/usr/local/etc/testbed';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
#
# Load the OS independent support library. It will load the OS dependent
# library and initialize itself.
#
use lib "/usr/local/etc/testbed";
use libsetup;
#
# Turn off line buffering on output
#
$| = 1;
#
# Must be setuid root.
#
if ($EUID != 0) {
die("*** $0:\n".
" Must be root! Maybe not installed properly?\n");
}
#
# If not invoked as real root, then must be invoked as emulabman.
#
if ($UID) {
my ($pwname) = getpwuid($UID) or
die("*** $0:\n".
" $UID is not in the password file!\n");
if ($pwname ne "emulabman") {
die("*** $0:\n".
" You do not have permission to run this script!\n");
}
#
# But must run as real root!
#
$UID = 0;
}
#
# Local config.
#
my $lockfile = "/var/tmp/node_update_lockfile";
my $batchmode = 1;
#
# Reuse the same log file so we can track errors.
#
my $logname = "/tmp/account_update.debug";
#
# Put this into the background so that the ssh can return immediately.
#
if ($batchmode && TBBackGround($logname)) {
#
# Parent exits normally
#
exit(0);
}
if ($batchmode) {
my $date = POSIX::strftime("20%y/%m/%d %H:%M:%S", localtime());
print "------------------------------------------------------------\n";
print "$date\n";
print "------------------------------------------------------------\n";
}
#
# We do not want multiple invocations of this script running concurrently!
# Use a lock file to serialze.
#
open(LOCK, ">>$lockfile") ||
die("Could not open $lockfile\n");
my $count = 0;
while (flock(LOCK, LOCK_EX|LOCK_NB) == 0) {
print "Another update in progress. Waiting a moment ...\n";
if ($count++ > 100) {
die("Could not get the lock after a long time!\n");
}
sleep(5);
}
nodeupdate();
close(LOCK);
exit(0);
......@@ -8,7 +8,7 @@ SUBDIR = utils
include $(OBJDIR)/Makeconf
BIN_SCRIPTS = delay_config sshtb create_image node_admin
BIN_SCRIPTS = delay_config sshtb create_image node_admin sshremote
SBIN_SCRIPTS = vlandiff vlansync
LIBEXEC_SCRIPTS = webcreateimage
......