Commit 8ec05f0d authored by Leigh Stoller's avatar Leigh Stoller

A set of capture/capserver/DB changes.

Capserver and capture now handshake the owner/group of the tipline.
Owner is defaults to root, and the group defaults to root when the
node is not allocated. Capture will do the chmod after the handshake,
so if boss is down when capture starts, the acl/run file will get 0,0,
but will get the proper owner/group later after its able to handshake.
As a result, console_setup.proxy was trimmed down and cleaned up a
bit, since it no longer has to muck with some of this stuff.

A second change was to support multiple tiplines per node. I have
modified the tiplines table as such:

	| Field   | Type        | Null | Key | Default | Extra |
	+---------+-------------+------+-----+---------+-------+
	| tipname | varchar(32) |      | PRI |         |       |
	| node_id | varchar(10) |      |     |         |       |
	| server  | varchar(64) |      |     |         |       |

That is, the name of the tip device (given to capture) is the unique
key, and there can be multiple tiplines associated with each node.
console_setup now uses the tiplines table to determine what tiplines
need to be reset; used to be just the name of the node_id passed into
console_setup. Conversely, capserver uses the tipname to map back to
the node_id, so that it can get the owner/group from the reserved
table.

I also removed the shark hack from nalloc, nfree, and console_reset,
since there is no longer any need for that; this can be described
completely now with tiplines table entries. If we ever bring the
sharks back, we will need to generate new entries. Hah!
parent 91b15ec9
......@@ -21,7 +21,7 @@
#define ACLPATH LOGPATH
/*
* The key is transferred in ascii text.
* The key is transferred from capture to capserver in ascii text.
*/
typedef struct {
int keylen; /* of the key string */
......@@ -29,11 +29,19 @@ typedef struct {
} secretkey_t;
#define DEFAULTKEYLEN 32
/*
* The capserver then returns this structure as part of the handshake.
*/
typedef struct {
uid_t uid;
gid_t gid;
} tipowner_t;
/*
* The remote capture sends this back when it starts up
*/
typedef struct {
char nodeid[64];
char name[64]; /* "tipname" in tiplines table */
int portnum;
secretkey_t key;
} whoami_t;
......
......@@ -54,6 +54,7 @@ int
main(int argc, char **argv)
{
MYSQL_RES *res;
MYSQL_ROW row;
int tcpsock, ch;
int length, i, err = 0;
struct sockaddr_in name;
......@@ -125,8 +126,9 @@ main(int argc, char **argv)
int clientsock, length = sizeof(client);
int cc;
whoami_t whoami;
unsigned char buf[BUFSIZ];
unsigned char buf[BUFSIZ], node_id[64];
secretkey_t secretkey;
tipowner_t tipown;
if ((clientsock = accept(tcpsock,
(struct sockaddr *)&client,
......@@ -159,11 +161,11 @@ main(int argc, char **argv)
if ((cc = read(clientsock, &whoami, sizeof(whoami))) <= 0) {
if (cc < 0)
syslog(LOG_ERR, "Reading request: %m");
syslog(LOG_ERR, "Connection aborted");
syslog(LOG_ERR, "Connection aborted (read)");
goto done;
}
if (cc != sizeof(whoami)) {
syslog(LOG_ERR, "Wrong byte count!");
syslog(LOG_ERR, "Wrong byte count (read)!");
goto done;
}
......@@ -173,20 +175,53 @@ main(int argc, char **argv)
* message in the log file. Local tip will still work but
* remote tip will not.
*/
res = mydb_query("select server from tiplines "
"where node_id='%s'",
1, whoami.nodeid);
res = mydb_query("select server,node_id from tiplines "
"where tipname='%s'",
2, whoami.name);
if (!res) {
syslog(LOG_ERR, "DB Error getting tiplines for %s!",
whoami.nodeid);
whoami.name);
goto done;
}
if ((int)mysql_num_rows(res) == 0) {
syslog(LOG_ERR, "No tipline info for %s!",
whoami.nodeid);
whoami.name);
mysql_free_result(res);
goto done;
}
row = mysql_fetch_row(res);
strcpy(node_id, row[1]);
mysql_free_result(res);
/*
* Figure out current owner. Might not be a reserved node,
* in which case set it to root/wheel by default.
*/
res = mydb_query("select g.unix_gid from groups as g "
"left join experiments as e "
" on g.pid=e.pid and g.gid=e.gid "
"left join reserved as r "
" on e.pid=r.pid and e.eid=r.eid "
"where r.node_id='%s'",
1, node_id);
if (!res) {
syslog(LOG_ERR, "DB Error getting info for %s/%s!",
node_id, whoami.name);
goto done;
}
if ((int)mysql_num_rows(res)) {
row = mysql_fetch_row(res);
tipown.uid = 0;
tipown.gid = atoi(row[0]);
}
else {
/*
* Default to root/root.
*/
tipown.uid = 0;
tipown.gid = 0;
}
mysql_free_result(res);
/*
......@@ -194,17 +229,33 @@ main(int argc, char **argv)
*/
if (mydb_update("update tiplines set portnum=%d, "
"keylen=%d, keydata='%s' "
"where node_id='%s'",
"where tipname='%s'",
whoami.portnum,
whoami.key.keylen, whoami.key.key,
whoami.nodeid)) {
whoami.name)) {
syslog(LOG_ERR, "DB Error updating tiplines for %s!",
whoami.nodeid);
whoami.name);
goto done;
}
syslog(LOG_INFO, "Nodeid %s, Portnum %d, Keylen %d, Key %s\n",
whoami.nodeid, whoami.portnum,
whoami.key.keylen, whoami.key.key);
/*
* And now send the reply.
*/
if ((cc = write(clientsock, &tipown, sizeof(tipown))) <= 0) {
if (cc < 0)
syslog(LOG_ERR, "Writing reply: %m");
syslog(LOG_ERR, "Connection aborted (write)");
goto done;
}
if (cc != sizeof(tipown)) {
syslog(LOG_ERR, "Wrong byte count (write)!");
goto done;
}
syslog(LOG_INFO,
"Tipline %s/%s, Port %d, Keylen %d, Key %s, Group %d\n",
node_id, whoami.name, whoami.portnum,
whoami.key.keylen, whoami.key.key, tipown.gid);
done:
close(clientsock);
}
......
......@@ -99,12 +99,16 @@ char *Machine;
int logfd, runfd, devfd, ptyfd;
int hwflow = 0, speed = B9600, debug = 0, runfile = 0;
#ifdef USESOCKETS
char *Bossnode = BOSSNODE;
char *Bossnode = BOSSNODE;
char *Aclname;
int serverport = SERVERPORT;
int sockfd, tipactive, portnum;
struct sockaddr_in tipclient;
secretkey_t secretkey;
char ourhostname[MAXHOSTNAMELEN];
int needshake;
gid_t tipgid;
uid_t tipuid;
#endif
int
......@@ -122,13 +126,16 @@ main(argc, argv)
Progname = (Progname = rindex(argv[0], '/')) ? ++Progname : *argv;
while ((op = getopt(argc, argv, "rds:Hb:it")) != EOF)
while ((op = getopt(argc, argv, "rds:Hb:itp:")) != EOF)
switch (op) {
#ifdef USESOCKETS
case 'b':
Bossnode = optarg;
break;
case 'p':
serverport = atoi(optarg);
break;
#endif
case 'H':
++hwflow;
......@@ -194,12 +201,17 @@ main(argc, argv)
die("%s: chmod: %s", Logname, geterr(errno));
if (runfile) {
if ((runfd = open(Runname,O_WRONLY|O_CREAT|O_APPEND,0640)) < 0)
unlink(Runname);
if ((runfd = open(Runname,O_WRONLY|O_CREAT|O_APPEND,0600)) < 0)
die("%s: open: %s", Runname, geterr(errno));
if (chmod(Runname, 0640) < 0)
die("%s: chmod: %s", Runname, geterr(errno));
if (fchmod(runfd, 0640) < 0)
die("%s: fchmod: %s", Runname, geterr(errno));
}
#ifdef USESOCKETS
(void) sprintf(strbuf, ACLNAME, ACLPATH, Machine);
Aclname = newstr(strbuf);
/*
* Create and bind our socket.
*/
......@@ -624,10 +636,20 @@ newrun(int sig)
close(runfd);
unlink(Runname);
if ((runfd = open(Runname, O_WRONLY|O_CREAT|O_APPEND, 0640)) < 0)
if ((runfd = open(Runname, O_WRONLY|O_CREAT|O_APPEND, 0600)) < 0)
die("%s: open: %s", Runname, geterr(errno));
if (chmod(Runname, 0640) < 0)
die("%s: chmod: %s", Runname, geterr(errno));
#ifdef USESOCKETS
/*
* Set owner/group of the new run file. Avoid race in which a
* user can get the new file before the chmod, by creating 0600
* and doing the chmod below.
*/
if (fchown(runfd, tipuid, tipgid) < 0)
die("%s: fchown: %s", Runname, geterr(errno));
#endif
if (fchmod(runfd, 0640) < 0)
die("%s: fchmod: %s", Runname, geterr(errno));
dolog(LOG_NOTICE, "new run started");
}
......@@ -653,6 +675,10 @@ terminate(int sig)
else
dolog(LOG_INFO, "revoked");
tipuid = tipgid = 0;
if (runfile)
newrun(sig);
/* Must be done *after* all the above stuff is done! */
createkey();
#else
......@@ -1042,7 +1068,7 @@ int
createkey()
{
int cc, i, fd;
unsigned char buf[BUFSIZ], aclname[BUFSIZ];
unsigned char buf[BUFSIZ];
FILE *fp;
/*
......@@ -1087,17 +1113,27 @@ createkey()
* This is still secure in that we rely on unix permission, which
* is how most of our security is based anyway.
*/
(void) sprintf(aclname, ACLNAME, ACLPATH, Machine);
/*
* We want to control the mode bits when this file is created.
* Sure, could change the umask, but I hate that function.
*/
(void) unlink(aclname);
if ((fd = open(aclname, O_WRONLY|O_CREAT|O_TRUNC, 0640)) < 0)
die("%s: open: %s", aclname, geterr(errno));
(void) unlink(Aclname);
if ((fd = open(Aclname, O_WRONLY|O_CREAT|O_TRUNC, 0600)) < 0)
die("%s: open: %s", Aclname, geterr(errno));
/*
* Set owner/group of the new run file. Avoid race in which a
* user can get the new file before the chmod, by creating 0600
* and doing the chmod after.
*/
if (fchown(fd, tipuid, tipgid) < 0)
die("%s: fchown: %s", Runname, geterr(errno));
if (fchmod(fd, 0640) < 0)
die("%s: fchmod: %s", Runname, geterr(errno));
if ((fp = fdopen(fd, "w")) == NULL)
die("fdopen(%s)", aclname, geterr(errno));
die("fdopen(%s)", Aclname, geterr(errno));
fprintf(fp, "host: %s\n", ourhostname);
fprintf(fp, "port: %d\n", portnum);
......@@ -1126,6 +1162,12 @@ deadboss()
longjmp(deadline, 1);
}
/*
* Tell the capserver our new secret key, and receive the setup info
* back (owner/group of the tty/acl/run file). The handshake might be
* delayed, so we continue to operate, and when we do handshake, set
* the files properly.
*/
int
handshake()
{
......@@ -1133,6 +1175,7 @@ handshake()
struct sockaddr_in name;
struct hostent *he;
whoami_t whoami;
tipowner_t tipown;
/*
* Global. If we fail, we keep trying from the main loop. This
......@@ -1152,7 +1195,7 @@ handshake()
return 0;
/* Our whoami info. */
strcpy(whoami.nodeid, Machine);
strcpy(whoami.name, Machine);
whoami.portnum = portnum;
memcpy(&whoami.key, &secretkey, sizeof(secretkey));
......@@ -1180,7 +1223,7 @@ handshake()
}
memcpy ((char *)&name.sin_addr, he->h_addr, he->h_length);
name.sin_family = AF_INET;
name.sin_port = htons(SERVERPORT);
name.sin_port = htons(serverport);
if (connect(sock, (struct sockaddr *) &name, sizeof(name)) < 0) {
warn("connect(bossnode): %s", geterr(errno));
......@@ -1197,8 +1240,34 @@ handshake()
close(sock);
goto done;
}
if ((cc = read(sock, &tipown, sizeof(tipown))) != sizeof(tipown)) {
if (cc < 0)
warn("read(bossnode): %s", geterr(errno));
else
warn("read(bossnode): Failed");
err = -1;
close(sock);
goto done;
}
tipuid = tipown.uid;
tipgid = tipown.gid;
close(sock);
needshake = 0;
dolog(LOG_INFO,
"Handshake complete. Owner %d, Group %d", tipuid, tipgid);
/*
* Now that we have owner/group info, set the runfile and aclfile.
*/
if (runfile &&
chown(Runname, tipuid, tipgid) < 0)
die("%s: chown: %s", Runname, geterr(errno));
if (chown(Aclname, tipuid, tipgid) < 0)
die("%s: chown: %s", Aclname, geterr(errno));
done:
alarm(0);
signal(SIGALRM, SIG_DFL);
......
......@@ -133,26 +133,8 @@ DBQueryFatal("unlock tables");
######################################################################
if (!$error && @nodes) {
my @conlist=();
my @sharks=();
foreach $n ( @nodes ) {
if ($n =~ /(sh\d+)/) {
# Its a shark - do the shelf if it hasn't been done yet
my $shelf = $1;
if (!(join(",",@sharks) =~ /\b$shelf\b/)) {
push(@sharks,$shelf);
push(@conlist,$shelf);
}
}
else {
push(@conlist,$n);
}
}
if ($debug) { print "Console setup list: @conlist\n"; }
system("$consetup @conlist") == 0 or
print STDERR "WARNING: $consetup @conlist failed!\n";
system("$consetup @nodes") == 0 or
print STDERR "WARNING: $consetup @nodes failed!\n";
}
if ($debug) { print "Returning with value $error\n"; }
......
......@@ -326,30 +326,8 @@ foreach my $n (@freed_nodes) {
######################################################################
if (@freed_nodes) {
my @conlist=();
my @sharks=();
foreach my $n ( @freed_nodes ) {
# Shark hack
if ($n =~ /(sh\d+)/) {
# Its a shark - do the shelf if it hasn't been done yet
my $shelf = $1;
if (!(join(",",@sharks) =~ /\b$shelf\b/)) {
push(@sharks,$shelf);
push(@conlist,$shelf);
}
}
# End shark hack
else {
push(@conlist,$n);
}
}
#
# Finally, run the actual command
#
system("$consetup @conlist") == 0 ||
print STDERR "WARNING: $consetup @conlist failed!\n";
system("$consetup @freed_nodes") == 0 ||
print STDERR "WARNING: $consetup @freed_nodes failed!\n";
}
exit($error);
......
......@@ -68,17 +68,6 @@ while (@nodes) {
while ($i < 10 && @nodes > 0) {
my $node = shift(@nodes);
#
# Another classic shark hack. Only add shelves to the list.
#
if ($node =~ /(sh\d+)-\d+/) {
if (defined($shelves{$1})) {
next;
}
$shelves{$1} = $1;
$node = $1;
}
push(@batch, $node);
$i++;
}
......
......@@ -72,25 +72,30 @@ foreach my $node (@nodes) {
}
#
# Need the project for the node since that is the group.
# We need to know all of the tiplines associated with this node,
# and where they live. There might not be any at all, in which
# case we are done. This query could probably be rolled into the
# next query, but that would be confusing.
#
# HACK! If its a shark shelf, then need a wildcard query so we can
# find the nodes.
#
if ($node =~ /sh\d+/) {
$db_result =
DBQueryFatal("select pid from reserved ".
"where node_id like '$node%'");
}
else {
$db_result =
DBQueryFatal("select g.unix_name from groups as g ".
"left join experiments as e ".
" on g.pid=e.pid and g.gid=e.gid ".
"left join reserved as r ".
" on e.pid=r.pid and e.eid=r.eid ".
"where r.node_id='$node'");
$tiplines_result =
DBQueryFatal("select tipname,server from tiplines ".
"where node_id='$node'");
if (! $tiplines_result->numrows) {
next;
}
#
# Determine the unix group for the node.
#
$db_result =
DBQueryFatal("select g.unix_name from groups as g ".
"left join experiments as e ".
" on g.pid=e.pid and g.gid=e.gid ".
"left join reserved as r ".
" on e.pid=r.pid and e.eid=r.eid ".
"where r.node_id='$node'");
if ($db_result->numrows > 0) {
@row = $db_result->fetchrow_array();
$pid = $row[0];
......@@ -100,25 +105,21 @@ foreach my $node (@nodes) {
}
#
# Now we need to know *where* the tip line lives, since there are
# multiple tip servers. We want to group these still, so use an
# array of command arguments, indexed by the tip server.
# For each tipline associated with the node (might be more than one),
# we want to issue the proxy command. However, we want to group all
# commands for each server together to avoid a zillion ssh calls. So,
# use an array of command arguments, indexed by the tip server.
#
$db_result =
DBQueryFatal("select server from tiplines where node_id='$node'");
if ($db_result->numrows == 0) {
print STDERR "*** No tip server defined for $node\n";
exit 1;
}
@row = $db_result->fetchrow_array();
$server = $row[0];
if (defined($cmdargs{$server})) {
$cmdargs{$server} = $cmdargs{$server} . " $node $pid";
}
else {
$cmdargs{$server} = "$node $pid ";
while (@row = $tiplines_result->fetchrow_array()) {
my $tipname = $row[0];
my $server = $row[1];
if (defined($cmdargs{$server})) {
$cmdargs{$server} = $cmdargs{$server} . " $tipname $pid";
}
else {
$cmdargs{$server} = "$tipname $pid ";
}
}
}
......
......@@ -2,18 +2,20 @@
use English;
#
# usage: console_setup node1 pid1 node2 pid2 ...
# usage: console_setup.proxy tipname group tipname group ...
#
# This script runs on the tip servers, which is where the capture processes
# are running. Since the tip servers do not have direct access to the DB
# this invoked from the boss node. For each tip device, specify the group
# the associated files should be in. This script will send the proper signal
# to the capture process, and then reset the files to the proper group.
#
# This script runs on plastic where the tip lines are and the capture
# processes are running. Since plastic does not have access to the DB
# we invoke this from paper in nalloc/nfree, giving it a list of node/pid
# pairs to set. Only use can run this script.
#
my $TIPLOGDIR = "/var/log/tiplogs";
my $TIPDEVDIR = "/dev/tip";
my $TBPID = "flux";
my $dbg = 1;
my %nodepid = ();
my %tipgroup = ();
my $failures = 0;
# un-taint path
......@@ -21,9 +23,7 @@ $ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
sub usage {
die("Usage: console_setup node project_id [node pid node pid ...]\n".
"Enables project members to access consoles logs of nodes.\n".
"This script must be run as root, typically from paper.\n");
die("Usage: console_setup.proxy tipname group [tipname group ...]\n");
}
if ( $#ARGV < 1) {
......@@ -35,18 +35,18 @@ while ($#ARGV >= 0) {
usage();
}
$node = shift;
$pid = shift;
$name = shift;
$grp = shift;
# untaint the args.
if ($node =~ /^([-\@\w.]+)$/) {
$node = $1;
if ($name =~ /^([-\@\w.]+)$/) {
$name = $1;
}
if ($pid =~ /^([-\@\w.]+)$/) {
$pid = $1;
if ($grp =~ /^([-\@\w.]+)$/) {
$grp = $1;
}
$nodepid{$node} = $pid;
$tipgroup{$name} = $grp;
}
#
......@@ -63,18 +63,18 @@ if (! chdir($TIPLOGDIR)) {
#
# Well, do it.
#
foreach my $node ( keys %nodepid ) {
my $pid = $nodepid{$node};
my $filename = "${node}.run";
my $tipdevname = "$TIPDEVDIR/$node";
my $aclname = "${node}.acl";
foreach my $tipname ( keys %tipgroup ) {
my $grp = $tipgroup{$tipname};
my $filename = "${tipname}.run";
my $tipdevname = "$TIPDEVDIR/$tipname";
my $aclname = "${tipname}.acl";
#
# Find out the current group setting for the file.
#
if (! -e $filename) {
print STDERR
"*** Console log for $node does not exist! Skipping ...\n";
"*** Console log for $tipname does not exist! Skipping ...\n";
$failures++;
next;
}
......@@ -86,7 +86,7 @@ foreach my $node ( keys %nodepid ) {
# in rolling the file. Inconvenient for the user to have the log keep
# rolling.
#
if (getgrgid($gid) eq $pid) {
if (getgrgid($gid) eq $grp) {
goto setperms;
}
......@@ -94,19 +94,37 @@ foreach my $node ( keys %nodepid ) {
die("Could not unlink run file $filename");
#
# Now send a USR1 signal to the capture process so that it opens
# a new run file.
# Unlink this so current user gets old version not new one.
#
if (-e $aclname) {
unlink($aclname) or
die("Could not unlink run file $aclname");
}
#
# Remove group access from the tty device. This renders the device
# inaccessible to both old and new users while we revoke access from
# any current tip user.
#
if (-e $tipdevname) {
chmod(0600, $tipdevname) or
die("Could not chmod(0600) $tipdevname: $!");
}
#
# Now send a USR2 signal to the capture process. This will shutdown
# any attached tip, and reopen the run file.
#
$procid = `cat ${node}.pid`;
$procid = `cat ${tipname}.pid`;
$procid =~ s/\n//;
# untaint
if ($procid =~ /^([-\@\w.]+)$/) {
$procid = $1;
}
kill('USR1', $procid) or
die("Could not signal(USR1) process $procid for log $filename");
kill('USR2', $procid) or
die("Could not signal(USR2) process $procid for log $filename");
# Give capture the chance to react.
# Don't use sleep cause 1 second too long wait when doing 25 nodes!
# Don't use sleep cause 1 second too long wait when doing 25 devices!
select(undef, undef, undef, 0.2);
#
......@@ -121,41 +139,12 @@ foreach my $node ( keys %nodepid ) {
# The new log should exist now. Set its group, and just to be safe
# set its mode too.
#
$gid = getgrnam($pid);
$gid = getgrnam($grp);
chown(0, $gid, $filename) or
die("Could not chown(0, $gid) $filename: $!");
chmod(0640, $filename) or
die("Could not chmod(0640) $filename: $!");
#
# Remove group access from the tty device. This renders the device
# inaccessible to both old and new users while we revoke access from
# any current tip user.
#
if (-e $tipdevname) {
chmod(0600, $tipdevname) or
die("Could not chmod(0600) $tipdevname: $!");
}
#
# Ditto for the "acl" file, which is how socket based tip/capture enforce
# protection of the console line.
#
if (-e $aclname) {
chmod(0600, $aclname) or
die("Could not chmod(0600) $aclname: $!");
}
#
# Now send a USR2 signal to the capture process so that it closes down
# any tip thats attached to it.
#
kill('USR2', $procid) or
die("Could not signal(USR2) process $procid for log $filename");
# Give capture the chance to react.
# Don't use sleep cause 1 second too long wait when doing 25 nodes!
select(undef, undef, undef, 0.2);
#
# Set the mode and group on the tty that tip is going to use. This
# allows the new group to access the tip line using a non-setuid/gid
......
......@@ -2,18 +2,20 @@
use English;
#
# usage: console_setup node1 pid1 node2 pid2 ...
# usage: console_setup.proxy tipname group tipname group ...
#
# This script runs on the tip servers, which is where the capture processes
# are running. Since the tip servers do not have direct access to the DB
# this invoked from the boss node. For each tip device, specify the group
# the associated files should be in. This script will send the proper signal
# to the capture process, and then reset the files to the proper group.
#
# This script runs on plastic where the tip lines are and the capture
# processes are running. Since plastic does not have access to the DB
# we invoke this from paper in nalloc/nfree, giving it a list of node/pid
# pairs to set. Only use can run this script.
#
my $TIPLOGDIR = "/var/log/tiplogs";
my $TIPDEVDIR = "/dev/tip";
my $TBPID = "flux";
my $dbg = 1;
my %nodepid = ();
my %tipgroup = ();
my $failures = 0;
# un-taint path
......@@ -21,9 +23,7 @@ $ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};