Commit 0749ef9c authored by Leigh Stoller's avatar Leigh Stoller

Such a brutal ElabinElab hack ... When trying to swapin an actual

experiment from the web interface, I ran into another control network
problem, this time in bootinfo. When a node is sitting free, it waits
in pxeboot for a bootinfo packet from boss to tell it what to do (this
is different then when the node is allocated, and bootinfo tells it
what to do in a reply to the initial request). In the PXEWAIT case, we
*send* it a packet, addressed to its *control network* address, which
in the inner DB, is on the inner control network, but of course PXE is
really using the outer control network, so packets addressed to inner
control network are never seen by pxeboot.

This is the only (known) case of this happening, and rather then try
for some general, over engineered solution, I did something unusual,
and put in a hack, ifdefed for ELABINELAB (meaning, its an inner
elab). I know, you're thinking, how could he have done such a thing,
its so unlike him!

Well, it was damn easy! Anyway, this little hack checks the DB for an
interface tagged as role='outer_ctrl' and uses that IP instead of the
inner control network. When I create the inner DB from the outer DB, I
was already leaving the outer control network in place so that
bootinfo could find the proper node (again, cause the bootinfo request
packets are coming from the outer control network, and so its IP would
not match any nodes in the DB).

I'd like to say that this is the last problem with swapin, but I see
in my other window that the event scheduler failed to start on inner
ops with some silly error ssh permission denied error. Whats that all
about?
parent cafbb325
/*
* EMULAB-COPYRIGHT
* Copyright (c) 2000-2003 University of Utah and the Flux Group.
* Copyright (c) 2000-2004 University of Utah and the Flux Group.
* All rights reserved.
*/
struct boot_what;
......@@ -9,6 +9,7 @@ int open_bootinfo_db(void);
int close_bootinfo_db(void);
int query_bootinfo_db(struct in_addr ipaddr, int version,
struct boot_what *info);
int elabinelab_hackcheck(struct sockaddr_in *target);
extern int debug;
......
......@@ -300,6 +300,7 @@ boot_newnode_mfs(struct in_addr ipaddr, int version, boot_what_t *info)
break;
default:
error("Too many DB entries for OSID %s\n", NEWNODEOSID);
mysql_free_result(res);
return 1;
}
row = mysql_fetch_row(res);
......@@ -310,6 +311,7 @@ boot_newnode_mfs(struct in_addr ipaddr, int version, boot_what_t *info)
mysql_free_result(res);
return 0;
}
mysql_free_result(res);
error("No path info for OSID %s\n", NEWNODEOSID);
return 1;
#undef MFS_PATH
......@@ -355,6 +357,56 @@ parse_mfs_path(char *str, boot_what_t *info)
sizeof(info->what.mfs)-strlen(info->what.mfs));
}
/*
* ElabinElab hack. Its really terrible!
*/
#ifdef ELABINELAB
int
elabinelab_hackcheck(struct sockaddr_in *target)
{
int nrows;
MYSQL_RES *res;
MYSQL_ROW row;
res = mydb_query("select i2.IP from interfaces as i1 "
"left join interfaces as i2 on i2.node_id=i1.node_id "
" and i2.role='outer_ctrl' "
"where i1.IP='%s'", 1, inet_ntoa(target->sin_addr));
if (!res) {
error("elabinelab_hackcheck failed for host %s\n",
inet_ntoa(target->sin_addr));
/* XXX Wrong. Should fail so client can request again later */
return 0;
}
nrows = mysql_num_rows(res);
switch (nrows) {
case 0:
/* No hack interface */
mysql_free_result(res);
return 0;
case 1:
break;
default:
error("elabinelab_hackcheck: Too many DB entries %s\n",
inet_ntoa(target->sin_addr));
mysql_free_result(res);
return 1;
}
row = mysql_fetch_row(res);
if (row[0] != 0 && row[0][0] != '\0') {
inet_aton(row[0], &(target->sin_addr));
return 0;
}
mysql_free_result(res);
error("elabinelab_hackcheck: No IP address %s\n",
inet_ntoa(target->sin_addr));
return 1;
}
#endif
#ifdef TEST
#include <stdarg.h>
......
......@@ -85,7 +85,7 @@ main(int argc, char **argv)
if (debug)
info("%s\n", build_info);
/* Make sure we can map target */
/* Make sure we can map target. */
if ((he = gethostbyname(argv[0])) == NULL) {
errorc("gethostbyname(%s)", argv[0]);
exit(1);
......@@ -173,7 +173,11 @@ main(int argc, char **argv)
log_bootwhat(target.sin_addr, boot_whatp);
boot_info.status = BISTAT_SUCCESS;
boot_info.opcode = BIOPCODE_BOOTWHAT_ORDER;
#ifdef ELABINELAB
/* This is too brutal to even describe */
elabinelab_hackcheck(&target);
#endif
if (sendto(sock, (char *)&boot_info, sizeof(boot_info), 0,
(struct sockaddr *)&target, sizeof(target)) < 0)
errorc("sendto");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment