Commit 249d37af authored by Mike Hibler's avatar Mike Hibler

If bievent_send() fails, clear the entry from the cache so we will try again.

Also, if the first (PXEBOOTING) event send fails, pause a second and then
try again before sending the follow on event.
parent f8fc27f6
/* /*
* Copyright (c) 2000-2012 University of Utah and the Flux Group. * Copyright (c) 2000-2013 University of Utah and the Flux Group.
* *
* {{{EMULAB-LICENSE * {{{EMULAB-LICENSE
* *
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
static int bicache_init(void); static int bicache_init(void);
#ifdef EVENTSYS #ifdef EVENTSYS
static int bicache_needevent(struct in_addr ipaddr); static int bicache_needevent(struct in_addr ipaddr);
static void bicache_clearevent(struct in_addr ipaddr);
#endif #endif
int int
...@@ -82,7 +83,7 @@ bootinfo(struct in_addr ipaddr, char *node_id, struct boot_info *boot_info, ...@@ -82,7 +83,7 @@ bootinfo(struct in_addr ipaddr, char *node_id, struct boot_info *boot_info,
void *opaque, int no_event_send, int *event_sent) void *opaque, int no_event_send, int *event_sent)
{ {
#ifdef EVENTSYS #ifdef EVENTSYS
int needevent = 0; int needevent = 0, eventfailed = 0;
#endif #endif
int err; int err;
boot_what_t *boot_whatp = (boot_what_t *) &boot_info->data; boot_what_t *boot_whatp = (boot_what_t *) &boot_info->data;
...@@ -93,9 +94,12 @@ bootinfo(struct in_addr ipaddr, char *node_id, struct boot_info *boot_info, ...@@ -93,9 +94,12 @@ bootinfo(struct in_addr ipaddr, char *node_id, struct boot_info *boot_info,
inet_ntoa(ipaddr), boot_info->data, boot_info->version); inet_ntoa(ipaddr), boot_info->data, boot_info->version);
#ifdef EVENTSYS #ifdef EVENTSYS
needevent = bicache_needevent(ipaddr); needevent = bicache_needevent(ipaddr);
if (!no_event_send && needevent) if (!no_event_send && needevent &&
bievent_send(ipaddr, opaque, bievent_send(ipaddr, opaque, TBDB_NODESTATE_PXEBOOTING)) {
TBDB_NODESTATE_PXEBOOTING); /* send failed, clear the cache entry */
bicache_clearevent(ipaddr);
eventfailed = 1;
}
#endif #endif
err = query_bootinfo_db(ipaddr, node_id, boot_info->version, err = query_bootinfo_db(ipaddr, node_id, boot_info->version,
boot_whatp, boot_info->data); boot_whatp, boot_info->data);
...@@ -106,9 +110,12 @@ bootinfo(struct in_addr ipaddr, char *node_id, struct boot_info *boot_info, ...@@ -106,9 +110,12 @@ bootinfo(struct in_addr ipaddr, char *node_id, struct boot_info *boot_info,
inet_ntoa(ipaddr), boot_info->version); inet_ntoa(ipaddr), boot_info->version);
#ifdef EVENTSYS #ifdef EVENTSYS
needevent = bicache_needevent(ipaddr); needevent = bicache_needevent(ipaddr);
if (!no_event_send && needevent) if (!no_event_send && needevent &&
bievent_send(ipaddr, opaque, bievent_send(ipaddr, opaque, TBDB_NODESTATE_PXEBOOTING)) {
TBDB_NODESTATE_PXEBOOTING); /* send failed, clear the cache entry */
bicache_clearevent(ipaddr);
eventfailed = 1;
}
#endif #endif
err = query_bootinfo_db(ipaddr, node_id, err = query_bootinfo_db(ipaddr, node_id,
boot_info->version, boot_whatp, NULL); boot_info->version, boot_whatp, NULL);
...@@ -129,6 +136,22 @@ bootinfo(struct in_addr ipaddr, char *node_id, struct boot_info *boot_info, ...@@ -129,6 +136,22 @@ bootinfo(struct in_addr ipaddr, char *node_id, struct boot_info *boot_info,
boot_info->status = BISTAT_SUCCESS; boot_info->status = BISTAT_SUCCESS;
#ifdef EVENTSYS #ifdef EVENTSYS
if (!no_event_send && needevent) { if (!no_event_send && needevent) {
/*
* Retry a failed PXEBOOTING event.
*
* Chances are, a failure here will amplify down
* the road as stated gets out of sync. So pause
* here and try to stay on track.
*/
if (eventfailed) {
sleep(1);
info("%s: retry failed PXEBOOTING event\n",
inet_ntoa(ipaddr));
bicache_needevent(ipaddr);
if (bievent_send(ipaddr, opaque,
TBDB_NODESTATE_PXEBOOTING))
bicache_clearevent(ipaddr);
}
switch (boot_whatp->type) { switch (boot_whatp->type) {
case BIBOOTWHAT_TYPE_PART: case BIBOOTWHAT_TYPE_PART:
case BIBOOTWHAT_TYPE_DISKPART: case BIBOOTWHAT_TYPE_DISKPART:
...@@ -240,4 +263,29 @@ bicache_needevent(struct in_addr ipaddr) ...@@ -240,4 +263,29 @@ bicache_needevent(struct in_addr ipaddr)
} }
return rval; return rval;
} }
/*
* Clear a timestamp in the cache.
* We call this if an event send fails.
*/
static void
bicache_clearevent(struct in_addr ipaddr)
{
DBT key, item;
time_t tt;
if (dbp) {
key.data = (void *) &ipaddr;
key.size = sizeof(ipaddr);
tt = 0;
item.data = (void *) &tt;
item.size = sizeof(tt);
if ((dbp->put)(dbp, &key, &item, 0) != 0) {
errorc("Could not insert DBM entry for %s\n",
inet_ntoa(ipaddr));
}
}
}
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment