bootinfo.c 7.77 KB
Newer Older
Leigh Stoller's avatar
Leigh Stoller committed
1
/*
2
 * Copyright (c) 2000-2016 University of Utah and the Flux Group.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 * 
 * {{{EMULAB-LICENSE
 * 
 * This file is part of the Emulab network testbed software.
 * 
 * This file is free software: you can redistribute it and/or modify it
 * under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or (at
 * your option) any later version.
 * 
 * This file is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
 * License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this file.  If not, see <http://www.gnu.org/licenses/>.
 * 
 * }}}
Leigh Stoller's avatar
Leigh Stoller committed
22 23
 */

Leigh Stoller's avatar
Leigh Stoller committed
24 25 26
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
27
#include <arpa/inet.h>
Leigh Stoller's avatar
Leigh Stoller committed
28
#include <stdio.h>
29
#include <paths.h>
30 31 32
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
33
#include <signal.h>
34 35
#include <db.h>
#include <fcntl.h>
36
#include <time.h>
37 38
#include "log.h"
#include "tbdefs.h"
39
#include "bootwhat.h"
40
#include "bootinfo.h"
Leigh Stoller's avatar
Leigh Stoller committed
41

42 43 44 45 46 47 48 49
/*
 * Minimum number of seconds that must pass before we send another
 * event for a node. This is to decrease the number of spurious events
 * we get from nodes when bootinfo packets are lost. 
 */
#define MINEVENTTIME	10

static int	bicache_init(void);
50
#ifdef	EVENTSYS
51
static int	bicache_needevent(struct in_addr ipaddr);
52 53 54
#ifdef __clang__
__attribute__((unused)) /* Suppress warning */
#endif
55
static void	bicache_clearevent(struct in_addr ipaddr);
56
#endif
57 58

int
59
bootinfo_init(void)
60
{
61 62
	int	err;
	
63 64 65
	/* Initialize data base */
	err = open_bootinfo_db();
	if (err) {
66
		error("could not open database\n");
67
		return -1;
68
	}
69 70
	err = bicache_init();
	if (err) {
71
		error("could not initialize cache\n");
72
		return -1;
73
	}
74 75 76
#ifdef EVENTSYS
	err = bievent_init();
	if (err) {
77
		error("could not initialize event system\n");
78
		return -1;
79
	}
80
#endif
81 82
	return 0;
}
Leigh Stoller's avatar
Leigh Stoller committed
83

84
int
85
bootinfo(struct in_addr ipaddr, char *node_id, struct boot_info *boot_info, 
86
	 void *opaque, int no_event_send, int *event_sent)
87
{
88 89
	boot_what_t	*boot_whatp = (boot_what_t *) &boot_info->data;
	int		err;
90
#ifdef	EVENTSYS
91
	int		needevent = 0, eventfailed = 0;
92
	int		doevents = 0, no_boot_event_send = no_event_send;
93 94

	/*
95 96
	 * We are not going to send any events for nodes we don't know about,
	 * or PXEBOOTING/BOOTING events for "pxelinux" nodes.
97
	 */
98 99 100 101
	if (!findnode_bootinfo_db(ipaddr, &doevents))
		no_boot_event_send = no_event_send = 1;
	else if (!no_event_send)
		no_boot_event_send = doevents ? 0 : 1;
102
#endif
103 104

	switch (boot_info->opcode) {
105 106 107
	case BIOPCODE_BOOTWHAT_KEYED_REQUEST:
		info("%s: KEYED REQUEST (key=[%s], vers %d)\n",
			inet_ntoa(ipaddr), boot_info->data, boot_info->version);
108
#ifdef	EVENTSYS
109 110
		if (!no_event_send) {
			needevent = bicache_needevent(ipaddr);
111
#if defined(BOOTINFO_PXEEVENTS)
112 113 114 115 116 117 118
			if (!no_boot_event_send && needevent &&
			    bievent_send(ipaddr, opaque,
					 TBDB_NODESTATE_PXEBOOTING)) {
				/* send failed, clear the cache entry */
				bicache_clearevent(ipaddr);
				eventfailed = 1;
			}
119
#endif
120
		}
121
#endif
122 123
		err = query_bootinfo_db(ipaddr, node_id, boot_info->version, 
					boot_whatp, boot_info->data);
124
		break;
125 126 127 128
	case BIOPCODE_BOOTWHAT_REQUEST:
	case BIOPCODE_BOOTWHAT_INFO:
		info("%s: REQUEST (vers %d)\n",
		     inet_ntoa(ipaddr), boot_info->version);
129
#ifdef	EVENTSYS
130 131
		if (!no_event_send) {
			needevent = bicache_needevent(ipaddr);
132
#if defined(BOOTINFO_PXEEVENTS)
133 134 135 136 137 138 139
			if (!no_boot_event_send && needevent &&
			    bievent_send(ipaddr, opaque,
					 TBDB_NODESTATE_PXEBOOTING)) {
				/* send failed, clear the cache entry */
				bicache_clearevent(ipaddr);
				eventfailed = 1;
			}
140
#endif
141
		}
142
#endif
143
		err = query_bootinfo_db(ipaddr, node_id,
144
					boot_info->version, boot_whatp, NULL);
145
		break;
146

147 148 149
	default:
		info("%s: invalid packet %d\n",
		     inet_ntoa(ipaddr), boot_info->opcode);
150 151 152 153
#ifdef	EVENTSYS
		if (event_sent)
			*event_sent = 0;
#endif
154 155 156 157 158 159
		return -1;
	}
	if (err)
		boot_info->status = BISTAT_FAIL;
	else {
		boot_info->status = BISTAT_SUCCESS;
160
#ifdef	EVENTSYS
161
		if (needevent) {
162 163 164 165 166 167 168
			/*
			 * Retry a failed PXEBOOTING event.
			 *
			 * Chances are, a failure here will amplify down
			 * the road as stated gets out of sync. So pause
			 * here and try to stay on track.
			 */
169
			if (!no_boot_event_send && eventfailed) {
170 171 172 173
				sleep(1);
				info("%s: retry failed PXEBOOTING event\n",
				     inet_ntoa(ipaddr));
				bicache_needevent(ipaddr);
174
#if defined(BOOTINFO_PXEEVENTS)
175 176 177
				if (bievent_send(ipaddr, opaque,
						 TBDB_NODESTATE_PXEBOOTING))
					bicache_clearevent(ipaddr);
178
#endif
179
			}
180 181
			switch (boot_whatp->type) {
			case BIBOOTWHAT_TYPE_PART:
182
			case BIBOOTWHAT_TYPE_DISKPART:
183 184 185
			case BIBOOTWHAT_TYPE_SYSID:
			case BIBOOTWHAT_TYPE_MB:
			case BIBOOTWHAT_TYPE_MFS:
186
#if defined(BOOTINFO_PXEEVENTS)
187 188 189 190 191
				if (!no_boot_event_send) {
					bievent_send(ipaddr, opaque,
						     TBDB_NODESTATE_BOOTING);
					break;
				}
192
#endif
193
				needevent = 0;
194
				break;
195
					
196 197 198 199 200 201 202 203 204 205 206 207 208
			case BIBOOTWHAT_TYPE_WAIT:
				bievent_send(ipaddr, opaque,
					     TBDB_NODESTATE_PXEWAIT);
				break;

			case BIBOOTWHAT_TYPE_REBOOT:
				bievent_send(ipaddr, opaque, 
					     TBDB_NODESTATE_REBOOTING);
				break;

			default:
				error("%s: invalid boot directive: %d\n",
				      inet_ntoa(ipaddr), boot_whatp->type);
209
				needevent = 0;
210
				break;
211
			}
212
		}
213
#endif
214
	}
215 216 217 218 219

#ifdef	EVENTSYS
	if (event_sent)
		*event_sent = needevent;
#endif
220
	return 0;
221 222
}

223 224 225 226 227 228 229 230 231 232 233
/*
 * Simple cache to prevent dups when bootinfo packets get lost.
 */
static DB      *dbp;

/*
 * Initialize an in-memory DB
 */
static int
bicache_init(void)
{
234 235 236 237
	if ((dbp = dbopen(NULL, O_CREAT|O_TRUNC|O_RDWR, 664, DB_HASH, NULL))
	    == NULL) {
		pfatal("failed to initialize the bootinfo DBM");
		return -1;
238 239 240 241
	}
	return 0;
}

242
#ifdef	EVENTSYS
243 244 245 246 247 248 249 250 251 252 253
/*
 * This does both a check and an insert. The idea is that we store the
 * current time of the request, returning yes/no to the caller if the
 * current request is is within a small delta of the previous request.
 * This should keep the number of repeats to a minimum, since a requests
 * coming within a few seconds of each other indicate lost bootinfo packets.
 */
static int
bicache_needevent(struct in_addr ipaddr)
{
	DBT	key, item;
254
	time_t  tt;
255 256
	int	rval = 1, r;

257 258 259 260
	/* So we can include bootinfo into tmcd; always send the event. */
	if (!dbp)
		return 1;

261
	tt = time(NULL);
262 263 264 265 266 267
	key.data = (void *) &ipaddr;
	key.size = sizeof(ipaddr);

	/*
	 * First find current value.
	 */
268
	if ((r = (dbp->get)(dbp, &key, &item, 0)) != 0) {
269 270 271 272 273
		if (r == -1) {
			errorc("Could not retrieve entry from DBM for %s\n",
			       inet_ntoa(ipaddr));
		}
	}
274
	if (r == 0) {
275 276 277 278 279 280
		time_t	oldtt = *((time_t *)item.data);

		if (debug) {
			info("Timestamps: old:%ld new:%ld\n", oldtt, tt);
		}

281 282 283 284 285 286 287 288 289
		/*
		 * XXX sanity check, in case time goes backward while we
		 * are running.
		 */
		if (tt < oldtt) {
			info("%s: Whoa! time went backwards (%ld -> %ld),"
			     "fixing...\n",
			     inet_ntoa(ipaddr), oldtt, tt);
		} else if (tt - oldtt <= MINEVENTTIME) {
290 291 292 293 294 295 296 297 298
			rval = 0;
			info("%s: no event will be sent: last:%ld cur:%ld\n",
			     inet_ntoa(ipaddr), oldtt, tt);
		}
	}
	if (rval) {
		item.data = (void *) &tt;
		item.size = sizeof(tt);

299
		if ((dbp->put)(dbp, &key, &item, 0) != 0) {
300 301 302 303 304 305
			errorc("Could not insert DBM entry for %s\n",
			       inet_ntoa(ipaddr));
		}
	}
	return rval;
}
306 307 308 309 310

/*
 * Clear a timestamp in the cache.
 * We call this if an event send fails.
 */
311 312 313
#ifdef __clang__
__attribute__((unused)) /* Suppress warning */
#endif
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
static void
bicache_clearevent(struct in_addr ipaddr)
{
	DBT	key, item;
	time_t  tt;

	if (dbp) {
		key.data = (void *) &ipaddr;
		key.size = sizeof(ipaddr);

		tt = 0;
		item.data = (void *) &tt;
		item.size = sizeof(tt);

		if ((dbp->put)(dbp, &key, &item, 0) != 0) {
			errorc("Could not insert DBM entry for %s\n",
			       inet_ntoa(ipaddr));
		}
	}
}
334
#endif