decls.h 8.38 KB
Newer Older
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1
2
/*
 * EMULAB-COPYRIGHT
3
 * Copyright (c) 2000-2005 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
4
5
6
 * All rights reserved.
 */

7
8
9
10
/*
 * Shared for defintions for frisbee client/server code.
 */

Mike Hibler's avatar
Mike Hibler committed
11
#include <limits.h>	/* CHAR_BIT */
12
13
14
#include "log.h"

/*
Mike Hibler's avatar
Mike Hibler committed
15
16
 * Ethernet MTU (1514) - eth header (14) - min UDP/IP (28) - BLOCK msg
 * header (24).
17
 */
Mike Hibler's avatar
Mike Hibler committed
18
#define MAXBLOCKSIZE	1448
19
20

/*
Mike Hibler's avatar
Mike Hibler committed
21
22
 * Images are broken into chunks which are the standalone unit of decompression
 * Chunks are broken into blocks which are the unit of transmission
23
24
 */
#define CHUNKSIZE	1024
Mike Hibler's avatar
Mike Hibler committed
25
#define BLOCKSIZE	1024
26
27

/*
Mike Hibler's avatar
Mike Hibler committed
28
 * Make sure we can fit a block in a single ethernet MTU.
Mike Hibler's avatar
Mike Hibler committed
29
30
 * This limits the maximum block size to 1448 with the current protocol
 * headers on Ethernet.
31
 */
Mike Hibler's avatar
Mike Hibler committed
32
33
34
#if BLOCKSIZE > MAXBLOCKSIZE
#error "Invalid block size"
#endif
Mike Hibler's avatar
Mike Hibler committed
35

Mike Hibler's avatar
Mike Hibler committed
36
37
38
39
40
41
42
43
44
45
/*
 * Make sure we can represent a bitmap of blocks in a single packet.
 * This limits the maximum number of blocks in a chunk to 1448*8 == 11584.
 * With the maximum block size of 1448, this limits a chunk to no more
 * than 16,773,632 bytes (just under 16MB).
 */
#if (CHUNKSIZE%CHAR_BIT) != 0 || (CHUNKSIZE/CHAR_BIT) > MAXBLOCKSIZE
#error "Invalid chunk size"
#endif

Mike Hibler's avatar
Mike Hibler committed
46
47
48
/*
 * Chunk buffers and output write buffers constitute most of the memory
 * used in the system.  These should be sized to fit in the physical memory
Mike Hibler's avatar
Mike Hibler committed
49
50
 * of the client (forcing pieces of frisbee to be paged out to disk, even
 * if there is a swap disk to use, is not a very efficient way to load disks!)
Mike Hibler's avatar
Mike Hibler committed
51
52
53
54
55
56
57
58
59
60
61
62
 *
 * MAXCHUNKBUFS is the number of BLOCKSIZE*CHUNKSIZE chunk buffers used to
 * receive data from the network.  With the default values, these are 1MB
 * each.
 *
 * MAXWRITEBUFMEM is the amount, in MB, of write buffer memory in the client.
 * This is the amount of queued write data that can be pending.  A value of
 * zero means unlimited.
 *
 * The ratio of the number of these two buffer types depends on the ratio
 * of network to disk speed and the degree of compression in the image.
 */
Mike Hibler's avatar
Mike Hibler committed
63
64
#define MAXCHUNKBUFS	64	/* 64MB with default chunk size */
#define MAXWRITEBUFMEM	64	/* in MB */
65

66
67
68
69
/*
 * Socket buffer size, used for both send and receive in client and
 * server right now.
 */
Mike Hibler's avatar
Mike Hibler committed
70
#define SOCKBUFSIZE	(200 * 1024)
71

72
73
/*
 * The number of read-ahead chunks that the client will request
74
 * at a time. No point in requesting too far ahead either, since they
75
 * are uncompressed/written at a fraction of the network transfer speed.
Mike Hibler's avatar
Mike Hibler committed
76
 * Also, with multiple clients at different stages, each requesting blocks,
77
78
79
 * it is likely that there will be plenty more chunks ready or in progress.
 */
#define MAXREADAHEAD	2
80
#define MAXINPROGRESS	8
81
82
83
84
85

/*
 * Timeout (in usecs) for packet receive. The idletimer number is how
 * many PKT timeouts we allow before requesting more data from the server.
 * That is, if we go TIMEOUT usecs without getting a packet, then ask for
86
 * more.
87
88
 */
#define PKTRCV_TIMEOUT		30000
89
#define CLIENT_IDLETIMER_COUNT	3
90
91
#define TIMEOUT_HZ		(1000000 / PKTRCV_TIMEOUT)
#define TIMEOUT_HALFHZ		(TIMEOUT_HZ / 2)
92
93
94

/*
 * Timeout (in seconds!) server will hang around with no active clients.
95
 * Make it zero to never exit. 
96
 */
97
#define SERVER_INACTIVE_SECONDS	(60 * 30)
98
99
100

/*
 * The number of disk read blocks in a single read on the server.
Mike Hibler's avatar
Mike Hibler committed
101
 * Must be an integer divisor of CHUNKSIZE.
102
 */
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#define SERVER_READ_SIZE	32

/*
 * Parameters for server network usage:
 *
 *	SERVER_BURST_SIZE	Max BLOCKSIZE packets sent in a burst.
 *				Should be a multiple of SERVER_READ_SIZE
 *				Should be less than SOCKBUFSIZE/BLOCKSIZE,
 *				bursts of greater than the send socket
 *				buffer size are almost certain to cause
 *				lost packets.
 *	SERVER_BURST_GAP	Delay in usec between output bursts.
 *				Given the typical scheduling granularity
 *				of 10ms for most unix systems, this
 *				will likely be set to either 0 or 10000.
Mike Hibler's avatar
Mike Hibler committed
118
119
 *				On FreeBSD we set the clock to 1ms
 *				granularity.
120
121
122
123
124
125
 *
 * Together with the BLOCKSIZE, these two params form a theoretical upper
 * bound on bandwidth consumption for the server.  That upper bound (for
 * ethernet) is:
 *
 *	(1000000 / SERVER_BURST_GAP)		# bursts per second
Mike Hibler's avatar
Mike Hibler committed
126
 *	* (BLOCKSIZE+24+42) * SERVER_BURST_SIZE	# * wire size of a burst
127
 *
Mike Hibler's avatar
Mike Hibler committed
128
129
130
131
132
133
 * which for the default 1k packets, gap of 1ms and burst of 16 packets
 * is about 17.4MB/sec.  That is beyond the capacity of a 100Mb ethernet
 * but with a 1ms granularity clock, the average gap size is going to be
 * 1.5ms yielding 11.6MB/sec.  In practice, the server is ultimately
 * throttled by clients' ability to generate requests which is limited by
 * their ability to decompress and write to disk.
134
 */
Mike Hibler's avatar
Mike Hibler committed
135
#define SERVER_BURST_SIZE	16
Mike Hibler's avatar
Mike Hibler committed
136
#define SERVER_BURST_GAP	2000
137
138
139
140
141

/*
 * Max burst size when doing dynamic bandwidth adjustment.
 * Needs to be large enough to induce loss.
 */ 
Mike Hibler's avatar
Mike Hibler committed
142
#define SERVER_DYNBURST_SIZE	128
143
144
145
146
147
148
149

/*
 * How long (in usecs) to wait before re-reqesting a chunk.
 * It will take the server more than:
 *
 *	(CHUNKSIZE/SERVER_BURST_SIZE) * SERVER_BURST_GAP
 *
Mike Hibler's avatar
Mike Hibler committed
150
 * usec (0.13 sec with defaults) for each each chunk it pumps out,
151
152
 * and we conservatively assume that there are a fair number of other
 * chunks that must be processed before it gets to our chunk.
Mike Hibler's avatar
Mike Hibler committed
153
154
155
 *
 * XXX don't like making the client rely on compiled in server constants,
 * lets just set it to 1 second right now.
156
 */
Mike Hibler's avatar
Mike Hibler committed
157
#define CLIENT_REQUEST_REDO_DELAY	1000000
158
159
160
161
162
163

/*
 * How long for the writer to sleep if there are no blocks currently
 * ready to write.  Allow a full server burst period, assuming that
 * something in the next burst will complete a block.
 */
Mike Hibler's avatar
Mike Hibler committed
164
#define CLIENT_WRITER_IDLE_DELAY	1000
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191

/*
 * Client parameters and statistics.
 */
#define CLIENT_STATS_VERSION	1
typedef struct {
	int	version;
	union {
		struct {
			int	runsec;
			int	runmsec;
			int	delayms;
			unsigned long long rbyteswritten;
			unsigned long long ebyteswritten;
			int	chunkbufs;
			int	maxreadahead;
			int	maxinprogress;
			int	pkttimeout;
			int	startdelay;
			int	idletimer;
			int	idledelay;
			int	redodelay;
			int	randomize;
			unsigned long	nochunksready;
			unsigned long	nofreechunks;
			unsigned long	dupchunk;
			unsigned long	dupblock;
Mike Hibler's avatar
Mike Hibler committed
192
			unsigned long	prequests;
193
194
195
			unsigned long	recvidles;
			unsigned long	joinattempts;
			unsigned long	requests;
Mike Hibler's avatar
Mike Hibler committed
196
			unsigned long	decompblocks;
197
			unsigned long	writeridles;
Mike Hibler's avatar
Mike Hibler committed
198
199
200
			int	writebufmem;
			unsigned long	lostblocks;
			unsigned long	rerequests;
201
202
203
204
		} v1;
		unsigned long limit[256];
	} u;
} ClientStats_t;
205

Mike Hibler's avatar
Mike Hibler committed
206
207
208
209
typedef struct {
	char	map[CHUNKSIZE/CHAR_BIT];
} BlockMap_t;

210
211
212
213
214
215
216
217
218
219
220
221
222
/*
 * Packet defs.
 */
typedef struct {
	struct {
		int		type;
		int		subtype;
		int		datalen; /* Useful amount of data in packet */
		unsigned int	srcip;   /* Filled in by network level. */
	} hdr;
	union {
		/*
		 * Join/leave the Team. Send a randomized ID, and receive
223
224
225
		 * the number of blocks in the file. This is strictly
		 * informational; the info is reported in the log file.
		 * We must return the number of chunks in the file though.
226
227
228
229
230
		 */
		union {
			unsigned int	clientid;
			int		blockcount;
		} join;
231
232
233
234
235
		
		struct {
			unsigned int	clientid;
			int		elapsed;	/* Stats only */
		} leave;
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253

		/*
		 * A data block, indexed by chunk,block.
		 */
		struct {
			int		chunk;
			int		block;
			char		buf[BLOCKSIZE];
		} block;

		/*
		 * A request for a data block, indexed by chunk,block.
		 */
		struct {
			int		chunk;
			int		block;
			int		count;	/* Number of blocks */
		} request;
254

Mike Hibler's avatar
Mike Hibler committed
255
256
257
258
259
260
261
262
263
264
265
266
267
		/*
		 * Partial chunk request, a bit map of the desired blocks
		 * for a chunk.  An alternative to issuing multiple standard
		 * requests.  Retries is a hint to the server for congestion
		 * control, non-zero if this is a retry of an earlier request
		 * we made.
		 */
		struct {
			int		chunk;
			int		retries;
			BlockMap_t	blockmap;
		} prequest;

268
269
270
271
272
273
274
275
		/*
		 * Leave reporting client params/stats
		 */
		struct {
			unsigned int	clientid;
			int		elapsed;
			ClientStats_t	stats;
		} leave2;
276
277
278
279
280
281
282
283
284
	} msg;
} Packet_t;
#define PKTTYPE_REQUEST		1
#define PKTTYPE_REPLY		2

#define PKTSUBTYPE_JOIN		1
#define PKTSUBTYPE_LEAVE	2
#define PKTSUBTYPE_BLOCK	3
#define PKTSUBTYPE_REQUEST	4
285
#define PKTSUBTYPE_LEAVE2	5
Mike Hibler's avatar
Mike Hibler committed
286
#define PKTSUBTYPE_PREQUEST	6
287
288
289
290
291
292

/*
 * Protos.
 */
int	ClientNetInit(void);
int	ServerNetInit(void);
293
int	ServerNetMCKeepAlive(void);
294
unsigned long ClientNetID(void);
295
int	PacketReceive(Packet_t *p);
296
297
298
void	PacketSend(Packet_t *p, int *resends);
void	PacketReply(Packet_t *p);
int	PacketValid(Packet_t *p, int nchunks);
Mike Hibler's avatar
Mike Hibler committed
299
void	dump_network(void);
300
301
302
303
304
305

/*
 * Globals
 */
extern int		debug;
extern int		portnum;
306
extern int		broadcast;
307
308
309
extern struct in_addr	mcastaddr;
extern struct in_addr	mcastif;
extern char	       *filename;
310
extern int		clockres;