digest-slothd 13.1 KB
Newer Older
1
2
#! /usr/bin/awk -f

Timothy Stack's avatar
Timothy Stack committed
3
4
#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2004, 2005 University of Utah and the Flux Group.
Timothy Stack's avatar
Timothy Stack committed
6
7
8
# All rights reserved.
#

9
10
11
12
13
14
15
16
17
18
19
20
21
#
# Digest slothd logs and generate a TCL file that contains resource usage data
# that can be fed back into the mapper.
#
# Usage: digest-slothd <slothd-log1> [<slothd-log2> ...] <mapping>
#
# <slothd-logN> - The output of slothd when it is run in high resolution
#   tracing mode.
# <mapping> - A mapping of MAC addresses to node members and LAN/link names.
#   XXX disgusting.
#

BEGIN {
22
23
24
25
26
27
28
29
30
31
32
33
#   The overload "threshold", so, this many overload signals in a row means
#   the node is overloaded.  The 2.25 here requires three CPU/swapout/etc
#   related overloads or a few timing related overloads.
    OVERLOAD_MAX = 2.25; # XXX Make this a command line argument.

#   The maximum allowed time between canaryd reports, otherwise we consider
#   it an overload condition.
    TIME_OVERLOAD = 1.1;

#   The maximum number of pageouts allowed.  XXX I don't know the semantics...
    SWAPOUT_OVERLOAD = 2;

34
    DISK_OVERLOAD = 95;
35
    
36
    error = 0;
Timothy Stack's avatar
   
Timothy Stack committed
37
    if( ARGC < 3 )
38
39
    {
	printf("Convert slothd output to a TCL feedback file.\n");
Timothy Stack's avatar
   
Timothy Stack committed
40
	printf("Usage: digest-slothd <alert> <mapping> <file1> [<file2> ...]\n");
41
42
43
44
45
46
47
48
49
50
51
52
	printf("\n");
	printf("Required arguments:\n");
	printf("  file1   - A file containing the output of slothd.\n");
	printf("  mapping - A mapping of MAC addresses to node members\n");
	printf("            and LAN/link names.");
	printf("\n");
	printf("Example:\n");
	printf("  $ digest-slothd vhost-0.slothd - < mapping\n");
	printf("\n");
	printf("Example mapping:\n");
	printf("  00:00:0a:01:01:02 node0 link");
	printf("\n");
Timothy Stack's avatar
   
Timothy Stack committed
53
	error = 1;
54
55
	exit error;
    }
Timothy Stack's avatar
   
Timothy Stack committed
56
57
58
59

    alert_file = ARGV[1];
    ARGV[1] = "";
    printf("") > alert_file;
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
}

#
# Process slothd output.  This will digest the "vnode" and "iface" attributes
# and store the values in the following variables:
#
#   vnodes_cpu[<vnode_name>,<instance>] - The vnode percent of CPU used per
#     second.
#   vnodes_ram[<vnode_name>,<instance>] - The vnode percent of RAM used per
#     second.
#   links_mac_bw[<MAC addr>] - The peak kilobits per second used on the NIC.
#   links_mac_pkts[<MAC addr>] - The peak number of packets per second used on
#     the NIC.
#
# Example input:
#
# vers=3 mis=1084907941 lave=0.0000000000,0.0000000000,0.0000000000 abits=0x5 page=0,0 ints=1134,693,82 cpu=0,1,98 iface=00:02:b3:3f:7a:20,18804,3352,14047137,457522 iface=00:03:47:73:a2:42,9079,125118,3428409,60045802 iface=00:00:00:00:00:00,0,0,0,0 iface=00:00:0a:01:08:03,2846,125780,385828,58014417 iface=00:00:0a:01:12:03,125782,2843,58015462,385822 iface=00:00:0a:01:0d:02,6285,152,2896485,19985 iface=00:00:0a:01:12:02,2845,125781,385780,58015462 vnode=dslclient-11.testdssvm.tbres.emulab.net,0.0,1.8 vnode=server.testdssvm.tbres.emulab.net,2.1,2.5 vnode=corerouter.testdssvm.tbres.emulab.net,0.0,1.2
#
78
/vers=2/ {
79
80
81
82
83
    line_count[ARGIND] += 1;
    if( (vnode_count > 1) &&
	(line_count[ARGIND - 1] > 0) &&
	(line_count[ARGIND - 1] < 3) )
    {
84
85
#       There are not enough canaryd lines to deduce anything other than
#       overload.
86
87
88
89
90
91
92
93
94
	for( vnode_name in line_vnode_names )
	{
	    alerts[vnode_name] = 1;
	}
	for( mac in line_macs )
	{
	    alerts[mac] = 1;
	}
    }
95
96
97
98
99

#   Initialize some "locals"
    vnode_count = 0; # The number of vnodes on this pnode.
    total_vnode_cpu = 0; # The total CPU used by vnodes
    total_vnode_pps = 0; # The total number of packets per second (in and out)
100
101
102
103
    time_diff_s = 0;
    ovld = 0;
    delete line_vnode_names;
    delete line_macs;
104
105

#   Loop through the key/value pairs on the canaryd line.
106
107
108
109
110
111
112
    for( lpc = 2; lpc <= NF; lpc++ )
    {
#       Determine the field type and
	split($lpc, field, /=/);
#       ... handle it.
	if( field[1] == "stamp" )
	{
113
114
#           Timestamp, mostly just care about how far off we are from the last
#           period.
115
116
117
118
119
120
121
122
123
124
125
126
127
128
	    split(field[2], data, /,/);
	    if( last_stamp_s[ARGIND] )
	    {
		time_diff_us = (data[1] - last_stamp_s[ARGIND]) * 1000000;
		time_diff_us += (data[2] - last_stamp_us[ARGIND]);
		time_diff_s = time_diff_us / 1000000.0
	    }
	    else
	    {
		time_diff_s = 1.0;
	    }
	    last_stamp_s[ARGIND] = data[1];
	    last_stamp_us[ARGIND] = data[2];

129
130
131
#           Assume some overload if the diff from the last period is relatively
#           large.
	    if( time_diff_s > TIME_OVERLOAD )
132
133
134
	    {
#               Signal multiple overload events for each missed time interval.
		overload[ARGIND] += (time_diff_s - 1.0);
135
		printf("# tstmp ovld (%f): %s\n", (time_diff_s - 1.0), $0);
136
137
138
139
	    }
	}
	else if( field[1] == "ovld" )
	{
140
141
	    if( field[2] == 1 )
		overload[ARGIND] += 1;
142
143
144
	}
	else if( field[1] == "cpu" )
	{
145
#           Total CPU time and
146
	    total_cpu = field[2];
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#           ... signal overload if its too high.
	    if( total_cpu >= 99 )
		overload[ARGIND] += 1;
	}
	else if( field[1] == "intr" )
	{
#           Record interrupt load so we can try to assign it to node members
#           later on.
	    intr = field[2];
	}
	else if( field[1] == "sys" )
	{
#           Record system CPU load so we can try to assign it to node members
#           later on.
	    sys_cpu = field[2];
	}
	else if( field[1] == "mem" )
	{
	    split(field[2], data, /,/);
	    if( data[2] > SWAPOUT_OVERLOAD )
	    {
#               Swapping out too much...
		overload[ARGIND] += 1;
	    }
	}
	else if( field[1] == "disk" )
	{
Timothy Stack's avatar
   
Timothy Stack committed
174
	    if( field[2] > DISK_OVERLOAD )
175
176
177
	    {
		overload[ARGIND] += 1;
	    }
178
	}
Timothy Stack's avatar
   
Timothy Stack committed
179
180
181
182
183
184
185
186
187
	else if( field[1] == "pipe" )
	{
	    split(field[2], data, /,/);
	    
	    pipe = data[1];
	    qlen = data[2];
	    qlen_bytes = data[3];
	    qdrops = data[4];
	}
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
	else if( field[1] == "iface" )
	{
#           Split the data values and
	    split(field[2], data, /,/);
#           ... the ethernet MAC address.
	    link_mac = data[1];
	    links_mac[link_mac] = 1;
#           ... the input bandwidth/packets for this second, and
	    if( link_mac in links_last_in )
	    {
		last_pkt_value = links_last_pkt_in[link_mac];
		links_last_pkt_in[link_mac] = data[2];
		in_pkts = data[2] - last_pkt_value;

		last_value = links_last_in[link_mac];
		links_last_in[link_mac] = data[4];
		in_bw = data[4] - last_value;
	    }
	    else
	    {
		links_last_pkt_in[link_mac] = data[2];
		links_last_in[link_mac] = data[4];

		in_pkts = 0;
		in_bw = 0;
	    }
#           ... the output bandwidth/packets for this second.
	    if( link_mac in links_last_out )
	    {
		last_pkt_value = links_last_pkt_out[link_mac];
		links_last_pkt_out[link_mac] = data[3];
		out_pkts = data[3] - last_pkt_value;

		last_value = links_last_out[link_mac];
		links_last_out[link_mac] = data[5];
		out_bw = data[5] - last_value;
	    }
	    else
	    {
		links_last_pkt_out[link_mac] = data[3];
		links_last_out[link_mac] = data[5];

		out_pkts = 0;
		out_bw = 0;
	    }
#           Make sure the measures are 'per second' since slothd will sometimes
#           miss an interval.
	    in_bw = in_bw / time_diff_s;
	    in_pkts = in_pkts / time_diff_s;
	    out_bw = out_bw / time_diff_s;
	    out_pkts = out_pkts / time_diff_s;
239
240
241
242
243
244

#           Record these values so we can do something with them and the
#           interrupt load numbers later.
	    total_vnode_pps += in_pkts + out_pkts;
	    line_pps[link_mac] = in_pkts + out_pkts;
	    
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#           Find the maximum of bandwidth/packets of the data seen so far.
	    if( in_pkts > links_mac_pkts[link_mac] )
	    {
		links_mac_pkts[link_mac] = in_pkts;
	    }
	    if( in_bw > links_mac_bw[link_mac] )
	    {
		links_mac_bw[link_mac] = in_bw;
	    }
	    if( out_pkts > links_mac_pkts[link_mac] )
	    {
		links_mac_pkts[link_mac] = out_pkts;
	    }
	    if( out_bw > links_mac_bw[link_mac] )
	    {
		links_mac_bw[link_mac] = out_bw;
	    }
262
263

#           We've seen this MAC address...
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
	    line_macs[link_mac] = 1;
	}
	else if( field[1] == "vnode" )
	{
	    split(field[2], data, /,/);
	    split(data[1], name, /\./);
	    vnode_name = name[1];
	    vnodes_name[vnode_name] = FNR;
	    vnodes_cpu[vnode_name,FNR] = data[2];
	    vnodes_ram[vnode_name,FNR] = data[3];
	    line_vnode_names[vnode_name] = 1;
	    total_vnode_cpu += data[2];
	    vnode_count += 1;
	}
    }

#   Need to deal with CPU time lost in interrupts.
    if( vnode_count == 0 )
    {
Timothy Stack's avatar
   
Timothy Stack committed
283
	printf("error: no vnodes in line '%s'\n", $0) > "/dev/stderr";
284
285
286
287
288
	error = 1;
	exit error;
    }
    else if( vnode_count == 1 )
    {
289
290
291
292
293
294
295
296
	if( total_cpu >= 99 )
	{
	    vnodes_cpu[vnode_name,FNR] = total_cpu;
	}
	else
	{
	    vnodes_cpu[vnode_name,FNR] += intr;
	}
297
298
299
    }
    else
    {
300
301
302
#       We're in overload for this period, but do not signal an alert until
#       we see OVERLOAD_MAX consecutive indicators.
	if( overload[ARGIND] >= OVERLOAD_MAX )
303
	{
304
305
	    printf("# overload! %f %f\n", last_overload[ARGIND], overload[ARGIND]);
	    for( vnode_name in line_vnode_names )
306
	    {
307
		alerts[vnode_name] = 1;
308
	    }
309
	    for( mac in line_macs )
310
	    {
311
		alerts[mac] = 1;
312
313
	    }
	}
314
	else if( last_overload[ARGIND] == overload[ARGIND] )
315
316
	{
	    overload[ARGIND] = 0;
317
318
319
320
321
322
323
324
325
326
327
328
	}
	else
	{
	    printf("# more overload %f %f\n", last_overload[ARGIND], overload[ARGIND]);
	}
	if( total_vnode_pps > 0 )
	{
#           Try to guess how much interrupt load can be attributed to a vnode
#           based on the number of packets sent/received.
	    ipp = intr / total_vnode_pps;
	    printf("# ipp %f = %f / %f\n", ipp, intr, total_vnode_pps);
	    for( mac in line_macs )
329
	    {
330
331
332
333
334
335
336
337
338
339
		if( mac in mac2node )
		{
		    nipp = ipp * line_pps[mac];
		    vnodes_cpu[mac2node[mac],FNR] += nipp;
		    printf("# adding %s %s %f -> %f\n", 
			   mac2node[mac],
			   mac2link[mac],
			   nipp,
			   vnodes_cpu[mac2node[mac],FNR]);
		}
340
341
	    }
	}
342
343
	
	last_overload[ARGIND] = overload[ARGIND];
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
    }
}

#
# Process the mapping lines so we can find the mapping of IP addresses to
# symbolic link names.  Also, we generate the reservations for the node <-> LAN
# connections, reservations for links will be done at the END when we know who
# the maximums are for the NICs on the links.
#
# Example input:
#
#   00:00:0a:01:01:02 node0 link
#
# Example output:
#
#   set Reservations(link,node0,kbps) 123235.00
#
/^[[:xdigit:]][[:xdigit:]]\:[[:xdigit:]][[:xdigit:]]\:[[:xdigit:]][[:xdigit:]]\:[[:xdigit:]][[:xdigit:]]\:[[:xdigit:]][[:xdigit:]]\:[[:xdigit:]][[:xdigit:]] [[:alnum:]\-]* [[:alnum:]\-]*$/ {
362
363
    mac2node[$1] = $2;
    mac2link[$1] = $3;
Timothy Stack's avatar
   
Timothy Stack committed
364
365
366
367
368
369
    printf("# mac %s -> node %s\n", $1, $2);
    printf("# mac %s -> link %s\n", $1, $3);
}

/pipe/ {
    pipe2link[$2] = $3;
370
371
372
373
374
375
376
377
378
379
380
381
}

#
# All done, time to print out the peak values we've discovered while
# processing.
#
END {
    if( error )
    {
	exit error;
    }

382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
    if( (vnode_count > 1) &&
	(line_count[ARGIND - 1] > 0) &&
	(line_count[ARGIND - 1] < 3) )
    {
#       There are not enough lines to deduce anything other than overload.
	for( vnode_name in line_vnode_names )
	{
	    alerts[vnode_name] = 1;
	}
	for( mac in line_macs )
	{
	    alerts[mac] = 1;
	}
    }

    printf("# -*- TCL -*-\n");
    printf("# Automatically generated feedback file.\n");
    printf("#\n");
    for( lpc = 1; lpc < ARGC; lpc++ )
    {
	printf("# ARGV[%d]: %s\n", lpc, ARGV[lpc]);
    }
    printf("#\n");
    printf("# Generated at: %s\n", strftime());
    printf("#\n\n");
    printf("# BEGIN Node/LAN\n");

    for( mac in mac2node )
    {
	printf("set Reservations(%s,%s,kbps) %f # %s\n",
	       mac2link[mac],
	       mac2node[mac],
	       (links_mac_bw[mac] * 8) / 1000.0,
	       mac);
	printf("set Reservations(%s,%s,pps) %f\n",
	       mac2link[mac],
	       mac2node[mac],
	       links_mac_pkts[mac]);
	links_name[mac2link[mac]] = 1;
	if( links_mac_bw[mac] > links_bw[mac2link[mac]] )
	    links_bw[mac2link[mac]] = links_mac_bw[mac];
	if( links_mac_pkts[mac] > links_pkts[mac2link[mac]] )
	    links_pkts[mac2link[mac]] = links_mac_pkts[mac];
	if( mac in alerts )
	{
	    printf("set Alerts(%s,%s) 1\n", mac2link[mac], mac2node[mac]);
	    printf("set Alerts(%s) 1\n", mac2link[mac]);
Timothy Stack's avatar
   
Timothy Stack committed
429
430
431
432
433
434

	    printf("link,%s,%s\n",
		   mac2link[mac],
		   mac2node[mac]) > alert_file;

	    error = 3;
435
436
437
	}
    }

438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
    printf("# END Node/LAN\n\n");
    
    printf("# BEGIN Nodes\n");
    for( vnode_name in vnodes_name )
    {
	vnode_cpu = 0.001;
	vnode_ram = 0.001;
#       Note that we ignore the first and last values in the log, since the
#       first value is always 100% and I just felt like making it symmetrical
#       so the last was dropped as well.
	for( lpc = 2; lpc < vnodes_name[vnode_name] - 1; lpc++ )
	{
	    if( vnodes_cpu[vnode_name,lpc] > vnode_cpu )
	    {
		vnode_cpu = vnodes_cpu[vnode_name,lpc];
	    }
	    if( vnodes_ram[vnode_name,lpc] > vnode_ram )
	    {
		vnode_ram = vnodes_ram[vnode_name,lpc];
	    }
	}
	printf("set Reservations(%s,cpupercent) %f\n", vnode_name, vnode_cpu);
	printf("set Reservations(%s,rampercent) %f\n", vnode_name, vnode_ram);
#       Check for an alert.
	if( vnode_name in alerts )
	{
	    printf("set Alerts(%s) 1\n", vnode_name);
Timothy Stack's avatar
   
Timothy Stack committed
465
466
467

	    printf("node,%s\n", vnode_name) > alert_file;
	    error = 3;
468
469
470
471
472
473
474
475
476
477
478
479
480
481
	}
    }
    printf("# END Nodes\n\n");
    printf("# BEGIN Links\n");
    for( link_name in links_name )
    {
	printf("set Reservations(%s,kbps) %f\n",
	       link_name,
	       (links_bw[link_name] * 8) / 1000.0);
	printf("set Reservations(%s,pps) %f\n",
	       link_name,
	       links_pkts[link_name]);
    }
    printf("# END Links\n\n");
Timothy Stack's avatar
   
Timothy Stack committed
482
483
484

    printf("# %d\n", error);
    exit error;
485
}