bgmon.pl 23.1 KB
Newer Older
1
2
3
#!/usr/bin/perl

#TODO!! Result index numbers... window size 2x > buffer size???
4
# (1/30/06) No Ack's, just trust that sent data gets there.
5
6
7
8
# Q: how to determine if OPS receives it correctly?
#    (1/31) Just checking if "send notification" is successful
#
# (2/3/06): CLEANUP ALLOCATED MEMORY using .._free() functions
9
10
# (3/20/06): Look into usefullness of "nohang()"... did not get tested,
#            and may not be needed
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

=pod
HOW TO USE:
Send a notification to this node to perform a desired action by:
objtype   => BGMON
objname   => plabxxx   where plabxxx is the this node address
eventtype => <COMMAND> where COMMAND is any of:
  EDIT: Modify a specific link test to specific destination node.
        Notification must contain the following attributues:
          "linkdest" = destination node of the test. Example, "node10"
          "testtype" = type of test to run. Examples, "latency" or "bw"
          "testper"  = period of time between successive tests.
  INIT: Initialize the link tests with default destination nodes and frequency.
        **todo** ^^needs updating 
  SINGLE: Perform a single link test to a specifc destination node.
          Notification must contain the following attributues:
           "linkdest" = destination node of the test. Example, "node10"
           "testtype" = type of test to run. Examples, "latency" or "bw"
29
30
31
32
33
34
35
36
37
38
39
40
41
  STOPALL: stop all tests, running or pending.

OPERATION NUANCES:
- If a ping test is scheduled to have a period shorter than the normal
  latency to the destination, pings will be run sequentially one after
  the other. There will be no simultaneous tests of the same type to the same
  destination.
- If a node's latency is longer than 60 seconds, an error value is reported
  instead of a latency (milliseconds) value. See %ERRID
- If a testing process abnormally exits with a value not 0 (say, iperf dies),
  then the test is rescheduled to be run at a future time. This time is
  the normal period times the %TEST_FAIL_RETRY ratio. (TODO: add error
  reporting here.)
42
43
44
45
46
47
48
49
=cut


use lib '/usr/testbed/lib';
use event;
use Getopt::Std;
use strict;
use DB_File;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
50
use Socket;
51
52

sub usage {
Kirk Webb's avatar
   
Kirk Webb committed
53
	warn "Usage: $0 [-s server] [-p port] [-e pid/eid] -d <working_dir> [hostname]\n";
54
55
56
57
58
59
60
	return 1;
}


#*****************************************
my %MAX_SIMU_TESTS = (latency => "10",
		      bw      => "1");
61

62
# ratio of testing period to wait after a test process abnormally exits
63
my %TEST_FAIL_RETRY= (latency => 0.3,
64
		      bw      => 0.1);
65
66
#MARK_RELIABLE
# each result waiting to be acked has an id number and corresponding file
Leigh B. Stoller's avatar
Leigh B. Stoller committed
67
68
69
my $resultDBlimit = 1000;
my %reslist       = ();
my $magic         = "0xDeAdBeAf";
70

71
my %testevents = ();
72

73
74
75
76
77
78
79
80
#queue for each test type containing tests that need to
# be run, but couldn't, due to MAX_SIMU_TESTS.
# keys are test types, values are lists
# This separate Q, which has higher priority than the normal %testevents
# should solve "starvation" issues if an "early" test blocks for too long
# such as during a network timeout with iperf
my %waitq = ( latency => [],
	      bw      => [] );
81
82
83
84
85
86

my %ERRID;
$ERRID{timeout} = -1;
$ERRID{ttlexceed} = -2;
$ERRID{unknown} = -3;

87
88
89
90
#*****************************************

my %opt = ();
#getopt(\%opt,"s:p:h");
Kirk Webb's avatar
   
Kirk Webb committed
91
getopts("s:p:e:d:i:h",\%opt);
92
93
94
95

#if ($opt{h}) { exit &usage; }
#if (@ARGV > 1) { exit &usage; }

Kirk Webb's avatar
   
Kirk Webb committed
96
my ($server,$port,$evexpt,$workingdir,$iperfport);
Kirk Webb's avatar
   
Kirk Webb committed
97
98
99
if ($opt{s}) { $server = $opt{s}; } else { $server = "localhost"; }
if ($opt{p}) { $port = $opt{p}; }
if ($opt{e}) { $evexpt = $opt{e}; } else { $evexpt = "__none"; }
Kirk Webb's avatar
   
Kirk Webb committed
100
101
if ($opt{d}) { $workingdir = $opt{d}; `cd $workingdir`; }
if ($opt{i}) { $iperfport = $opt{i}; } else { $iperfport = 5002; }
Kirk Webb's avatar
   
Kirk Webb committed
102

103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
my $thismonaddr;
if( defined  $ARGV[0] ){
    $thismonaddr = $ARGV[0];
}else{
    $_ = `cat /var/emulab/boot/nodeid`;
    /plabvm(\d+)-/;
    $thismonaddr = "plab$1";
}
print "thismonaddr = $thismonaddr\n";

print "server=$server\n";

my $URL = "elvin://$server";
if ($port) { $URL .= ":$port"; }
my $handle = event_register($URL,0);
if (!$handle) { die "Unable to register with event system\n"; }
print "registered with $server\n";
my $tuple = address_tuple_alloc();
if (!$tuple) { die "Could not allocate an address tuple\n"; }

%$tuple = ( host      => $event::ADDRESSTUPLE_ALL,
	    objtype   => "BGMON",
Kirk Webb's avatar
   
Kirk Webb committed
125
126
	    objname   => $thismonaddr,
            expt      => $evexpt);
127
128
129
130
131

if (!event_subscribe($handle,\&callbackFunc,$tuple)) {
	die "Could not subscribe to event\n";
}

Leigh B. Stoller's avatar
Leigh B. Stoller committed
132
133
134
135
136
137
138
139
140
141
142
143
144
# This is for our ack from ops.
$tuple = address_tuple_alloc();
if (!$tuple) { die "Could not allocate an address tuple\n"; }

%$tuple = ( objname   => "ops",
	    eventtype => "ACK",
	    expt      => "__none",
	    objtype   => "BGMON");

if (!event_subscribe($handle,\&callbackFunc,$tuple)) {
	die "Could not subscribe to ack event\n";
}

145
146
147
148
149
150
151
152
153
154
#this call will reconnect event system if it has failed
sendBlankNotification();

#############################################################################
# Note a difference from tbrecv.c - we don't yet have event_main() functional
# in perl, so we have to poll. (Nothing special about the select, it's just
# a wacky way to get usleep() )
#############################################################################
#main()

Leigh B. Stoller's avatar
Leigh B. Stoller committed
155
156
157
158
159
160
161
162
163
164
#
# At startup, look for any old results that did not get acked. Add them to
# the reslist so they get resent below.
#
for (my $i = 0; $i < $resultDBlimit; $i++) {
    if (-e createDBfilename($i)) {
	$reslist{$i} = createDBfilename($i);
    }
}

165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
while (1) {

    #check for pending received events
    event_poll_blocking($handle,100);

    #TODO: this loop shouldn't be run every poll loop.
    #try to run tests on queue
    foreach my $testtype (keys %waitq){
	my $arrlen = scalar(@{$waitq{$testtype}});
	#DONOT use "foreach" here, since the call to spawnTest may add to waitq
	for( my $i = 0; $i < $arrlen; $i++ ){
	    my $destnode = pop @{$waitq{$testtype}};
#	    print "WAIT Q: REMOVED $destnode \n";
	    spawnTest( $destnode, $testtype );
	}
    }

    #iterate through all event structures
    for my $destaddr ( keys %testevents ) {
	for my $testtype ( keys %{ $testevents{$destaddr} } ){
	    #mark flags of finished tests
	    #check if process is running
	    my $pid = $testevents{$destaddr}{$testtype}{"pid"};
	    if( $pid != 0 ){
		use POSIX ":sys_wait_h";
		my $cnt = waitpid( $pid, &WNOHANG );
		if( $cnt != 0 )
		{
		    if( $? == 0 ){
			#process finished, so mark it's "finished" flag
			$testevents{$destaddr}{$testtype}{"flag_finished"} = 1;
			$testevents{$destaddr}{$testtype}{"pid"} = 0;
		    }else{
			#process exited abnormally
			#reset pid
			$testevents{$destaddr}{$testtype}{"pid"} = 0;
			#schedule next test at a % of a normal period from now
			my $nextrun = time_all() + 
			    $testevents{$destaddr}{$testtype}{"testper"} *
				$TEST_FAIL_RETRY{$testtype};
			$testevents{$destaddr}{$testtype}{"timeOfNextRun"} = 
			    $nextrun;
			#delete tmp filename
			my $filename = createtmpfilename($destaddr, $testtype);
209
			unlink($filename) or warn "can't delete temp file";
210
211
212
213
		    }
		}		
	    }

214
	    #check for finished events
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
	    if( $testevents{$destaddr}{$testtype}{"flag_finished"} == 1 ){
		#read raw results from temp file
		my $filename = createtmpfilename($destaddr, $testtype);
		open FILE, "< $filename" 
		    or die "can't open file $filename";
	        my @raw_lines = <FILE>;
		my $raw;
		foreach my $line (@raw_lines){
		    $raw = $raw.$line;
		}
		close FILE;
		unlink($filename) or die "can't delete temp file";
		#parse raw data
		my $parsedData = parsedata($testtype,$raw);
		$testevents{$destaddr}{$testtype}{"results_parsed"} =
		    $parsedData;

		my %results = 
		    ("sourceaddr" => $thismonaddr,
		     "destaddr" => $destaddr,
		     "testtype" => $testtype,
		     "result" => $parsedData,
Leigh B. Stoller's avatar
Leigh B. Stoller committed
237
238
239
		     "tstamp" => $testevents{$destaddr}{$testtype}{"tstamp"},
		     "magic"  => "$magic",
		     );
240
241
		#MARK_RELIABLE
		#save result to local DB
Leigh B. Stoller's avatar
Leigh B. Stoller committed
242
		my $index = saveTestToLocalDB(\%results);
243
		#send result to remote DB
Leigh B. Stoller's avatar
Leigh B. Stoller committed
244
		sendResults(\%results, $index);
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294

		#reset flags
		$testevents{$destaddr}{$testtype}{"flag_finished"} = 0;
		$testevents{$destaddr}{$testtype}{"flag_scheduled"} = 0;
	    }

	    #schedule new tests
	    if( $testevents{$destaddr}{$testtype}{"flag_scheduled"} == 0 &&
		$testevents{$destaddr}{$testtype}{"testper"} > 0 )
	    {
		#schedule next test
#		$testevents{$destaddr}{$testtype}{"timeOfNextRun"} =
#		    $testevents{$destaddr}{$testtype}{"testper"} + time_all();

		if( time_all() < 
		    $testevents{$destaddr}{$testtype}{"timeOfNextRun"}
		    + $testevents{$destaddr}{$testtype}{"testper"} )
		{
		    #if time of next run is in the future, set it to that
		    $testevents{$destaddr}{$testtype}{"timeOfNextRun"} 
		      += $testevents{$destaddr}{$testtype}{"testper"};
		}else{
		    #if time of next run is in the past, set to current time
		    $testevents{$destaddr}{$testtype}{"timeOfNextRun"} 
		      = time_all();
		}

		$testevents{$destaddr}{$testtype}{"flag_scheduled"} = 1;
	    }

#	    print "nextrun="
#		.$testevents{$destaddr}{$testtype}{"timeOfNextRun"}."\n";
#	    print "scheduled?=".$testevents{$destaddr}{$testtype}{"flag_scheduled"}.
		"\n";
#	    print "pid=".$testevents{$destaddr}{$testtype}{"pid"}."\n";

	    #check for new tests ready to run
	    if( scalar(@{$waitq{bw}}) == 0 &&
		$testevents{$destaddr}{$testtype}{"timeOfNextRun"} 
		                                           <= time_all() &&
		$testevents{$destaddr}{$testtype}{"flag_scheduled"} == 1 &&
		$testevents{$destaddr}{$testtype}{"pid"} == 0 )
	    {
		#run test
		spawnTest( $destaddr, $testtype );
	    }
	}

	# may not be needed, but may help detect errors
	my $hangres = detectHang($destaddr);
295
	if( $hangres eq "bw" ){
296
	    print "HANG: $hangres,  $destaddr\n";
297
298
	    # reset time of next run
	    $testevents{$destaddr}{bw}{"timeOfNextRun"} = time_all();
299
300
301
	}
    }

Leigh B. Stoller's avatar
Leigh B. Stoller committed
302
303
304
    #
    # Check for results that could not be sent due to error. We want to wait
    # a little while though to avoid resending data that has yet to be
305
    # acked because the network is slow or down.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
306
307
308
309
310
311
312
313
314
315
316
317
318
319
    #
    my $count    = 0;
    my $maxcount = 5;	# Wake up and send only this number at once.
    
    for (my $index = 0; $index < $resultDBlimit; $index++) {
	next
	    if (!exists($reslist{$index}));
	
	my $filename = $reslist{$index};

	if (! -e $filename) {
	    # Hmm, something went wrong!
	    delete($reslist{$index});
	    next;
320
	}
321

Leigh B. Stoller's avatar
Leigh B. Stoller committed
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
	# Stat file to get create time.
	my (undef,undef,undef,undef,undef,undef,undef,undef,
	    undef,undef,$ctime) = stat($filename);

	next
	    if ((time() - $ctime) < 10);

	#resend
	my %results;
	my %db;
	tie(%db, "DB_File", $filename) 
	    or die "cannot open db file";
	for my $key (keys %db ){
	    $results{$key} = $db{$key};
	}
	untie(%db);

	# Verify results in case the file was scrogged.
	if (!exists($results{"magic"}) || $results{"magic"} ne $magic) {
	    # Hmm, something went wrong!
	    print "Old results for index $index are scrogged; deleting!\n";
	    delete($reslist{$index});
	    unlink($filename);
	    next;
	}
	sendResults(\%results, $index);
	sleep(1);
	$count++;
	if ($count > $maxcount) {
	    print "Delaying a bit before sending more old results!\n";
	    sleep(2);
	    last;
	}
    }
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
}

#############################################################################

if (event_unregister($handle) == 0) {
    die "Unable to unregister with event system\n";
}

exit(0);


sub callbackFunc($$$) {
	my ($handle,$notification,$data) = @_;

	my $time      = time_all();
	my $site      = event_notification_get_site($handle, $notification);
	my $expt      = event_notification_get_expt($handle, $notification);
	my $group     = event_notification_get_group($handle, $notification);
	my $host      = event_notification_get_host($handle, $notification);
	my $objtype   = event_notification_get_objtype($handle, $notification);
	my $objname   = event_notification_get_objname($handle, $notification);
	my $eventtype = event_notification_get_eventtype($handle,
							 $notification);
#	print "Event: $time $site $expt $group $host $objtype $objname " .
#		"$eventtype\n";

#	print "EVENT: $time $objtype $eventtype\n";

Leigh B. Stoller's avatar
Leigh B. Stoller committed
384
385
386
387
388
389
390
391
392
393
394
395
396
	# Ack from ops.
	if ($eventtype eq "ACK") {
	    my $index = event_notification_get_string($handle,
						      $notification,
						      "index");
	    
	    print "Ack for index $index. Deleting backup file\n";
	    if (exists($reslist{$index})) {
		unlink($reslist{$index});
		delete($reslist{$index});
	    }
	    return;
	}
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435

	#change values and/or initialize
	if( $eventtype eq "EDIT" ){
	    print "EDIT\n";
	    my $linkdest = event_notification_get_string($handle,
							 $notification,
							 "linkdest");
	    my $testtype = event_notification_get_string($handle,
							 $notification,
							 "testtype");
	    my $testper = event_notification_get_string($handle,
							$notification,
							"testper");
	    $testevents{$linkdest}{$testtype}{"testper"} = $testper;
	    $testevents{$linkdest}{$testtype}{"flag_scheduled"} = 0;
	    $testevents{$linkdest}{$testtype}{"timeOfNextRun"} = time_all();

	    print( "linkdest=$linkdest\n".
		   "testype =$testtype\n".
		   "testper=$testper\n" );
	}
	elsif( $eventtype eq "INIT" ){
	    print "INIT\n";
	    my $testtype = event_notification_get_string($handle,
							 $notification,
							 "testtype");
	    my @destnodes 
		= split(" ", event_notification_get_string($handle,
							   $notification,
							   "destnodes"));
            my $testper = event_notification_get_string($handle,
							$notification,
							"testper");
#	    print "$testtype*$@destnodes*$testper\n";

	    #TOOD: Add a start time offset, so as to schedule the initial test
            #      from the manager/controller
	    
            foreach my $linkdest (@destnodes){
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
		#be smart about adding tests
		# don't want to change already running tests
		# only change those tests which have been updated
		if( defined($testevents{$linkdest}{$testtype}{"testper"}) &&
		    $testper == $testevents{$linkdest}{$testtype}{"testper"} )
		{
		    # do nothing... keep test as it is
		}else{
		    # update test
		    $testevents{$linkdest}{$testtype}{"testper"} =$testper;
		    $testevents{$linkdest}{$testtype}{"flag_scheduled"} =0;
		    # TODO? be smart about when the first test should run?
		    $testevents{$linkdest}{$testtype}{"timeOfNextRun"} =
			time_all();
		}
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
	    }
	}
	elsif( $eventtype eq "SINGLE" ){
	    print "SINGLE\n";
	    #schedule a single test run NOW (this time, minus 1 second)
	    my $linkdest = event_notification_get_string($handle,
							 $notification,
							 "linkdest");
	    my $testtype = event_notification_get_string($handle,
							 $notification,
							 "testtype");
	    my $testper = event_notification_get_string($handle,
							$notification,
							"testper");
	    $testevents{$linkdest}{$testtype}{"testper"} = 0;
	    $testevents{$linkdest}{$testtype}{"timeOfNextRun"} = time_all()-1;
	    $testevents{$linkdest}{$testtype}{"flag_scheduled"} = 1;
	    $testevents{$linkdest}{$testtype}{"pid"} = 0;

	    print( "linkdest=$linkdest\n".
		   "testype =$testtype\n".
		   "testper=$testper\n" );
	    
	}
	elsif( $eventtype eq "STOPALL" ){
	    print "STOPALL\n";
	    %testevents = ();
	}

#	if (event_unregister($handle) == 0) {
#	    die "Unable to unregister with event system\n";
#	}
#	exit(0);

	#TODO: Why does this segfault?
#	if( event_notification_free( $handle, $notification ) == 0 ){
#	    die "Unable to free notification";
#	}
}

#############################################################################

sub spawnTest($$)
{
    my ($linkdest, $testtype) = @_;
    
    use Errno qw(EAGAIN);


    #exit and don't fork if the max number of tests is already being run
    if( getRunningTestsCnt($testtype) >= $MAX_SIMU_TESTS{$testtype} ){
#	print "Testcnt = ".getRunningTestsCnt($testtype);
#	print "Too many running tests of type $testtype\n";
	
	#add this to queue if it doesn't exist already
	#this seach is inefficient... use a hash? sparse array? sorted list?
	my $flag_duplicate = 0;	
	foreach my $element ( @{$waitq{$testtype}} ){
	    if( $element eq $linkdest ){
		$flag_duplicate = 1;
	    }
	}
	if( $flag_duplicate == 0 ){
	    unshift @{$waitq{$testtype}}, $linkdest;
#	    print "WAIT Q: ADDED $linkdest \n";
	}

	return 0;
    }

  FORK:{
      if( my $pid = fork ){
	  #parent
	  #save child pid in test event
	  $testevents{$linkdest}{$testtype}{"pid"} = $pid;
	  $testevents{$linkdest}{$testtype}{"tstamp"} = time_all();

      }elsif( defined $pid ){
	  #child
	  #exec 'ls','-l' or die "can't exec: $!\n";
	  
	  my $filename = createtmpfilename($linkdest,$testtype);

	  #############################
	  ###ADD MORE TEST TYPES HERE###
	  #############################
	  if( $testtype eq "latency" ){
	      #command line for "LATENCY TEST"
#	      print "##########latTest\n";
540
541
	      # one ping, with timeout of 60 sec
	      exec "ping -c 1 -t 60 $linkdest >$filename"
542
543
544
545
546
		  or die "can't exec: $!\n";
	  }elsif( $testtype eq "bw" ){
	      #command line for "BANDWIDTH TEST"
#	      print "###########bwtest\n";
	      exec "$workingdir".
Kirk Webb's avatar
   
Kirk Webb committed
547
		  "iperf -c $linkdest -t 10 -p $iperfport >$filename"
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
		      or die "can't exec: $!";
	  }else{
	      warn "bad testtype: $testtype";
	  }

      }elsif( $! == EAGAIN ){
	  #recoverable fork error, redo;
	  sleep 1;
	  redo FORK;
      }else{ die "can't fork: $!\n";}
  }
}

sub getRunningTestsCnt($)
{
    my ($type) = @_;
    my $testcount = 0;

    #count currently running tests
    for my $destaddr ( keys %testevents ) {
	if( $testevents{$destaddr}{$type}{"pid"} > 0 ){
	    #we have a running process, so inc it's counter
	    $testcount++;
	}
    }
#    print "testcount = $testcount\n";
    return $testcount;
}
#############################################################################
sub parsedata($$)
{
    my $type = $_[0];
    my $raw = $_[1];
    my $parsed;
    $_ = $raw;

#    print "Raw=$_";

    #############################
    ###ADD MORE TEST TYPES HERE###
    #############################
    #latency test
    if( $type eq "latency" ){
591
592
593
594
595
596
597
598
599
600
	if( /time=(.*) ms/ ){
	    $parsed = "$1";
	}elsif( /0 packets received/ ){
	    $parsed = $ERRID{timeout};
	}elsif( /Time to live exceeded/ ){
	    $parsed = $ERRID{ttlexceed};
	}else{
	    $parsed = $ERRID{unknown};
	}
	
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
    }elsif( $type eq "bw" ){
	/\s+(\S*)\s+([MK])bits\/sec/;
	$parsed = $1;
	if( $2 eq "M" ){
	    $parsed *= 1000;
	}
#	print "parsed=$parsed\n";
    }
	   
    return $parsed;
}


#############################################################################
sub printTimeEvents {
    for my $destaddr ( keys %testevents ) {
	for my $testtype ( keys %{ $testevents{$destaddr} } ){
	    print 
		"finished? ".
		$testevents{$destaddr}{$testtype}{"flag_finished"}."\n".
		"scheduled?= ".
		$testevents{$destaddr}{$testtype}{"flag_scheduled"}."\n".
		"testper?= ".
                $testevents{$destaddr}{$testtype}{"testper"}."\n".
		"timeOfNextRun= ".
	        $testevents{$destaddr}{$testtype}{"timeOfNextRun"}."\n" .
		"results_parsed= ".
      	        $testevents{$destaddr}{$testtype}{"results_parsed"}."\n";
	}
    }
}



#############################################################################
636
#MARK_RELIABLE
637
638
sub saveTestToLocalDB($)
{
Leigh B. Stoller's avatar
Leigh B. Stoller committed
639
640
641
642
643
644
645
646
647
648
649
650
    #
    # Find an unused index. Leave zero unused to indicate we ran out.
    #
    my $index;
    
    for ($index = 1; $index < $resultDBlimit; $index++) {
	last
	    if (!exists($reslist{$index}));
    }
    return 0
	if ($index == $resultDBlimit);

651
652
653
    #save result to DB's in files.
    my $results = $_[0];
    my %db;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
654
    my $filename = createDBfilename($index);
655
656
657
658
659
    tie( %db, "DB_File", $filename ) or die "cannot create db file";
    for my $key (keys %$results ){
	$db{$key} = $$results{$key};
    }
    untie %db;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
660
661
662

    $reslist{$index} = createDBfilename($index);
    return $index;
663
664
665
666
667
668
669
670
671
672
673
674
675
}

#############################################################################
sub sendResults($$){
    my $results = $_[0];
    my $index = $_[1];

    my $tuple_res = address_tuple_alloc();
    if (!$tuple_res) { warn "Could not allocate an address tuple\n"; }
    
    %$tuple_res = ( objtype   => "BGMON",
		    objname   => "ops",
		    eventtype => "RESULT"
Kirk Webb's avatar
   
Kirk Webb committed
676
		    , expt      => $evexpt
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
		    , scheduler => 1
		    );

    my $notification_res = event_notification_alloc($handle,$tuple_res);
    if (!$notification_res) { warn "Could not allocate notification\n"; }


    if( 0 == event_notification_put_string( $handle,
					    $notification_res,
					    "linksrc",
					    $results->{sourceaddr} ) )
    { warn "Could not add attribute to notification\n"; }


    if( 0 == event_notification_put_string( $handle,
					    $notification_res,
					    "linkdest",
					    $results->{destaddr} ) )
    { warn "Could not add attribute to notification\n"; }
    
    if( 0 == event_notification_put_string( $handle,
					    $notification_res,
					    "testtype",
					    $results->{testtype} ) )
    { warn "Could not add attribute to notification\n"; }

    if( 0 == event_notification_put_string( $handle,
					    $notification_res,
					    "result",
					    $results->{result} ) )
    { warn "Could not add attribute to notification\n"; }

    if( 0 == event_notification_put_string( $handle,
					    $notification_res,
					    "tstamp",
					    $results->{tstamp} ) )
    { warn "Could not add attribute to notification\n"; }

Leigh B. Stoller's avatar
Leigh B. Stoller committed
715
716
717
718
719
    if( 0 == event_notification_put_string( $handle,
					    $notification_res,
					    "index",
					    "$index" ) )
    { warn "Could not add attribute to notification\n"; }
720

Leigh B. Stoller's avatar
Leigh B. Stoller committed
721
    print "Sending results to ops. Index: $index\n";
722

Leigh B. Stoller's avatar
Leigh B. Stoller committed
723
724
    if (!event_notify($handle, $notification_res)) {
	warn("could not send test event notification");
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
    }

    if( event_notification_free( $handle, $notification_res ) == 0 ){
	die "unable to free notification_res";
    }
}

#############################################################################
sub time_all()
{
    package main;
    require 'sys/syscall.ph';
    my $tv = pack("LL",());
    syscall( &SYS_gettimeofday, $tv, undef ) >=0
	or warn "gettimeofday: $!";
    my ($sec, $usec) = unpack ("LL",$tv);
    return $sec + ($usec / 1_000_000);
#    return time();
}

#############################################################################

sub createtmpfilename($$)
{
    return "$workingdir$_[0]-$_[1].tmp";
}

sub createDBfilename($)
{
    return "$workingdir$_[0].bgmonbdb";
}


#############################################################################

#send a dummy event to reconnect the event system if the node has lost it
sub sendBlankNotification
{
    my $tuple_res = address_tuple_alloc();
    if (!$tuple_res) { warn "Could not allocate an address tuple\n"; }
    
    %$tuple_res = ( objtype   => "NOTHING",
		    objname   => "no_name",
		    eventtype => "no_event"
Kirk Webb's avatar
   
Kirk Webb committed
769
		    , expt      => $evexpt
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
		    , scheduler => 1
		    );

    my $notification_res = event_notification_alloc($handle,$tuple_res);

    #send notification
    if (!event_notify($handle, $notification_res)) {
	warn("could not send test event notification");
    }

    if( event_notification_free( $handle, $notification_res ) == 0 ){
	die "unable to free notification_res";
    }
}

#############################################################################

sub detectHang($)
{
    my ($nodeid) = @_;
    my $TIMEOUT_NUM_PER = 5;

    if( 
	$testevents{$nodeid}{bw}{flag_scheduled} == 1 &&
	time_all() > $testevents{$nodeid}{bw}{timeOfNextRun} +
	$testevents{$nodeid}{bw}{testper} * $TIMEOUT_NUM_PER )
    {
	return "bw";
    }
    
    return "nohang";
}