Commit 9f2143ce authored by Mike Hibler's avatar Mike Hibler

Archive my log parsing stuff so I don't lose it again.

parent 1a0b096d
This diff is collapsed.
#!/usr/bin/perl -w
#
# Extract testbed mail of interest. Takes one or more mboxs of interest
# and creates a new file with only the messages of interest. Duplicates
# are ignored. Note that the resulting file is NOT necessarily in date order.
#
# Works for 2000 and 2001 mail.
#
# cat testbed*.mail | extracttbmail.pl > tb.mail
#
#
# All messages must match this
# Note the ?: to avoid clustering since we use this string in a clustering
# context below.
#
my $TB_RE = '^(?:TESTBED|EMULAB\.NET): ';
#
# Things we explicitly don't care about.
# All will start with TB_RE prefix which has been stripped.
#
my @ignore_pat = (
'^New Project',
'^New Group',
'^New Node',
'^Delete User',
'^Project',
'^Group',
'^User',
'^Account',
'^Node',
'^Virtual Node',
'^Password',
'^Genlastlog',
'^Exports Setup',
'^Frisbee',
'^Image Creation',
'^Membership',
'^Mysqld Watchdog',
'^Named Setup',
'^Reload Daemon',
'^Stated ',
'^SFS ',
'^SSH ',
'^W[Ee][Bb] ',
'^Widearea',
'^Your CD ',
'^changeuid ',
'^genelists',
'^idleswap ',
'^jabbersetup',
'^mkproj ',
'^mkgroup ',
'^modgroups',
'^mkacct ',
'^mkusercert ',
'^os_select ',
'^tbacct ',
'^rmproj ',
'^rmgroup ',
'^rmuser ',
'^snmpit',
'^setgroups ',
'^addpubkey ',
'^DB',
'^TBExpt',
'^Batch Mode Experiment Removed',
'^Swap or Terminate Request',
'^Survey ',
'^Failed',
'^plab',
'^Lease',
'^Sliver',
'^Testsuite',
'^Linktest',
'^Testbed Audit',
'^WARNING: Experiment Configuration',
'^Experiment Configure Failure',
'^Experiment Swapin Failure',
'^Files accessed by',
'^Failure importing',
'^Image Creation Failure',
'^Event System Failure',
'^Termination Failure',
'^Idle-Swap Warning',
'^Auto-Swap Warning',
'^Max Duration Warning',
'^Parser Exceeded CPU Limit',
'^Login ID requested by ',
'^No power outlet ',
'^Power cycle nodes:',
'^Robot Lab',
'^Grab WebCam',
'^OPS has rebooted',
'^DHCPD died',
'^USRP ',
'^ElabInElab',
'^Panic Button',
'^Mailman list',
'^WARNING: power controller',
'^Duplicates in plab',
'^Plabdaemon',
'^WARNING: PLAB',
'plab nodes revived\.',
'[Nn]ew plab node',
'Swap or Terminate Experiment',
'^Firewalled experiment Swapout failed',
'^Batch Mode.* Failure',
'^Swap ?(?:in|out|restart|modify) Failure',
'.* swap settings changed',
'.* User Key',
'.* Project Join Request',
'.* Email Lists',
'.* down\?',
'.*acct-ctrl Failed',
'.* WA node ',
'^\d+ (?:virtual )?nodes? (?:is|are) down',
'^\d+ PCs? idle \d+ hour',
'\(fwd\)$',
);
my $msgs = 0;
my $goodmsgs = 0;
my $dupmsgs = 0;
my @curmsg = ();
my %seen;
my $whine = 0;
sub processmsg(@);
# compile REs for speed
my @ignore_re = map { qr/$_/ } @ignore_pat;
while ($line = <STDIN>) {
#
# Found the start of a message.
# Process the message that just ended before starting afresh
#
if ($line =~ /^From /) {
processmsg(@curmsg)
if (@curmsg);
@curmsg = ();
}
push(@curmsg, $line);
}
processmsg(@curmsg)
if (@curmsg);
print STDERR "Saved $goodmsgs of $msgs messages";
print STDERR ", ($dupmsgs duplicates)"
if ($dupmsgs);
print STDERR "\n";
exit(0);
sub processmsg(@) {
my (@lines) = @_;
my $msgid = "";
my $wantthis = 0;
my $seensubject = 0;
for my $line (@lines) {
if ($line =~ /^Message-[Ii][Dd]: (<.*>)$/) {
$msgid = $1;
last if ($seensubject);
next;
}
if ($line =~ /^Subject: (.*)/) {
if (isgoodsubject($1)) {
#
# To filter out old messages from MINI, Leigh's home test
# and other forwarded messages, we restrict the message
# ID to cs.utah.edu or emulab.net
#
if ($msgid) {
if ($msgid !~ /\.emulab\.net/ &&
$msgid !~ /\.cs\.utah\.edu/) {
last;
}
if ($msgid =~ /mini\.emulab\.net/) {
last;
}
}
$wantthis = 1;
if ($seensubject) {
print STDERR "*** $msgid: multiple subject lines\n".
" Original subject: $seensubject".
" Check forwarded message filter.\n";
}
}
$seensubject = $line;
last if ($msgid ne "");
next;
}
}
if ($msgid ne "") {
# filter out duplicates
if ($seen{$msgid}) {
print STDERR "Duplicate message $msgid ignored\n"
if ($whine);
$dupmsgs++;
$msgs++;
return;
}
$seen{$msgid} = 1;
}
if ($wantthis) {
# Output lines
for my $line (@lines) {
print $line;
}
$goodmsgs++;
}
$msgs++;
}
sub isgoodsubject($)
{
my ($str) = @_;
my $pat;
# must start with the magic string
if ($str !~ /$TB_RE(.*)/o) {
return 0;
}
$str = $1;
for $pat (@ignore_re) {
if ($str =~ /$pat/) {
return 0;
}
}
return 1;
}
This diff is collapsed.
#!/bin/sh
timeit=/usr/bin/time
if [ ! -x ../extractmail.pl -o ! -x ../parsemail.pl -o ! -x ../checkrecords.pl -o ! -r tbmail.pm ]; then
echo 'No scipts! Wrong working directory?'
exit 1
fi
mv filtered ofiltered
mv extract.log oextract.log
cat *.mail | $timeit ../extractmail.pl >filtered 2>extract.log || {
echo 'extracttbmail failed!'
mv oextract.log extract.log
mv ofiltered filtered
exit 1
}
mv records orecords
mv parse.log oparse.log
$timeit ../parsemail.pl <filtered >records 2>parse.log || {
echo 'parsemail raw record generation failed!'
mv oparse.log parse.log
mv orecords records
mv oextract.log extract.log
mv ofiltered filtered
exit 1
}
mv fixed ofixed
mv fixup.log ofixup.log
$timeit ../checkrecords.pl -f <records >fixed 2>fixup.log || {
echo 'parsemail fixup failed!'
mv ofixup.log fixup.log
mv ofixed fixed
mv oparse.log parse.log
mv orecords records
mv oextract.log extract.log
mv ofiltered filtered
exit 1
}
diff oextract.log extract.log | grep Saved >extract.diffs && {
echo '*** WARNING: diffs in message extraction, see extract.diffs'
}
diff oparse.log parse.log | grep Processed >parse.diffs && {
echo '*** WARNING: diffs in message parsing, see parse.diffs'
}
diff ofixed fixed >fixed.diffs || {
echo '*** WARNING: diffs in fixed records, see fixed.diffs'
}
exit 0;
#!/usr/bin/perl -w
#
# Check a list of Emulab records, optionally adding fixup records
# to take care of inconsistencies.
#
use Getopt::Std;
use tbmail;
sub usage()
{
exit(1);
}
my $optlist = "hl:";
my $tolist = 10;
my $expts = 0;
my $swapins = 0;
my $first;
my $last;
my %peruser = ();
my %perproj = ();
my %pernode = ();
my %byeid = ();
my %iscreate = (
PRELOAD() => 1,
CREATE1() => 1,
CREATE2() => 1,
BATCHCREATE() => 1,
);
my %isswapin = (
CREATE1() => 1,
CREATE2() => 1,
SWAPIN() => 1,
BATCHCREATE() => 1,
BATCHSWAPIN() => 1,
);
sub dorecord($);
#
# Parse command arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
usage();
}
if (defined($options{"h"})) {
usage();
}
if (defined($options{"l"})) {
$tolist = $options{"l"};
}
my $lineno = 1;
while (my $line = <STDIN>) {
my $rec = parserecord($line);
if ($rec) {
dorecord($rec);
} else {
print STDERR "*** Bad record on line $lineno:\n";
print STDERR " '$line'\n";
}
$lineno++;
}
@projlist = sort { $perproj{$b} <=> $perproj{$a} } keys(%perproj);
@userlist = sort { $peruser{$b} <=> $peruser{$a} } keys(%peruser);
@nodelist = sort { $pernode{$b} <=> $pernode{$a} } keys(%pernode);
@eidlist = sort { $byeid{$b} <=> $byeid{$a} } keys(%byeid);
#
# And the stats
#
print "$expts experiments, $swapins swapins in ", $lineno - 1, " records\n";
print " First: ", scalar(localtime($first)), "\n";
print " Last: ", scalar(localtime($last)), "\n";
print "Top $tolist projects (swapins):\n";
my $did = 0;
for $proj (@projlist) {
printf "%20s: %d\n", $proj, $perproj{$proj};
last if (++$did == $tolist);
}
print "Top $tolist users (swapins):\n";
$did = 0;
for $user (@userlist) {
printf "%20s: %d\n", $user, $peruser{$user};
last if (++$did == $tolist);
}
# goofy shit
print "Top $tolist nodes (swapins):\n";
$did = 0;
for $node (@nodelist) {
printf "%20s: %d\n", $node, $pernode{$node};
last if (++$did == $tolist);
}
print "Top $tolist experiment names:\n";
$did = 0;
for $eid (@eidlist) {
printf "%20s: %d\n", $eid, $byeid{$eid};
last if (++$did == $tolist);
}
sub dorecord($) {
my $rec = shift;
my ($stamp, $pid, $eid, $uid, $action, $msgid, @nodes) = @{$rec};
if ($iscreate{$action}) {
$expts++;
$first = $stamp if (!$first);
$last = $stamp;
$byeid{$eid}++;
}
if ($isswapin{$action}) {
$swapins++;
$peruser{$uid}++;
$perproj{$pid}++;
map { $pernode{$_}++ } @nodes;
}
}
#!/usr/bin/perl -wT
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
# All rights reserved.
#
package tbmail;
use Exporter;
@ISA = "Exporter";
@EXPORT = qw(makerecord parserecord printrecord sortrecords
REC_STAMP REC_PID REC_EID REC_UID REC_ACTION REC_MSGID REC_NODES
BAD IGNORE PRELOAD MODIFY MODIFYADD MODIFYSUB CREATE1 CREATE2
ISCREATE SWAPIN SWAPOUT SWAPOUTMAYBE TERMINATE BATCH BATCHIGNORE
BATCHCREATE BATCHCREATEMAYBE BATCHTERM BATCHTERMMAYBE BATCHSWAPIN
BATCHSWAPOUT BATCHPRELOAD ACTIONSTR
);
# Must come after package declaration!
use English;
# Load up the paths. Done like this in case init code is needed.
BEGIN
{
}
#
# Experiment records
# Record format is:
# timestamp pid eid uid action msgid nodelist
#
sub REC_STAMP { return 0 };
sub REC_PID { return 1 };
sub REC_EID { return 2 };
sub REC_UID { return 3 };
sub REC_ACTION { return 4 };
sub REC_MSGID { return 5 };
sub REC_NODES { return 6 };
#
# Actions
#
sub BAD() { return 0 };
sub IGNORE() { return 1 };
sub PRELOAD() { return 2 };
sub MODIFY() { return 3 };
sub MODIFYADD() { return 4 };
sub MODIFYSUB() { return 5 };
sub CREATE1() { return 11 }; # 2000-ish creation message
sub CREATE2() { return 12 }; # 2001-ish creation message
sub ISCREATE($)
{ my $id = shift; return ($id && $id >= CREATE1 && $id <= CREATE2); }
sub SWAPIN() { return 21 };
sub SWAPOUT() { return 31 };
sub SWAPOUTMAYBE() { return 32 };
sub TERMINATE() { return 41 };
sub BATCH() { return 50 };
sub BATCHIGNORE() { return 51 };
sub BATCHCREATE() { return 52 };
sub BATCHCREATEMAYBE() { return 53 };
sub BATCHTERM() { return 54 };
sub BATCHTERMMAYBE() { return 55 };
sub BATCHSWAPIN() { return 56 };
sub BATCHSWAPOUT() { return 57 };
sub BATCHPRELOAD() { return 58 };
my %actiontostr = (
BAD() => 'BAD',
IGNORE() => 'IGNORE',
PRELOAD() => 'PRELOAD',
MODIFY() => 'MODIFY',
MODIFYADD() => 'MODIFYADD',
MODIFYSUB() => 'MODIFYSUB',
CREATE1() => 'OCREATE',
CREATE2() => 'CREATE',
SWAPIN() => 'SWAPIN',
SWAPOUT() => 'SWAPOUT',
TERMINATE() => 'TERMINATE',
BATCH() => 'BATCH',
BATCHCREATE() => 'BATCHCREATE',
BATCHTERM() => 'BATCHTERM',
BATCHSWAPIN() => 'BATCHSWAPIN',
BATCHSWAPOUT() => 'BATCHSWAPOUT',
);
my %strtoaction = (
'BAD' => BAD(),
'IGNORE' => IGNORE(),
'PRELOAD' => PRELOAD(),
'MODIFY' => MODIFY(),
'MODIFYADD' => MODIFYADD(),
'MODIFYSUB' => MODIFYSUB(),
'OCREATE' => CREATE1(),
'CREATE' => CREATE2(),
'SWAPIN' => SWAPIN(),
'SWAPOUT' => SWAPOUT(),
'TERMINATE' => TERMINATE(),
'BATCH' => BATCH(),
'BATCHCREATE' => BATCHCREATE(),
'BATCHTERM' => BATCHTERM(),
'BATCHSWAPIN' => BATCHSWAPIN(),
'BATCHSWAPOUT'=> BATCHSWAPOUT(),
);
sub ACTIONSTR($) { my $a = shift; return $actiontostr{$a} };
#
# Make sure we create/consume records in a consistent way
#
sub makerecord($$$$$$@) {
my ($stamp, $pid, $eid, $uid, $action, $msgid, @nodes) = @_;
return [ $stamp, $pid, $eid, $uid, $action, $msgid, @nodes ];
}
sub parserecord($) {
my $l = shift;
my ($stamp, $pid, $eid, $uid, $action, $msgid, @nodes) = split(/\s+/, $l);
$action = $strtoaction{$action};
return undef if (!$action);
return [ $stamp, $pid, $eid, $uid, $action, $msgid, @nodes ];
}
#
# Sort records: first by timestamp, then by pid/eid, uid, real/fake
#
sub recsort {
return $a->[REC_STAMP] <=> $b->[REC_STAMP] ||
$a->[REC_PID] cmp $b->[REC_PID] ||
$a->[REC_EID] cmp $b->[REC_EID] ||
$a->[REC_UID] cmp $b->[REC_UID] ||
$a->[REC_MSGID] cmp $b->[REC_MSGID];
}
sub sortrecords(@) {
return sort recsort @_;
}
#
# Sort nodes: pcs before sharks, then in numeric order
#
sub nodesort {
# sort by name prefix
my $as = $1 if ($a =~ /^(\D+)/);
my $bs = $1 if ($b =~ /^(\D+)/);
if (my $sc = $as cmp $bs) {
return $sc;
}
# shark hack, take out '-'
(my $an = $a) =~ s/-//;
(my $bn = $b) =~ s/-//;
# sort by number
$an = $1 if ($an =~ /(\d+)$/);
$bn = $1 if ($bn =~ /(\d+)$/);
return $an <=> $bn if ($an && $bn);
return $a cmp $b;
}
sub printrecord($$) {
my ($rec, $sortem) = @_;
my ($stamp, $pid, $eid, $uid, $action, $msgid, @nodes) = @{$rec};
my $actstr = $actiontostr{$action};
if ($sortem && @nodes > 1) {
@nodes = sort nodesort @nodes;
}
print "$stamp $pid $eid $uid $actstr $msgid";
if (@nodes > 0) {
print " ", join(" ", @nodes);
}
print "\n";
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment