From 45f997fd1ade5c186fae5d7b9c3f36d6fdeec825 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Mon, 19 Dec 2005 20:07:56 +0000 Subject: [PATCH] Updates to to Error Logging API Code. You should start seeing much better error messages coming from my system. Errors coming from parse.proxy and assign (the two most frequent sources of errors) should now be concise and to the point. Errors coming from libosload/libreboot (the next most frequent source of errors) should now also be much better, but not perfect. Getting perfect errors will likely a rework of how errors are handled in libosload/libreboot, just adding tberror/tbwarn/tbnotice calls is not enough. I can do this at a latter date if necessary. A few minor database changes. Some changes to the API. A few bug fixes. Lots of tberror/tbwarn/tbnotice added to scripts. Since assign is a C program, and at this time my API is perl only, I wrote a second wrapper around assign, assign_wrapper2. When assign fails errors are now parsed in assign_wrapper2, sent to stderr and logged. This means that RunAssign() just returns when assign fails rather than echoing some of assign.log output and then quiting. The output to the activity log remains unchanged. Since "parse.proxy" is run from ops I couldn't use my API in it, even though it is a perl program. Instead I parse the errors coming form it in parse-ns. --- configure | 1 + configure.in | 1 + sql/database-create.sql | 13 +- sql/database-migrate.txt | 15 +- sql/libtblog-orig.sql | 73 ++++++ sql/libtblog.sql | 13 +- tbsetup/GNUmakefile.in | 3 +- tbsetup/assign_wrapper.in | 52 ++--- tbsetup/assign_wrapper2.in | 100 +++++++++ tbsetup/batchexp.in | 76 +++---- tbsetup/eventsys_control.in | 25 +-- tbsetup/frisbeeimage.in | 22 +- tbsetup/frisbeelauncher.in | 27 +-- tbsetup/libosload.pm.in | 95 ++++---- tbsetup/libreboot.pm.in | 59 +++-- tbsetup/libtblog.pm.in | 433 +++++++++++++++++++++++++++--------- tbsetup/ns2ir/parse-ns.in | 79 +++++-- tbsetup/os_setup.in | 91 ++++---- tbsetup/portstats.in | 6 +- tbsetup/snmpit.in | 34 +-- tbsetup/staticroutes.in | 9 +- tbsetup/swapexp.in | 89 +++----- tbsetup/tarfiles_setup.in | 18 +- tbsetup/tbprerun.in | 16 +- tbsetup/tbswap.in | 27 +-- 25 files changed, 867 insertions(+), 510 deletions(-) create mode 100644 sql/libtblog-orig.sql create mode 100644 tbsetup/assign_wrapper2.in diff --git a/configure b/configure index c7146f5e3..45a6b102e 100755 --- a/configure +++ b/configure @@ -2265,6 +2265,7 @@ outfiles="$outfiles Makeconf GNUmakefile \ tbsetup/tbprerun tbsetup/tbswap tbsetup/tbend tbsetup/tbrestart \ tbsetup/tbreport tbsetup/named_setup tbsetup/exports_setup \ tbsetup/checkpass/GNUmakefile tbsetup/assign_wrapper tbsetup/ptopgen \ + tbsetup/assign_wrapper2 \ tbsetup/frisbeelauncher tbsetup/node_update tbsetup/webnodeupdate \ tbsetup/savelogs tbsetup/setgroups tbsetup/websetgroups \ tbsetup/savelogs.proxy \ diff --git a/configure.in b/configure.in index 3bc9b66d3..eafb41189 100755 --- a/configure.in +++ b/configure.in @@ -703,6 +703,7 @@ outfiles="$outfiles Makeconf GNUmakefile \ tbsetup/tbprerun tbsetup/tbswap tbsetup/tbend tbsetup/tbrestart \ tbsetup/tbreport tbsetup/named_setup tbsetup/exports_setup \ tbsetup/checkpass/GNUmakefile tbsetup/assign_wrapper tbsetup/ptopgen \ + tbsetup/assign_wrapper2 \ tbsetup/frisbeelauncher tbsetup/node_update tbsetup/webnodeupdate \ tbsetup/savelogs tbsetup/setgroups tbsetup/websetgroups \ tbsetup/savelogs.proxy \ diff --git a/sql/database-create.sql b/sql/database-create.sql index 3c5242e0e..c70745040 100644 --- a/sql/database-create.sql +++ b/sql/database-create.sql @@ -922,11 +922,12 @@ CREATE TABLE log ( invocation int(10) unsigned NOT NULL default '0', parent int(10) unsigned NOT NULL default '0', script smallint(3) NOT NULL default '0', - level smallint(2) NOT NULL default '0', + level tinyint(2) NOT NULL default '0', + sublevel tinyint(2) NOT NULL default '0', priority smallint(3) NOT NULL default '0', inferred tinyint(1) NOT NULL default '0', cause varchar(32) NOT NULL default '', - type enum('normal','entering','exiting','thecause') NOT NULL default 'normal', + type enum('normal','entering','exiting','thecause','extra','summary') NOT NULL default 'normal', relevant tinyint(1) NOT NULL default '0', mesg text NOT NULL, PRIMARY KEY (seq), @@ -1625,9 +1626,9 @@ CREATE TABLE portmap ( CREATE TABLE priorities ( priority smallint(3) NOT NULL default '0', - name varchar(8) NOT NULL default '', + priority_name varchar(8) NOT NULL default '', PRIMARY KEY (priority), - UNIQUE KEY name (name) + UNIQUE KEY name (priority_name) ) TYPE=MyISAM; -- @@ -1758,9 +1759,9 @@ CREATE TABLE scheduled_reloads ( CREATE TABLE scripts ( script smallint(3) NOT NULL auto_increment, - name varchar(24) NOT NULL default '', + script_name varchar(24) NOT NULL default '', PRIMARY KEY (script), - UNIQUE KEY id (name) + UNIQUE KEY id (script_name) ) TYPE=MyISAM; -- diff --git a/sql/database-migrate.txt b/sql/database-migrate.txt index 14c8129f6..bbbd004fd 100644 --- a/sql/database-migrate.txt +++ b/sql/database-migrate.txt @@ -2867,7 +2867,7 @@ last_net_act,last_cpu_act,last_ext_act); 4.16: Add new tables for the logging subsystem (libdblog) - mysql tbdb < libtblog.sql + mysql tbdb < libtblog-orig.sql 4.17: Add linktest feature. @@ -3021,3 +3021,16 @@ last_net_act,last_cpu_act,last_ext_act); notes text, PRIMARY KEY (uid_idx) ) TYPE=MyISAM; + +4.25: Modify tables for the logging subsystem + + alter table log + modify level tinyint(2) not null, + add sublevel tinyint(2) NOT NULL default 0 after level, + modify type + enum('normal','entering','exiting','thecause','extra','summary') + NOT NULL default 'normal'; + + alter table scripts change name script_name varchar(24) NOT NULL default ''; + + alter table priorities change name priority_name varchar(8) NOT NULL; diff --git a/sql/libtblog-orig.sql b/sql/libtblog-orig.sql new file mode 100644 index 000000000..09562d36a --- /dev/null +++ b/sql/libtblog-orig.sql @@ -0,0 +1,73 @@ +-- MYSQL TABLES used for logging subsystem + +CREATE TABLE log ( + seq int unsigned NOT NULL auto_increment, + stamp int unsigned NOT NULL, + pidx int NOT NULL, -- unique project id + uid int default NULL, -- numeric user id + session int unsigned NOT NULL, -- session id (1) + invocation int unsigned NOT NULL, -- invocation id - unique id for a + -- particular execution of a script (1) + parent int unsigned NOT NULL, -- invocation id of the parent script + script smallint(3) NOT NULL, -- numeric id for the script name + level smallint(2) NOT NULL , -- depth of the script in the call chain + priority smallint(3) NOT NULL, -- syslog priority + inferred tinyint(1) NOT NULL, -- 0 - log created with tblog + -- 1 - log created by catching a die or warn + -- 2 - log created with a normal print and + -- given the non-default priority + -- 3 - log created with a normal print and + -- given the default priority + cause varchar(32) NOT NULL, -- string identifying the "cause" of the error + type enum('normal','entering','exiting','thecause') + NOT NULL default 'normal',-- type of error: + -- normal: normal error generated by tblog + -- entering: auto gen when a script starts + -- exiting: auto gen when a script exists (2) + -- thecause: created by tblog_find_error + relevant tinyint(1) NOT NULL, -- if the error was considered relevent + -- by tblog_find_error + mesg text NOT NULL, -- the text of the message + PRIMARY KEY (seq), + KEY (session) +); + +-- (1) These ids are equal to the sequence number of the "entering" log +-- message +-- (2) Due to a number of factors every "exiting" message in not +-- garanteed to have a 1-1 relation with the "entering" message. +-- The two major ones are +-- (1) A Fork not followed by a "exec" will most likely to cause +-- multiple exiting messages. (2) If "exec" is used the script +-- will not have an exiting message. + +-- NOTE: It may be beneficial to split this table into three: +-- session pidx uid +-- invocation session parent script level +-- seq stamp [session] invocation priority inferred cause type relevant mesg +-- ... + +CREATE TABLE scripts ( + script smallint(3) NOT NULL auto_increment, + name varchar(24) NOT NULL default '', + PRIMARY KEY (script), + UNIQUE KEY id (name) +); + +CREATE TABLE priorities ( + priority smallint(3) NOT NULL, + name varchar(8) NOT NULL, + primary key (priority), + unique key (name) +); + +insert into priorities values (000, 'EMERG'); +insert into priorities values (100, 'ALERT'); +insert into priorities values (200, 'CRIT'); +insert into priorities values (300, 'ERR'); +insert into priorities values (400, 'WARNING'); +insert into priorities values (500, 'NOTICE'); +insert into priorities values (600, 'INFO'); +insert into priorities values (700, 'DEBUG'); + +insert into exported_tables values ('state_timeouts'); diff --git a/sql/libtblog.sql b/sql/libtblog.sql index 09562d36a..179f74b4c 100644 --- a/sql/libtblog.sql +++ b/sql/libtblog.sql @@ -10,7 +10,8 @@ CREATE TABLE log ( -- particular execution of a script (1) parent int unsigned NOT NULL, -- invocation id of the parent script script smallint(3) NOT NULL, -- numeric id for the script name - level smallint(2) NOT NULL , -- depth of the script in the call chain + level tinyint(2) NOT NULL , -- depth of the script in the call chain + sublevel tinyint(2) NOT NULL, -- sub level priority smallint(3) NOT NULL, -- syslog priority inferred tinyint(1) NOT NULL, -- 0 - log created with tblog -- 1 - log created by catching a die or warn @@ -19,12 +20,16 @@ CREATE TABLE log ( -- 3 - log created with a normal print and -- given the default priority cause varchar(32) NOT NULL, -- string identifying the "cause" of the error - type enum('normal','entering','exiting','thecause') + type enum('normal','entering','exiting','thecause','extra','summary') NOT NULL default 'normal',-- type of error: -- normal: normal error generated by tblog -- entering: auto gen when a script starts -- exiting: auto gen when a script exists (2) -- thecause: created by tblog_find_error + -- extra: extra information to normal info + -- at a lower level + -- summary: useful summary of errors, + -- ignore all info at lower levels relevant tinyint(1) NOT NULL, -- if the error was considered relevent -- by tblog_find_error mesg text NOT NULL, -- the text of the message @@ -49,14 +54,14 @@ CREATE TABLE log ( CREATE TABLE scripts ( script smallint(3) NOT NULL auto_increment, - name varchar(24) NOT NULL default '', + script_name varchar(24) NOT NULL default '', PRIMARY KEY (script), UNIQUE KEY id (name) ); CREATE TABLE priorities ( priority smallint(3) NOT NULL, - name varchar(8) NOT NULL, + priority_name varchar(8) NOT NULL, primary key (priority), unique key (name) ); diff --git a/tbsetup/GNUmakefile.in b/tbsetup/GNUmakefile.in index ca8eeabd5..d878d912d 100644 --- a/tbsetup/GNUmakefile.in +++ b/tbsetup/GNUmakefile.in @@ -40,7 +40,8 @@ FSBIN_STUFF = exports_setup.proxy LIBEXEC_STUFF = rmproj wanlinksolve wanlinkinfo \ os_setup mkexpdir console_setup webnscheck webreport \ webendexp webbatchexp webpanic \ - assign_wrapper assign_prepass ptopgen webnodeupdate \ + assign_wrapper assign_wrapper2 \ + assign_prepass ptopgen webnodeupdate \ webdelay_config webnodehistory \ webrmgroup webswapexp webnodecontrol webeventsys_control \ webmkgroup websetgroups webmkproj webmodgroups \ diff --git a/tbsetup/assign_wrapper.in b/tbsetup/assign_wrapper.in index d8ff683e6..6fe8fc82a 100644 --- a/tbsetup/assign_wrapper.in +++ b/tbsetup/assign_wrapper.in @@ -113,6 +113,7 @@ use libtestbed; use Node; use libadminctrl; use libtblog; +use libtblog qw(*SOUT *SERR); # # assign_wrapper Settings @@ -186,11 +187,9 @@ TBDebugTimeStampsOn(); # # All exits happen via this function! # -sub fatal ($) +sub fatal (@) { - my($message) = @_; - - tberror $message; + &tberror(@_); # We next go to the END block below. exit($WRAPPER_FAILED); @@ -200,7 +199,7 @@ sub fatal ($) # We want warnings to cause assign_wrapper to exit abnormally. # We will come through here no matter how we exit though. # -$SIG{__WARN__} = sub { print STDERR $_[0];$warnings++; }; +$SIG{__WARN__} = sub { tbwarn $_[0];$warnings++; }; END { # Watch for getting here cause of a die()/exit() statement someplace. @@ -211,7 +210,7 @@ END { } if ($warnings > 0) { - tbwarn "$warnings.\n"; + tberror "$warnings warnings.\n"; $exitcode |= $WRAPPER_FAILED; } @@ -750,7 +749,7 @@ while (1) { $precheck = 1; my $retval = RunAssign(); if ($retval != 0) { - fatal("Experiment can not be run on an empty testbed. Please fix the experiment."); + fatal({type=>'extra'}, "Experiment can not be run on an empty testbed. Please fix the experiment."); } print("Assign succeeded on an empty testbed.\n"); $precheck = 0; @@ -887,20 +886,20 @@ sub RunAssign () # the parent. # POSIX::setsid(); - exec("nice $cmd $cmdargs > assign.log"); + exec("nice assign_wrapper2 $cmd $cmdargs > assign.log"); die "Could not start assign!\n"; } # Check cancel flag before continuing. TBGetCancelFlag($pid, $eid, \$canceled); if ($canceled) { - print("Cancel flag set; aborting assign run!\n"); + tbnotice("Cancel flag set; aborting assign run!\n"); return -1; } # Check for possible full filesystem ... if (-z "assign.log") { - print("assign.log is zero length! Stopping ...\n"); + tbnotice("assign.log is zero length! Stopping ...\n"); return -1; } @@ -916,37 +915,12 @@ sub RunAssign () printdb "Reading assign results.\n"; # - # We no longer care what assign has to say when it fails! Just - # tell the caller whether we want to keep trying or not. We still - # send some of the goo to the output stream so that Rob can quickly - # deduce what what wrong. + # We no longer care what assign has to say when it fails! + # Any relevent info was already sent to stderr so just + # tell the caller whether we want to keep trying or not. # if ($assignexitcode) { - my $violations = 0; - - print "ASSIGN FAILED:\n"; - while ( !~ /^[\w\s]*precheck:$/) {} - while () { - chop; - /^\w*\s*precheck:$/ && do { - next; - }; - /^With ([0-9]+) violations$/ && do { - $violations = $1; - last; - }; - print $_ . "\n"; - } - if ($violations) { - while ( !~ /^Violations:/) {} - while () { - if (/^Nodes:/) { - last; - } - print "$_"; - } - } - close(ASSIGNFP); + close ASSIGNFP; return (($assignexitcode == 1) ? 1 : -1); } diff --git a/tbsetup/assign_wrapper2.in b/tbsetup/assign_wrapper2.in new file mode 100644 index 000000000..de2b432ad --- /dev/null +++ b/tbsetup/assign_wrapper2.in @@ -0,0 +1,100 @@ +#!/usr/bin/perl -w +# +# EMULAB-COPYRIGHT +# Copyright (c) 2000-2005 University of Utah and the Flux Group. +# All rights reserved. +# + +# +# Hack to get assign error messages into the database correctly. Will +# be removed once the API is ported to other languages besides perl +# + +BEGIN {$FAKE_SCRIPTNAME = $ARGV[0];} + +use lib "@prefix@/lib"; +use libtblog; +use libtblog qw(dblog *SOUT *SERR); + +use strict; + +open P, join(' ', @ARGV, ' |'); + +my @out; +my @err; +my $obvious_error = 0; + +while (

) { + print SOUT $_; + push @out, $_; +} +close P; + +my $exitcode = $? >> 8; + +# +# Now parse out relevent info and echo it to stderr. Also, pull out +# any obvious errors (prefixed with ***) and log them sepertly. The +# rest will go in te database as one big error. +# +if ($exitcode) { + my $violations = 0; + + # Pull out relevent info + print SERR "ASSIGN FAILED:\n"; + while ((shift @out) !~ /^[\w\s]*precheck:$/) {} + while (($_ = shift @out)) { + /^[\w\s]*precheck:$/ && do { + next; + }; + /^With ([0-9]+) violations$/ && do { + $violations = $1; + last; + }; + print SERR $_; + push @err, $_; + } + if ($violations) { + while ((shift @out) !~ /^Violations:/) {} + while (($_ = shift @out)) { + if (/^Nodes:/) { + last; + } + print SERR $_; + push @err, $_; + } + } + + # See if there are any obvious errors + my $err = ''; + while (($_ = shift @err)) { + $err .= $_; + if (/^(\s*)\*\*\*+\s*(.+)/) { + $obvious_error = 1; + my $space = $1; + my $mesg = $2; + while (@err && $err[0] =~ /^$space \s*(.+)/) { + $mesg .= "\n$1"; + shift @err; + } + my $sublevel = length($space) > 0 ? 1 : 0; + if ($mesg =~ s/^warning:\s+//i) { + dblog(TBLOG_WARNING, {sublevel=>$sublevel}, $mesg); + } else { + dblog(TBLOG_ERR, {sublevel=>$sublevel}, $mesg); + } + } + } + + # log all relevent output as one entry unless an obvious_error was + # already found + dblog(TBLOG_ERR, {}, $err) unless ($obvious_error); + + # create a log entry that assign failed + my %parms; + $parms{sublevel} = -1; + $parms{type} = 'extra' unless $obvious_error; + dblog(TBLOG_ERR, \%parms, "Assign Failed."); +} + +exit $exitcode; diff --git a/tbsetup/batchexp.in b/tbsetup/batchexp.in index 258614e9c..a00bd6b81 100755 --- a/tbsetup/batchexp.in +++ b/tbsetup/batchexp.in @@ -146,16 +146,14 @@ my $committed = 0; # Verify user and get his DB uid. # if (! UNIX2DBUID($UID, \$dbuid)) { - die("*** $0:\n". - " You do not exist in the Emulab Database!\n"); + tbdie("You do not exist in the Emulab Database!"); } # # Get email info for user. # if (! UserDBInfo($dbuid, \$user_name, \$user_email)) { - die("*** $0:\n". - " Cannot determine your name and email address.\n"); + tbdie("Cannot determine your name and email address."); } # @@ -163,9 +161,7 @@ if (! UserDBInfo($dbuid, \$user_name, \$user_email)) { # our time. Make sure user sees the error by exiting with 1. # if (system("$checkquota $dbuid") != 0) { - print STDERR - "*** $0:\n". - " You are over your disk quota on $CONTROL; please cleanup!\n"; + tberror("You are over your disk quota on $CONTROL; please cleanup!"); exit(1); } @@ -187,14 +183,13 @@ if (!defined($description)) { $description = "'Created by $dbuid'"; } if (! $swappable && (!defined($noswap_reason) || $noswap_reason eq "")) { - die("Must provide a reason with -S option (not swappable reason)!\n"); + tbdie("Must provide a reason with -S option (not swappable reason)!"); } if (! $idleswap && (!defined($noidleswap_reason) || $noidleswap_reason eq "")) { - die("Must provide a reason with -L option (no idleswap reason)!\n"); + tbdie("Must provide a reason with -L option (no idleswap reason)!"); } if (!defined($tempnsfile) && !TBAdmin($dbuid)) { - die("*** $0:\n". - " Only admins can create experiments with no NS file\n"); + tbdie("Only admins can create experiments with no NS file"); } my $nsfile = "$eid.ns"; my $repfile = "$eid.report"; @@ -209,8 +204,7 @@ $noidleswap_reason = "'None Given'" # Make sure UID is allowed to create experiments in this project. # if (! TBProjAccessCheck($dbuid, $pid, $gid, TB_PROJECT_CREATEEXPT)) { - die("*** $0:\n". - " You do not have permission to create experiments in $pid/$gid\n"); + die("You do not have permission to create experiments in $pid/$gid"); } # @@ -223,8 +217,7 @@ if (!defined($tempnsfile)) { } elsif (! -f $tempnsfile || ! -r $tempnsfile || -z $tempnsfile) { # Exit so that user sees the error, not us. - print STDERR "*** $0:\n". - " $tempnsfile does not exist or is not a readable file!\n"; + tberror("$tempnsfile does not exist or is not a readable file!"); exit(1); } @@ -270,8 +263,7 @@ $query_result = if ($query_result->numrows) { DBQueryWarn("unlock tables"); - die("*** $0:\n". - " Experiment $eid in project $pid already exists!\n"); + tbdie("Experiment $eid in project $pid already exists!"); } # @@ -298,8 +290,7 @@ foreach my $table ("experiments", "experiment_stats", "experiment_resources", if ($query_result->numrows) { DBQueryWarn("unlock tables"); - die("*** $0:\n". - " Experiment index $exptidx exists in $table; this is bad!\n"); + tbdie("Experiment index $exptidx exists in $table; this is bad!"); } } @@ -324,8 +315,7 @@ if (! DBQueryWarn("INSERT INTO experiments ". "$noidleswap_reason, $batchmode, '$batchstate', ". "$linktest, $savestate)")) { DBQueryWarn("unlock tables"); - die("*** $0:\n". - " DB error inserting experiment record for $pid/$eid!\n"); + tbdie("DB error inserting experiment record for $pid/$eid!"); } # @@ -338,8 +328,7 @@ $query_result = if (!$query_result) { DBQueryWarn("delete from experiments where pid='$pid' and eid='$eid'"); DBQueryWarn("unlock tables"); - die("*** $0:\n". - " DB error inserting experiment resources record for $pid/$eid!"); + tbdie("DB error inserting experiment resources record for $pid/$eid!"); } my $rsrcidx = $query_result->insertid; @@ -353,16 +342,14 @@ if (! DBQueryWarn("insert into experiment_stats ". DBQueryWarn("delete from experiments where pid='$pid' and eid='$eid'"); DBQueryWarn("delete from experiment_resources where idx=$rsrcidx"); DBQueryWarn("unlock tables"); - die("*** $0:\n". - " DB error inserting experiment stats record for $pid/$eid!"); + tbdie("DB error inserting experiment stats record for $pid/$eid!"); } if (! DBQueryWarn("unlock tables")) { DBQueryWarn("delete from experiments where pid='$pid' and eid='$eid'"); DBQueryWarn("delete from experiment_resources where idx=$rsrcidx"); DBQueryWarn("delete from experiment_stats where exptidx=$exptidx"); - die("*** $0:\n". - " DB error unlocking tables!"); + tbdie("DB error unlocking tables!"); } # @@ -883,8 +870,7 @@ sub ParseArgs() $tempnsfile = $1; } else { - die("*** $0:\n". - " Bad data in nsfile: $tempnsfile\n"); + tbdie("Bad data in nsfile: $tempnsfile"); } # @@ -897,8 +883,7 @@ sub ParseArgs() $tempnsfile = $1; } else { - die("*** $0:\n". - " Bad data returned by realpath: $translated\n"); + tbdie("Bad data returned by realpath: $translated"); } # @@ -913,9 +898,7 @@ sub ParseArgs() ! ($tempnsfile =~ /^\/proj/) && ! ($tempnsfile =~ /^\/groups/) && ! ($tempnsfile =~ /^\/users/)) { - print STDERR - "*** $0:\n". - " $tempnsfile does not resolve to an allowed directory!\n"; + tberror("$tempnsfile does not resolve to an allowed directory!"); # Note positive status; so error goes to user not tbops. exit(1); } @@ -953,7 +936,7 @@ sub ParseArgs() $pid = $1; } else { - die("Bad data in argument: $pid."); + tbdie("Bad data in argument: $pid."); } } if (defined($options{"e"})) { @@ -963,11 +946,11 @@ sub ParseArgs() $eid = $1; } else { - die("Bad data in argument: $eid."); + tbdie("Bad data in argument: $eid."); } if (! TBcheck_dbslot($eid, "experiments", "eid", TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) { - die("Improper experiment name (id)!\n"); + tbdie("Improper experiment name (id)!"); } } if (defined($options{"g"})) { @@ -977,20 +960,20 @@ sub ParseArgs() $gid = $1; } else { - die("Bad data in argument: $gid."); + tbdie("Bad data in argument: $gid."); } } if (defined($options{"E"})) { if (! TBcheck_dbslot($options{"E"}, "experiments", "expt_name", TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) { - die("Improper experiment description!\n"); + tbdie("Improper experiment description!"); } $description = DBQuoteSpecial($options{"E"}); } if (defined($options{"S"})) { if (! TBcheck_dbslot($options{"S"}, "experiments", "noswap_reason", TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) { - die("Improper noswap reason!\n"); + tbdie("Improper noswap reason!"); } $swappable = 0; $noswap_reason = DBQuoteSpecial($options{"S"}); @@ -998,7 +981,7 @@ sub ParseArgs() if (defined($options{"L"})) { if (! TBcheck_dbslot($options{"L"}, "experiments", "noidleswap_reason", TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) { - die("Improper noidleswap reason!\n"); + tbdie("Improper noidleswap reason!"); } $idleswap = 0; $noidleswap_reason = DBQuoteSpecial($options{"L"}); @@ -1006,7 +989,7 @@ sub ParseArgs() if (defined($options{"l"})) { if (! TBcheck_dbslot($options{"l"}, "experiments", "idleswap_timeout", TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) { - die("Improper idleswap timeout!\n"); + tbdie("Improper idleswap timeout!"); } $idleswap = 1; $idleswaptime = $options{"l"}; @@ -1014,7 +997,7 @@ sub ParseArgs() if (defined($options{"a"})) { if (! TBcheck_dbslot($options{"a"}, "experiments", "autoswap_timeout", TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) { - die("Improper autoswap timeout!\n"); + tbdie("Improper autoswap timeout!"); } $autoswap = 1; $autoswaptime = $options{"a"}; @@ -1022,7 +1005,7 @@ sub ParseArgs() if (defined($options{"t"})) { if (! TBcheck_dbslot($options{"t"}, "experiments", "linktest_level", TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) { - die("Improper linktest level!\n"); + tbdie("Improper linktest level!"); } $linktest = $options{"t"}; } @@ -1040,9 +1023,8 @@ sub fatal($) { my($mesg) = $_[0]; - print "*** $0:\n"; - print " $mesg\n"; - print "Cleaning up and exiting with status $errorstat ...\n"; + tberror $mesg; + tbinfo "Cleaning up and exiting with status $errorstat ..."; # # This exit will drop into the END block below. diff --git a/tbsetup/eventsys_control.in b/tbsetup/eventsys_control.in index aa8cedb7f..38f116993 100755 --- a/tbsetup/eventsys_control.in +++ b/tbsetup/eventsys_control.in @@ -52,8 +52,7 @@ if ($TESTMODE) { } if ($EUID != 0) { - die("*** $0:\n". - " Must be root! Maybe its a development version?\n"); + tbdie("Must be root! Maybe its a development version?"); } # @@ -123,13 +122,11 @@ else { } if (! ($expstate = ExpState($pid, $eid))) { - die("*** $0:\n". - " No such experiment $pid/$eid!\n"); + tbdie("No such experiment $pid/$eid!"); } if (! UNIX2DBUID($UID, \$dbuid)) { - die("*** $0:\n". - " You do not exist in the Emulab Database!\n"); + tbdie("You do not exist in the Emulab Database!"); } # @@ -137,8 +134,7 @@ if (! UNIX2DBUID($UID, \$dbuid)) { # can do this. # if (! TBExptAccessCheck($UID, $pid, $eid, TB_EXPT_DESTROY)) { - die("*** $0:\n". - " You do not have permission to control the event system!\n"); + tbdie("You do not have permission to control the event system!"); } # @@ -149,8 +145,7 @@ if ($expstate ne EXPTSTATE_ACTIVE && $expstate ne EXPTSTATE_ACTIVATING && $expstate ne EXPTSTATE_MODIFY_RESWAP && $expstate ne EXPTSTATE_SWAPPING) { - die("*** $0:\n". - " Experiment $pid/$eid must active (or swapping)!\n"); + tbdie("Experiment $pid/$eid must active (or swapping)!"); } # @@ -158,8 +153,8 @@ if ($expstate ne EXPTSTATE_ACTIVE && # the experiment, balk unless force mode is on. # if ($action eq "start" && !$force && !ExpNodes($pid, $eid)) { - print("*** There are no nodes in $pid/$eid. ". - "Not starting a scheduler.\n"); + tbnotice("There are no nodes in $pid/$eid. ". + "Not starting a scheduler."); exit(0); } @@ -176,8 +171,7 @@ my $gid = ExpGroup($pid, $eid); my ($unix_gid, $unix_gidname); if (! TBGroupUnixInfo($pid, $gid, \$unix_gid, \$unix_gidname)) { - die("*** $0:\n". - " Could not get unix group info for $pid/$gid!\n"); + tbdie("Could not get unix group info for $pid/$gid!"); } # @@ -202,5 +196,4 @@ if ($UNIFIED || $DBIFACE) { else { exec("sshtb -host $CONTROL $cmd"); } -die("*** $0:\n". - " Could not exec '$cmd'\n"); +tbdie("Could not exec '$cmd'"); diff --git a/tbsetup/frisbeeimage.in b/tbsetup/frisbeeimage.in index 62d01a276..8d10c5169 100644 --- a/tbsetup/frisbeeimage.in +++ b/tbsetup/frisbeeimage.in @@ -42,12 +42,10 @@ $| = 1; # We don't want to run this script unless its the real version. # if ($EUID != 0) { - die("*** $0:\n". - " Must be root! Maybe its a development version?\n"); + tbdie("Must be root! Maybe its a development version?"); } if (!$ELABINELAB) { - die("*** $0:\n". - " This script is for elabinelab systems only!\n"); + tbdie("This script is for elabinelab systems only!"); } # Load the Testbed support stuff. @@ -55,6 +53,7 @@ use lib "@prefix@/lib"; use libdb; use libtestbed; use libxmlrpc; +use libtblog; # Locals my $FRISBEE = "$TB/sbin/frisbee"; @@ -93,8 +92,7 @@ else { # Grab mcastif. This is has been set up for us, and its a serious kludge. # if (! -e "/etc/emulab/outer_ipaddr") { - die("*** $0:\n". - " /etc/emulab/outer_ipaddr does not exist!\n"); + tbdie("/etc/emulab/outer_ipaddr does not exist!"); } $mcastif = `cat /etc/emulab/outer_ipaddr`; chomp($mcastif); @@ -102,8 +100,7 @@ if ($mcastif =~ /^([\d\.]+)$/) { $mcastif = $1; } else { - die("*** $0:\n". - " Could not parse outer IP: $mcastif\n!"); + tbdie("Could not parse outer IP: $mcastif!"); } # @@ -125,8 +122,7 @@ my ($filename,$creator,$gid) = $query_result->fetchrow(); my $rval = libxmlrpc::CallMethod("elabinelab", "frisbeelauncher", {"imageid" => "$imageid"}); if (!defined($rval)) { - die("*** $0:\n". - " Could not fire up frisbee on outer Emulab!\n"); + die("Could not fire up frisbee on outer Emulab!"); } # @@ -138,8 +134,7 @@ if ($rval =~ /^(.*):(\d*)$/) { $loadport = $2; } else { - die("*** $0:\n". - " Could not parse loadinfo from server: $rval\n!"); + tbdie("Could not parse loadinfo from server: $rval!"); } if ($debug) { @@ -148,7 +143,6 @@ if ($debug) { system("$FRISBEE -N -i $mcastif -m $loadaddr -p $loadport $filename"); if ($?) { - die("*** $0:\n". - " Error downloading image data from outer Emulab!\n"); + tbdie("Error downloading image data from outer Emulab!"); } exit(0); diff --git a/tbsetup/frisbeelauncher.in b/tbsetup/frisbeelauncher.in index 3ab9ee464..f8b748fd0 100755 --- a/tbsetup/frisbeelauncher.in +++ b/tbsetup/frisbeelauncher.in @@ -48,6 +48,7 @@ $| = 1; use lib "@prefix@/lib"; use libdb; use libtestbed; +use libtblog; # Defines my $FRISBEED = "$TB/sbin/frisbeed"; @@ -94,12 +95,11 @@ my $filename = &get_filename($imageid); if (!TBImageIDAccessCheck($UID, $imageid, ($killmode ? TB_IMAGEID_DESTROY : TB_IMAGEID_READINFO))) { - die("*** $0:\n". - " Not enough permission!\n"); + tbdie("Not enough permission!"); } if (!$killmode && !$ELABINELAB && ! -R $filename) { - die("*** You do not have permission to read the image file for\n". + tbdie("You do not have permission to read the image file for". "imageid $imageid: $filename\n"); } @@ -136,8 +136,7 @@ if ($killmode) { "Imageid: $imageid\n". $mesg); - die("*** $0:\n". - " $mesg\n"); + tbdie($mesg); } if ($address && !$pid) { # @@ -163,8 +162,7 @@ if ($killmode) { "Imageid: $imageid\n". $mesg); - die("*** $0:\n". - " $mesg\n"); + tbdie($mesg); } } @@ -181,8 +179,7 @@ if ($killmode) { "Failed to stop frisbee daemon for $imageid\n". "Could not kill(TERM) process $pid: $? $!"); - die("*** $0:\n". - " Failed to stop frisbee daemon for $imageid!\n"); + tbdie("Failed to stop frisbee daemon for $imageid!"); } exit(0); } @@ -202,8 +199,7 @@ if ($pid) { "Imageid: $imageid\n". $mesg); - die("*** $0:\n". - " $mesg\n"); + tbdie($mesg); } # Pick an address: Die if unsucessful, set address and unlock if sucessful @@ -227,8 +223,7 @@ if ($ELABINELAB && ! -e $filename) { $EUID = $UID; system("$FRISBEEIMAGE $imageid"); if ($?) { - die("*** $0:\n". - " No such image file: $filename\n!"); + tbdie("No such image file: $filename!"); } $EUID = 0; } @@ -245,8 +240,7 @@ if (my $childpid = TBBackGround($LOGFILE)) { my $foo = waitpid($childpid, &WNOHANG); if ($foo) { &clear_address; - die("*** $0:\n". - " Error $? backgrounding frisbeelauncher\n!"); + tbdie("Error $? backgrounding frisbeelauncher!"); } exit(0); } @@ -456,8 +450,7 @@ sub pick_address { "Imageid: $imageid\n". $mesg); - die("*** $0:\n". - " $mesg\n"); + tbdie($mesg); } } } diff --git a/tbsetup/libosload.pm.in b/tbsetup/libosload.pm.in index 33b7f99a6..778479da7 100755 --- a/tbsetup/libosload.pm.in +++ b/tbsetup/libosload.pm.in @@ -20,6 +20,7 @@ use vars qw(@ISA @EXPORT); use lib '@prefix@/lib'; use libdb; use libreboot; +use libtblog; use English; use File::stat; use IO::Handle; @@ -63,7 +64,7 @@ sub osload ($$) { my $rowref; if (!defined($args->{'nodelist'})) { - print STDERR "*** osload: Must supply a node list!\n"; + tberror "Must supply a node list!"; # INTERNAL return -1; } @nodes = sort(@{ $args->{'nodelist'} }); @@ -87,10 +88,19 @@ sub osload ($$) { if (defined($args->{'zerofree'})) { $zerofree = $args->{'zerofree'}; } + if (defined($args->{'swapinfo'})) { $swapinfo = $args->{'swapinfo'}; } - + + # + # Start a new logging sub-process + # + my $old_env = \%ENV; + local %ENV; + copy_hash %ENV, $old_env; + tblog_sub_process("osload", @nodes); + # # Figure out who called us. Root and admin types can do whatever they # want. Normal users can only change nodes in experiments in their @@ -100,9 +110,9 @@ sub osload ($$) { $mereuser = 1; if (! TBNodeAccessCheck($UID, TB_NODEACCESS_LOADIMAGE, @nodes)) { - print STDERR - "*** osload: Not enough permission to load images on one or ". - "more nodes!\n"; + tberror + "Not enough permission to load images on one or ". + "more nodes!"; return -1; } } @@ -112,8 +122,7 @@ sub osload ($$) { # if (defined($imageid) && $mereuser && ! TBImageIDAccessCheck($UID, $imageid, TB_IMAGEID_READINFO)) { - print STDERR - "*** osload: You do not have permission to load '$imageid'!\n"; + tberror "You do not have permission to load '$imageid'!"; return -1; } @@ -126,9 +135,9 @@ sub osload ($$) { # if (defined($imageid) && !TBImageLoadMaxOkay($imageid, scalar(@nodes), @nodes)) { - print STDERR - "*** osload: Would exceed maxiumum concurrent instances ". - "limitation for $imageid\n"; + tberror + "Would exceed maxiumum concurrent instances ". + "limitation for $imageid"; return -1; } @@ -148,7 +157,7 @@ sub osload ($$) { # Create a pipe to read back results from the child we will create. # if (! pipe(PARENT_READER, CHILD_WRITER)) { - print STDERR "*** osload: creating pipe: $!\n"; + tberror "creating pipe: $!"; return -1; } CHILD_WRITER->autoflush(1); @@ -178,7 +187,7 @@ sub osload ($$) { my $default_imageid; if (! DefaultImageID($node, \$default_imageid) || ! defined($default_imageid)) { - print STDERR "*** osload ($node): No default imageid defined!\n"; + tberror "$node: No default imageid defined!"; goto failednode; } @@ -210,7 +219,7 @@ sub osload ($$) { if (!$TESTMODE) { system("$osselect $defosid $node"); if ($?) { - print STDERR "*** osload ($node): os_select $defosid failed!\n"; + tberror "$node: os_select $defosid failed!"; goto failednode; } } @@ -223,8 +232,8 @@ sub osload ($$) { # if ($imageid ne $default_imageid && !TBSetSchedReload($node, $default_imageid)) { - print STDERR - "*** osload ($node): Could not schedule default reload\n"; + tberror + "$node: Could not schedule default reload"; goto failednode; } @@ -256,8 +265,7 @@ sub osload ($$) { "where node_id='$node' and partition='$i'"); } if (!$dbresult) { - print STDERR - "*** osload ($node): Could not update partition table\n"; + tberror "$node: Could not update partition table"; goto failednode; } } @@ -309,8 +317,7 @@ sub osload ($$) { if (!$TESTMODE) { if (&$reload_func($node, $imageid, $defosid, $zerofree) < 0) { - print STDERR - "*** osload ($node): Could not set up reload. Skipping.\n"; + tberror("$node: Could not set up reload. Skipping."); goto failednode; } } @@ -338,7 +345,7 @@ sub osload ($$) { } if (! @nodes) { - print STDERR "*** osload: Stopping because of previous failures!\n"; + tbnotice "Stopping because of previous failures!"; goto done; } @@ -422,17 +429,14 @@ sub osload ($$) { my $node = shift(@failednodes); if ($retries{$node}) { - print "*** osload ($node): Trying again ...\n"; + tbnotice "$node: Trying again ..."; my $reload_info = $reload_info{$node}; # Possible race with reboot? if (&{$reload_info->{'func'}}($node, $reload_info->{'imageid'}, $reload_info->{'osid'}, $reload_info->{'zerofree'}) < 0) { - print(STDERR - "*** osload ($node): ". - "Could not set up reload. Skipping.\n"); - + tberror("$node: Could not set up reload. Skipping."); $result->{$node} = -1; $failures++; next; @@ -443,7 +447,8 @@ sub osload ($$) { $retries{$node} -= 1; } else { - print "*** osload ($node): failed too many times. Skipping!\n"; + tberror ({sublevel => -1}, + "$node failed to boot too many times. Skipping!"); $result->{$node} = -1; $failures++; } @@ -481,8 +486,7 @@ sub GetImageInfo($$) my $query_result = DBQueryWarn("select * from images where imageid='$imageid'"); if (! $query_result || $query_result->numrows < 1) { - print STDERR - "*** osload ($node): Imageid $imageid is not defined!\n"; + tberror "Imageid $imageid is not defined!"; return 0; } $imageinfo{$imageid} = $query_result->fetchrow_hashref(); @@ -495,8 +499,7 @@ sub GetImageInfo($$) # and that file should exist. # if (!defined($imagepath)) { - print STDERR - "*** osload ($node): No filename associated with $imageid!\n"; + tberror "No filename associated with $imageid!"; return 0; } @@ -507,21 +510,16 @@ sub GetImageInfo($$) # system("$FRISBEELAUNCHER " . ($debug ? "-d ": "") . "$imageid"); if ($?) { - print STDERR - "*** osload ($node): Frisbeelauncher ($imageid) failed!\n"; + tberror "Frisbeelauncher ($imageid) failed!"; return 0; } if (! -R $imagepath) { - print STDERR - "*** osload ($node): ". - "Frisbeelauncher could not fetch $imagepath ($imageid)!\n"; + tberror "Frisbeelauncher could not fetch $imagepath ($imageid)!"; return 0; } } else { - print STDERR - "*** osload ($node): ". - "$imagepath does not exist or cannot be read!\n"; + tberror "$imagepath does not exist or cannot be read!"; return 0; } } @@ -609,8 +607,7 @@ sub WaitTillReloadDone($$$@) # try again in a little bit. # if (!$query_result) { - print STDERR - "*** osload ($node): Query failed; waiting a bit.\n"; + tbwarn "$node: Query failed; waiting a bit."; next; } @@ -632,8 +629,8 @@ sub WaitTillReloadDone($$$@) $waittime = time - $startwait; if ($waittime > $maxwait) { my $t = (int ($waittime / 60)); - print STDERR "*** osload ($node): appears wedged; ". - "it has been $t minutes since it was rebooted.\n"; + tbnotice "$node appears wedged; ". + "it has been $t minutes since it was rebooted."; TBNodeConsoleTail($node, *STDERR); $count--; @@ -696,12 +693,12 @@ sub SetupReloadFrisbee($$$$) system("$osselect -1 $osid $node"); if ($?) { - print STDERR "*** osload ($node): os_select $osid failed!\n"; + tberror "os_select $osid failed!"; return -1; } system("$FRISBEELAUNCHER " . ($debug ? "-d ": "") . "$imageid"); if ($?) { - print STDERR "*** osload ($node): Frisbee Launcher ($imageid) failed!\n"; + tberror "Frisbee Launcher ($imageid) failed!"; return -1; } return 0; @@ -722,7 +719,7 @@ sub SetupReloadUISP($$$$) my $query_result = DBQueryFatal("select path from images " . "where imageid='$imageid'"); if ($query_result->num_rows() != 1) { - print STDERR "*** osload ($node): Failed to get path for $imageid!\n"; + tberror "Failed to get path for $imageid!"; return -1; } my ($path) = $query_result->fetchrow(); @@ -743,7 +740,7 @@ sub SetupReloadUISP($$$$) # my $rv = system("$TBUISP upload $path $node"); if ($rv) { - print STDERR "*** osload ($node): tbuisp failed\n"; + tberror "$node: tbuisp failed"; return -1; } @@ -754,7 +751,7 @@ sub SetupReloadUISP($$$$) system("$osselect $osid $node"); if ($?) { - print STDERR "*** osload ($node): os_select $osid failed!\n"; + tberror "os_select $osid failed!"; goto failednode; } @@ -795,7 +792,7 @@ sub osload_wait($) my ($childpid) = @_; if (!exists($children{$childpid})) { - print STDERR "*** osload: No such child pid $childpid!\n"; + tberror "No such child pid $childpid!"; # INTERNAL return -1; } my ($PARENT_READER, $result) = @{ $children{$childpid}}; @@ -811,7 +808,7 @@ sub osload_wait($) print STDERR "reload ($1): child returned $2 status.\n"; } else { - print STDERR "*** osload: Improper response from child: $_\n"; + tberror "Improper response from child: $_"; # INTERNAL } } diff --git a/tbsetup/libreboot.pm.in b/tbsetup/libreboot.pm.in index 2f0133c8d..d36b3c37d 100644 --- a/tbsetup/libreboot.pm.in +++ b/tbsetup/libreboot.pm.in @@ -23,6 +23,7 @@ use vars qw(@ISA @EXPORT); # Must come after package declaration! use lib '@prefix@/lib'; use libdb; +use libtblog; use English; # @@ -68,6 +69,16 @@ my %children = (); sub nodereboot($$) { my ($args, $result) = @_; + my @nodes = @{ $args->{'nodelist'}}; + + # + # Start a new logging sub-process + # + my $old_env = \%ENV; + local %ENV; + copy_hash %ENV, $old_env; + tblog_sub_process("reboot", @nodes); + $debug = $args->{'debug'} if (exists($args->{'debug'})); @@ -79,7 +90,7 @@ sub nodereboot($$) # caused by installing improperly. # if (exists($ENV{'REBOOTPIPENO'})) { - print STDERR "*** reboot: Must be root when using library!\n"; + tberror "Must be root when using library!"; return -1; } print STDERR "reboot: no privs; invoking real nodereboot script!\n" @@ -88,10 +99,9 @@ sub nodereboot($$) } if (!defined($args->{'nodelist'})) { - print STDERR "*** reboot: Must supply a node list!\n"; + tberror "Must supply a node list!"; return -1; } - my @nodes = @{ $args->{'nodelist'}}; my $powercycle = 0; my $rebootmode = 0; my $waitmode = 0; @@ -122,7 +132,7 @@ sub nodereboot($$) $pipemode = $1; } else { - print STDERR "*** reboot: Bad pipeno in environment: $pipemode!\n"; + tberror "Bad pipeno in environment: $pipemode!"; return -1; } } @@ -132,8 +142,7 @@ sub nodereboot($$) # if ($UID && !TBAdmin($UID) && ! TBNodeAccessCheck($UID, TB_NODEACCESS_REBOOT, @nodes)) { - print STDERR "*** reboot: ". - "You do not have permission to reboot some of the nodes!\n"; + tberror "You do not have permission to reboot some of the nodes!"; return -1; } @@ -178,7 +187,7 @@ sub nodereboot($$) my $pnode; if (! TBPhysNodeID($node, \$pnode)) { - print STDERR "*** reboot ($node): No physical node!\n"; + tberror "$node: No physical node!"; return -1; } $virtnodes{$node} = $pnode; @@ -235,7 +244,7 @@ sub nodereboot($$) # Create a pipe to read back results from the child we will create. # if (! pipe(PARENT_READER, CHILD_WRITER)) { - print STDERR "*** reboot: creating pipe: $!\n"; + tberror "creating pipe: $!"; return -1; } CHILD_WRITER->autoflush(1); @@ -262,7 +271,7 @@ sub nodereboot($$) # but rather just write the results to the pipe we got. # if (! open(CHILD_WRITER, ">>&=${pipemode}")) { - print STDERR "*** reboot: reopening pipe: $!\n"; + tberror "reopening pipe: $!"; return -1; } CHILD_WRITER->autoflush(1); @@ -309,8 +318,7 @@ sub nodereboot($$) # info("*** reboot: Powercyle mode: power cycle ".join(" ",@batch)); if (PowerCycle(@batch)) { - print STDERR - "*** reboot: Powercyle failed for " . join(" ",@batch) ."\n"; + tberror "Powercyle failed for " . join(" ",@batch); foreach my $node (@batch) { $result->{$node} = -1; $failed++; @@ -374,7 +382,7 @@ sub nodereboot($$) elsif ($mypid != 0 && $?) { $failed++; $result->{$node} = -1; - print STDERR "*** reboot ($node): Failed ($?)!\n"; + tberror "Failed ($?)!"; } else { print STDOUT "reboot ($node): Successful!\n"; @@ -387,9 +395,7 @@ sub nodereboot($$) # if (@needPowercycle) { if (PowerCycle(@needPowercycle)) { - print STDERR - "*** reboot: Powercyle failed for " . - join(" ",@needPowercycle) ."\n"; + tberror "Powercyle failed for " . join(" ",@needPowercycle); foreach my $node (@needPowercycle) { $result->{$node} = -1; $failed++; @@ -406,7 +412,7 @@ sub nodereboot($$) if (RebootVNode($node, $pnode)) { $failed++; $result->{$node} = -1; - print STDERR "*** reboot ($node): failed (on $pnode).\n"; + tberror "$node: Reboot failed (on $pnode)"; } else { print STDOUT "reboot ($node): rebooting (on $pnode).\n"; @@ -442,7 +448,7 @@ sub nodereboot($$) SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY); next; } - print STDOUT "*** reboot ($node): reported a TBFAILED event.\n"; + tberror "$node reported a TBFAILED event."; } SetNodeBootStatus($node, NODEBOOTSTATUS_FAILED); $result->{$node} = -1; @@ -476,6 +482,14 @@ sub RebootNode { my ($pc, $reconfig, $killmode, $rebootmode) = @_; my ($status, $syspid, $mypid, $didipod, $nodestate); + # + # Start a new logging sub-process + # + my $old_env = \%ENV; + local %ENV; + copy_hash %ENV, $old_env; + tblog_sub_process("reboot_node", $pc); + if ($reconfig) { print "reboot ($pc): Attempting to reconfigure ...\n"; } @@ -530,8 +544,7 @@ sub RebootNode { system("$bisend $optarg $reqarg $pc"); if ($?) { info("$pc: PXEWAKEUP failed ... power cycle"); - print STDERR - "*** reboot ($pc): PXEWAKEUP failed; will power cycle.\n"; + tbnotice "$pc: PXEWAKEUP failed; will power cycle.\n"; # Signal to the caller that the node needs to be power cycled return -2; @@ -555,7 +568,7 @@ sub RebootNode { # if (! DoesPing($pc)) { info("$pc appears dead: power cycle"); - print STDERR "*** reboot ($pc): appears dead; will power cycle.\n"; + tbnotice "$pc appears dead; will power cycle."; # Signal to the parent that the node needs to be power cycled exit(2); @@ -863,7 +876,7 @@ sub nodereboot_wait($) my ($childpid) = @_; if (!exists($children{$childpid})) { - print STDERR "*** reboot: No such child pid $childpid!\n"; + tberror "No such child pid $childpid!"; return -1; } my ($PARENT_READER, $result) = @{ $children{$childpid}}; @@ -879,7 +892,7 @@ sub nodereboot_wait($) print STDERR "reboot ($1): child returned $2 status.\n"; } else { - print STDERR "*** reboot: Improper response from child: $_\n"; + tberror "Improper response from child: $_"; } } @@ -930,7 +943,7 @@ sub nodereboot_exec($$) # Create a pipe to read back results from the child we will create. # if (! pipe(PARENT_READER, CHILD_WRITER)) { - print STDERR "*** reboot: creating pipe: $!\n"; + tberror "creating pipe: $!"; return -1; } CHILD_WRITER->autoflush(1); diff --git a/tbsetup/libtblog.pm.in b/tbsetup/libtblog.pm.in index e75f504a9..1791b3e15 100644 --- a/tbsetup/libtblog.pm.in +++ b/tbsetup/libtblog.pm.in @@ -34,17 +34,20 @@ use Exporter; @ISA = "Exporter"; @EXPORT = qw (tblog tberror tberr tbwarn tbwarning tbnotice tbinfo tbdebug - tbdie tblog_set_info tblog_find_error + tbdie tblog_set_info tblog_sub_process tblog_find_error tblog_capture tblog_stop_capture + tblog_new_process tblog_init_process tblog_exit + copy_hash TBLOG_EMERG TBLOG_ALERT TBLOG_CRIT TBLOG_ERR TBLOG_WARNING TBLOG_NOTICE TBLOG_INFO TBLOG_DEBUG); -@EXPORT_OK = qw (*SOUT *SERR); +@EXPORT_OK = qw (dblog *SOUT *SERR); # After package decl. use English; use POSIX qw(isatty setsid); use File::Basename; use IO::Handle; +use Text::Wrap; use strict; @@ -55,13 +58,13 @@ use lib "@prefix@/lib"; use libtestbed; use libdb; -my $SCRIPTNAME = basename($PROGRAM_NAME); -my $SCRIPTNUM = 0; -my $PARENT_INVOCATION = 0; +sub if_defined($$); my %PRIORITY_MAP_TO_NUM; my %PRIORITY_MAP_TO_STR; +my $REAL_SCRIPTNAME = basename($PROGRAM_NAME); + # # Duplicate STDOUT and STDERR to SOUT and SERR respectfully, since # tblog_capture() will redirect the real STDOUT and STDERR @@ -110,7 +113,11 @@ sub check_env () check_env_num 'TBLOG_SESSION'; check_env_num 'TBLOG_PIDX'; check_env_num 'TBLOG_INVOCATION'; + check_env_num 'TBLOG_PARENT_INVOCATION'; check_env_num 'TBLOG_UID'; + check_env_num 'TBLOG_SCRIPTNUM'; + check_env_def 'TBLOG_SCRIPTNAME'; + check_env_def 'TBLOG_BASE_SCRIPTNAME'; } sub if_defined ($$) { @@ -142,36 +149,22 @@ sub DBQuerySingleFatal ( $ ) # Forward Decals # -sub dblog ( $@ ); +sub dblog ( $$@ ); sub tblog ( $@ ); +sub tblog_new_process(@); +sub tblog_init_process(@); +sub informative_scriptname(); # -# tblog_init(): Called automatically when a script starts. +# tblog_init(): Called automatically when a script starts. # -# Will: (1) Get the unique ID for the script name, (2) get the -# priority mapping (string -> int) from the database, (3) Creating an -# "entring" log message in the database, (4) get the session id and -# set up the environmental variables if they are not already set, -# 5) Get the invocation id, and 6) increment the level +# Will: Get the priority mapping (string -> int) from the database and +# than call tblog_new_process # sub tblog_init() { - # Get script number - - my $query_result = DBQueryFatal - sprintf("select script from scripts where name=%s", - DBQuoteSpecial $SCRIPTNAME); - if ($query_result->num_rows > 0) { - $SCRIPTNUM = ($query_result->fetchrow_array())[0]; - } else { - DBQueryFatal - sprintf("insert into scripts (name) values (%s)", - DBQuoteSpecial $SCRIPTNAME); - $SCRIPTNUM = DBQuerySingleFatal 'select LAST_INSERT_ID()'; - } - # Get priority mapping - $query_result = DBQueryFatal "SELECT * FROM priorities"; + my $query_result = DBQueryFatal "SELECT * FROM priorities"; for (my $i = 0; $i < $query_result->num_rows; $i++) { my ($n,$v) = $query_result->fetchrow_array(); $PRIORITY_MAP_TO_STR{$n} = $v; @@ -179,39 +172,121 @@ sub tblog_init() { $PRIORITY_MAP_TO_NUM{lc $v} = $n; } + tblog_new_process(if_defined($main::FAKE_SCRIPTNAME, + $REAL_SCRIPTNAME), + @ARGV); +}; + + +# +# tblog_new_process($cmd, @argv): enter a new (possible fake) process +# calls tblog_init_process +# +# If used to start a new fake process it is advised to make a local +# copy of %ENV using perl "local". See tblog_sub_process for an +# explanation. +# +sub tblog_new_process(@) { + delete $ENV{TBLOG_BASE_SCRIPTNAME}; + tblog_init_process(@_); +} + +# +# tblog_init_process($cmd, @argv): init a new process +# +# Will: (1) Get the unique ID for the script name, (2) Creating an +# "entring" log message in the database, (3) get the session id and +# set up the environmental variables if they are not already set, +# (4) Get the invocation id, and (5) increment the level +# +# NOTE: Everything is currently stored in the %ENV hash. +# +sub tblog_init_process(@) { + my ($script, @argv) = @_; + + # Get script name + + $ENV{TBLOG_SCRIPTNAME} = $script; + $ENV{TBLOG_BASE_SCRIPTNAME} = $script unless defined $ENV{TBLOG_BASE_SCRIPTNAME}; + + # Get script number + + my $query_result = DBQueryFatal + sprintf("select script from scripts where script_name=%s", + DBQuoteSpecial $ENV{TBLOG_SCRIPTNAME}); + if ($query_result->num_rows > 0) { + $ENV{TBLOG_SCRIPTNUM} = ($query_result->fetchrow_array())[0]; + } else { + DBQueryFatal + sprintf("insert into scripts (script_name) values (%s)", + DBQuoteSpecial $ENV{TBLOG_SCRIPTNAME}); + $ENV{TBLOG_SCRIPTNUM} = DBQuerySingleFatal 'select LAST_INSERT_ID()'; + } + # ... if (defined $ENV{'TBLOG_SESSION'}) { check_env(); - $ENV{'TBLOG_LEVEL'}++; - $PARENT_INVOCATION = $ENV{'TBLOG_INVOCATION'}; + $ENV{TBLOG_LEVEL}++; + $ENV{TBLOG_PARENT_INVOCATION} = $ENV{TBLOG_INVOCATION}; dblog($NOTICE, {type => 'entering'}, - 'Entering "', join(' ', $SCRIPTNAME, @ARGV), '"') or die; + 'Entering "', join(' ', informative_scriptname(), @argv), '"') + or die; my $id = DBQuerySingleFatal 'select LAST_INSERT_ID()'; - $ENV{'TBLOG_INVOCATION'} = $id; + $ENV{TBLOG_INVOCATION} = $id; DBQueryFatal("update log set invocation=$id where seq=$id"); } else { $ENV{TBLOG_SESSION} = 0; $ENV{TBLOG_INVOCATION} = 0; + $ENV{TBLOG_PARENT_INVOCATION} = 0; $ENV{TBLOG_LEVEL} = 0; $ENV{TBLOG_PIDX} = 0; $ENV{TBLOG_UID} = 0; dblog($NOTICE, {type => 'entering'}, - 'Entering "', join(' ', $SCRIPTNAME, @ARGV), '"') or die; + 'Entering "', join(' ', informative_scriptname(), @ARGV), '"') + or die; my $id = DBQuerySingleFatal 'select LAST_INSERT_ID()'; # set SESSION in database $ENV{TBLOG_SESSION} = $id; $ENV{TBLOG_INVOCATION} = $id; DBQueryFatal("update log set session=$id,invocation=$id where seq=$id"); } + +} + +# +# Began a sub process. It is advised to make a local copy of %ENV using +# perl "local". This can be done with: +# local %ENV = %ENV +# however due to a bug in perl 5.005_03 if "-T" is used than the above will +# taint the path, instead use: +# my $old_env = \%ENV +# local %ENV; +# copy_hash %ENV, $old_env +# See perlsub(1) 1 for more info on "local" +# +sub tblog_sub_process($@) { + my $name = shift; + tblog_init_process("$ENV{TBLOG_BASE_SCRIPTNAME}-$name", + @_); +} + +# +# copy_hash(%dest, \%src) +# +sub copy_hash(\%$) { + my ($new, $old) = @_; + foreach (keys %$old) { + $new->{$_} = $old->{$_}; + } } # # tblog_set_info(pid, eid, uid): Sets info in the database which can't # be derived automatically with init. Needs to be called at least -# once during a session +# once during a session. # -sub tblog_set_info ( $$$ ) +sub tblog_set_info ( $$$ ) { check_env(); my ($pid, $eid, $uid) = @_; @@ -221,48 +296,61 @@ sub tblog_set_info ( $$$ ) DBQueryFatal sprintf('update log set pidx=%d,uid=%s where session=%s', $ENV{TBLOG_PIDX}, $ENV{TBLOG_UID}, $ENV{TBLOG_SESSION}); - print SERR "Session ID = $ENV{TBLOG_SESSION}\n"; } # -# tblog_exit(): Called automatically when a script exits +# tblog_exit(): Called automatically when a script exits, or explistly +# when a fake process exits. # sub tblog_exit() { return unless defined $ENV{'TBLOG_SESSION'}; check_env(); - dblog($INFO, {type=>'exiting'}, "Leaving \"$SCRIPTNAME ...\""); + dblog($INFO, {type=>'exiting'}, "Leaving \"", informative_scriptname(), + " ...\""); } # -# dblog(priority, [{parm=>value,...},] mesg, ...) +# informative_scriptname() +# +sub informative_scriptname() { + if ($ENV{TBLOG_BASE_SCRIPTNAME} eq $REAL_SCRIPTNAME) { + return $ENV{TBLOG_SCRIPTNAME}; + } else { + return "$ENV{TBLOG_SCRIPTNAME} (but really $REAL_SCRIPTNAME)"; + } +} + + +# +# dblog(priority, {parm=>value,...}, mesg, ...) # Internal function. Logs a message to the database. Doesn't print # anything. Will not die, instead return 0 on error, with the error # message in $@. -# Valid parms: cause, type, fatal +# Valid parms: sublevel, cause, type, fatal # my $in_dblog = 0; # Used to avoid an infinite recursion when # DBQueryFatal fails as a log entry is made to # record the failure, which will than likely cause # another failure and so on -sub dblog( $@ ) { - my ($priority, @mesg) = @_; - my $parms = {}; - $parms = shift @mesg if ref $mesg[0]; +sub dblog( $$@ ) { + my ($priority, $parms, @mesg) = @_; my $mesg = join('',@mesg); + #print SERR "===$priority $parms @mesg\n"; return if $ENV{TBLOG_OFF} || $in_dblog; $in_dblog = 1; eval { check_env(); my $query = - sprintf('insert into log (stamp,pidx,uid,session,parent,invocation,script,level,priority,inferred,cause,type,mesg) '. - 'VALUES (UNIX_TIMESTAMP(now()),%d,%d,%d,%d,%d,%d,%d,%d,%d,%s,%s,%s)', + sprintf('insert into log (stamp,pidx,uid,session,parent,invocation,script,level,sublevel,priority,inferred,cause,type,mesg) '. + 'VALUES (UNIX_TIMESTAMP(now()),%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%s,%s,%s)', $ENV{TBLOG_PIDX}, $ENV{TBLOG_UID}, $ENV{TBLOG_SESSION}, - $PARENT_INVOCATION, + $ENV{TBLOG_PARENT_INVOCATION}, $ENV{TBLOG_INVOCATION}, - $SCRIPTNUM, + $ENV{TBLOG_SCRIPTNUM}, $ENV{TBLOG_LEVEL}, + if_defined($parms->{sublevel}, 0), $priority, if_defined($parms->{inferred}, 0), DBQuoteSpecial if_defined($parms->{cause}, ''), @@ -276,6 +364,9 @@ sub dblog( $@ ) { DBFatal("DB Query failed") unless $result; }; $in_dblog = 0; + # Print a warning on failure but don't log the results to stdout + # as that is likely to fail also + print SERR format_message($WARNING, $@) if $@; return 0 if $@; return 1; } @@ -290,10 +381,13 @@ sub dblog( $@ ) { # message than they will concatenated. If the env. var. TBLOG_OFF # is set to a true value than nothing will be written to the # database, but the message will still be written to STDOUT. -# Useful parms: cause +# Useful parms: sublevel, cause, type # sub tblog( $@ ) { - my ($priority, @rest) = @_; + my ($priority) = shift; + my $parms = {}; + $parms = shift if ref $_[0] eq 'HASH'; + my $mesg = join('',@_); if (exists $PRIORITY_MAP_TO_STR{$priority}) { # $priority already a valid priority number @@ -304,20 +398,14 @@ sub tblog( $@ ) { die "Unknown Priority \"$priority\"" unless defined $priority; } - my $mesg = join('', @rest); - my $res = dblog($priority, @rest) unless $mesg =~ /^\s+$/; - - if ($priority <= $ERR ) { - print SERR "*** $SCRIPTNAME:\n"." $mesg\n"; - } elsif ($priority == $WARNING) { - print SERR "*** WARNING: $mesg\n"; - } else { - print SERR "$mesg\n"; - } + my $res = dblog($priority, $parms, $mesg) unless $mesg =~ /^\s+$/; + + print SERR format_message($priority, $mesg); return $res; } + # Useful alias functions sub tberror( @ ) {&tblog($ERR, @_)} @@ -329,12 +417,71 @@ sub tbinfo( @ ) {&tblog($INFO, @_)} sub tbdebug( @ ) {&tblog($DEBUG, @_)} # - +# Log the message to the database as an error and than die. An +# optional set of paramaters may be specified as the first paramater. +# Not exactly like die as the message bust be specified. +# sub tbdie( @ ) { - dblog($ERR, @_); + my $parms = {}; + $parms = shift if ref $_[0] eq 'HASH'; + my $mesg = join('',@_); + + dblog($ERR, $parms, $mesg); tblog_stop_capture(); - die ("*** $SCRIPTNAME:\n". - " $_[0]\n"); + die format_message($ERR, $mesg); +} + +# +# Format the message based on $priority +# +sub format_message ( $$ ) { + my ($priority, $mesg) = @_; + + $mesg =~ s/\s+$//; + + my $header; + + if ($mesg =~ /\s*\*\*\*/) { + # do nothing + } elsif ($priority <= $ERR ) { + $header = "$ENV{TBLOG_SCRIPTNAME}"; + } elsif ($priority == $WARNING) { + $header = "$ENV{TBLOG_SCRIPTNAME}: WARNING"; + } elsif ($priority == $NOTICE) { + $header = "$ENV{TBLOG_SCRIPTNAME}"; + } + + my $text; + + my @mesg = split /\n/, $mesg; + if (@mesg == 1) { + $mesg[0] =~ s/^\s+//; + $mesg = $mesg[0]; + } + if ($header) { + my $line = "*** $header: $mesg[0]"; + if (@mesg > 1 || length($line) > $Text::Wrap::columns) { + $line = "*** $header:\n"; + if (@mesg == 1) { # NOTE: $mesg[0] eq $mesg + $mesg =~ s/^\s+//; + $line .= wrap(' ',' ', $mesg, "\n"); + } else { + foreach (@mesg) { + s/\s+$//; + $line .= " $_\n"; + } + } + return $line; + } else { + return "$line\n"; + } + } else { + if (@mesg == 1) { + return wrap ('', ' ', $mesg, "\n"); + } else { + return "$mesg\n"; + } + } } # @@ -351,10 +498,6 @@ sub tbdie( @ ) { # STDERR. Defaults to true unless the "session" parameter is also # given, in which case default to false. # -# Format of the result: -# :