Commit 9f3205c9 authored by Leigh Stoller's avatar Leigh Stoller

More work on the aggregate monitoring.

1. Split the resource stuff (where we ask for an advertisement and
   process it) into a separate script, since that takes a long time to
   cycle through cause of the size of the ads from the big clusters.

2. On the monitor, distinguish offline (nologins) from actually being
   down.

3. Add a table to store changes in status so we can see over time how
   much time the aggregates are usable.
parent 9320782d
......@@ -207,6 +207,21 @@ sub STATUS($$;$)
return $self->{'STATUS'}->{$name};
}
#
# Insert a status (change) event.
#
sub StatusEvent($$)
{
my ($self, $event) = @_;
my $urn = $self->urn();
DBQueryWarn("insert into apt_aggregate_events set ".
" urn='$urn', event='$event', stamp=now()")
or return -1;
return 0;
}
#
# Lookup all aggregates for a portal.
#
......
......@@ -36,7 +36,7 @@ BIN_SCRIPTS = manage_profile manage_instance manage_dataset \
manage_images rtecheck checkprofile manage_extensions \
create_slivers searchip
SBIN_SCRIPTS = apt_daemon aptevent_daemon portal_xmlrpc apt_checkup \
portal_monitor apt_scheduler
portal_monitor apt_scheduler portal_resources
LIB_SCRIPTS = APT_Profile.pm APT_Instance.pm APT_Dataset.pm APT_Geni.pm \
APT_Aggregate.pm APT_Utility.pm APT_Rspec.pm
WEB_BIN_SCRIPTS = webmanage_profile webmanage_instance webmanage_dataset \
......
This diff is collapsed.
This diff is collapsed.
......@@ -161,6 +161,11 @@ case "$1" in
echo -n " portal_monitor"
@prefix@/sbin/portal_monitor
fi
if [ -x @prefix@/sbin/portal_resources ]; then
echo -n " portal_resources"
@prefix@/sbin/portal_resources
fi
#
# Could trigger experiment creation, so make sure everything
# else is setup first; i.e., run this last!
......@@ -299,6 +304,9 @@ case "$1" in
if [ -r /var/run/portal_monitor.pid ]; then
kill `cat /var/run/portal_monitor.pid`
fi
if [ -r /var/run/portal_resources.pid ]; then
kill `cat /var/run/portal_resources.pid`
fi
if [ -r /var/run/attend.pid ]; then
kill `cat /var/run/attend.pid`
fi
......
......@@ -90,6 +90,18 @@ CREATE TABLE `apt_aggregate_nodetypes` (
PRIMARY KEY (`urn`,`type`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
--
-- Table structure for table `apt_aggregates_status_events`
--
DROP TABLE IF EXISTS `apt_aggregate_events`;
CREATE TABLE `apt_aggregate_events` (
`urn` varchar(128) NOT NULL default '',
`event` enum('up','down','offline','unknown') NOT NULL default 'unknown',
`stamp` datetime NOT NULL default '0000-00-00 00:00:00',
PRIMARY KEY (`urn`,`stamp`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
--
-- Table structure for table `apt_aggregate_status`
--
......@@ -97,7 +109,7 @@ CREATE TABLE `apt_aggregate_nodetypes` (
DROP TABLE IF EXISTS `apt_aggregate_status`;
CREATE TABLE `apt_aggregate_status` (
`urn` varchar(128) NOT NULL default '',
`status` enum('up','down','unknown') NOT NULL default 'unknown',
`status` enum('up','down','offline','unknown') NOT NULL default 'unknown',
`last_success` datetime default NULL,
`last_attempt` datetime default NULL,
`pcount` int(11) default '0',
......
use strict;
use libdb;
sub DoUpdate($$$)
{
my ($dbhandle, $dbname, $version) = @_;
DBQueryFatal("ALTER TABLE apt_aggregate_status change `status` " .
" `status` enum('up','down','offline','unknown') ".
" NOT NULL default 'unknown'");
if (!DBTableExists("apt_aggregate_events")) {
DBQueryFatal("CREATE TABLE `apt_aggregate_events` ( ".
" `urn` varchar(128) NOT NULL default '', ".
" `event` enum('up','down','offline','unknown') ".
" NOT NULL default 'unknown', ".
" `stamp` datetime NOT NULL ".
" default '0000-00-00 00:00:00', ".
" PRIMARY KEY (`urn`,`stamp`) ".
") ENGINE=MyISAM DEFAULT CHARSET=latin1");
}
return 0;
}
1;
# Local Variables:
# mode:perl
# End:
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment