All new accounts created on Gitlab now require administrator approval. If you invite any collaborators, please let Flux staff know so they can approve the accounts.

Commit 9f3205c9 authored by Leigh B Stoller's avatar Leigh B Stoller

More work on the aggregate monitoring.

1. Split the resource stuff (where we ask for an advertisement and
   process it) into a separate script, since that takes a long time to
   cycle through cause of the size of the ads from the big clusters.

2. On the monitor, distinguish offline (nologins) from actually being
   down.

3. Add a table to store changes in status so we can see over time how
   much time the aggregates are usable.
parent 9320782d
......@@ -207,6 +207,21 @@ sub STATUS($$;$)
return $self->{'STATUS'}->{$name};
}
#
# Insert a status (change) event.
#
sub StatusEvent($$)
{
my ($self, $event) = @_;
my $urn = $self->urn();
DBQueryWarn("insert into apt_aggregate_events set ".
" urn='$urn', event='$event', stamp=now()")
or return -1;
return 0;
}
#
# Lookup all aggregates for a portal.
#
......
......@@ -36,7 +36,7 @@ BIN_SCRIPTS = manage_profile manage_instance manage_dataset \
manage_images rtecheck checkprofile manage_extensions \
create_slivers searchip
SBIN_SCRIPTS = apt_daemon aptevent_daemon portal_xmlrpc apt_checkup \
portal_monitor apt_scheduler
portal_monitor apt_scheduler portal_resources
LIB_SCRIPTS = APT_Profile.pm APT_Instance.pm APT_Dataset.pm APT_Geni.pm \
APT_Aggregate.pm APT_Utility.pm APT_Rspec.pm
WEB_BIN_SCRIPTS = webmanage_profile webmanage_instance webmanage_dataset \
......
This diff is collapsed.
This diff is collapsed.
......@@ -161,6 +161,11 @@ case "$1" in
echo -n " portal_monitor"
@prefix@/sbin/portal_monitor
fi
if [ -x @prefix@/sbin/portal_resources ]; then
echo -n " portal_resources"
@prefix@/sbin/portal_resources
fi
#
# Could trigger experiment creation, so make sure everything
# else is setup first; i.e., run this last!
......@@ -299,6 +304,9 @@ case "$1" in
if [ -r /var/run/portal_monitor.pid ]; then
kill `cat /var/run/portal_monitor.pid`
fi
if [ -r /var/run/portal_resources.pid ]; then
kill `cat /var/run/portal_resources.pid`
fi
if [ -r /var/run/attend.pid ]; then
kill `cat /var/run/attend.pid`
fi
......
......@@ -90,6 +90,18 @@ CREATE TABLE `apt_aggregate_nodetypes` (
PRIMARY KEY (`urn`,`type`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
--
-- Table structure for table `apt_aggregates_status_events`
--
DROP TABLE IF EXISTS `apt_aggregate_events`;
CREATE TABLE `apt_aggregate_events` (
`urn` varchar(128) NOT NULL default '',
`event` enum('up','down','offline','unknown') NOT NULL default 'unknown',
`stamp` datetime NOT NULL default '0000-00-00 00:00:00',
PRIMARY KEY (`urn`,`stamp`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
--
-- Table structure for table `apt_aggregate_status`
--
......@@ -97,7 +109,7 @@ CREATE TABLE `apt_aggregate_nodetypes` (
DROP TABLE IF EXISTS `apt_aggregate_status`;
CREATE TABLE `apt_aggregate_status` (
`urn` varchar(128) NOT NULL default '',
`status` enum('up','down','unknown') NOT NULL default 'unknown',
`status` enum('up','down','offline','unknown') NOT NULL default 'unknown',
`last_success` datetime default NULL,
`last_attempt` datetime default NULL,
`pcount` int(11) default '0',
......
use strict;
use libdb;
sub DoUpdate($$$)
{
my ($dbhandle, $dbname, $version) = @_;
DBQueryFatal("ALTER TABLE apt_aggregate_status change `status` " .
" `status` enum('up','down','offline','unknown') ".
" NOT NULL default 'unknown'");
if (!DBTableExists("apt_aggregate_events")) {
DBQueryFatal("CREATE TABLE `apt_aggregate_events` ( ".
" `urn` varchar(128) NOT NULL default '', ".
" `event` enum('up','down','offline','unknown') ".
" NOT NULL default 'unknown', ".
" `stamp` datetime NOT NULL ".
" default '0000-00-00 00:00:00', ".
" PRIMARY KEY (`urn`,`stamp`) ".
") ENGINE=MyISAM DEFAULT CHARSET=latin1");
}
return 0;
}
1;
# Local Variables:
# mode:perl
# End:
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment