Commit e1f4263c authored by Mac Newbold's avatar Mac Newbold

Added sched_reserve. Works kind of like sched_reload, but for a reservation.

Rob and I noticed that when we need to do maintainance to a node, its hard
to grab it when it gets freed before it gets taken again. So this will allow
admins to set up nodes so that when the current owner frees them, they
immediately are moved into another experiment. For instance, if I need to do
an upgrade on all the machines in the testbed, I can do:

sched_reserve testbed down pc1 pc2 pc3 [...] pc40

and it will immediately reserve any free nodes, skip any that are already
in testbed/down, and add an entry to the next_reserve table for any nodes
that are already reserved. Then nfree checks for entries in the next_reserve
table before it lets any nodes go (much like it does for the reloads table),
and moves them into the expt before it can get reserved again. So we can
guarantee that the next time a node is freed, it will be saved for us.

Note that this can also be useful to accumulate nodes for a large experiment,
since you can make sure you get them as soon as the current owner is done.
parent 87dbf64a
......@@ -1031,7 +1031,7 @@ outfiles="$outfiles Makeconf GNUmakefile \
tbsetup/os_load tbsetup/os_setup tbsetup/mkprojdir tbsetup/power \
tbsetup/node_reboot tbsetup/webnscheck tbsetup/nscheck \
tbsetup/resetvlans tbsetup/rmacct-ctrl tbsetup/rmproj \
tbsetup/sched_reload tbsetup/reload_daemon \
tbsetup/sched_reload tbsetup/sched_reserve tbsetup/reload_daemon \
tbsetup/batchexp tbsetup/killbatchexp tbsetup/batch_daemon \
tbsetup/webbatchexp tbsetup/webkillbatchexp \
tbsetup/startexp tbsetup/endexp tbsetup/webstartexp tbsetup/webendexp \
......
......@@ -153,7 +153,7 @@ outfiles="$outfiles Makeconf GNUmakefile \
tbsetup/os_load tbsetup/os_setup tbsetup/mkprojdir tbsetup/power \
tbsetup/node_reboot tbsetup/webnscheck tbsetup/nscheck \
tbsetup/resetvlans tbsetup/rmacct-ctrl tbsetup/rmproj \
tbsetup/sched_reload tbsetup/reload_daemon \
tbsetup/sched_reload tbsetup/sched_reserve tbsetup/reload_daemon \
tbsetup/batchexp tbsetup/killbatchexp tbsetup/batch_daemon \
tbsetup/webbatchexp tbsetup/webkillbatchexp \
tbsetup/startexp tbsetup/endexp tbsetup/webstartexp tbsetup/webendexp \
......
......@@ -30,6 +30,8 @@ my $sth = "";
my $self = (getpwuid($UID))[0]
|| die "Cannot figure out who you are!\n";
if ($d) { print "You are '$self', expt '$eid', proj '$pid'\n"; }
$cmd = "select uid from proj_memb as pm left join experiments as e on ".
"e.pid=pm.pid where e.eid='$eid' and uid='$self' and e.pid='$pid'";
$sth = $dbh->query($cmd);
......
......@@ -53,7 +53,8 @@ if ($sth->numrows < 1) {
}
$cmd = "lock tables nodes write, node_types read, reloads read, ".
"interfaces write, delays write, reserved write, last_reservation write";
"interfaces write, delays write, reserved write, last_reservation write,".
"next_reserve read";
$sth = $dbh->query($cmd)
|| die("Locking error:\n$cmd\nError string is:".$dbh->errstr."\n");
......@@ -68,6 +69,7 @@ if ($#node_names == -1) {
}
my %reloads = ();
my %reserves= ();
foreach my $n (@node_names) {
$sth = $dbh->query("select * from reserved where node_id='$n' ".
"and eid='$eid' and pid='$pid'");
......@@ -88,10 +90,10 @@ foreach my $n (@node_names) {
$sth = $dbh->query($cmd)
|| (print "Failed Command:\n$cmd\nError string is:".$dbh->errstr."\n"
&& $error++);
if ( ($sth->num_rows()) > 0) {
my @reload;
print "Adding reloads for $n to the list.\n";
print "Adding reload for $n to the list.\n";
my $i = 0;
my $max = $sth->num_rows();
while ( $i < $max ) {
......@@ -109,16 +111,34 @@ foreach my $n (@node_names) {
}
}
} else {
# No reloads to be done, so really free the node
# This little sillyness is for disk reloading. Remember the last
# project a node was reserved into.
$cmd = "insert into last_reservation values ('$n', '$pid')";
$dbh->query($cmd);
print "Releasing node '$n'...";
$cmd = "delete from reserved where node_id='$n' and eid='$eid'";
$sth = $dbh->query($cmd) && print "Succeeded.\n"
# If the node has a next_reserve entry, change the reservation and start it
$cmd = "select node_id,pid,eid from next_reserve where node_id='$n'";
$sth = $dbh->query($cmd)
|| (print "Failed Command:\n$cmd\nError string is:".$dbh->errstr."\n"
&& $error++);
if ( ($sth->num_rows()) > 0) {
my @reserve;
print "Adding next reservation for $n to the list.\n";
my $i = 0;
my $max = $sth->num_rows();
while ( $i < $max ) {
$i++;
@reserve = $sth->fetchrow_array();
my ($node, $next_pid, $next_eid) = @reserve;
$reserves{$node} = "$next_pid:$next_eid";
}
} else {
# No reloads or reservation changes, so really free the node
# This little sillyness is for disk reloading. Remember the last
# project a node was reserved into.
$cmd = "insert into last_reservation values ('$n', '$pid')";
$dbh->query($cmd);
print "Releasing node '$n'...";
$cmd = "delete from reserved where node_id='$n' and eid='$eid'";
$sth = $dbh->query($cmd) && print "Succeeded.\n"
|| (print "Failed Command:\n$cmd\nError string is:".$dbh->errstr."\n"
&& $error++);
}
}
if (! ($n =~ /sh\d+/)) { #If its not a shark
......@@ -177,10 +197,10 @@ foreach $reload ( keys %reloads ) {
print STDERR "Reload for @list complete.\n";
}
if ( @reloaded > 0 ) {
if ( (@reloaded > 0) || (keys %reserves > 0) ) {
print "Locking tables.\n";
$cmd = "lock tables nodes read, node_types read, reloads read, ".
"interfaces write, reserved write";
"interfaces write, reserved write, next_reserve write";
$sth = $dbh->query($cmd)
|| die("Locking error:\n$cmd\nError string is:".$dbh->errstr."\n");
......@@ -194,6 +214,21 @@ if ( @reloaded > 0 ) {
&& $error++ && next);
}
foreach $n ( keys %reserves ) {
my ($next_pid,$next_eid) = split (":",$reserves{$n});
# Change reservation (don't delete or we'll get races)
print "Changing reservation for $n to $next_pid/$next_eid...\n";
$cmd = "update reserved set pid='$next_pid',eid='$next_eid' where ".
"node_id='$n'";
$sth = $dbh->query($cmd)
|| (print "Failed Command:\n$cmd\nError string is:".$dbh->errstr."\n"
&& $error++ && next);
$cmd = "delete from next_reserve where node_id='$n'";
$sth = $dbh->query($cmd)
|| (print "Failed Command:\n$cmd\nError string is:".$dbh->errstr."\n"
&& $error++ && next);
}
print "Unlocking tables.\n";
$cmd = "unlock tables";
$sth = $dbh->query($cmd)
......
......@@ -18,7 +18,7 @@ BIN_STUFF = power snmpit tbend tbswapin tbswapout tbprerun tbreport \
USERBINS = os_load node_reboot nscheck
SBIN_STUFF = resetvlans console_setup.proxy sched_reload named_setup \
batch_daemon exports_setup reload_daemon
batch_daemon exports_setup reload_daemon sched_reserve
LIBEXEC_STUFF = mkprojdir rmproj mkacct-ctrl rmacct-ctrl \
os_setup mkexpdir console_setup webnscheck \
......
#!/usr/bin/perl -wT
use English;
# Schedule the reservation of a node. If the node is not currently in use,
# nalloc will be called to reserve the node immediately. If the node is
# currently reserved, an entry will be added to the next_reserve table, and
# when the node is freed it will be reserved to the given experiment.
#
# usage: sched_reserve <pid> <eid> <node> [<node> ...]
sub usage() {
die("Usage: sched_reserve <pid> <eid> <node> [<node> ...]\n".
"Reserves nodes to eid when the are free.\n");
}
# Configure variables
my $TB = "@prefix@";
# Load the Testbed support stuff.
push(@INC, "$TB/lib");
require libdb;
my $nalloc = "$TB/bin/nalloc";
my $debug = 0;
my @nodes = ();
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
$| = 1; #Turn off line buffering on output
if (@ARGV < 2) {
usage();
}
# Untaint args.
my $pid = shift;
if ($pid =~ /^([-\@\w.\+]+)$/) {
$pid = $1;
} else {
die("Bad data in pid '$pid'.");
}
my $eid = shift;
if ($eid =~ /^([-\@\w.\+]+)$/) {
$eid = $1;
} else {
die("Bad data in eid '$eid'.");
}
foreach my $node ( @ARGV ) {
if ($node =~ /^([-\@\w]+)$/) {
$node = $1;
} else {
die("Bad node name '$node'.");
}
push(@nodes, $node);
}
# Root and admin types can do whatever they want.
# Mere users cannot schedule reservations (yet?)
if ($UID && !TBAdmin($UID)) {
die("Only root or TB administrators can schedule reservations.\n");
}
foreach my $node (@nodes) {
my $pc = $node;
my $allocated = 0;
$sth = DBQueryFatal("select * from nodes where node_id='$pc'");
if ($sth->num_rows() != 1) {
print STDERR "Node $pc doesn't exist. Skipping $pc.\n";
next;
}
print "Checking if $pc is reserved...";
$sth = DBQueryFatal("select * from reserved where node_id='$pc'");
if ( ($sth->num_rows()) < 1) {
print "Available - Reserving...\n";
print STDERR "Using proj $pid, expt $eid, I am ",`whoami`;
my $cmd = "$nalloc $pid $eid $pc";
if ( system($cmd) != 0 ) {
print STDERR "WARNING: Could not reserve $pc!\n";
}
} else {
$sth = DBQueryFatal("select * from reserved where node_id='$pc' and ".
"pid='$pid' and eid='$eid'");
if ( ($sth->num_rows()) < 1) {
print "Reserved - Scheduling next reservation...\n";
$sth = DBQueryFatal("replace into next_reserve (node_id,pid,eid) ".
"values ('$pc','$pid','$eid')");
} else {
print "Reserved - Already reserved to $pid/$eid\n";
}
}
}
# If I haven't died yet, then this was successful.
exit 0;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment