Commit d72fb5af authored by Tom Mitchell's avatar Tom Mitchell

Merge branch 'master' into non-pg-users

parents 774c4e1f dfb4e671
......@@ -442,17 +442,17 @@ void print_help() {
<< endl;
cout << " -u - Print a summary of the solution." << endl;
cout << " -c <float> - Use the 'connected' pnode finding algorithm ";
cout << "<float>*100%" << endl;
cout << " of the time." << endl;
cout << "<float>*100%" << endl;
cout << " of the time." << endl;
cout << " -n - Don't anneal - just do the prechecks." << endl;
cout << " -x <file> - Specify a text ptop file" << endl;
cout << " -y <file> - Specify a text top file" << endl;
#ifdef WITH_XML
cout << " -W <file> - Specify the output rspec file" << endl;
cout << " -f <T>[/<T>] - Specify the ptop/vtop file formats " << endl;
cout << " T should be one of (text|xml|rspec-<rspec ver. number>)" << endl;;
cout << " Specifying only one T is equivalent to -f T/T"<<endl;
cout << " -f <T>[/<T>] - Specify the ptop/vtop file formats " << endl;
cout << " T should be one of (text|xml|rspec)" << endl;
cout << " Specifying only one T is equivalent to -f T/T"<<endl;
#endif
cout << " -F - Apply additional checking to fixed nodes" << endl;
cout << " -D - Dump configuration options" << endl;
......@@ -946,6 +946,14 @@ int main(int argc,char **argv) {
break;
#ifdef WITH_XML
case 'W':
if (strcmp(optarg, "") == 0) {
print_help();
}
vtopOutputFilename = optarg;
break;
case 'f':
if (strcmp(optarg, "") == 0) {
print_help();
......@@ -1253,4 +1261,3 @@ int main(int argc,char **argv) {
exit(EXIT_SUCCESS);
}
}
#!/usr/bin/perl -wT
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2007 University of Utah and the Flux Group.
# Copyright (c) 2000-2010 University of Utah and the Flux Group.
# All rights reserved.
#
use English;
......@@ -409,6 +409,11 @@ sub CheckPassword($$$$$)
my ($prefix, $uid, $password, $name, $email) = @_;
my $checkpass_args = escapeshellarg($password)
. " " . $uid . " " . escapeshellarg($name . ":" . $email);
# Make taint check happy.
if ($checkpass_args =~ /^(.*)$/) {
$checkpass_args = $1;
}
my $pwokay = `$checkpass $checkpass_args`;
if ($?) {
chomp($pwokay);
......
......@@ -781,12 +781,14 @@ sub StartSlivers($$$$)
my $errors = 0;
my $count = 0;
my @tmp = ();
my @failed = ();
foreach my $result (@results) {
my $resource = $resources[$count];
if ($result != 0) {
print STDERR "*** Error starting slivers for $resource\n";
$errors++;
push(@failed, $resource);
}
else {
#
......@@ -802,6 +804,36 @@ sub StartSlivers($$$$)
}
$count++;
}
#
# Set the nodes to TBFAILED to avoid waiting in os_setup.
#
if (@failed) {
foreach my $resource (@failed) {
my $manager_urn = $resource->manager_urn();
my $ticketstr = $resource->Ticket();
my $ticket = GeniXML::Parse($ticketstr);
return -1
if (!defined($ticket));
foreach my $ref (GeniXML::FindNodes("n:node",
$ticket)->get_nodelist()) {
my $vname = GeniXML::GetVirtualId($ref);
my $this_manager_urn = GeniXML::GetManagerId($ref);
next
if (!defined($this_manager_urn) ||
$manager_urn ne $this_manager_urn);
my $node = $experiment->VnameToNode($vname);
next
if (!defined($node));
if ($node->eventstate() ne TBDB_NODESTATE_TBFAILED()) {
$node->SetEventState(TBDB_NODESTATE_TBFAILED());
}
}
}
}
# Everything failed, stop now.
return -1
if (!@tmp);
......@@ -878,7 +910,9 @@ sub WaitForSlivers($$$@)
my $coderef = sub {
my ($resource) = @_;
my $ref;
my $notready = 0;
my $failed = 0;
my $ready = 0;
my $count = 0;
print STDERR "Getting ($$) sliver status for $resource\n";
......@@ -919,19 +953,28 @@ sub WaitForSlivers($$$@)
}
# State was changed in a another process.
$node->Refresh();
$count++;
if ($status eq "ready") {
# print statement would be repeated.
# Normal node waiting at this point, for ISUP to arrive.
$ready++;
}
elsif ($status eq "failed") {
# print statement would be repeated.
}
else {
$notready++;
# We want to do something here, to avoid waiting
# for something that failed, but might not report in any
# status. os_setup might wait a really long time for the
# timeout, and that is silly.
#
if ($node->eventstate() ne TBDB_NODESTATE_TBFAILED()) {
$node->SetEventState(TBDB_NODESTATE_TBFAILED());
}
$failed++;
}
}
# Tell the parent to stop if ready or all failed.
if ($ref->{'status'} eq "ready" || $notready == 0) {
# Tell the parent to stop if all nodes are ready.
if ($ref->{'status'} eq "ready" || ($failed + $ready) == $count) {
return 0;
}
# Tell the parent not ready.
......@@ -977,10 +1020,10 @@ sub WaitForSlivers($$$@)
if (@resources);
}
#
# If we get here, mark any nodes left over or failed, with TBFAILED.
# If we get here, mark nodes in failed resources, with TBFAILED.
# This will stop the waiting up in os_setup.
#
foreach my $resource (@resources, @failed) {
foreach my $resource (@failed) {
my $manifest = $resource->Manifest();
next
if (!defined($manifest));
......
......@@ -4,6 +4,13 @@
# All rights reserved.
#
#
# NB: Any changes to this document should be reflected in the Wiki page:
# http://users.emulab.net/trac/emulab/wiki/EventAPI
#
The Emulab event system API:
* event_register: Register with the testbed event system
#include <event.h>
......@@ -68,6 +75,7 @@
successful, 0 otherwise. Should only be called by single-threaded
programs.
* event_stop_main: Force event_main to return
#include <event.h>
......@@ -78,6 +86,7 @@
main loop to return, either to check for a completion condition or
to handle other, non event related processing.
* event_notify: Send an event notification
#include <event.h>
......@@ -133,14 +142,17 @@
#include <event.h>
event_notification_t event_notification_alloc(event_handle_t handle,
char *host,
event_type_t type);
Allocate an event notification. The HOST parameter specifies
the hostname of the node that should receive the notification,
or EVENT_HOST_ANY if the notification should go to all hosts.
The TYPE parameter specifies the event type. Returns
a pointer to an event notification structure if the operation
address_tuple_t tuple);
Allocate an event notification destined for a particular target.
The TUPLE parameter describes the target of the notification.
It contains a set of strings for a standard set of Emulab attributes
including the Emulab site and host, the project and experiment names,
the target object type and name, and the event type, group and timeline.
All are optional and will be given default "any" values if not specified
by the caller.
Returns a pointer to an event notification structure if the operation
is successful, 0 otherwise.
......@@ -155,6 +167,17 @@
operation is successful, 0 otherwise.
* event_notification_clone: create a (deep) copy of a notification
event_notification_t
event_notification_clone(event_handle_t handle,
event_notification_t notification);
Create an exact copy of NOTIFICATION.
Returns a pointer to an event notification structure if the operation
is successful, 0 otherwise.
* event_notification_get_*: Get an attribute from an event notification
#include <event.h>
......@@ -181,6 +204,12 @@
char *name,
char *buffer,
int length);
int event_notification_get_opaque_length(event_handle_t handle,
event_notification_t notification,
char *name)
int event_notification_get_string_length(event_handle_t handle,
event_notification_t notification,
char *name)
Get the attribute with name NAME from the event notification
NOTIFICATION.
......@@ -192,6 +221,49 @@
For _opaque and _string: Writes LENGTH bytes into *BUFFER and
returns non-zero if the named attribute is found, 0 otherwise.
The _length functions return the length of the named attribute,
and can be used to size buffers for subsequent get_* calls.
* event_notification_get_<tuple-arg>: extract notification tuple info
int event_notification_get_site(event_handle_t handle,
event_notification_t notification,
void *buffer,
int length);
int event_notification_get_host(event_handle_t handle,
event_notification_t notification,
void *buffer,
int length);
int event_notification_get_expt(event_handle_t handle,
event_notification_t notification,
void *buffer,
int length);
int event_notification_get_objtype(event_handle_t handle,
event_notification_t notification,
void *buffer,
int length);
int event_notification_get_objname(event_handle_t handle,
event_notification_t notification,
void *buffer,
int length);
int event_notification_get_eventtype(event_handle_t handle,
event_notification_t notification,
void *buffer,
int length);
int event_notification_get_group(event_handle_t handle,
event_notification_t notification,
void *buffer,
int length);
int event_notification_get_timeline(event_handle_t handle,
event_notification_t notification,
void *buffer,
int length);
Convenience functions for extracting subscription attributes from a
notification. Wrappers for event_notification_get_string using the
specific NAME value.
* event_notification_put_*: Add an attribute to an event notification
......@@ -220,13 +292,12 @@
Add an attribute with name NAME to the event notification
NOTIFICATION. For _double, _int32, _int64, and _string: The value
is specified in VALUE. For _opaque: The value is specified
in the buffer BUFFER which has length LENGTH. Returns non-zero if
the operation is successful, 0 otherwise.
is specified in VALUE (strings are null-terminated). For _opaque:
The value is specified in the buffer BUFFER which has length LENGTH.
Returns non-zero if the operation is successful, 0 otherwise.
* event_notification_attr_remove: Remove an attribute from an event
notification
* event_notification_remove: Remove an attribute from an event notification
#include <event.h>
......@@ -245,25 +316,38 @@
event_subscription_t event_subscribe(event_handle_t handle,
event_notify_callback_t callback,
event_type_t type,
address_tuple_t tuple,
void *data);
Subscribe to events of type TYPE. Event notifications that match
TYPE will be passed to the callback function CALLBACK; DATA is
an arbitrary pointer that will be passed to the callback function.
Callback functions are of the form
Subscribe to events matching the information in TUPLE, arranging for
CALLBACK to be called with DATA as an argument when an event occurs.
Returns a pointer to an event subscription structure if the operation
is successful, 0 otherwise.
The information in TUPLE is used to create a pubsub boolean expression
that is a conjunction of all the tuple elements. DATA is an arbitrary
pointer that will be passed to the callback function. CALLBACK functions
are of the form:
void callback(event_handle_t handle,
event_notification_t notification,
char *host,
event_type_t type,
void *data);
where HANDLE is the handle to the event server, NOTIFICATION is the
event notification, HOST and TYPE are the respective attributes of
the event notification, and DATA is the arbitrary pointer passed to
event_subscribe. Returns a pointer to an event
subscription structure if the operation is successful, 0 otherwise.
event notification, and DATA is the arbitrary pointer passed to
event_subscribe. The various event_notification_get_* functions can
be applied to the notification to extract information.
* event_unsubscribe: Clear a subscription for an event
#include <event.h>
int event_unsubscribe(event_handle_t handle, event_subscription_t es);
Unregister an interest in the specified subscription. ES is a subscription
object returned from a previous event_subscribe call.
* event_poll: Poll for new events
......@@ -274,6 +358,7 @@
Polls for new events. Calls callbacks for all pending events. Does
not block - simply processes events that are currently queued.
* event_poll_blocking: Poll for new events
#include <event.h>
......@@ -281,10 +366,10 @@
int event_poll_blocking(event_handle_t handle, unsigned int timeout);
Same as event_poll, but blocks waiting for an event. Times out
after the given amount of time (which is in millseconds), or doesn't time
after the given amount of time (which is in milliseconds), or doesn't time
out if 0 is given.
IMPORTANT: elvin uses timeouts internally. So, this function does
NOT guarantee that when it returns, either an event has been
recieved or your timeout has passed. This should not be much of
received or your timeout has passed. This should not be much of
a problem, but you have been warned!
......@@ -481,6 +481,7 @@ sub ConvertRspec($)
my $node = GeniXML::AddElement("node", $root);
foreach my $key ('virtual_id', 'component_urn', 'exclusive',
'component_manager_urn', 'component_manager_uuid',
'virtualization_type', 'virtualization_subtype',
'startup_command', 'tarfiles') {
GeniXML::SetText($key, $node, $ref->{$key})
......
......@@ -671,8 +671,7 @@ sub Create($$$$$$)
$sliver_uuid = $vnode->uuid();
$resource_id = $vnode->node_id();
$hostname = GeniUtil::FindHostname($vnode->node_id());
$sshdport = $vnode->sshdport()
if (!defined($vnode->jailip()));
$sshdport = $vnode->sshdport();
}
else {
$hrn = "${PGENIDOMAIN}." . $node->node_id();
......
......@@ -1346,7 +1346,7 @@ sub Volunteers($)
# Something went wrong with the physnode reboot, so the
# virtnodes are DOA.
#
$node->_setupstatus($libossetup::SETUP_OKAY);
$node->_setupstatus($libossetup::SETUP_FAILED);
$typehandler->WaitDone($node);
next;
}
......
......@@ -3881,7 +3881,8 @@ sub AllocNodes($)
my $vpnodename = $self->solution_v2v()->{$vnodename};
my $vpnode = $self->pnodes()->{$vpnodename};
if ($vpnode->isjailed() || $vpnode->isremotenode()) {
if ($vpnode->isjailed() ||
$vpnode->isremotenode() || $vpnode->_onsharednode()) {
my $pnodename = $self->solution_v2p()->{$vnodename};
my $pnode = $self->pnodes()->{$pnodename};
my $sshdport = nextipportnum($pnode);
......
#!/usr/bin/perl -w
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2009 University of Utah and the Flux Group.
# Copyright (c) 2000-2010 University of Utah and the Flux Group.
# All rights reserved.
#
use strict;
......@@ -284,6 +284,8 @@ sub AssignLoop()
TBDebugTimeStamp("mapper loop started");
while (1) {
my $gotlock = 0;
chat("Assign run $currentrun\n");
my $prefix = ($debug || $regression ? "$pid-$eid" : "$pid-$eid-$$");
......@@ -297,6 +299,28 @@ sub AssignLoop()
chat("Trying assign on an empty testbed.\n");
}
#
# Serialize with the pool daemon if using shared nodes.
# XXX When using shared nodes, only one can proceed at a
# time through assignment. This is okay for now since few
# experiments are using shared nodes. Eventually needs to be
# a barrier.
#
if ((!($impotent || $regression)) && $vtop->sharednodecount()) {
while (1) {
my $lock_result =
DBQueryWarn("select get_lock('pool_daemon', 10)");
fatal("DB Error tring to get pool_daemon lock")
if (!defined($lock_result));
($gotlock) = $lock_result->fetchrow_array();
last
if ($gotlock);
chat("Waiting for pool daemon lock ...\n");
}
}
#
# RunAssign returns 0 if successful.
# returns -1 if failure, but assign says to stop trying.
......@@ -305,6 +329,11 @@ sub AssignLoop()
#
my $retval = RunAssign($precheck, $prefix);
if ($gotlock) {
DBQueryWarn("select release_lock('pool_daemon')")
or fatal("Could not release the pool lock");
}
# Success!
last
if ($retval == 0);
......
......@@ -153,6 +153,13 @@ while (my ($node_id,$pid,$eid,$vname,$erole) = $db_result->fetchrow_array()) {
($erole eq "boss" || $erole eq "boss+router")) {
push @{$cnames{$node_id}}, "www.$eid.$pid";
}
#
# Special case for inner elab ops; add CNAME for event-server.
#
if (defined($erole) &&
($erole eq "ops" || $erole eq "ops+fs")) {
push @{$cnames{$node_id}}, "event-server.$eid.$pid";
}
}
......
......@@ -19,10 +19,11 @@ sub usage()
"Use the -d option to prevent daemonization\n";
exit(-1);
}
my $optlist = "dn";
my $optlist = "dne";
my $debug = 0;
my $impotent = 0;
my $startup = 0;
my $killme = 0;
my $nofree = 1;
#
# This should run as root.
......@@ -88,10 +89,13 @@ if (@ARGV != 0) {
usage();
}
if (defined($options{"d"})) {
$debug = $options{"d"};
$debug = 1;
}
if (defined($options{"n"})) {
$impotent = $options{"n"};
$impotent = 1;
}
if (defined($options{"e"})) {
$nofree = 1;
}
if (!$impotent && CheckDaemonRunning("pool_daemon")) {
......@@ -164,21 +168,26 @@ if (!defined($image)) {
exit(0);
}
while (1) {
#
# And handler for TERM since we really do not want this to be
# interrupted. Just set a flag that will cause it to exit at
# the next loop.
#
sub sigterm()
{
print "Got a TERM signal; arranging to exit soon\n";
$killme = 1;
}
$SIG{TERM} = \&sigterm;
while (!$killme) {
my $disabled;
# Use a long period; we do not want the pool to change too fast.
if (!$startup) {
$startup++;
}
else {
sleep(120);
}
print "Pool Daemon running at ".`date`;
if (! TBGetSiteVar("web/nologins", \$disabled) || $disabled) {
print " Skipping this loop cause of nologins\n";
next;
goto loop;
}
Node->FlushAll();
......@@ -196,6 +205,24 @@ while (1) {
my $minpoolsize = TBGetSiteVar("general/minpoolsize");
my $poolnodetype = TBGetSiteVar("general/poolnodetype");
#
# Serialize this part with the mapper.
#
if (!$impotent) {
while (1) {
my $lock_result =
DBQueryWarn("select get_lock('pool_daemon', 5)");
fatal("DB Error tring to get pool_daemon lock")
if (!defined($lock_result));
my ($gotlock) = $lock_result->fetchrow_array();
last
if ($gotlock);
print "Waiting for pool daemon lock ...\n";
}
}
#
# Look to see how each of the nodes is packed. This is
# advisory; we will not know for sure until tables locked
......@@ -220,7 +247,7 @@ while (1) {
next
if ($vnodecount < 0);
if ($vnodecount == 0) {
if ($vnodecount == 0 && !$nofree) {
print "$node no longer has virtual nodes on it.\n";
# Free the node unless we would go below the minpoolsize.
if (scalar(@nodelist) - scalar(keys(%tofree)) > $minpoolsize) {
......@@ -231,6 +258,7 @@ while (1) {
}
# Count up loaded vs. unloaded nodes.
my $factor = $maxsharecount / $vnodecount;
print "$node load factor is $factor\n";
if ($factor < 0.5) {
$unloaded++;
}
......@@ -248,14 +276,14 @@ while (1) {
my $key = (keys(%tofree))[0];
delete($tofree{$key});
}
elsif (scalar(@nodelist) < $maxpoolsize) {
elsif (scalar(@nodelist) < $maxpoolsize) {
$newcount++;
}
}
if (! (keys(%tofree) || $newcount)) {
exit(0)
if ($impotent);
next;
goto loop;
}
#
......@@ -268,7 +296,7 @@ while (1) {
if (!open(NS, ">$tmpfile")) {
notify("Could not create $tmpfile");
next;
goto loop;
}
print NS "# Auto generated by the pool daemon\n\n";
print NS "source tb_compat.tcl\n";
......@@ -311,7 +339,7 @@ while (1) {
chmod(0775, $tmpfile);
exit(0)
if ($impotent);
if ($impotent || $killme);
# Must do this each time before fork.
tblog_new_session();
......@@ -335,7 +363,7 @@ while (1) {
if ($error_data->{'cause'} eq "temp") {
print "Temporary resource shortage; try again later\n";
next;
goto loop;
}
fatal("swapmod failed");
}
......@@ -348,6 +376,12 @@ while (1) {
exec("$SWAPEXP -q -w -n -s modify $pid $eid $tmpfile");
die("Could not exec $SWAPEXP\n");
}
loop:
DBQueryWarn("select release_lock('pool_daemon')")
or fatal("Could not release the pool lock");
# Use a long period; we do not want the pool to change too fast.
sleep(120);
}
#
......
......@@ -199,6 +199,7 @@ if (!defined($experiment)) {
}
my $special = ($pid eq "testbed" || $pid eq "tbres" ||
$pid eq "emulab-ops" || $pid eq "utahstud");
my $newsetup = ($pid eq "testbed");
#
# Print starting message.
......@@ -1285,8 +1286,10 @@ sub doSwapin($) {
#
print "Resetting OS and rebooting.\n";
TBDebugTimeStamp("launching os_setup");
if (!($os_setup_pid = fork())) {
exec("os_setup $pid $eid") or return 1;
if (!($os_setup_pid = fork())) {
my $oscmd = ($newsetup ? "os_setup_new" : "os_setup");
exec("$oscmd $pid $eid") or return 1;
} elsif ($os_setup_pid == -1) {
tberror "Fork failed.";
return 1;
......
......@@ -1137,10 +1137,9 @@ function PAGEHEADER($title, $view = NULL, $extra_headers = NULL,
TBGetVersionInfo($major, $minor, $build);
$versioninfo = "Vers: $major.$minor Build: $build";
$hash = TBGetCommitHash();
if ($hash) {
$commithash = "$hash";
$commithash = TBGetCommitHash();
if (!$commithash) {
$commithash = "";
}
}
echo "<div id='versioninfo'>$versioninfo";
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment