All new accounts created on Gitlab now require administrator approval. If you invite any collaborators, please let Flux staff know so they can approve the accounts.

Commit fcc80cee authored by David Johnson's avatar David Johnson

Add an optional arg to TBSetNodeEventState so it can be nonfatal.

Use that in the libosload_virtnode package to retry on failures, instead
of aborting.  This is a cheesy retry strategy to prevent failures on
multi-thousand vnode topologies.

(Default behavior without the arg is still fatal.)
parent a1056149
......@@ -932,24 +932,32 @@ sub TBSetNodeLogEntry($$$$)
#
# Set event state for a node.
#
# usage: TBSetNodeEventState(char *node, char *state)
# usage: TBSetNodeEventState(char *node, char *state; int fatal)
# Returns 1 if okay.
# Returns 0 if failed.
#
sub TBSetNodeEventState($$)
sub TBSetNodeEventState($$;$)
{
my ($node, $state) = @_;
my ($node, $state, $fatal) = @_;
#
# If using the event system, we send out an event for the state daemon to
# pick up. Otherwise, we just set the state in the database ourselves
#
require event;
return event::EventSendFatal(objtype => TBDB_TBEVENT_NODESTATE,
objname => $node,
eventtype => $state,
host => $BOSSNODE);
if (!defined($fatal) || $fatal) {
return event::EventSendFatal(objtype => TBDB_TBEVENT_NODESTATE,
objname => $node,
eventtype => $state,
host => $BOSSNODE);
}
else {
return event::EventSendWarn(objtype => TBDB_TBEVENT_NODESTATE,
objname => $node,
eventtype => $state,
host => $BOSSNODE);
}
}
#
......
......@@ -2807,8 +2807,21 @@ sub SetupReload($$)
}
}
#
# Need to kick virtnodes so stated picks up the next_op_mode from os_select
TBSetNodeEventState($node_id,TBDB_NODESTATE_SHUTDOWN);
#
# We attempt to re-send failed event sends; failures can be a problem here
# in multi-thousand vnode experiments. Definitely a bit of a hack, but
# the sending rate is highest here.
#
my $chances = 8;
while ($chances > 0
&& !TBSetNodeEventState($node_id,TBDB_NODESTATE_SHUTDOWN,0)) {
tbwarn("$self SetupReload: TBSetNodeEventState failed; waiting and".
" retrying.");
sleep(4);
$chances -= 1;
}
return 0;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment