Commit 7f5fa6bf authored by David Johnson's avatar David Johnson

If we don't flash the switch, restore its osid/partition info.

If a switch reload fails, and we're sure we didn't flash the
switch, then revert the changes to osid info in the nodes table
and the partitions table entries.  This way, we don't lie about
what's actually on the switch.  Otherwise, there could be a
problem on a subsequent reload -- a user could request something
that failed to load last time, but Emulab thinks that's what's
really on the switch, so it doesn't reload... but that wasn't
really what was installed on the switch.  So, this will help
us stay in synch.

Also remove all the os_select script invocations in favor of
using the Node->OSSelect method instead.  Save a fork, save a
life.
parent 7cfa7aa6
......@@ -42,7 +42,6 @@ my $PROJROOT = "@PROJROOT_DIR@";
my $FRISBEELAUNCHER = "$TB/sbin/frisbeelauncher";
my $SUBBOSS_FRISBEELAUNCHER = "$TB/sbin/subboss_frisbeelauncher_wrapper";
my $osselect = "$TB/bin/os_select";
my $TBUISP = "$TB/bin/tbuisp";
#
# Max number of retries (per node) before its deemed fatal. This allows
......@@ -1869,8 +1868,7 @@ sub SetBootOS($$)
print "$self ($nodeobject): Changing default OS to $osinfo\n";
if (!$TESTMODE) {
system("$osselect $defosid $node_id");
if ($?) {
if ($nodeobject->OSSelect($osinfo,"def_boot_osid",$self->debug())) {
tberror "$self ($nodeobject): os_select $defosid failed!";
return -1;
}
......@@ -1879,6 +1877,85 @@ sub SetBootOS($$)
return 0;
}
#
# Save the current osid, def_boot_osid, and partitions info for this node.
# This is not useful to most nodes, but for nodes where we *know* the reboot
# has failed (and for which we don't want to lie about which OS is actually
# on the node *because* we don't always force a reload if we think we know
# what OS is on "disk" -- like for switches!), we can do a reset if we save
# the old info.
#
# Note that none of this is called by default -- subclasses must manually
# invoke if they need it!
#
sub SaveNodeDiskInfo($$)
{
my ($self,$nodeobject) = @_;
my $node_id = $nodeobject->node_id();
$self->dprint("SaveNodeDiskInfo($node_id) saving partition and osid");
#
# Save the old ones off first!
#
my $dbres;
$dbres = DBQueryFatal("select node_id,partition,osid,imageid,imagepid" .
" from partitions where node_id='$node_id'");
if (defined($dbres) && $dbres->numrows()) {
my @rows = ();
while (my $rowref = $dbres->fetchrow_hashref()) {
push @rows,$rowref;
}
$self->nodeinfo($nodeobject,'old_partitions',\@rows);
}
$dbres = DBQueryFatal("select osid,def_boot_osid" .
" from nodes where node_id='$node_id'");
if (defined($dbres) && $dbres->numrows()) {
my $rowref = $dbres->fetchrow_hashref();
$self->nodeinfo($nodeobject,'old_osid',$rowref->{'osid'});
$self->nodeinfo($nodeobject,'old_def_boot_osid',
$rowref->{'def_boot_osid'});
}
return 0;
}
#
# Restore osid and partitions info if we saved it off!
#
sub RestoreNodeDiskInfo($$)
{
my ($self,$nodeobject) = @_;
my $node_id = $nodeobject->node_id();
$self->dprint("RestoreNodeDiskInfo($node_id) restoring partition and osid");
my $partref = $self->nodeinfo($nodeobject,'old_partitions');
if (defined($partref) && @$partref) {
DBQueryFatal("delete from partitions where node_id='$node_id'");
foreach my $rowref (@$partref) {
my ($part,$osid,$imageid,$imagepid) =
( $rowref->{'partition'},$rowref->{'osid'},
$rowref->{'imageid'},$rowref->{'imagepid'} );
DBQueryFatal("insert into partitions " .
" (node_id,partition,osid,imageid,imagepid)" .
" values" .
" ('$node_id','$part','$osid','$imageid','$imagepid')");
}
}
my $osid = $self->nodeinfo($nodeobject,'old_osid');
my $def_boot_osid = $self->nodeinfo($nodeobject,'old_def_boot_osid');
if (defined($osid) && defined($def_boot_osid)) {
DBQueryFatal("update nodes set osid='$osid'," .
" def_boot_osid='$def_boot_osid'" .
" where node_id='$node_id'");
}
return 0;
}
sub UpdatePartitions($$)
{
my ($self,$nodeobject) = @_;
......@@ -2209,8 +2286,8 @@ sub SetupReload($$)
++$idx;
}
system("$osselect -1 $osid $node_id");
if ($?) {
my $osinfo = OSinfo->Lookup($osid);
if ($nodeobject->OSSelect($osinfo,"next_boot_osid",$self->debug())) {
tberror "$self: os_select $osid failed!";
return -1;
}
......@@ -2440,8 +2517,9 @@ sub Reload($$)
#
TBSetNodeEventState($node_id,TBDB_NODESTATE_RELOADDONE);
system("$osselect $osid $node_id");
if ($?) {
my $osinfo = OSinfo->Lookup($osid);
if ($nodeobject->OSSelect($osinfo,"def_boot_osid",$self->debug())) {
tberror "$self ($nodeobject): os_select $osid failed!";
return -1;
}
......
......@@ -78,7 +78,6 @@ sub AUTOLOAD {
my $TB = "@prefix@";
my $TESTMODE = @TESTMODE@;
my $TBOPS = "@TBOPSEMAIL@";
my $OSSELECT = "$TB/bin/os_select";
my $SSHTB = "$TB/bin/sshtb";
my $POWER = "$TB/bin/power";
my $USERS = "@USERNODE@";
......@@ -199,11 +198,16 @@ sub AddNode($$$$)
#
# Never force node reloads -- only reload if new/old image is actually
# different
# different. So, if they haven't set a force flag, default it 0.
#
if (!defined($self->nodeflag($nodeobject,'force'))) {
$self->nodeflag($nodeobject,'force',0);
}
#
# We never allow switches to be rebooted -- we control that ourselves.
# Power cycling during a flash operation could be disastrous.
#
$self->nodeflag($nodeobject,'noreboot',1);
return $retval;
......@@ -214,7 +218,7 @@ sub GetMaxRetries($)
return 0;
}
sub UpdatePartitions($$)
sub PreSetupReload($$)
{
my ($self,$nodeobject) = @_;
my $node_id = $nodeobject->node_id();
......@@ -252,6 +256,22 @@ sub UpdatePartitions($$)
}
}
#
# If we're really gonna do it, then save our state so we can restore
# if it seems like our load failed.
#
if ($self->nodeflag($nodeobject,'force',1)) {
$self->SaveNodeDiskInfo($nodeobject);
}
return $self->SUPER::PreSetupReload($nodeobject);
}
sub UpdatePartitions($$)
{
my ($self,$nodeobject) = @_;
my $node_id = $nodeobject->node_id();
if ($self->nodeflag($nodeobject,'force',1)) {
return $self->SUPER::UpdatePartitions($nodeobject);
}
......@@ -755,7 +775,8 @@ sub Reload($$)
TBSetNodeEventState($node_id,TBDB_NODESTATE_RELOADING);
# try with tftp
$retval = $self->uploadImageTftp($nodeobject,$filename);
my $didflash = 0;
$retval = $self->uploadImageTftp($nodeobject,$filename,\$didflash);
if ($retval < 0) {
if (defined($self->nodeinfo($nodeobject,'osload_allowxmodem'))
&& $self->nodeinfo($nodeobject,'osload_allowxmodem')) {
......@@ -813,8 +834,8 @@ sub Reload($$)
#
TBSetNodeEventState($node_id,TBDB_NODESTATE_RELOADDONE);
system("$OSSELECT $osid $node_id");
if ($?) {
my $osinfo = OSinfo->Lookup($osid);
if ($nodeobject->OSSelect($osinfo,"def_boot_osid",$self->debug())) {
tberror "$self ($node_id): os_select $osid failed!";
goto failure;
}
......@@ -849,7 +870,10 @@ sub Reload($$)
failure:
;
if (!$didflash) {
$self->RestoreNodeDiskInfo($nodeobject);
}
done:
# and finally remove this guy!
$self->_undoTiptunnel($nodeobject);
......@@ -1320,10 +1344,11 @@ sub writeConfig($$$) {
return 0;
}
sub uploadImageTftp($$$) {
my ($self,$nodeobject,$filename) = @_;
sub uploadImageTftp($$$;$) {
my ($self,$nodeobject,$filename,$flashedref) = @_;
my $node_id = $nodeobject->node_id();
my $retval;
my $didflash = 0;
#
# This must be done from the OS serial command prompt! We need to disable
......@@ -1379,6 +1404,8 @@ sub uploadImageTftp($$$) {
[ '#\s*$', undef, 'line' ] ] });
if ($retval < 0) {
tberror "$self uploadImageTftp($nodeobject): could not upload file via tftp: $retval,$!";
# we have to assume that it happened
$didflash = 1;
goto failure;
}
......@@ -1397,16 +1424,24 @@ sub uploadImageTftp($$$) {
if (defined($errormsg)) {
tberror "$self Reload($node_id): tftp upload failed: $errormsg\n";
$retval = -2;
# we didn't actually flash anything
$didflash = 0;
goto failure;
}
$self->dprint("uploadImageTftp($nodeobject): tftp upload appears successful!");
unlink($tmpfilename);
if (defined($flashedref)) {
$$flashedref = 1;
}
return 0;
failure:
unlink($tmpfilename);
if (defined($flashedref)) {
$$flashedref = $didflash;
}
return -1;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment