node_reboot.in 6.27 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2006 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
5 6
# All rights reserved.
#
7 8 9 10
use English;
use Getopt::Std;

#
11 12
# Reboot a node (or nodes). Will power cycle the node as a last resort.
# Use -e option to reboot all nodes in an experiment.
13
#
14 15
# Exit value is 0 if all nodes reboot okay, or the number of nodes
# could not be rebooted.
16 17 18
#
sub usage()
{
19 20 21 22 23 24 25
    print(STDERR
	  "Usage: node_reboot [-d] [-f] [-w] [-k] node [node ...]\n" .
	  "       node_reboot [-d] [-f] [-w] [-k] -e pid,eid\n".
	  "Use the -d option to turn on debugging\n" .
	  "Use the -e option to reboot all the nodes in an experiment\n" .
	  "Use the -w option to to wait for nodes is come back up\n" .
	  "Use the -k option to power cycle nodes in PXEWAIT mode\n" .
26
	  "Use the -b option to reboot nodes in PXEWAIT mode\n" .
27 28 29
	  "Use the -a option to reboot all free nodes\n".
	  "Use the -c option to reconfig nodes instead of rebooting\n".
	  "Use the -f option to power cycle (and not wait for nodes to die)\n");
30 31
    exit(-1);
}
32 33
# The hidden -r option runs this in "realmode", ie don't send an event, but
# really do the work instead.
34
my $optlist     = "dfe:wrkacbp";
35 36 37 38 39 40
my $debug       = 0;
my $powercycle  = 0;
my $waitmode    = 0;
my $realmode    = 1; # XXX Temporary, until we make event sending the default.
my $killmode    = 0;
my $reconfig    = 0;
41
my $rebootmode  = 0;
42
my $prepare     = 0;
43 44 45 46 47

#
# Configure variables
#
my $TB		= "@prefix@";
48
my $CLIENT_BIN  = "@CLIENT_BINDIR@";
49 50 51

# Locals
my $nodes	= ();
52 53

#
54
# Testbed Support libraries
55
#
56 57
use lib "@prefix@/lib";
use libdb;
58
use libreboot;
59 60 61 62 63 64

# un-taint path
$ENV{'PATH'} = '/bin:/sbin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Turn off line buffering on output
Mac Newbold's avatar
Mac Newbold committed
65
$| = 1;
66

67 68 69
# Be careful not to exit on transient error
$libdb::DBQUERY_MAXTRIES = 30;

70 71 72 73
#
# We don't want to run this script unless its the real version.
#
if ($EUID != 0) {
74 75
    die("*** $0:\n".
        "    Must be root! Maybe its a development version?\n");
76 77 78 79 80 81 82 83 84 85 86
}

#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (defined($options{"d"})) {
87 88
    $debug = 1;
}
89 90 91
if (defined($options{"b"})) {
    $rebootmode = 1;
}
92
if (defined($options{"f"})) {
93
    $powercycle = 1;
94
}
95 96 97
if (defined($options{"k"})) {
    $killmode = 1;
}
98 99 100
if (defined($options{"w"})) {
    $waitmode = 1;
}
101 102 103
if (defined($options{"p"})) {
    $prepare = 1;
}
104 105
if (defined($options{"r"})) {
    $realmode = 1;
106 107 108 109

    #if ($UID && !TBAdmin($UID)) {
    #	die("*** You cannot use real mode!\n");
    #}
110
}
111
if (defined($options{"c"})) {
112 113
    $reconfig = 1;
}
114

115
if (defined($options{"a"})) {
116 117 118 119
    #
    # Reboot all free nodes
    #
    if ($UID && !TBAdmin($UID)) {
120 121
	die("*** $0:\n".
	    "    You not have permission to reboot all free nodes!\n");
122 123
    }

124 125 126 127
    if (@ARGV) {
	usage();
    }
    
128 129 130 131 132 133 134 135
    my $query_result =
	DBQueryFatal("select n.node_id from nodes as n ".
		     "left join reserved as r on r.node_id=n.node_id ".
		     "left join node_types as nt on nt.type=n.type ".
		     "where nt.class='pc' and n.role='testnode' and ".
		     "      r.pid is NULL");

    if ($query_result->numrows == 0) {
136 137
	die("*** $0:\n".
	    "    There are no free nodes to reboot\n");
138
    }
139

140 141
    while (my ($nodeid) = $query_result->fetchrow_array()) {
	push(@nodes, $nodeid);
142 143
    }
}
144
elsif (defined($options{"e"})) {
145
    #
146
    # Reboot all nodes in an experiment
147
    #
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
    if (@ARGV) {
	usage();
    }

    my $eidmode = $options{"e"};
    my $pid;
    my $eid;
    
    if ($eidmode =~ /([-\w]*),([-\w]*)/) {
	$pid = $1;
	$eid = $2;
    }
    else {
	die("*** $0:\n".
	    "    Invalid argument to -e option: $eidmode\n");
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
164 165

    #
166 167 168
    # Verify permission to muck with this experiment. This is to head off
    # permission problems early; the nodes are indvidually checked later
    # in the library.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
169 170 171
    #
    if ($UID && !TBAdmin($UID) &&
	! TBExptAccessCheck($UID, $pid, $eid, TB_EXPT_MODIFY)) {
172 173
	die("*** $0:\n".
	    "    You not have permission to reboot nodes in $pid/$eid!\n");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
174
    }
175

176 177 178 179 180
    my $query_result =
	DBQueryFatal("select node_id from reserved where ".
		     "pid='$pid' and eid='$eid'");

    if ($query_result->numrows == 0) {
181 182
	die("*** $0:\n".
            "    There are no nodes reserved in pid/eid $pid/$eid\n");
183
    }
184
    
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
    #
    # If this is a firewalled experiment, don't reboot the firewall.
    # If you want to reboot the firewall, you have to do it individually.
    #
    # Two reasons.  One is that the firewall is not conceptually part of
    # the experiment and the user should not "be aware" of it.  This
    # is a pretty lame reason because pretty much everywhere else, the
    # firewall IS part of the experiment.  That leads to reason number
    # two: rebooting the firewall causes all other nodes in the experiment
    # to become disconnected until the firewall reboots.  For some machines,
    # that are also rebooting as you recall, not getting PXE info for a
    # significant amount of time causes them to fail to the next boot.
    # For some machines this might mean halting ("Strike any key to continue"),
    # as there is no other boot possibility.  This means ya gotta come back
    # later and reboot all those nodes again.
    #
    my $firewall = "";
    if (TBExptFirewall($pid, $eid, \$firewall)) {
	warn("WARNING: NOT rebooting firewall node $firewall\n");
    }

206
    while (my ($nodeid) = $query_result->fetchrow_array()) {
207 208
	push(@nodes, $nodeid)
	    if ($nodeid ne $firewall);
209 210 211
    }
}
else {
212 213 214 215
    #
    # Reboot nodes listed on command line.
    # 
    if (!@ARGV) {
216 217
	usage();
    }
218

219 220
    # Untaint the nodes.
    foreach my $node ( @ARGV ) {
221
	if ($node =~ /^([-\w]+)$/) {
222 223 224
	    $node = $1;
	}
	else {
225
	    die("*** Tainted node name: $node\n");
Mac Newbold's avatar
Mac Newbold committed
226 227
	}
	if (!TBValidNodeName($node)) {
228
	    die("*** $0:\n".
229
		"    Node does not exist: $node\n");
230
	}
231
	push(@nodes, $node);
232 233 234
    }
}

235
#
236
# Okay, call into the library using a hash of arguments.
237
#
238 239
my %args   = ();
my %status = ();
240

241 242
$args{'debug'}       = $debug;
$args{'powercycle'}  = $powercycle;
243
$args{'rebootmode'}  = $rebootmode;
244 245 246 247
$args{'waitmode'}    = $waitmode;
$args{'realmode'}    = $realmode;
$args{'killmode'}    = $killmode;
$args{'reconfig'}    = $reconfig;
248
$args{'prepare'}     = $prepare;
249
$args{'nodelist'}    = [ @nodes ];
250

251
exit(nodereboot(\%args, \%status));