node_reboot.in 6.16 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2006 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
5 6
# All rights reserved.
#
7 8 9 10
use English;
use Getopt::Std;

#
11 12
# Reboot a node (or nodes). Will power cycle the node as a last resort.
# Use -e option to reboot all nodes in an experiment.
13
#
14 15
# Exit value is 0 if all nodes reboot okay, or the number of nodes
# could not be rebooted.
16 17 18
#
sub usage()
{
19 20 21 22 23 24 25
    print(STDERR
	  "Usage: node_reboot [-d] [-f] [-w] [-k] node [node ...]\n" .
	  "       node_reboot [-d] [-f] [-w] [-k] -e pid,eid\n".
	  "Use the -d option to turn on debugging\n" .
	  "Use the -e option to reboot all the nodes in an experiment\n" .
	  "Use the -w option to to wait for nodes is come back up\n" .
	  "Use the -k option to power cycle nodes in PXEWAIT mode\n" .
26
	  "Use the -b option to reboot nodes in PXEWAIT mode\n" .
27 28 29
	  "Use the -a option to reboot all free nodes\n".
	  "Use the -c option to reconfig nodes instead of rebooting\n".
	  "Use the -f option to power cycle (and not wait for nodes to die)\n");
30 31
    exit(-1);
}
32 33
# The hidden -r option runs this in "realmode", ie don't send an event, but
# really do the work instead.
34
my $optlist     = "dfe:wrkacb";
35 36 37 38 39 40
my $debug       = 0;
my $powercycle  = 0;
my $waitmode    = 0;
my $realmode    = 1; # XXX Temporary, until we make event sending the default.
my $killmode    = 0;
my $reconfig    = 0;
41
my $rebootmode  = 0;
42 43 44 45 46

#
# Configure variables
#
my $TB		= "@prefix@";
47
my $CLIENT_BIN  = "@CLIENT_BINDIR@";
48 49 50

# Locals
my $nodes	= ();
51 52

#
53
# Testbed Support libraries
54
#
55 56
use lib "@prefix@/lib";
use libdb;
57
use libreboot;
58 59 60 61 62 63

# un-taint path
$ENV{'PATH'} = '/bin:/sbin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Turn off line buffering on output
Mac Newbold's avatar
Mac Newbold committed
64
$| = 1;
65

66 67 68
# Be careful not to exit on transient error
$libdb::DBQUERY_MAXTRIES = 30;

69 70 71 72
#
# We don't want to run this script unless its the real version.
#
if ($EUID != 0) {
73 74
    die("*** $0:\n".
        "    Must be root! Maybe its a development version?\n");
75 76 77 78 79 80 81 82 83 84 85
}

#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (defined($options{"d"})) {
86 87
    $debug = 1;
}
88 89 90
if (defined($options{"b"})) {
    $rebootmode = 1;
}
91
if (defined($options{"f"})) {
92
    $powercycle = 1;
93
}
94 95 96
if (defined($options{"k"})) {
    $killmode = 1;
}
97 98 99
if (defined($options{"w"})) {
    $waitmode = 1;
}
100 101
if (defined($options{"r"})) {
    $realmode = 1;
102 103 104 105

    #if ($UID && !TBAdmin($UID)) {
    #	die("*** You cannot use real mode!\n");
    #}
106
}
107
if (defined($options{"c"})) {
108 109
    $reconfig = 1;
}
110

111
if (defined($options{"a"})) {
112 113 114 115
    #
    # Reboot all free nodes
    #
    if ($UID && !TBAdmin($UID)) {
116 117
	die("*** $0:\n".
	    "    You not have permission to reboot all free nodes!\n");
118 119
    }

120 121 122 123
    if (@ARGV) {
	usage();
    }
    
124 125 126 127 128 129 130 131
    my $query_result =
	DBQueryFatal("select n.node_id from nodes as n ".
		     "left join reserved as r on r.node_id=n.node_id ".
		     "left join node_types as nt on nt.type=n.type ".
		     "where nt.class='pc' and n.role='testnode' and ".
		     "      r.pid is NULL");

    if ($query_result->numrows == 0) {
132 133
	die("*** $0:\n".
	    "    There are no free nodes to reboot\n");
134
    }
135

136 137
    while (my ($nodeid) = $query_result->fetchrow_array()) {
	push(@nodes, $nodeid);
138 139
    }
}
140
elsif (defined($options{"e"})) {
141
    #
142
    # Reboot all nodes in an experiment
143
    #
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
    if (@ARGV) {
	usage();
    }

    my $eidmode = $options{"e"};
    my $pid;
    my $eid;
    
    if ($eidmode =~ /([-\w]*),([-\w]*)/) {
	$pid = $1;
	$eid = $2;
    }
    else {
	die("*** $0:\n".
	    "    Invalid argument to -e option: $eidmode\n");
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
160 161

    #
162 163 164
    # Verify permission to muck with this experiment. This is to head off
    # permission problems early; the nodes are indvidually checked later
    # in the library.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
165 166 167
    #
    if ($UID && !TBAdmin($UID) &&
	! TBExptAccessCheck($UID, $pid, $eid, TB_EXPT_MODIFY)) {
168 169
	die("*** $0:\n".
	    "    You not have permission to reboot nodes in $pid/$eid!\n");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
170
    }
171

172 173 174 175 176
    my $query_result =
	DBQueryFatal("select node_id from reserved where ".
		     "pid='$pid' and eid='$eid'");

    if ($query_result->numrows == 0) {
177 178
	die("*** $0:\n".
            "    There are no nodes reserved in pid/eid $pid/$eid\n");
179
    }
180
    
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
    #
    # If this is a firewalled experiment, don't reboot the firewall.
    # If you want to reboot the firewall, you have to do it individually.
    #
    # Two reasons.  One is that the firewall is not conceptually part of
    # the experiment and the user should not "be aware" of it.  This
    # is a pretty lame reason because pretty much everywhere else, the
    # firewall IS part of the experiment.  That leads to reason number
    # two: rebooting the firewall causes all other nodes in the experiment
    # to become disconnected until the firewall reboots.  For some machines,
    # that are also rebooting as you recall, not getting PXE info for a
    # significant amount of time causes them to fail to the next boot.
    # For some machines this might mean halting ("Strike any key to continue"),
    # as there is no other boot possibility.  This means ya gotta come back
    # later and reboot all those nodes again.
    #
    my $firewall = "";
    if (TBExptFirewall($pid, $eid, \$firewall)) {
	warn("WARNING: NOT rebooting firewall node $firewall\n");
    }

202
    while (my ($nodeid) = $query_result->fetchrow_array()) {
203 204
	push(@nodes, $nodeid)
	    if ($nodeid ne $firewall);
205 206 207
    }
}
else {
208 209 210 211
    #
    # Reboot nodes listed on command line.
    # 
    if (!@ARGV) {
212 213
	usage();
    }
214

215 216
    # Untaint the nodes.
    foreach my $node ( @ARGV ) {
217
	if ($node =~ /^([-\w]+)$/) {
218 219 220
	    $node = $1;
	}
	else {
221
	    die("*** Tainted node name: $node\n");
Mac Newbold's avatar
Mac Newbold committed
222 223
	}
	if (!TBValidNodeName($node)) {
224
	    die("*** $0:\n".
225
		"    Node does not exist: $node\n");
226
	}
227
	push(@nodes, $node);
228 229 230
    }
}

231
#
232
# Okay, call into the library using a hash of arguments.
233
#
234 235
my %args   = ();
my %status = ();
236

237 238
$args{'debug'}       = $debug;
$args{'powercycle'}  = $powercycle;
239
$args{'rebootmode'}  = $rebootmode;
240 241 242 243 244
$args{'waitmode'}    = $waitmode;
$args{'realmode'}    = $realmode;
$args{'killmode'}    = $killmode;
$args{'reconfig'}    = $reconfig;
$args{'nodelist'}    = [ @nodes ];
245

246
exit(nodereboot(\%args, \%status));