nfree.in 9.15 KB
Newer Older
1 2
#!/usr/bin/perl -wT
use English;
3 4 5 6 7 8 9 10

#
# nfree - Takes pysical node names, and frees them from the experiment they
# are allocated to. If nodes are ommited, frees up all nodes in the given
# experiment. Looks in the scheduled_reloads and next_reserve tables to see
# if this node should be re-reserved into another experiment and/or reloaded,
# rather than being put back into the pool of free nodes
#
11 12 13 14 15 16
sub usage ()
{
    die("Usage: nfree <pid> <eid> [<node> <node> <...>]\n".
	"Releases all nodes in the specified experiment.\n".
	"If nodes are listed, nfree releases only those nodes.\n");
}
Mac Newbold's avatar
Mac Newbold committed
17

18 19 20
#
# Configure variables
#
21
my $TB       = "@prefix@";
22 23 24 25 26

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
27
use libdb;
28
use libtestbed;
29

30 31 32 33
my $consetup	= "$TB/libexec/console_setup";
my $reloadpid	= "emulab-ops";
my $pendingeid  = "reloadpending";
my $reloadeid   = "reloading";
Mac Newbold's avatar
Mac Newbold committed
34

35
my @nodes;
Mac Newbold's avatar
Mac Newbold committed
36
my @freed_nodes=();
37 38 39 40 41 42
my $error = 0;

#
# Turn off line buffering on output
#
$| = 1;
Mac Newbold's avatar
Mac Newbold committed
43

44 45 46 47 48 49 50 51 52
#
# Untaint the path
# 
$ENV{'PATH'} = "/bin:/usr/bin:/sbin:/usr/sbin";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

if (@ARGV < 2) {
    usage();
}
53 54 55
my $pid = shift;
my $eid = shift;

56
#
57
# Untaint args.
58
#
59 60 61 62 63 64 65 66 67 68 69 70
if ($pid =~ /^([-\@\w]+)$/) {
    $pid = $1;
}
else {
    die("Bad data in pid: $pid.");
}
if ($eid =~ /^([-\@\w]+)$/) {
    $eid = $1;
}
else {
    die("Bad data in eid: $eid.");
}
71

72
#
73
# Make sure that the experiment actually exists
74
#
75 76
if (!ExpState($pid,$eid)) {
    die("There is no experiment '$eid' in project '$pid'.\n");
77 78
}

79 80 81 82
#
# Make sure the user has the ability to modify this experiment
#
if (!TBExptAccessCheck($UID, $pid, $eid, TB_EXPT_MODIFY)) {
83
    die("You do not have permission to modify '$eid' in project '$pid'.\n");
84 85
}

86
#
87 88 89 90 91
# Make a list of nodes given on the command line, or get the whole list from
# the DB if none provided. 
#
if (@ARGV) {
    foreach my $n (@ARGV) {
92
	#
93 94 95 96 97 98 99 100
	# Taint check first! Solves silly perl problems.
	# 
	if ($n =~ /^([\w]+)$/) {
	    $n = $1;
	}
	else {
	    die("*** $0:\n".
		"    Bad node name: $n.\n");
101
	}
102

103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
	# Shark hack
	if ($n =~ /(sh\d+)/ ) {
	    #
	    # It's a shark - do the whole shelf if its not done already.
	    #
	    my $shelf = $1;
	    if ( ! (join(",", @nodes) =~ /,$shelf-\d,/)) {
		# Shelf hasn't been done yet...
		foreach my $n ( 1 .. 8 ) {
		    push(@nodes, "$shelf-$n");
		}
	    }
	    # End shark hack
	}
	else {
	    # its not a shark - just add it in...
	    push(@nodes, $n);
	}
121
    }
122
}
123 124 125 126 127 128 129
else {
    print "Releasing all nodes from experiment '$eid' in project '$pid'.\n";
    @nodes = ExpNodes($pid, $eid);
}

######################################################################
# Step 1
130
#
131 132
# See what nodes need to be freed, and then lock them down my moving
# them to a holding reservation.
133
#
134 135 136 137 138
# We lock just the reserved table. The prevents races between multiple
# invocations of nfree trying to free the same node. Rather than lock
# a zillion tables, move the reservation into a holding pattern. This
# effectively prevents someone else from freeing the same nodes, and from
# someone else allocating the nodes until we are done cleaning things up.
139
#
140 141 142 143 144
# NOTE: My reason for not wanting to lock all those tables (9 in the
# original version) is that library calls will fail since mysql locking
# requires that every table used within the locked area, be locked.
# Of course, who knows what tables the library uses, so thats a silly
# way to go. 
145
#
146 147 148 149 150 151 152
######################################################################

#
# Form a unique temporary EID. I want this to be unique in case something
# goes wrong, and they get left in the DB.
# 
my $lockedeid = "nfree-locked-$PID";
153

154 155 156
DBQueryFatal("lock tables reserved write");

foreach my $n (@nodes) { 
157
    #
158
    # Check to make sure they have actually reserved the nodes.
159
    #
160 161 162
    my $result =
	DBQueryFatal("select * from reserved where node_id='$n' ".
		     "and eid='$eid' and pid='$pid'");
163 164 165 166 167 168
    if ($result->numrows == 0) {
	print "Node '$n' is not reserved by your experiment.\n";
	$error++;
	next;
    }

169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
    #
    # Move to locked reservation. Node is not free, but is no longer
    # owned by the pid/eid, so cannot be mucked with.
    # 
    if (! DBQueryWarn("update reserved " .
		      "set vname=NULL, pid='$reloadpid', eid='$lockedeid' ".
		      "where node_id='$n'")) {
	print "*** WARNING: Error locking down node $n!\n";
	next;
    }

    push(@freed_nodes, $n);
}

DBQueryFatal("unlock tables");

######################################################################
# Step 2
#
# Go through the list of nodes we successfully locked down, and clean
# up the node state (nodes, delays, interfaces, etc). Once that is done,
# move them to whatever new reservations are pending, or free the node.
#
######################################################################

foreach my $n (@freed_nodes) {
195 196 197 198 199 200
    #
    # This little sillyness is for disk reloading. Kill the last reservation.
    #
    DBQueryWarn("delete from last_reservation where node_id='$n'") || $error++;

    #
201
    # Clean out all delays
202
    #
203
    DBQueryWarn("delete from delays where node_id='$n'") || $error++;
204 205 206

    #
    # Find the control net interface for this node type, as well as some
207
    # of the default values for its node type.
208
    #
209 210 211 212 213
    my $result =
	DBQueryFatal("select control_net,osid,node_types.pxe_boot_path " .
		     "from node_types " .
		     "left join nodes on nodes.type=node_types.type " .
		     "where node_id='$n'");
214
    my ($control, $osid, $pxe_boot_path) = $result->fetchrow_array();
215 216 217 218 219 220

    #
    # Clean up interfaces by clearing IPs and/or aliases.
    #
    if (! ($n =~ /sh\d+/)) {
        # Its not a shark, so clean out all IPs except the control net.
221
	DBQueryWarn("update interfaces set IP='' " .
222 223 224 225
		    "where node_id='$n' and card!='$control'") || $error++;
    }
    else {
	# XXX Shark Hack!
226 227
	DBQueryWarn("update interfaces set IPalias='' ".
		    "where node_id='$n'") || $error++;
228
    }
229

230
    #
231 232 233
    # Map the default OSID to something that is actually loaded on the
    # machine. I think this is a hack, but its the best I can come up
    # with right now. 
234
    #
235 236 237 238
    my $mapped_osid = MapNodeOSID($n, $osid);
    if ($mapped_osid) {
	$osid = $mapped_osid;
    }
239 240

    #
241 242 243 244 245
    # Clean up the nodes table so that its in a moderately clean state.
    # 
    DBQueryWarn("update nodes set def_boot_osid='$osid',def_boot_cmd_line='',".
		"def_boot_path='',startupcmd='',rpms='',deltas='', ".
		"tarballs='',pxe_boot_path='$pxe_boot_path', ".
246
		"failureaction='fatal', routertype='none', ".
247
		"next_pxe_boot_path='' where node_id='$n'") || $error++;
248

249
    #
250
    # Clean out the current_reloads table (a just in case measure). 
251 252
    #
    DBQueryWarn("delete from current_reloads where node_id='$n'") || $error++;
253

254
    #
255
    # Now its safe to change the reservation.
256 257
    #
    #
258 259
    # If the node has a reloads entry, change the reservation so that the
    # reload_daemon will pick it up.
260
    #
261 262 263 264 265 266 267
    $result =
	DBQueryFatal("select node_id,image_id from scheduled_reloads " .
		     "where node_id='$n'");

    if ($result->numrows()) {
	print "Moving $n to $reloadpid/$pendingeid.\n";
	
268
	DBQueryWarn("update reserved set ".
269 270 271
		    "pid='$reloadpid',eid='$pendingeid' ".
		    "where node_id='$n'") || $error++;
	next;
272
    }
273

274
    #
275
    # If the node has a next_reserve entry, change the reservation.
276
    #
277 278 279 280 281 282 283 284 285
    $result =
	DBQueryFatal("select node_id,pid,eid from next_reserve ".
		     "where node_id='$n'");

    if ($result->num_rows()) {
	my ($node, $next_pid, $next_eid) = $result->fetchrow_array();
	
	print "Moving $n to $next_pid/$next_eid.\n";

286
	DBQueryWarn("update reserved set pid='$next_pid',eid='$next_eid'," .
287 288 289 290 291
		    "vname=NULL where node_id='$n'")
	    || $error++;
	
	DBQueryWarn("delete from next_reserve where node_id='$n'")
	    || $error++;
292

293 294 295
	next;
    }
    
296
    #
297
    # No reloads or reservation changes, so really free the node
298
    #
299 300 301 302 303 304 305 306 307 308 309 310 311
    # This little sillyness is for disk reloading. Remember the last
    # project a node was reserved into.
    #
    DBQuery("insert into last_reservation values ('$n', '$pid')");
    
    print "Releasing node '$n' ... ";
    if (DBQueryWarn("delete from reserved where node_id='$n'")) {
	print "Succeeded.\n";
    }
    else {
	print "Failed!\n";
	$error++;
    }
312 313
}

314 315 316 317 318
######################################################################
# Step 3 - Set up console for freed nodes.
#
# Using a list of freed nodes build eariler, run consetup to reset
# their serial consoles.
319 320 321 322 323 324 325
#
# NOTE: While it may seem like a race to do this after releasing the
# reservation, it really is not. Even if the node is allocated again
# console_setup looks at the current reservation and does the right
# thing, and since nalloc locks the reserved table, ordering will be
# preserved.
# 
326 327
######################################################################

328
if (@freed_nodes) {
329 330
    my @conlist=();
    my @sharks=();
331 332 333

    foreach my $n ( @freed_nodes ) {
	# Shark hack
334 335 336 337 338 339 340 341
	if ($n =~ /(sh\d+)/) {
	    # Its a shark - do the shelf if it hasn't been done yet
	    my $shelf = $1;
	    if (!(join(",",@sharks) =~ /\b$shelf\b/)) {
		push(@sharks,$shelf);
		push(@conlist,$shelf);
	    }
	}
342
	# End shark hack
343 344 345
	else {
	    push(@conlist,$n);
	}
346
    }
347 348 349 350

    #
    # Finally, run the actual command
    #
351 352
    system("$consetup @conlist") == 0 ||
	print STDERR "WARNING: $consetup @conlist failed!\n";
Mac Newbold's avatar
Mac Newbold committed
353 354
}

355
exit($error);
356