nfree.in 11.1 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3 4 5 6 7

#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2002 University of Utah and the Flux Group.
# All rights reserved.
#
8
use English;
9 10 11 12 13 14 15 16

#
# nfree - Takes pysical node names, and frees them from the experiment they
# are allocated to. If nodes are ommited, frees up all nodes in the given
# experiment. Looks in the scheduled_reloads and next_reserve tables to see
# if this node should be re-reserved into another experiment and/or reloaded,
# rather than being put back into the pool of free nodes
#
17 18 19 20 21 22
sub usage ()
{
    die("Usage: nfree <pid> <eid> [<node> <node> <...>]\n".
	"Releases all nodes in the specified experiment.\n".
	"If nodes are listed, nfree releases only those nodes.\n");
}
Mac Newbold's avatar
Mac Newbold committed
23

24 25 26
#
# Configure variables
#
27
my $TB       = "@prefix@";
28 29 30 31 32

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
33
use libdb;
34
use libtestbed;
35

36 37 38 39 40
my $consetup	 = "$TB/libexec/console_setup";
my $sched_reload = "$TB/sbin/sched_reload";
my $reloadpid	 = "emulab-ops";
my $pendingeid   = "reloadpending";
my $reloadeid    = "reloading";
Mac Newbold's avatar
Mac Newbold committed
41

42
my @nodes;
Mac Newbold's avatar
Mac Newbold committed
43
my @freed_nodes=();
44 45 46 47 48 49
my $error = 0;

#
# Turn off line buffering on output
#
$| = 1;
Mac Newbold's avatar
Mac Newbold committed
50

51 52 53 54 55 56 57 58 59
#
# Untaint the path
# 
$ENV{'PATH'} = "/bin:/usr/bin:/sbin:/usr/sbin";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

if (@ARGV < 2) {
    usage();
}
60 61 62
my $pid = shift;
my $eid = shift;

63
#
64
# Untaint args.
65
#
66 67 68 69 70 71 72 73 74 75 76 77
if ($pid =~ /^([-\@\w]+)$/) {
    $pid = $1;
}
else {
    die("Bad data in pid: $pid.");
}
if ($eid =~ /^([-\@\w]+)$/) {
    $eid = $1;
}
else {
    die("Bad data in eid: $eid.");
}
78

79
#
80
# Make sure that the experiment actually exists
81
#
82 83
if (!ExpState($pid,$eid)) {
    die("There is no experiment '$eid' in project '$pid'.\n");
84 85
}

86 87 88 89
#
# Make sure the user has the ability to modify this experiment
#
if (!TBExptAccessCheck($UID, $pid, $eid, TB_EXPT_MODIFY)) {
90
    die("You do not have permission to modify '$eid' in project '$pid'.\n");
91 92
}

93
#
94 95 96 97 98
# Make a list of nodes given on the command line, or get the whole list from
# the DB if none provided. 
#
if (@ARGV) {
    foreach my $n (@ARGV) {
99
	#
100 101
	# Taint check first! Solves silly perl problems.
	# 
102
	if ($n =~ /^([-\w]+)$/) {
103 104 105 106 107
	    $n = $1;
	}
	else {
	    die("*** $0:\n".
		"    Bad node name: $n.\n");
108
	}
109

110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
	# Shark hack
	if ($n =~ /(sh\d+)/ ) {
	    #
	    # It's a shark - do the whole shelf if its not done already.
	    #
	    my $shelf = $1;
	    if ( ! (join(",", @nodes) =~ /,$shelf-\d,/)) {
		# Shelf hasn't been done yet...
		foreach my $n ( 1 .. 8 ) {
		    push(@nodes, "$shelf-$n");
		}
	    }
	    # End shark hack
	}
	else {
	    # its not a shark - just add it in...
	    push(@nodes, $n);
	}
128
    }
129
}
130 131 132 133 134 135 136
else {
    print "Releasing all nodes from experiment '$eid' in project '$pid'.\n";
    @nodes = ExpNodes($pid, $eid);
}

######################################################################
# Step 1
137
#
138 139
# See what nodes need to be freed, and then lock them down my moving
# them to a holding reservation.
140
#
141 142 143 144 145
# We lock just the reserved table. The prevents races between multiple
# invocations of nfree trying to free the same node. Rather than lock
# a zillion tables, move the reservation into a holding pattern. This
# effectively prevents someone else from freeing the same nodes, and from
# someone else allocating the nodes until we are done cleaning things up.
146
#
147 148 149 150 151
# NOTE: My reason for not wanting to lock all those tables (9 in the
# original version) is that library calls will fail since mysql locking
# requires that every table used within the locked area, be locked.
# Of course, who knows what tables the library uses, so thats a silly
# way to go. 
152
#
153 154 155 156 157 158 159
######################################################################

#
# Form a unique temporary EID. I want this to be unique in case something
# goes wrong, and they get left in the DB.
# 
my $lockedeid = "nfree-locked-$PID";
160

161 162 163
DBQueryFatal("lock tables reserved write");

foreach my $n (@nodes) { 
164
    #
165
    # Check to make sure they have actually reserved the nodes.
166
    #
167 168 169
    my $result =
	DBQueryFatal("select * from reserved where node_id='$n' ".
		     "and eid='$eid' and pid='$pid'");
170 171 172 173 174 175
    if ($result->numrows == 0) {
	print "Node '$n' is not reserved by your experiment.\n";
	$error++;
	next;
    }

176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
    #
    # Move to locked reservation. Node is not free, but is no longer
    # owned by the pid/eid, so cannot be mucked with.
    # 
    if (! DBQueryWarn("update reserved " .
		      "set vname=NULL, pid='$reloadpid', eid='$lockedeid' ".
		      "where node_id='$n'")) {
	print "*** WARNING: Error locking down node $n!\n";
	next;
    }

    push(@freed_nodes, $n);
}

DBQueryFatal("unlock tables");

######################################################################
# Step 2
#
# Go through the list of nodes we successfully locked down, and clean
# up the node state (nodes, delays, interfaces, etc). Once that is done,
# move them to whatever new reservations are pending, or free the node.
#
######################################################################

foreach my $n (@freed_nodes) {
202 203
    my $mustclean = 1;
    
204
    #
205
    # Clean out all delays
206
    #
207
    DBQueryWarn("delete from delays where node_id='$n'") || $error++;
208 209 210

    #
    # Find the control net interface for this node type, as well as some
211
    # of the default values for its node type.
212
    #
213
    my $result =
214 215 216 217
	DBQueryFatal("select control_net,osid,node_types.pxe_boot_path, " .
		     " nodes.def_boot_osid,nodes.def_boot_path, ".
		     " node_types.isvirtnode ".
		     " from node_types " .
218 219
		     "left join nodes on nodes.type=node_types.type " .
		     "where node_id='$n'");
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
    my ($control,$osid,$pxe_boot_path,$def_boot_osid,$def_boot_path,$isvirt) =
	$result->fetchrow_array();

    #
    # See if the OS it was running was marked as mustclean or not. Basically,
    # this is an OSKit hack to avoid reloading disks that have not been
    # touched by the kernel. If a def_boot_path was set, there is nothing
    # we can figure out, so just reload it. This needs to be more general
    # purpose.
    #
    if ($isvirt) {
	#
	# VIRTNODE HACK: Virtual nodes are special. Do not clean or reload.
	# 
	$mustclean = 0;
    }
    elsif (! defined($def_boot_osid) ||
	 (defined($def_boot_path) && $def_boot_path ne "")) {
	$mustclean = 1;
    }
    else {
	my $result =
	    DBQueryFatal("select mustclean from os_info " .
			 "where osid='$def_boot_osid'");
	if ($result->numrows()) {
	    ($mustclean) = $result->fetchrow_array();
	}
    }
248 249 250 251 252 253

    #
    # Clean up interfaces by clearing IPs and/or aliases.
    #
    if (! ($n =~ /sh\d+/)) {
        # Its not a shark, so clean out all IPs except the control net.
254
	DBQueryWarn("update interfaces set IP='' " .
255 256 257 258
		    "where node_id='$n' and card!='$control'") || $error++;
    }
    else {
	# XXX Shark Hack!
259 260
	DBQueryWarn("update interfaces set IPalias='' ".
		    "where node_id='$n'") || $error++;
261
    }
262

263
    #
264 265 266
    # Map the default OSID to something that is actually loaded on the
    # machine. I think this is a hack, but its the best I can come up
    # with right now. 
267
    #
268 269 270 271 272
    if (! $isvirt) {
	my $mapped_osid = MapNodeOSID($n, $osid);
	if ($mapped_osid) {
	    $osid = $mapped_osid;
	}
273
    }
274 275

    #
276
    # Clean up the nodes table so that its in a moderately clean state.
277 278
    #

279 280 281 282 283 284 285 286 287
    #foreach $var ( 'osid','pxe_boot_path','n') {
	#print "\$$var = ";
	#if (!defined ${$var}) {
	#    print "(null)";
	#} else {
	#    print ${$var};
        #}
        #print "\n";
    #}
288 289 290
    DBQueryWarn("update nodes set def_boot_osid='$osid',def_boot_cmd_line='',".
		"def_boot_path='',startupcmd='',rpms='',deltas='', ".
		"tarballs='',pxe_boot_path='$pxe_boot_path', ".
291
		"failureaction='fatal', routertype='none', ".
292
		"next_pxe_boot_path='' where node_id='$n'") || $error++;
293

294
    #
295
    # Clean out the current_reloads table (a just in case measure). 
296 297
    #
    DBQueryWarn("delete from current_reloads where node_id='$n'") || $error++;
298

299
    #
300
    # Now its safe to change the reservation.
301 302
    #
    #
303
    # If the node has a next_reserve entry, change the reservation.
304
    #
305 306 307 308 309 310 311 312 313
    $result =
	DBQueryFatal("select node_id,pid,eid from next_reserve ".
		     "where node_id='$n'");

    if ($result->num_rows()) {
	my ($node, $next_pid, $next_eid) = $result->fetchrow_array();
	
	print "Moving $n to $next_pid/$next_eid.\n";

314
	DBQueryWarn("update reserved set pid='$next_pid',eid='$next_eid'," .
315 316 317 318 319
		    "vname=NULL where node_id='$n'")
	    || $error++;
	
	DBQueryWarn("delete from next_reserve where node_id='$n'")
	    || $error++;
320

321 322 323 324 325 326 327
	#
	# This little sillyness is for disk reloading.
	# Kill the last reservation since this path is special.
	#
	DBQueryWarn("delete from last_reservation where node_id='$n'") ||
	    $error++;

328 329 330
	next;
    }
    
331 332 333 334 335 336 337 338 339 340 341 342 343 344
    #
    # If the node has a reloads entry, change the reservation so that the
    # reload_daemon will pick it up.
    #
    $result =
	DBQueryFatal("select node_id,image_id from scheduled_reloads " .
		     "where node_id='$n'");

    if ($result->numrows()) {
	print "Moving $n to $reloadpid/$pendingeid.\n";
	
	DBQueryWarn("update reserved set ".
		    "pid='$reloadpid',eid='$pendingeid' ".
		    "where node_id='$n'") || $error++;
345 346 347 348 349 350 351 352

	#
	# This little sillyness is for disk reloading.
	# Kill the last reservation since this path is special.
	#
	DBQueryWarn("delete from last_reservation where node_id='$n'") ||
	    $error++;

353 354 355
	next;
    }

356
    #
357
    # No reloads or reservation changes, so really free the node
358
    #
359 360
    # Make sure we don't schedule a reload if the node is coming out of
    # the reloading or reloadpending experimenets.
361
    #
362 363
    if ($mustclean && (($pid ne $reloadpid) ||
	      (($eid ne $reloadeid) && ($eid ne $pendingeid)))) {
364
	#
365
	# Schedule a reload
366
	#
367 368 369 370 371 372 373 374
	if (my $imageid = DefaultImageID($n)) {
	    if (!TBSetSchedReload($n,$imageid)) {
		print STDERR "WARNING: Unable to schedule reload for $n\n";
		$errors++;
	    }
	} else {
	    print STDERR "WARNING: Unable to get default imageid for $n\n";
	    $errors++;
375
	}
376 377 378 379 380 381 382 383
	#
	# Let's still stick the node into the reloadpending experiment, even
	# if there were errors, to get it out of the way.
	#
	print "Moving $n to $reloadpid/$pendingeid.\n";
	DBQueryWarn("update reserved set ".
	    "pid='$reloadpid',eid='$pendingeid' ".
	    "where node_id='$n'") || $error++;
384 385 386 387 388 389 390

	#
	# Make SURE that we don't have a last_reservation, since that would
	# cause the node to get picked up by the reload daemon twice
	#
	DBQueryWarn("delete from last_reservation where node_id='$n'") ||
	    $error++;
391
	next;
392
    }
393 394 395 396 397 398 399 400 401
    
    print "Releasing node '$n' ... ";
    if (DBQueryWarn("delete from reserved where node_id='$n'")) {
	print "Succeeded.\n";
    }
    else {
	print "Failed!\n";
	$error++;
    }
402 403
}

404 405 406 407 408
######################################################################
# Step 3 - Set up console for freed nodes.
#
# Using a list of freed nodes build eariler, run consetup to reset
# their serial consoles.
409 410 411 412 413 414 415
#
# NOTE: While it may seem like a race to do this after releasing the
# reservation, it really is not. Even if the node is allocated again
# console_setup looks at the current reservation and does the right
# thing, and since nalloc locks the reserved table, ordering will be
# preserved.
# 
416 417
######################################################################

418
if (@freed_nodes) {
419 420
    system("$consetup @freed_nodes") == 0 ||
	print STDERR "WARNING: $consetup @freed_nodes failed!\n";
Mac Newbold's avatar
Mac Newbold committed
421 422
}

423
exit($error);
424