nfree.in 17.5 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3 4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2007 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6 7
# All rights reserved.
#
8
use English;
Chad Barb's avatar
Chad Barb committed
9
use Getopt::Std;
10 11 12

#
# nfree - Takes pysical node names, and frees them from the experiment they
Mike Hibler's avatar
Mike Hibler committed
13
# are allocated to. If nodes are omitted, frees up all nodes in the given
14 15 16 17
# experiment. Looks in the scheduled_reloads and next_reserve tables to see
# if this node should be re-reserved into another experiment and/or reloaded,
# rather than being put back into the pool of free nodes
#
18 19
sub usage ()
{
20
    die("Usage: nfree [-x] [-o] <pid> <eid> [<node> <node> <...>]\n".
21
	"Releases all nodes in the specified experiment.\n".
Chad Barb's avatar
Chad Barb committed
22
	"If nodes are listed, nfree releases only those nodes.\n".
23 24
	" '-x' frees all virtual nodes on any physical node that gets freed.\n".
	" '-o' Moves nodes into a oldreserved holding experiment.\n"
Chad Barb's avatar
Chad Barb committed
25
	);
26
}
Mac Newbold's avatar
Mac Newbold committed
27

28
# Configure variables
29
my $TB       = "@prefix@";
30
my $TESTMODE = @TESTMODE@;
31 32 33

# Testbed Support libraries
use lib "@prefix@/lib";
34
use libdb;
35
use libtestbed;
36 37
use User;
use Experiment;
38
use Node;
39

40
my $consetup	= "$TB/libexec/console_setup";
41
my $osselect    = "$TB/bin/os_select";
Kirk Webb's avatar
Kirk Webb committed
42
my $nodereboot  = "$TB/bin/node_reboot";
43
my $makeconf    = "$TB/sbin/dhcpd_makeconf";
44 45
my $reloadpid	= "emulab-ops";
my $pendingeid  = "reloadpending";
46
my $rppendingeid= "repositionpending";
47 48
my $oldreserved_pid = OLDRESERVED_PID;
my $oldreserved_eid = OLDRESERVED_EID;
49 50
my $lockedpid   = NFREELOCKED_PID();
my $lockedeid   = NFREELOCKED_EID();
Mac Newbold's avatar
Mac Newbold committed
51

52
my @nodes;
Mac Newbold's avatar
Mac Newbold committed
53
my @freed_nodes=();
54
my @dynanodes=();
55
my $error = 0;
56
my %mustzero=();
57

58
$| = 1;  # Turn off line buffering on output
Mac Newbold's avatar
Mac Newbold committed
59

60 61 62 63
# Untaint the path
$ENV{'PATH'} = "/bin:/usr/bin:/sbin:/usr/sbin";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

Chad Barb's avatar
Chad Barb committed
64 65 66 67
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
68
my  $optlist = "xo";
Chad Barb's avatar
Chad Barb committed
69 70 71 72 73 74 75 76

%options = ();

if (! getopts($optlist, \%options)) {
    usage();
}

my $freeDependantVirtuals = 0;
77
my $moveToOldReserved = 0;
Chad Barb's avatar
Chad Barb committed
78 79 80 81

if (defined($options{"x"})) {
    $freeDependantVirtuals = 1;
}
82 83 84
if (defined($options{"o"})) {
    $moveToOldReserved = 1;
}
Chad Barb's avatar
Chad Barb committed
85

86 87 88
if (@ARGV < 2) {
    usage();
}
Chad Barb's avatar
Chad Barb committed
89

90 91 92
my $pid = shift;
my $eid = shift;

93
# Untaint args.
94 95 96 97
if ($pid =~ /^([-\@\w]+)$/) { $pid = $1; }
else { die("Bad data in pid: $pid."); }
if ($eid =~ /^([-\@\w]+)$/) { $eid = $1; }
else { die("Bad data in eid: $eid."); }
98 99

# Make sure that the experiment actually exists
100 101
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
102
    die("There is no experiment '$eid' in project '$pid'.\n");
103
}
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
# Need the project index below.
my $pid_idx = $experiment->pid_idx();
my $exptidx = $experiment->idx();

my $old_exptidx;
if (!TBExptIDX($oldreserved_pid, $oldreserved_eid, \$old_exptidx)) {
    die("No such experiment $oldreserved_pid/$oldreserved_eid!\n");
}

my $locker_exptidx;
if (!TBExptIDX($lockedpid, $lockedeid, \$locker_exptidx)) {
    die("No such experiment $lockedpid/$lockedeid!\n");
}

my $reload_exptidx;
if (!TBExptIDX($reloadpid, $pendingeid, \$reload_exptidx)) {
    die("No such experiment $reloadpid/$pendingeid!\n");
}
# Only in Utah, see below
my $rppend_exptidx;

#
# Verify user and get his DB uid for later. 
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    die("You ($UID) do not exist!\n");
}
my $user_uid = $this_user->uid();
133

134
# Make sure the user has the ability to modify this experiment
135
if (!$experiment->AccessCheck($this_user, TB_EXPT_MODIFY)) {
136
    die("You do not have permission to modify '$eid' in project '$pid'.\n");
137 138
}

139
# Make a list of nodes given on the command line, or get the whole list from
140
# the DB if none provided.
141 142 143
if (@ARGV) {
    foreach my $n (@ARGV) {
	# Taint check first! Solves silly perl problems.
144 145
	if ($n =~ /^([-\w]+)$/) { $n = $1; }
	else { die("*** $0:\n    Bad node name: $n.\n"); }
146

147 148 149 150 151 152 153 154 155 156 157 158 159 160
	push(@nodes, $n);

	# if -x was specified, remove any 
	# mapping to a node which has a phys_nodeid of $n.
	if ($freeDependantVirtuals) {
	    my $result = 
		DBQueryFatal("SELECT r.node_id FROM reserved AS r ".
			     "LEFT JOIN nodes AS n ".
			     "ON r.node_id=n.node_id ".
			     "WHERE n.phys_nodeid='$n' AND ".
			     "r.eid='$eid' AND r.pid='$pid'");
	    while (my ($dependantVirtual) = $result->fetchrow_array()) {
		if (defined $dependantVirtual && $dependantVirtual ne $n) {
		    push(@nodes, $dependantVirtual);
Chad Barb's avatar
Chad Barb committed
161 162
		}
	    }
163
	}
164
    }
165
} else {
166 167
    print "Releasing all nodes from experiment '$eid' in project '$pid'.\n";
    @nodes = ExpNodes($pid, $eid);
168 169 170
    if (! $moveToOldReserved ) {
	push( @nodes, ExpNodesOldReserved($pid, $eid) );
    }
171 172
}

Mike Hibler's avatar
Mike Hibler committed
173 174
my $mustmakeconf = 0;

175 176
######################################################################
# Step 1
177
#
178 179
# See what nodes need to be freed, and then lock them down my moving
# them to a holding reservation.
180
#
181 182 183 184 185
# We lock just the reserved table. The prevents races between multiple
# invocations of nfree trying to free the same node. Rather than lock
# a zillion tables, move the reservation into a holding pattern. This
# effectively prevents someone else from freeing the same nodes, and from
# someone else allocating the nodes until we are done cleaning things up.
186
#
187 188 189 190
# NOTE: My reason for not wanting to lock all those tables (9 in the
# original version) is that library calls will fail since mysql locking
# requires that every table used within the locked area, be locked.
# Of course, who knows what tables the library uses, so thats a silly
191
# way to go.
192
#
193 194 195 196
######################################################################

DBQueryFatal("lock tables reserved write");

197
foreach my $n (@nodes) {
198 199 200 201
    # Check to make sure they have actually reserved the nodes.
    my $result =
	DBQueryFatal("select * from reserved where node_id='$n' ".
		     "and eid='$eid' and pid='$pid'");
202 203 204 205 206 207
    if ($result->numrows == 0) {
	print "Node '$n' is not reserved by your experiment.\n";
	$error++;
	next;
    }

208 209 210 211 212 213 214 215 216 217
    #
    # Remember if the node's disk must be zeroed
    #
    my $rowref = $result->fetchrow_hashref();
    if ($rowref->{'mustwipe'}) {
	$mustzero{$n} = $rowref->{'mustwipe'};
    } else {
	$mustzero{$n} = 0;
    }

218 219 220
    if ( $moveToOldReserved ) {
	# Move to holding reservation. Node is not free, but is no longer
	# owned by the pid/eid, so cannot be mucked with.
221

222
	if (! DBQueryWarn("update reserved " .
223 224 225 226 227 228 229
			  "set vname='$n', ".
			  " exptidx=$exptidx, ".
			  " pid='$oldreserved_pid', ".
			  " eid='$oldreserved_eid', ".
			  " old_exptidx=$old_exptidx, ".
			  " old_pid='$pid', ".
			  " old_eid='$eid' where node_id='$n'")) {
230 231 232 233 234 235 236
	    print "*** WARNING: Error moving node $n to holding pid/eid: ".
	          "$oldreserved_pid/$oldreserved_eid\n";
	    next;
	}
    } else {
	# Move to locked reservation. Node is not free, but is no longer
	# owned by the pid/eid, so cannot be mucked with.
237
	
238
	if (! DBQueryWarn("update reserved " .
239 240 241
			  "set vname='$n', exptidx=$locker_exptidx, ".
			  " pid='$lockedpid', eid='$lockedeid', ".
			  " old_exptidx=0, old_pid='', old_eid='' ".
242 243 244 245
			  "where node_id='$n'")) {
	    print "*** WARNING: Error locking down node $n!\n";
	    next;
	}
Mike Hibler's avatar
Mike Hibler committed
246 247 248 249 250 251 252 253 254 255 256 257

	# Any node that was part of an elabinelab or plabinelab experiment
	# that is freed requires that we remake the dhcpd.conf file.
	if ($rowref->{'inner_elab_boot'} || $rowref->{'plab_boot'}) {
	    if (!DBQueryWarn("update reserved " .
			     "set inner_elab_boot=0, plab_boot=0 " .
			     "where node_id='$n'")) {
		print "*** WARNING: Error clearing elab/plab boot on $n!\n";
	    }
	    $mustmakeconf = 1;
	}

258
	push(@freed_nodes, $n);
259 260 261 262 263 264
    }

}

DBQueryFatal("unlock tables");

265 266 267 268 269
# We are done if called with a -o
if( $moveToOldReserved ) {
    exit($error);
}

270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
######################################################################
# Step 1b
#
# Ugh.  If we are resetting the PXE boot program for any of the nodes
# we need to clear them and regenerate the dhcpd.conf file now, before
# we start freeing up nodes in Step 2.  If we delayed HUP'ing til after
# Step 2, then nodes might have already been rebooted by the reload
# daemon.
#
######################################################################
if (@freed_nodes > 0) {
    my $nodelist = "(" . join(",", map("'$_'", @freed_nodes)) . ")";

    my $result = 
	DBQueryFatal("select node_id from nodes where ".
		     "pxe_boot_path is not NULL and node_id in $nodelist");
    if ($result->num_rows()) {
	DBQueryFatal("update nodes set pxe_boot_path=NULL where ".
		     "node_id in $nodelist");
Mike Hibler's avatar
Mike Hibler committed
289
	$mustmakeconf = 1;
290 291
    }
}
Mike Hibler's avatar
Mike Hibler committed
292 293 294 295
if ($mustmakeconf) {
    system("$makeconf -i -r") == 0 ||
	print STDERR "WARNING: $makeconf failed!\n";
}
296

297 298 299 300 301 302 303 304 305 306
######################################################################
# Step 2
#
# Go through the list of nodes we successfully locked down, and clean
# up the node state (nodes, delays, interfaces, etc). Once that is done,
# move them to whatever new reservations are pending, or free the node.
#
######################################################################

foreach my $n (@freed_nodes) {
307
    my $mustclean = 1;
308

309 310
    # Clean out all delays
    DBQueryWarn("delete from delays where node_id='$n'") || $error++;
311

312
    # Find the default values for its node type.
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
    my $node = Node->Lookup($n);
    if (!defined($node)) {
	print "*** Unable to Lookup node $n\n";
	$error++;
	next;
    }
    my $estate        = $node->eventstate();
    my $isvirt        = $node->isvirtnode();
    my $isdynamic     = $node->isdynamic();
    my $def_boot_osid = $node->def_boot_osid();
    my $osid;
    my $imageable;

    if ($node->default_osid(\$osid) ||
	$node->imageable(\$imageable)) {

	print "*** Unable to get necessary type attributes for $n\n";
	$error++;
	next;
    }
    my $result = DBQueryFatal("select mustclean from os_info ".
			      "where osid='$def_boot_osid'");
    my ($clean) = $result->fetchrow_array();    
336 337 338 339 340 341

    # See if the OS it was running was marked as mustclean or not. Basically,
    # this is an OSKit hack to avoid reloading disks that have not been
    # touched by the kernel. If a def_boot_path was set, there is nothing
    # we can figure out, so just reload it. This needs to be more general
    # purpose.
342
    if ($isvirt || !$imageable) {
343 344
	# VIRTNODE HACK: Virtual nodes are special. Do not clean or reload.
	$mustclean = 0;
345
	$mustzero{$n} = 0;
346
    }
347 348 349 350
    elsif (defined($clean)) {
	# If def_boot_osid set, then $clean is defined. Otherwise not set
	# so default to cleaning node. 
	$mustclean = $clean;
351 352 353 354 355 356 357 358 359 360
    }

    #
    # If the node is a dynamic virtual node, just save it for later.
    # We will call into the Node library to delete it. 
    #
    if ($isdynamic) {
	push(@dynanodes, $n);
	next;
    }
361

362 363
    if (! $isvirt) {
        # On real nodes, clean out all interfaces except the control net.
364
	DBQueryWarn("update interfaces set IP='',IPaliases=NULL,mask=NULL,".
365
		    "       rtabid='0',vnode_id=NULL,current_speed='0' ".
366 367 368
		    "where node_id='$n' and ".
		    "  role='" . TBDB_IFACEROLE_EXPERIMENT() . "'")
	    || $error++;
369
	# And log phys nodes freed from hwdown
370 371
	if ($pid eq NODEDEAD_PID() && $eid eq NODEDEAD_EID() && $user_uid) {
	    TBSetNodeLogEntry($n, $user_uid, "misc",
372 373
			      "'Moved from hwdown; nfree'");
	}
374
    }
375

Chad Barb's avatar
Chad Barb committed
376 377
    my $allocFreeState = TBDB_ALLOCSTATE_FREE_DIRTY();

378
    DBQueryWarn("update nodes set startupcmd='',rpms='',deltas='', ".
Robert Ricci's avatar
Robert Ricci committed
379
		"tarballs='',failureaction='fatal', routertype='none', ".
380
		"def_boot_cmd_line='',next_boot_cmd_line='', ".
381
		"temp_boot_osid='',next_boot_osid='', ".
Chad Barb's avatar
Chad Barb committed
382
		"update_accounts=0,ipport_next=ipport_low, ".
383
		"sfshostid=NULL,allocstate='$allocFreeState',boot_errno=0 ".
384
		"where node_id='$n'") || $error++;
385

386 387 388 389 390 391 392 393
    #
    # If the node is a virtnode, force its state to SHUTDOWN. This is mostly
    # to avoid silly stated warnings for nodes that do not have a physical
    # representation most of the time!
    #
    TBSetNodeEventState($n, TBDB_NODESTATE_SHUTDOWN)
	if ($isvirt && $estate ne TBDB_NODESTATE_SHUTDOWN());

394 395 396 397 398
    # Clean out the SFS hostid. What about the other keys?
    DBQueryWarn("update node_hostkeys set ".
		"  sfshostid=NULL ".
		"where node_id='$n'") || $error++;

399
    # Clean out the current_reloads table (a just in case measure).
400
    DBQueryWarn("delete from current_reloads where node_id='$n'") || $error++;
401

402 403 404
    # Reset its port allocation stuff.
    DBQueryWarn("delete from nodeipportnum where node_id='$n'") || $error++;

405 406
    # Clean the vinterfaces table for this node.
    DBQueryWarn("delete from vinterfaces where node_id='$n'") || $error++;
407

408 409 410
    # Clean the interface_settings table for this node.
    DBQueryWarn("delete from interface_settings where node_id='$n'") || $error++;

411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
    # If it's a robot, we need to reset its physical location.
    $result =
	DBQueryFatal("select building,floor,loc_x,loc_y,orientation ".
		     "from node_startloc where node_id='$n'");
    if ($result->num_rows()) {
	while (my ($bldg,$floor,$x,$y,$o) = $result->fetchrow_array()) {
	    my $subresult = 
		DBQueryFatal("select pixels_per_meter from floorimages ".
			     "where building='$bldg'");
	    if ($subresult->num_rows()) {
		my ($pixels_per_meter) = $subresult->fetchrow_array();

		$x = int($x * $pixels_per_meter);
		$y = int($y * $pixels_per_meter);
		DBQueryWarn("update location_info set ".
			    "loc_x=$x,loc_y=$y,orientation=$o ".
			    "where node_id='$n' and building='$bldg' ".
			    "and floor=$floor") || $error++;
	    }
	    else {
		warn "No building named $bldg in floorimages";
	    }
	}
    }
    DBQueryWarn("update nodes set destination_x=NULL,destination_y=NULL,".
		"destination_orientation=NULL where node_id='$n'") || $error++;

438
    # Now its safe to change the reservation.
439

440 441
    # If the node has a next_reserve entry, change the reservation.
    $result =
442
	DBQueryFatal("select pid,eid from next_reserve ".
443 444 445
		     "where node_id='$n'");

    if ($result->num_rows()) {
446
	my ($next_pid, $next_eid) = $result->fetchrow_array();
447 448 449
	
	print "Moving $n to $next_pid/$next_eid.\n";

450 451 452 453 454 455 456 457 458
	my $next_exptidx;
	if (!TBExptIDX($next_pid, $next_eid, \$next_exptidx)) {
	    die("No such experiment $next_pid/$next_eid!\n");
	}

	DBQueryWarn("update reserved set ".
		    " exptidx=$next_exptidx,pid='$next_pid',eid='$next_eid'," .
		    " vname='$n' ".
		    "where node_id='$n'")
459 460
	    || $error++;
	
461 462 463
	TBSetNodeHistory($n, TB_NODEHISTORY_OP_MOVE, $UID,
			 $next_pid, $next_eid);
	
464 465
	DBQueryWarn("delete from next_reserve where node_id='$n'")
	    || $error++;
466

467 468 469 470 471
	# This little sillyness is for disk reloading.
	# Kill the last reservation since this path is special.
	DBQueryWarn("delete from last_reservation where node_id='$n'") ||
	    $error++;

472 473
	next;
    }
474

475 476 477 478 479
    # If the node has a reloads entry, change the reservation so that the
    # reload_daemon will pick it up.
    $result =
	DBQueryFatal("select node_id,image_id from scheduled_reloads " .
		     "where node_id='$n'");
480 481 482
    my $inreloads = $result->numrows();

    # XXX
Mike Hibler's avatar
Mike Hibler committed
483
    if ($inreloads && !$imageable) {
484 485 486
	print "WARNING: non-imageable node $n in scheduled_reloads\n";
	$inreloads = 0;
    }
487

488 489
    if (TBNodeType($n) eq "garcia") {
	print "Moving $n to $reloadpid/$rppendingeid.\n";
490 491 492 493 494 495

	if (!defined($rppend_exptidx)) {
	    if (!TBExptIDX($reloadpid, $rppendingeid, \$rppend_exptidx)) {
		die("No such experiment $reloadpid/$rppendingeid!\n");
	    }
	}
496
	
497 498 499 500 501
	DBQueryWarn("update reserved set ".
		    " exptidx=$rppend_exptidx, ".
		    " pid='$reloadpid',".
		    " eid='$rppendingeid',vname='$n' ".
		    "where node_id='$n'") ||
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
	    $error++;

	DBQueryWarn("REPLACE INTO scheduled_reloads set node_id='$n'") || 
	    $error++;

	TBSetNodeHistory($n, TB_NODEHISTORY_OP_MOVE, $UID,
			 $reloadpid, $rppendingeid);

	# This little sillyness is for disk reloading.
	# Kill the last reservation since this path is special.
	DBQueryWarn("delete from last_reservation where node_id='$n'") ||
	    $error++;

	next;
    }
    elsif (!$TESTMODE &&
	   ((!$isvirt && $imageable) || # XXX force reload hack!
	    $inreloads || $mustzero{$n})) { # XXX Garcia hack
520 521
	print "Moving $n to $reloadpid/$pendingeid.\n";
	
522 523 524 525 526
	DBQueryWarn("update reserved set ".
		    " exptidx=$reload_exptidx, ".
		    " pid='$reloadpid',eid='$pendingeid',".
		    " vname='$n' ".
		    "where node_id='$n'") || $error++;
527

528 529 530
	TBSetNodeHistory($n, TB_NODEHISTORY_OP_MOVE, $UID,
			 $reloadpid, $pendingeid);

531 532 533 534 535
	# This little sillyness is for disk reloading.
	# Kill the last reservation since this path is special.
	DBQueryWarn("delete from last_reservation where node_id='$n'") ||
	    $error++;

536 537 538
	next;
    }

539
    # No reloads or reservation changes, so really free the node
540
    #
541 542 543 544 545
    # This little sillyness is for disk reloading. Remember the last
    # project a node was reserved into. At present, there might already
    # be an entry. Eventually, os_setup will look for this and force
    # a reload.
    if ($mustclean) {
546 547
	DBQueryWarn("replace into last_reservation (pid_idx,node_id,pid)".
		    "values ($pid_idx, '$n', '$pid')");
548
    }
549

550 551 552
    print "Releasing node '$n' ... ";
    if (DBQueryWarn("delete from reserved where node_id='$n'")) {
	print "Succeeded.\n";
553 554
	
	TBSetNodeHistory($n, TB_NODEHISTORY_OP_FREE, $UID, $pid, $eid);
555 556 557 558 559
    }
    else {
	print "Failed!\n";
	$error++;
    }
560 561
}

562 563 564 565 566
# Release dynamic nodes.
if (@dynanodes) {
    Node::DeleteVnodes(@dynanodes);
}

567 568 569 570 571
######################################################################
# Step 3 - Set up console for freed nodes.
#
# Using a list of freed nodes build eariler, run consetup to reset
# their serial consoles.
572 573 574 575 576 577
#
# NOTE: While it may seem like a race to do this after releasing the
# reservation, it really is not. Even if the node is allocated again
# console_setup looks at the current reservation and does the right
# thing, and since nalloc locks the reserved table, ordering will be
# preserved.
578
#
579 580
######################################################################

581
if (@freed_nodes) {
582 583
    system("$consetup @freed_nodes") == 0 ||
	print STDERR "WARNING: $consetup @freed_nodes failed!\n";
Mac Newbold's avatar
Mac Newbold committed
584 585
}

586
exit($error);
587