mkvnode.pl 30 KB
Newer Older
1 2
#!/usr/bin/perl -w
#
Leigh B Stoller's avatar
Leigh B Stoller committed
3
# Copyright (c) 2009-2015 University of Utah and the Flux Group.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
23
#
24 25 26 27 28 29 30 31 32 33 34 35 36

#
# This is the top-level vnode creation script, called via the vnodesetup
# wrapper.  It is os independent, calling into routines defined
# in liblocsetup or elsewhere for os-dependent functionality.  Libraries
# contained in modules named like libvnode_<type>.pm are hooked in to
# obtain setup operations that are specific to the vnode type.
#
# This script was specific to Linux host environments, but has been modified
# to be used under FreeBSD for certain vnode-like containers.  Eventually
# all vnode/jail/etc. setups under any host OS should flow through this.
#

37 38 39 40 41
use strict;
use Getopt::Std;
use English;
use Errno;
use POSIX qw(strftime);
42 43
use POSIX qw(:sys_wait_h);
use POSIX qw(:signal_h);
Leigh B Stoller's avatar
Leigh B Stoller committed
44
use POSIX qw(setsid);
45
use Data::Dumper;
46 47
use Storable;
use vars qw($vnstate);
48 49 50

sub usage()
{
Leigh B. Stoller's avatar
Leigh B. Stoller committed
51
    print "Usage: mkvnode [-d] vnodeid\n" . 
52
          "  -d   Debug mode.\n" .
53 54
	  "  -c   Cleanup stale container\n".
	  "  -s   Show state for container\n".
55 56 57
          "";
    exit(1);
}
58
my $optlist  = "dcs";
59
my $debug    = 1;
60 61
my $cleanup  = 0;
my $showstate= 0;
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
my $vnodeid;

#
# Turn off line buffering on output
#
$| = 1;

# Drag in path stuff so we can find emulab stuff.
BEGIN { require "/etc/emulab/paths.pm"; import emulabpaths; }

#
# Load the OS independent support library. It will load the OS dependent
# library and initialize itself. 
# 
use libsetup;
use libtmcc;
78
use libutil;
79 80
use libtestbed;
    
81 82
# Pull in vnode stuff
use libgenvnode;
83 84 85 86
use libvnode;

# Helpers
sub MyFatal($);
87 88 89 90
sub safeLibOp($$$;@);
sub CleanupVM();
sub TearDownStaleVM();
sub StoreState();
91 92 93

# Locals
my $CTRLIPFILE = "/var/emulab/boot/myip";
94 95 96 97 98 99 100 101
my $VMPATH     = "/var/emulab/vms/vminfo";
my $VNDIR;
my $leaveme    = 0;
my $running    = 0;
my $cleaning   = 0;
my $rebooting  = 0;
my $reload     = 0;
my ($vmid,$vmtype,$ret,$err);
102
my $ISXENVM    = (GENVNODETYPE() eq "xen" ? 1 : 0);
103 104 105 106 107

# Flags for leaveme.
my $LEAVEME_REBOOT = 0x1;
my $LEAVEME_HALT   = 0x2;

108 109 110 111 112 113 114 115
#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
116 117 118 119 120 121 122 123
if (defined($options{"d"})) {
    $debug = 1;
}
if (defined($options{"c"})) {
    $cleanup = 1;
}
if (defined($options{"s"})) {
    $showstate = 1;
Leigh B Stoller's avatar
Leigh B Stoller committed
124
    $debug     = 0;
125 126 127
}
usage()
    if (@ARGV != 1);
128

129
$vnodeid = $ARGV[0];
130
$VNDIR   = "$VMPATH/$vnodeid";
131 132 133 134 135 136 137 138 139

#
# Must be root.
# 
if ($UID != 0) {
    die("*** $0:\n".
	"    Must be root to run this script!\n");
}

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
#
# Deal with VIFROUTING flag from the server. Do this before we switch
# our vnode_id below since it is a physical host attribute. This will
# go away at some point.
#
my %attributes = ();
if (getnodeattributes(\%attributes)) {
    die("*** $0:\n".
	"Could not get node attributes");
}
if (exists($attributes{"xenvifrouting"})) {
    # Gack, tell backend network scripts.
    system("touch $ETCDIR/xenvifrouting");
}

155 156 157
# Tell the library what vnode we are messing with.
libsetup_setvnodeid($vnodeid);

158 159 160
# Can set this after above line. 
my $RUNNING_FILE = CONFDIR() . "/running";

161 162 163 164 165 166 167
#
# Turn on debug timestamps if desired.
#
if ($debug) {
    TBDebugTimeStampsOn();
}

168 169 170 171 172 173 174 175
#
# Remove old state files at boot.
#
if (! -e "/var/run/mkvnode.ready") {
    system("rm -f $VARDIR/vms/*/vnode.state");
    system("touch /var/run/mkvnode.ready");
}

176 177 178 179 180 181 182 183
#
# XXX: for now, support only a single vnode type per phys node.  This is bad,
# but it's the current assumption.  For now, we also assume the nodetype since
# we only have pcvm.  Later, we need to get this info from tmcd so we know 
# lib to load.
#
my @nodetypes = ( GENVNODETYPE() );

184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
#
# Need the domain, but no conistent way to do it. Ask tmcc for the
# boss node and parse out the domain. 
#
my ($DOMAINNAME,$BOSSIP) = tmccbossinfo();
die("Could not get bossname from tmcc!")
    if (!defined($DOMAINNAME));

if ($DOMAINNAME =~ /^[-\w]+\.(.*)$/) {
    $DOMAINNAME = $1;
}
else {
    die("Could not parse domain name!");
}
if ($BOSSIP !~ /^\d+\.\d+\.\d+\.\d+$/) {
    die "Bad bossip '$BOSSIP' from bossinfo!";
}

202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
#
# We go through this crap so that we can pull in multiple packages implementing
# the libvnode API so they (hopefully) won't step on our namespace too much.
#
my %libops = ();
foreach my $type (@nodetypes) {
    if ($type =~ /^([\w\d\-]+)$/) {
	$type = $1;
    }
    # load lib and initialize it
    my %ops;
    eval "use libvnode_$type; %ops = %libvnode_${type}::ops";
    if ($@) {
	die "while trying to load 'libvnode_$type': $@";
    }
    if (0 && $debug) {
	print "%ops($type):\n" . Dumper(%ops);
    }
    $libops{$type} = \%ops;
    if ($debug) {
Leigh B Stoller's avatar
Leigh B Stoller committed
222
	$libops{$type}{'setDebug'}->($debug);
223
    }
224 225 226 227
    $libops{$type}{'init'}->();

    # need to do this for each type encountered. 
    TBDebugTimeStampWithDate("starting $type rootPreConfig()");
228
    $libops{$type}{'rootPreConfig'}->($BOSSIP);
229
    TBDebugTimeStampWithDate("finished $type rootPreConfig()");
230 231 232 233 234 235 236
}
if ($debug) {
    print "GENVNODETYPE " . GENVNODETYPE() . "\n";
    print "libops:\n" . Dumper(%libops);
}


237 238 239 240 241 242 243 244 245 246
#
# This holds the container state set up by the library. There is state
# added here, and state added in the library ("private"). We locally
# redefine this below, so cannot be a lexical.
#
# NOTE: There should be NO state in here that needs to survive reboot.
#       We just remove them all when rebooting. See above.
#
$vnstate = { "private" => {} };

247 248 249 250 251 252 253
#
# Quickie way to show the state.
#
if ($showstate) {
    if (! -e "$VNDIR/vnode.info") {
	fatal("No vnode.info file for $vnodeid");
    }
254 255 256
    my $str = `cat $VNDIR/vnode.info`;
    ($vmid, $vmtype, undef) = ($str =~ /^(\d*) (\w*) ([-\w]*)$/);
    
257 258 259 260 261
    my $tmp = eval { Storable::retrieve("$VNDIR/vnode.state"); };
    if ($@) {
	fatal("$@");
    }
    print Dumper($tmp);
262 263 264 265 266 267 268 269 270 271 272 273

    # So the lib op works.
    $vnstate = $tmp;

    ($ret,$err) = safeLibOp('vnodeState', 1, 0);
    if ($err) {
	fatal("Failed to get status for existing container: $err");
    }
    if ($ret eq VNODE_STATUS_UNKNOWN()) {
	print "Cannot determine status container $vmid.\n";
    }
    print "Domain is $ret\n";
274 275 276
    exit(0);
}

277 278 279 280
#
# In most cases, the vnodeid directory will have been created by the
# caller, and a config file possibly dropped in.  When debugging, we
# have to create it here.
281 282 283 284 285
#
if (! -e $VMPATH) {
    mkdir($VMPATH, 0770) or
	fatal("Could not mkdir $VMPATH: $!");
}
286 287 288 289 290 291 292
chdir($VMPATH) or
    die("Could not chdir to $VMPATH: $!\n");

if (! -e $vnodeid) {
    mkdir($vnodeid, 0770) or
	fatal("Could not mkdir $vnodeid in $VMPATH: $!");
}
293 294 295 296 297 298 299 300
#
# The container description for the library routines. 
#
my %vnconfig = ( "vnodeid"   => $vnodeid,
                 "config"    => undef,
		 "ifconfig"  => undef,
		 "ldconfig"  => undef,
		 "tunconfig" => undef,
301
		 "attributes"=> undef,
302
		 "environment"   => undef,
303
                 "storageconfig" => undef,
304
		 "fwconfig"      => undef,
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
);
sub VNCONFIG($) { return $vnconfig{'config'}->{$_[0]}; }

#
# If cleanup requested, make sure the manager process is not running
# Must do this after the stuff above is defined.
#
if ($cleanup) {
    # This path is in vnodesetup. 
    my $pidfile = "/var/run/tbvnode-${vnodeid}.pid";
    if (-e $pidfile) {
	print STDERR "Manager process still running. Use that instead.\n";
	print STDERR "If the manager is really dead, first rm $pidfile.\n";
	exit(1);
    }
    exit(TearDownStaleVM());
}

#
# Now we can start doing something useful.
#
326
my ($pid, $eid, $vname) = check_nickname();
327 328
my $nodeuuid = getnodeuuid();
$nodeuuid = $vnodeid if (!defined($nodeuuid));
329

330 331 332 333 334 335
#
# Get all the config stuff we need.
#
my %tmp;
my @tmp;
my $tmp;
336
my %attrs;
337
my %envvars;
338 339 340
my $fwinfo;
my @fwrules;
my @fwhosts;
341

342 343 344
fatal("Could not get vnode config for $vnodeid")
    if (getgenvnodeconfig(\%tmp));
$vnconfig{"config"} = \%tmp;
345 346

fatal("getifconfig($vnodeid): $!")
347 348
    if (getifconfig(\@tmp));
$vnconfig{"ifconfig"} = [ @tmp ];
349 350

fatal("getlinkdelayconfig($vnodeid): $!") 
351 352
    if (getlinkdelayconfig(\@tmp));
$vnconfig{"ldconfig"} = [ @tmp ];
353

Leigh B. Stoller's avatar
Leigh B. Stoller committed
354
fatal("gettunnelconfig($vnodeid): $!")
355 356
    if (gettunnelconfig(\$tmp));
$vnconfig{"tunconfig"} = $tmp;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
357

358 359 360 361
fatal("getnodeattributes($vnodeid): $!")
    if (getnodeattributes(\%attrs));
$vnconfig{"attributes"} = \%attrs;

362 363 364 365
fatal("getstorageconfig($vnodeid): $!")
    if (getstorageconfig(\@tmp));
$vnconfig{"storageconfig"} = [ @tmp ];

366 367 368 369
fatal("getenvvars(): $!")
    if (getenvvars(\%envvars));
$vnconfig{"environment"} = \%envvars;

370 371 372 373 374 375 376
fatal("getfwconfig(): $!")
    if (getfwconfig(\$fwinfo, \@fwrules, \@fwhosts));

$vnconfig{"fwconfig"} = {"fwinfo"  => $fwinfo,
			 "fwrules" => \@fwrules,
			 "fwhosts" => \@fwhosts};

377 378 379 380 381
#
# see if we 1) are supposed to be "booting" into the reload mfs, and 2) if
# we have loadinfo.  Need both to reload!
#
fatal("getbootwhat($vnodeid): $!") 
382
    if (getbootwhat(\@tmp));
383 384
if (scalar(@tmp) && exists($tmp[0]->{"WHAT"})) {
    if ($tmp[0]->{"WHAT"} =~ /frisbee-pcvm/) {
385 386 387 388
	#
	# Ok, we're reloading, using the fake frisbee pcvm mfs.
	#
	$reload = 1;
389 390 391 392 393 394
	
	fatal("getloadinfo($vnodeid): $!") 
	    if (getloadinfo(\@tmp));
	if (!scalar(@tmp)) {
	    fatal("vnode $vnodeid in reloading, but got no loadinfo!");
	}
395 396 397 398 399 400 401 402 403 404
	#
	# Loadinfo can now be a list, when loading deltas. Actually, I suppose
	# we could support loading multiple partitions, but other stuff would
	# have to change for that to work, so not going there right now.
	#
	$vnconfig{"reloadinfo"} = \@tmp;
	#
	# But the image we eventually boot is in jailconfig.
	# Sheesh, LVM names cannot include comma or colon. 
	#
Leigh B Stoller's avatar
Leigh B Stoller committed
405
	if (VNCONFIG('IMAGENAME') =~ /^([-\w]+),([-\w]+),([-\w\.]+)$/) {
406 407 408 409 410
	    $vnconfig{"image"}      = "$1-$2-$3";
	}
	elsif (VNCONFIG('IMAGENAME') =~ /^([-\w]+),([-\w]+),([^:]+):(\d+)$/) {
	    $vnconfig{"image"}      = "$1-$2-$3-$4";
	}
411
	else {
412 413 414 415 416 417 418 419
	    fatal("vnode $vnodeid in reloading, but got bogus IMAGENAME " . 
		   VNCONFIG('IMAGENAME') . " from jailconf!");
	}
	#
	# Apply the same transform to each loadinfo so that we do not have
	# duplicate it in the library,
	#
	foreach my $ref (@tmp) {
Leigh B Stoller's avatar
Leigh B Stoller committed
420
	    if ($ref->{'IMAGEID'} =~ /^([-\w]+),([-\w]+),([-\w\.]+)$/) {
421 422 423 424
		$ref->{'IMAGENAME'} = "$1-$2-$3";
	    }
	    elsif ($ref->{'IMAGEID'} =~ /^([-\w]+),([-\w]+),([^:]+):(\d+)$/) {
		$ref->{'IMAGENAME'} = "$1-$2-$3-$4";
425 426
	    }
	    else {
427
		fatal("Bad IMAGEID in loadinfo");
428 429 430
	    }
	}
    }
431 432 433 434 435 436 437
    elsif ($tmp[0]->{"WHAT"} =~ /^\d*$/) {
	#
	# We are using bootwhat for a much different purpose then intended.
	# It tells us a partition number, but that is meaningless. Look at
	# the jailconfig to see what image should boot. That image better
	# be resident already. 
	#
438 439
	# Sheesh, LVM names cannot include comma or colon.
	#
Leigh B Stoller's avatar
Leigh B Stoller committed
440
	if (VNCONFIG('IMAGENAME') =~ /^([-\w]+),([-\w]+),([-\w\.]+)$/) {
441 442
	    $vnconfig{"image"}      = "$1-$2-$3";
	}
443 444 445
	elsif (VNCONFIG('IMAGENAME') =~ /^([-\w]+),([-\w]+),([^:]+):(\d+)$/) {
	    $vnconfig{"image"}      = "$1-$2-$3-$4";
	}
446 447 448 449
    }
    else {
	# The library will boot the default, whatever that is.
    }
450 451
}

452 453 454 455 456
if ($debug) {
    print "VN Config:\n";
    print Dumper(\%vnconfig);
}

457 458 459 460 461 462
#
# Install a signal handler. We can get signals from vnodesetup.
#
sub handler ($) {
    my ($signame) = @_;

463 464
    print STDERR "mkvnode ($PID) caught a SIG${signame}!\n";

465 466 467 468 469 470 471 472 473 474
    # No more interruptions during teardown.
    $SIG{INT}  = 'IGNORE';
    $SIG{USR1} = 'IGNORE';
    $SIG{USR2} = 'IGNORE';
    $SIG{HUP}  = 'IGNORE';

    my $str = "killed";
    if ($signame eq 'USR1') {
	$leaveme = $LEAVEME_HALT;
	$str = "halted";
475
    }
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
    elsif ($signame eq 'USR2') {
	$leaveme = $LEAVEME_REBOOT;
	$str = "rebooted";
    }

    #
    # XXX this is a woeful hack for vnodesetup.  At the end of rebootvnode,
    # vnodesetup calls hackwaitandexit which essentially waits for a vnode
    # to be well on the way back up before it returns.  This call was
    # apparently added for the lighter-weight "reconfigure a vnode"
    # (as opposed to reboot it) path, however it makes the semantics of
    # reboot on a vnode different than that for a pnode, where reboot returns
    # as soon as the node stops responding (i.e., when it goes down and not
    # when it comes back up).  Why do I care?  Because Xen vnodes cannot
    # always "reboot" under the current semantics in less than 30 seconds,
    # which is the timeout in libreboot.
    #
    # So by touching the "running" file here we force hackwaitandexit to
    # return when the vnode is shutdown in Xen (or OpenVZ), more closely
    # matching the pnode semantics while leaving the BSD jail case (which
    # doesn't use this code) alone.  This obviously needs to be revisited.
    #
    mysystem("touch $RUNNING_FILE")
	if ($leaveme && -e "$RUNNING_FILE");

501 502
    print STDERR "Container is being $str\n";
    MyFatal("Container has been $str by $signame");
503 504
}

505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534
#
# If this file exists, we are rebooting an existing container. But
# need to check if its a stale or aborted container (one that failed
# to setup or teardown) and got left behind. Another wrinkle is shared
# nodes, so we use the node uuid to determine if its another logical
# pcvm with the same name, and needs to be destroyed before setting up.
#
if (-e "$VNDIR/vnode.info") {
    my $uuid;
    my $teardown = 0;

    my $str = `cat $VNDIR/vnode.info`;
    ($vmid, $vmtype, $uuid) = ($str =~ /^(\d*) (\w*) ([-\w]*)$/);

    # Consistency check.
    fatal("No matching file: $VMPATH/vnode.$vmid")
	if (! -e "$VMPATH/vnode.$vmid");
    $str = `cat $VMPATH/vnode.$vmid`;
    chomp($str);
    if ($str ne $vnodeid) {
	fatal("Inconsistent vnodeid in $VMPATH/vnode.$vmid");
    }

    if ($uuid ne $nodeuuid) {
	print "UUID mismatch; tearing down stale vnode $vnodeid\n";
	$teardown = 1;
    }
    elsif ($reload) {
	print "Reload requested, tearing down old vnode\n";
	$teardown = 1;
535 536
    }
    else {
537 538 539 540 541 542 543 544 545 546 547 548 549
	# We (might) need this to discover the state. 
	local $vnstate = { "private" => {} };
	
	if (-e "$VNDIR/vnode.state") {
	    my $tmp = eval { Storable::retrieve("$VNDIR/vnode.state"); };
	    if ($@) {
		print STDERR "$@";
		$teardown = 1;
	    }
	    else {
		$vnstate->{'private'} = $tmp->{'private'};
	    }
	}
550 551 552
	($ret,$err) = safeLibOp('vnodeState', 1, 0);
	if ($err) {
	    fatal("Failed to get status for existing container: $err");
553
	}
554 555 556 557
	if ($ret eq VNODE_STATUS_UNKNOWN()) {
	    print "Cannot determine status container $vmid. Deleting ...\n";
	    $teardown = 1;
	}
558 559 560 561 562
	elsif ($ret eq VNODE_STATUS_MOUNTED()) {
	    print("vnode $vnodeid still mounted. Unmounting then restarting\n");
	    $teardown = 1;
	    $leaveme  = $LEAVEME_REBOOT;
	}
563 564
	elsif ($ret ne VNODE_STATUS_STOPPED()) {
	    fatal("vnode $vnodeid not stopped, not booting!");
565 566
	}
    }
567
    if ($teardown) {
Leigh B Stoller's avatar
Leigh B Stoller committed
568 569 570 571 572 573 574 575 576
	if (TearDownStaleVM()) {
	    #
	    # This really sucks. We have to be careful that the caller
	    # (vnodesetup) does not remove the data directory, or else
	    # we will not be able to come back here next time for cleanup.
	    #
	    print STDERR "Could not tear down stale container\n";
	    exit(1);
	}
577 578 579
	# See MOUNTED case above; we set leaveme to keep the container
	# file systems, but must reset leaveme. 
	$leaveme = 0;
580 581 582 583
    }
    else {
	$rebooting = 1;
    }
584 585
}

Leigh B Stoller's avatar
Leigh B Stoller committed
586 587 588
#
# Install handlers *after* down stale container teardown, since we set
# them to IGNORE during the teardown.
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
# 
# Ignore TERM since we want our caller to catch it first and then send
# it down to us. 
#
$SIG{TERM} = 'IGNORE';
# Halt container and exit. Tear down transient state, leave disk.
$SIG{USR1} = \&handler;
# Halt container and exit. Leave all state intact (we are rebooting).
$SIG{USR2} = \&handler;
# Halt container and exit. Tear down all state including disk.
$SIG{HUP}  = \&handler;
$SIG{INT}  = \&handler;

#
# Initial pre config for the experimental network. We want to make sure
# we can allocate the required devices and whatever else before going
# any further. 
606
#
607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
TBDebugTimeStampWithDate("starting rootPreConfigNetwork()");
$ret = eval {
    $libops{GENVNODETYPE()}{'rootPreConfigNetwork'}->($vnodeid, undef,
	\%vnconfig, $vnstate->{'private'});
};
if ($ret || $@) {
    print STDERR $@
	if ($@);
    
    # If this fails, we require the library to clean up after itself
    # so that we can just exit without worrying about cleanup.
    fatal("rootPreConfigNetwork failed!");
}
TBDebugTimeStampWithDate("finished rootPreConfigNetwork()");

622 623 624 625 626 627 628
if (! -e "$VNDIR/vnode.info") {
    #
    # XXX XXX XXX: need to get this from tmcd!
    # NOTE: we first put the type into vndb so that the create call can go!
    #
    $vmtype = GENVNODETYPE();

Leigh B Stoller's avatar
Leigh B Stoller committed
629
    ($ret,$err) = safeLibOp('vnodeCreate',0,0);
630 631 632 633 634
    if ($err) {
	MyFatal("vnodeCreate failed");
    }
    $vmid = $ret;

635
    mysystem("echo '$vmid $vmtype $nodeuuid' > $VNDIR/vnode.info");
636
    mysystem("echo '$vnodeid' > $VMPATH/vnode.$vmid");
637 638 639

    # bootvnodes wants this to be here...
    mysystem("mkdir -p /var/emulab/jails/$vnodeid");
640
}
641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
else {
    #
    # Restore the state and throw away the private data. 
    #
    if (-e "$VNDIR/vnode.state") {
	my $tmp = eval { Storable::retrieve("$VNDIR/vnode.state"); };
	if ($@) {
	    print STDERR "$@";
	}
	else {
	    # Restore this from the saved state for vnodepreconfig.
	    $vnstate->{'private'}->{'os'} = $tmp->{'os'}
	        if (exists($tmp->{'os'}));
	    $vnstate->{'private'}->{'rootpartition'} = $tmp->{'rootpartition'}
	        if (exists($tmp->{'rootpartition'}));
	}
    }
}
# This state structure is saved to disk for TearDown and Reboot.
660 661 662
$vnstate->{"vmid"}   = $vmid;
$vnstate->{"vmtype"} = $vmtype;
$vnstate->{"uuid"}   = $nodeuuid;
663 664 665 666 667 668
# Save this for reboot. 
$vnstate->{'os'} = $vnstate->{'private'}->{'os'}
    if (exists($vnstate->{'private'}->{'os'}));
$vnstate->{'rootpartition'} = $vnstate->{'private'}->{'rootpartition'}
    if (exists($vnstate->{'private'}->{'rootpartition'}));

669 670 671
# Store the state to disk.
if (StoreState()) {
    MyFatal("Could not store container state to disk");
672 673
}

674 675
my $cnet_mac = (defined(VNCONFIG('CTRLMAC')) ?
		VNCONFIG('CTRLMAC') : ipToMac(VNCONFIG('CTRLIP')));
676 677 678 679 680 681 682 683 684
my $ext_ctrlip = `cat $CTRLIPFILE`;
chomp($ext_ctrlip);
if ($ext_ctrlip !~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
    # cannot/should not really go on if this happens.
    MyFatal("error prior to vnodePreConfigControlNetwork($vnodeid): " . 
	    " could not find valid ip in $CTRLIPFILE!");
}
my $longdomain = "${eid}.${pid}.${DOMAINNAME}";

685 686 687 688 689 690 691
#
# Call back to do things to the container before it boots.
#
sub callback($)
{
    my ($path) = @_;

692 693 694 695
    #
    # Set up sshd port to listen on. If the vnode has its own IP
    # then listen on both 22 and the per-vnode port.
    #
696 697 698 699 700 701 702 703 704
    if (defined(VNCONFIG('SSHDPORT')) && VNCONFIG('SSHDPORT') ne "") {
	my $sshdport = VNCONFIG('SSHDPORT');

	mysystem2("echo '# EmulabJail' >> $path/etc/ssh/sshd_config");
	mysystem2("echo '# DO NOT MAKE ANY CHANGES BELOW THIS LINE!' ".
		  "      >> $path/etc/ssh/sshd_config");
	mysystem2("echo 'Port $sshdport' >> $path/etc/ssh/sshd_config");
	if (VNCONFIG('CTRLIP') ne $ext_ctrlip) {
	    mysystem2("echo 'Port 22' >> $path/etc/ssh/sshd_config");
705 706
	}
    }
707
    # Localize the timezone.
708 709
    mysystem2("cp -fp /etc/localtime $path/etc");

710 711 712
    return 0;
}

713
# OP: preconfig
714
if (safeLibOp('vnodePreConfig', 1, 1, \&callback)) {
715 716 717 718
    MyFatal("vnodePreConfig failed");
}

# OP: control net preconfig
719 720 721
if (safeLibOp('vnodePreConfigControlNetwork',1,1,
	      VNCONFIG('CTRLIP'),
	      VNCONFIG('CTRLMASK'),$cnet_mac,
722 723 724 725 726
	      $ext_ctrlip,$vname,$longdomain,$DOMAINNAME,$BOSSIP)) {
    MyFatal("vnodePreConfigControlNetwork failed");
}

# OP: exp net preconfig
727
if (safeLibOp('vnodePreConfigExpNetwork', 1, 1)) {
728 729
    MyFatal("vnodePreConfigExpNetwork failed");
}
730
if (safeLibOp('vnodeConfigResources', 1, 1)) {
731 732
    MyFatal("vnodeConfigResources failed");
}
733
if (safeLibOp('vnodeConfigDevices', 1, 1)) {
734 735 736
    MyFatal("vnodeConfigDevices failed");
}

737
#
738
# Route to inner ssh, but not if the IP is routable, no need to.
739
#
740 741
if (defined(VNCONFIG('SSHDPORT')) && VNCONFIG('SSHDPORT') ne "" &&
    !isRoutable(VNCONFIG('CTRLIP'))) {
742 743 744 745 746 747 748 749 750
    my $ref = {};
    $ref->{'ext_ip'}   = $ext_ctrlip;
    $ref->{'ext_port'} = VNCONFIG('SSHDPORT');
    $ref->{'int_ip'}   = VNCONFIG('CTRLIP');
    $ref->{'int_port'} = VNCONFIG('SSHDPORT');
    $ref->{'protocol'} = "tcp";
    
    $vnstate->{'sshd_iprule'} = $ref
	if (libvnode::forwardPort($ref) == 0);
751 752
}

753
#
754
# Start the container. If all goes well, this will exit cleanly, with 
755 756 757
# it running in its new context. Still, lets protect it with a timer
# since it might get hung up inside and we do not want to get stuck here.
#
758 759 760 761 762 763 764 765
if (!$ISXENVM) {
    my $childpid = fork();
    if ($childpid) {
	my $timedout = 0;
	local $SIG{ALRM} = sub { kill("TERM", $childpid); $timedout = 1; };
	alarm 180;
	waitpid($childpid, 0);
	alarm 0;
766

767 768 769 770 771 772 773
	#
	# If failure then cleanup.
	#
	if ($? || $timedout) {
	    MyFatal("$vnodeid container startup ".
		    ($timedout ? "timed out." : "failed."));
	}
774
    }
775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791
    else {
	#
	# We want to call this as clean as possible.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{USR1} = 'DEFAULT';
	$SIG{USR2} = 'DEFAULT';
	$SIG{HUP}  = 'DEFAULT';
	POSIX::setsid();

	if ($libops{$vmtype}{"vnodeBoot"}->($vnodeid, $vmid,
					    \%vnconfig, $vnstate->{'private'})){
	    print STDERR "*** ERROR: vnodeBoot failed\n";
	    exit(1);
	}
	exit(0);
792
    }
793 794 795
}
elsif (safeLibOp('vnodeBoot', 1, 1)) {
    MyFatal("$vnodeid container startup failed.");
796
}
797
if (safeLibOp('vnodePostConfig', 1, 1)) {
798 799 800
    MyFatal("vnodePostConfig failed");
}
# XXX: need to do this for each type encountered!
801
TBDebugTimeStampWithDate("starting $vmtype rootPostConfig()");
802
$libops{$vmtype}{'rootPostConfig'}->();
803 804 805 806 807 808
TBDebugTimeStampWithDate("finished $vmtype rootPostConfig()");

if ($debug) {
    print "VN State:\n";
    print Dumper($vnstate);
}
809

810 811 812 813
# Store the state to disk.
if (StoreState()) {
    MyFatal("Could not store container state to disk");
}
814
# This is for vnodesetup
815
mysystem("touch $RUNNING_FILE");
816
$running = 1;
817 818

#
819 820 821 822
# This loop is to catch when the container stops. We used to run a sleep
# inside and wait for it to exit, but that is not portable across the
# backends, and the return value did not indicate how it exited. So, lets
# just loop, asking for the status every few seconds. 
823
#
824 825 826 827
# XXX Turn off debugging during this loop to keep the log file from growing.
#
TBDebugTimeStampsOff()
    if ($debug);
828

829 830 831
while (1) {
    sleep(5);
    
832
    #
833 834 835 836
    # If the container exits, either it rebooted from the inside or
    # the physical node is rebooting, or we are actively trying to kill
    # it cause our parent (vnodesetup) told us to. In all cases, we just
    # exit and let the parent decide what to do. 
837
    #
838 839 840 841 842 843
    my ($ret,$err) = safeLibOp('vnodeState', 0, 0);
    if ($err) {
	fatal("*** ERROR: vnodeState: $err\n");
    }
    if ($ret ne VNODE_STATUS_RUNNING()) {
	print "Container is no longer running.\n";
Leigh B Stoller's avatar
Leigh B Stoller committed
844 845 846 847 848 849 850 851 852
	if (!$cleaning) {
	    #
	    # Rebooted from inside, but not cause we told it to, so
	    # leave intact.
	    #
	    # But before we fold, lets wait a moment and check again
	    # since in XEN, the user can type reboot, which causes the
	    # domain to disappear for a while. We do not want to be
	    # fooled by that. Halt is another issue; if the user halts
853
	    # from inside the container it is never coming back and the 
Leigh B Stoller's avatar
Leigh B Stoller committed
854 855 856 857 858 859 860 861 862 863 864 865 866
	    # user has screwed himself. Need to restart from the frontend.
	    #
	    sleep(15);
	    ($ret,$err) = safeLibOp('vnodeState', 0, 0);
	    if ($err) {
		fatal("*** ERROR: vnodeState: $err\n");
	    }
	    if ($ret eq VNODE_STATUS_RUNNING()) {
		print "Container has restarted itself.\n";
		next;
	    }
	    $leaveme = $LEAVEME_REBOOT;
	}
867 868
	last;
    }
869
}
870 871 872
TBDebugTimeStampsOn()
    if ($debug);
exit(CleanupVM());
873 874

#
875 876 877 878
# Teardown a container. This should not be used if the mkvnode process
# is still running; use vnodesetup instead. This is just for the case
# that the manager (vnodesetup,mkvnode) process is gone and the turds
# need to be cleaned up.
879
#
880 881 882 883 884 885 886
sub TearDownStaleVM()
{
    if (! -e "$VNDIR/vnode.info") {
	fatal("TearDownStaleVM: no vnode.info file for $vnodeid");
    }
    my $str = `cat $VNDIR/vnode.info`;
    ($vmid, $vmtype, undef) = ($str =~ /^(\d*) (\w*) ([-\w]*)$/);
887

888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904
    #
    # Load the state. Use a local so that we do not overwrite
    # the outer version. Just a precaution.
    #
    # The state might not exist, but we proceed anyway.
    #
    local $vnstate = { "private" => {} };

    if (-e "$VNDIR/vnode.state") {
	$vnstate = eval { Storable::retrieve("$VNDIR/vnode.state"); };
	if ($@) {
	    print STDERR "$@";
	    return -1;
	}
	if ($debug) {
	    print "vnstate:\n";
	    print Dumper($vnstate);
905
	}
906
    }
907

Leigh B Stoller's avatar
Leigh B Stoller committed
908 909 910 911 912 913
    # No interruptions during stale teardown.
    $SIG{INT}  = 'IGNORE';
    $SIG{USR1} = 'IGNORE';
    $SIG{USR2} = 'IGNORE';
    $SIG{HUP}  = 'IGNORE';

914 915 916 917 918 919 920
    #
    # if we fail to cleanup, store the state back to disk so that we
    # capture any changes. 
    #
    if (CleanupVM()) {
	StoreState();
	return -1;
921
    }
Leigh B Stoller's avatar
Leigh B Stoller committed
922 923 924 925 926
    $SIG{INT}  = 'DEFAULT';
    $SIG{USR1} = 'DEFAULT';
    $SIG{USR2} = 'DEFAULT';
    $SIG{HUP}  = 'DEFAULT';
    
927
    return 0;
928 929 930 931 932
}

#
# Clean things up.
#
933
sub CleanupVM()
934 935 936 937 938 939 940 941
{
    if ($cleaning) {
	die("*** $0:\n".
	    "    Oops, already cleaning!\n");
    }
    $cleaning = 1;

    # If the container was never built, there is nothing to do.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
942
    return 0
943 944
	if (! -e "$VNDIR/vnode.info" || !defined($vmid));

945 946
    if (exists($vnstate->{'sshd_iprule'})) {
	my $ref = $vnstate->{'sshd_iprule'};
947
	libvnode::removePortForward($ref);
948 949 950
	# Update new state.
	delete($vnstate->{'sshd_iprule'});
	StoreState();
951 952
    }

953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973
    #
    # The tmcc proxy causes teardown problems, no idea why.
    # It used to be kill off from the unmount script, but lets
    # do it here.
    #
    my $PROXYPID = "/var/run/tmccproxy.${vnodeid}.pid";
    if (-e $PROXYPID) {
	my $ppid = `cat $PROXYPID`;
	chomp($ppid);
	# untaint
	if ($ppid =~ /^([-\@\w.]+)$/) {
	    $ppid = $1;
	}
	if (kill('TERM', $ppid) == 0) {
	    print"*** ERROR: Could not kill(TERM) proxy process $ppid: $!\n";
	}
	else {
	    unlink($PROXYPID);
	}
    }

974
    # if not halted, try that first
975
    my ($ret,$err) = safeLibOp('vnodeState', 1, 0);
976 977 978
    if ($err) {
	print STDERR "*** ERROR: vnodeState: ".
	    "failed to cleanup $vnodeid: $err\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
979
	return -1;
980
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
981
    if ($ret eq VNODE_STATUS_RUNNING()) {
982
	print STDERR "cleanup: $vnodeid not stopped, trying to halt it.\n";
983
	($ret,$err) = safeLibOp('vnodeHalt', 1, 1);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
984 985 986 987 988
	if ($err) {
	    print STDERR "*** ERROR: vnodeHalt: ".
		"failed to halt $vnodeid: $err\n";
	    return -1;
	}
989
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
990 991
    elsif ($ret eq VNODE_STATUS_MOUNTED()) {
	print STDERR "cleanup: $vnodeid is mounted, trying to unmount it.\n";
992
	($ret,$err) = safeLibOp('vnodeUnmount', 1, 1);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
993 994 995 996 997 998
	if ($err) {
	    print STDERR "*** ERROR: vnodeUnmount: ".
		"failed to unmount $vnodeid: $err\n";
	    return -1;
	}
    }
999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022
    if ($leaveme) {
	if ($leaveme == $LEAVEME_HALT || $leaveme == $LEAVEME_REBOOT) {
	    #
	    # When halting, the disk state is left, but the transient state
	    # is removed since it will get reconstructed later if the vnode
	    # is restarted. This avoids leaking a bunch of stuff in case the
	    # vnode never starts up again. We of course leave the disk, but
	    # that will eventually get cleaned up if the pcvm is reused for
	    # a future experiment.
	    #
	    # XXX Reboot should be different; there is no reason to tear
	    # down the transient state, but we do not handle that yet.
	    # Not hard to add though.
	    #
	    ($ret,$err) = safeLibOp('vnodeTearDown', 1, 1);
	    # Always store in case some progress was made. 
	    StoreState();
	    if ($err) {
		print STDERR "*** ERROR: failed to teardown $vnodeid: $err\n";
		return -1;
	    }
	}
	return 0;
    }
1023 1024

    # now destroy
1025
    ($ret,$err) = safeLibOp('vnodeDestroy', 1, 1);
1026 1027
    if ($err) {
	print STDERR "*** ERROR: failed to destroy $vnodeid: $err\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1028
	return -1;
1029 1030
    }
    unlink("$VNDIR/vnode.info");
1031
    unlink("$VNDIR/vnode.state");
1032
    unlink("$VMPATH/vnode.$vmid");
1033
    $cleaning = 0;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1034
    return 0;
1035 1036 1037 1038 1039 1040 1041 1042 1043
}
    
#
# Print error and exit.
#
sub MyFatal($)
{
    my ($msg) = @_;

1044 1045 1046 1047
    #
    # If rebooting but never got a chance to run, we do not want
    # to kill off the container. Might lose user data.
    #
1048
    $leaveme = $LEAVEME_REBOOT
1049 1050
	if ($rebooting && !$running);

1051 1052 1053
    TBDebugTimeStampsOn()
	if ($debug);
    
1054
    CleanupVM();
1055 1056 1057 1058 1059 1060 1061
    die("*** $0:\n".
	"    $msg\n");
}

#
# Helpers:
#
1062 1063
sub safeLibOp($$$;@) {
    my ($op,$autolog,$autoerr,@args) = @_;
1064 1065 1066

    my $sargs = '';
    if (@args > 0) {
1067
 	$sargs = join(',',@args);
1068
    }
1069
    TBDebugTimeStampWithDate("starting $vmtype $op($sargs)")
1070
	if ($debug);
1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082

    #
    # Block signals that could kill us in the middle of a library call.
    # Might be better to do this down in the library, but this is an
    # easier place to do it. This ensure that if we have to tear down
    # in the middle of setting up, the state is consistent. 
    #
    my $new_sigset = POSIX::SigSet->new(SIGHUP, SIGINT, SIGUSR1, SIGUSR2);
    my $old_sigset = POSIX::SigSet->new;
    if (! defined(sigprocmask(SIG_BLOCK, $new_sigset, $old_sigset))) {
	print STDERR "sigprocmask (BLOCK) failed!\n";
    }
1083
    my $ret = eval {
1084 1085
	$libops{$vmtype}{$op}->($vnodeid, $vmid,
				\%vnconfig, $vnstate->{'private'}, @args);
1086
    };
1087
    my $err = $@;
1088
    if (! defined(sigprocmask(SIG_SETMASK, $old_sigset))) {
1089 1090 1091
	print STDERR "sigprocmask (UNBLOCK) failed!\n";
    }
    if ($err) {
1092 1093 1094
	if ($autolog) {
	    ;
	}
1095
	TBDebugTimeStampWithDate("failed $vmtype $op($sargs): $err")
1096 1097 1098 1099
	    if ($debug);
	return (-1,$err);
    }
    if ($autoerr && $ret) {
1100
	$err = "$op($vnodeid) failed with exit code $ret!";
1101 1102 1103
	if ($autolog) {
	    ;
	}
1104
	TBDebugTimeStampWithDate("failed $vmtype $op($sargs): exited with $ret")
1105 1106 1107 1108
	    if ($debug);
	return ($ret,$err);
    }

1109
    TBDebugTimeStampWithDate("finished $vmtype $op($sargs)")
1110 1111 1112 1113
	if ($debug);

    return $ret;
}
1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127

sub StoreState()
{
    # Store the state to disk.
    print "Storing state to disk ...\n"
	if ($debug);
    
    my $ret = eval { Storable::store($vnstate, "$VNDIR/vnode.state"); };
    if ($@) {
	print STDERR "$@";
	return -1;
    }
    return 0;
}