mkvnode.pl 28 KB
Newer Older
1
2
#!/usr/bin/perl -w
#
3
# Copyright (c) 2009-2014 University of Utah and the Flux Group.
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
23
#
24
25
26
27
28
29
30
31
32
33
34
35
36

#
# This is the top-level vnode creation script, called via the vnodesetup
# wrapper.  It is os independent, calling into routines defined
# in liblocsetup or elsewhere for os-dependent functionality.  Libraries
# contained in modules named like libvnode_<type>.pm are hooked in to
# obtain setup operations that are specific to the vnode type.
#
# This script was specific to Linux host environments, but has been modified
# to be used under FreeBSD for certain vnode-like containers.  Eventually
# all vnode/jail/etc. setups under any host OS should flow through this.
#

37
38
39
40
41
use strict;
use Getopt::Std;
use English;
use Errno;
use POSIX qw(strftime);
42
43
use POSIX qw(:sys_wait_h);
use POSIX qw(:signal_h);
Leigh B Stoller's avatar
Leigh B Stoller committed
44
use POSIX qw(setsid);
45
use Data::Dumper;
46
47
use Storable;
use vars qw($vnstate);
48
49
50

sub usage()
{
Leigh B. Stoller's avatar
Leigh B. Stoller committed
51
    print "Usage: mkvnode [-d] vnodeid\n" . 
52
          "  -d   Debug mode.\n" .
53
54
	  "  -c   Cleanup stale container\n".
	  "  -s   Show state for container\n".
55
56
57
          "";
    exit(1);
}
58
my $optlist  = "dcs";
59
my $debug    = 1;
60
61
my $cleanup  = 0;
my $showstate= 0;
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
my $vnodeid;

#
# Turn off line buffering on output
#
$| = 1;

# Drag in path stuff so we can find emulab stuff.
BEGIN { require "/etc/emulab/paths.pm"; import emulabpaths; }

#
# Load the OS independent support library. It will load the OS dependent
# library and initialize itself. 
# 
use libsetup;
use libtmcc;
78
use libutil;
79
80
use libtestbed;
    
81
82
# Pull in vnode stuff
use libgenvnode;
83
84
85
86
use libvnode;

# Helpers
sub MyFatal($);
87
88
89
90
sub safeLibOp($$$;@);
sub CleanupVM();
sub TearDownStaleVM();
sub StoreState();
91
92
93

# Locals
my $CTRLIPFILE = "/var/emulab/boot/myip";
94
95
96
97
98
99
100
101
my $VMPATH     = "/var/emulab/vms/vminfo";
my $VNDIR;
my $leaveme    = 0;
my $running    = 0;
my $cleaning   = 0;
my $rebooting  = 0;
my $reload     = 0;
my ($vmid,$vmtype,$ret,$err);
102
my $ISXENVM    = (GENVNODETYPE() eq "xen" ? 1 : 0);
103
104
105
106
107

# Flags for leaveme.
my $LEAVEME_REBOOT = 0x1;
my $LEAVEME_HALT   = 0x2;

108
109
110
111
112
113
114
115
#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
116
117
118
119
120
121
122
123
if (defined($options{"d"})) {
    $debug = 1;
}
if (defined($options{"c"})) {
    $cleanup = 1;
}
if (defined($options{"s"})) {
    $showstate = 1;
Leigh B Stoller's avatar
Leigh B Stoller committed
124
    $debug     = 0;
125
126
127
}
usage()
    if (@ARGV != 1);
128

129
$vnodeid = $ARGV[0];
130
$VNDIR   = "$VMPATH/$vnodeid";
131
132
133
134
135
136
137
138
139

#
# Must be root.
# 
if ($UID != 0) {
    die("*** $0:\n".
	"    Must be root to run this script!\n");
}

140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#
# Deal with VIFROUTING flag from the server. Do this before we switch
# our vnode_id below since it is a physical host attribute. This will
# go away at some point.
#
my %attributes = ();
if (getnodeattributes(\%attributes)) {
    die("*** $0:\n".
	"Could not get node attributes");
}
if (exists($attributes{"xenvifrouting"})) {
    # Gack, tell backend network scripts.
    system("touch $ETCDIR/xenvifrouting");
}

155
156
157
# Tell the library what vnode we are messing with.
libsetup_setvnodeid($vnodeid);

158
159
160
# Can set this after above line. 
my $RUNNING_FILE = CONFDIR() . "/running";

161
162
163
164
165
166
167
#
# Turn on debug timestamps if desired.
#
if ($debug) {
    TBDebugTimeStampsOn();
}

168
169
170
171
172
173
174
175
#
# Remove old state files at boot.
#
if (! -e "/var/run/mkvnode.ready") {
    system("rm -f $VARDIR/vms/*/vnode.state");
    system("touch /var/run/mkvnode.ready");
}

176
177
178
179
180
181
182
183
#
# XXX: for now, support only a single vnode type per phys node.  This is bad,
# but it's the current assumption.  For now, we also assume the nodetype since
# we only have pcvm.  Later, we need to get this info from tmcd so we know 
# lib to load.
#
my @nodetypes = ( GENVNODETYPE() );

184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#
# Need the domain, but no conistent way to do it. Ask tmcc for the
# boss node and parse out the domain. 
#
my ($DOMAINNAME,$BOSSIP) = tmccbossinfo();
die("Could not get bossname from tmcc!")
    if (!defined($DOMAINNAME));

if ($DOMAINNAME =~ /^[-\w]+\.(.*)$/) {
    $DOMAINNAME = $1;
}
else {
    die("Could not parse domain name!");
}
if ($BOSSIP !~ /^\d+\.\d+\.\d+\.\d+$/) {
    die "Bad bossip '$BOSSIP' from bossinfo!";
}

202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#
# We go through this crap so that we can pull in multiple packages implementing
# the libvnode API so they (hopefully) won't step on our namespace too much.
#
my %libops = ();
foreach my $type (@nodetypes) {
    if ($type =~ /^([\w\d\-]+)$/) {
	$type = $1;
    }
    # load lib and initialize it
    my %ops;
    eval "use libvnode_$type; %ops = %libvnode_${type}::ops";
    if ($@) {
	die "while trying to load 'libvnode_$type': $@";
    }
    if (0 && $debug) {
	print "%ops($type):\n" . Dumper(%ops);
    }
    $libops{$type} = \%ops;
    if ($debug) {
Leigh B Stoller's avatar
Leigh B Stoller committed
222
	$libops{$type}{'setDebug'}->($debug);
223
    }
224
225
226
227
    $libops{$type}{'init'}->();

    # need to do this for each type encountered. 
    TBDebugTimeStampWithDate("starting $type rootPreConfig()");
228
    $libops{$type}{'rootPreConfig'}->($BOSSIP);
229
    TBDebugTimeStampWithDate("finished $type rootPreConfig()");
230
231
232
233
234
235
236
}
if ($debug) {
    print "GENVNODETYPE " . GENVNODETYPE() . "\n";
    print "libops:\n" . Dumper(%libops);
}


237
238
239
240
241
242
243
244
245
246
#
# This holds the container state set up by the library. There is state
# added here, and state added in the library ("private"). We locally
# redefine this below, so cannot be a lexical.
#
# NOTE: There should be NO state in here that needs to survive reboot.
#       We just remove them all when rebooting. See above.
#
$vnstate = { "private" => {} };

247
248
249
250
251
252
253
#
# Quickie way to show the state.
#
if ($showstate) {
    if (! -e "$VNDIR/vnode.info") {
	fatal("No vnode.info file for $vnodeid");
    }
254
255
256
    my $str = `cat $VNDIR/vnode.info`;
    ($vmid, $vmtype, undef) = ($str =~ /^(\d*) (\w*) ([-\w]*)$/);
    
257
258
259
260
261
    my $tmp = eval { Storable::retrieve("$VNDIR/vnode.state"); };
    if ($@) {
	fatal("$@");
    }
    print Dumper($tmp);
262
263
264
265
266
267
268
269
270
271
272
273

    # So the lib op works.
    $vnstate = $tmp;

    ($ret,$err) = safeLibOp('vnodeState', 1, 0);
    if ($err) {
	fatal("Failed to get status for existing container: $err");
    }
    if ($ret eq VNODE_STATUS_UNKNOWN()) {
	print "Cannot determine status container $vmid.\n";
    }
    print "Domain is $ret\n";
274
275
276
    exit(0);
}

277
278
279
280
#
# In most cases, the vnodeid directory will have been created by the
# caller, and a config file possibly dropped in.  When debugging, we
# have to create it here.
281
282
283
284
285
#
if (! -e $VMPATH) {
    mkdir($VMPATH, 0770) or
	fatal("Could not mkdir $VMPATH: $!");
}
286
287
288
289
290
291
292
chdir($VMPATH) or
    die("Could not chdir to $VMPATH: $!\n");

if (! -e $vnodeid) {
    mkdir($vnodeid, 0770) or
	fatal("Could not mkdir $vnodeid in $VMPATH: $!");
}
293
294
295
296
297
298
299
300
#
# The container description for the library routines. 
#
my %vnconfig = ( "vnodeid"   => $vnodeid,
                 "config"    => undef,
		 "ifconfig"  => undef,
		 "ldconfig"  => undef,
		 "tunconfig" => undef,
301
		 "attributes"=> undef,
302
		 "environment"   => undef,
303
                 "storageconfig" => undef,
304
		 "fwconfig"      => undef,
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
);
sub VNCONFIG($) { return $vnconfig{'config'}->{$_[0]}; }

#
# If cleanup requested, make sure the manager process is not running
# Must do this after the stuff above is defined.
#
if ($cleanup) {
    # This path is in vnodesetup. 
    my $pidfile = "/var/run/tbvnode-${vnodeid}.pid";
    if (-e $pidfile) {
	print STDERR "Manager process still running. Use that instead.\n";
	print STDERR "If the manager is really dead, first rm $pidfile.\n";
	exit(1);
    }
    exit(TearDownStaleVM());
}

#
# Now we can start doing something useful.
#
326
my ($pid, $eid, $vname) = check_nickname();
327
328
my $nodeuuid = getnodeuuid();
$nodeuuid = $vnodeid if (!defined($nodeuuid));
329

330
331
332
333
334
335
#
# Get all the config stuff we need.
#
my %tmp;
my @tmp;
my $tmp;
336
my %attrs;
337
my %envvars;
338
339
340
my $fwinfo;
my @fwrules;
my @fwhosts;
341

342
343
344
fatal("Could not get vnode config for $vnodeid")
    if (getgenvnodeconfig(\%tmp));
$vnconfig{"config"} = \%tmp;
345
346

fatal("getifconfig($vnodeid): $!")
347
348
    if (getifconfig(\@tmp));
$vnconfig{"ifconfig"} = [ @tmp ];
349
350

fatal("getlinkdelayconfig($vnodeid): $!") 
351
352
    if (getlinkdelayconfig(\@tmp));
$vnconfig{"ldconfig"} = [ @tmp ];
353

Leigh B. Stoller's avatar
Leigh B. Stoller committed
354
fatal("gettunnelconfig($vnodeid): $!")
355
356
    if (gettunnelconfig(\$tmp));
$vnconfig{"tunconfig"} = $tmp;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
357

358
359
360
361
fatal("getnodeattributes($vnodeid): $!")
    if (getnodeattributes(\%attrs));
$vnconfig{"attributes"} = \%attrs;

362
363
364
365
fatal("getstorageconfig($vnodeid): $!")
    if (getstorageconfig(\@tmp));
$vnconfig{"storageconfig"} = [ @tmp ];

366
367
368
369
fatal("getenvvars(): $!")
    if (getenvvars(\%envvars));
$vnconfig{"environment"} = \%envvars;

370
371
372
373
374
375
376
fatal("getfwconfig(): $!")
    if (getfwconfig(\$fwinfo, \@fwrules, \@fwhosts));

$vnconfig{"fwconfig"} = {"fwinfo"  => $fwinfo,
			 "fwrules" => \@fwrules,
			 "fwhosts" => \@fwhosts};

377
if ($debug) {
378
379
    print "VN Config:\n";
    print Dumper(\%vnconfig);
380
381
}

382
383
384
385
386
#
# see if we 1) are supposed to be "booting" into the reload mfs, and 2) if
# we have loadinfo.  Need both to reload!
#
fatal("getbootwhat($vnodeid): $!") 
387
    if (getbootwhat(\@tmp));
388
389
if (scalar(@tmp) && exists($tmp[0]->{"WHAT"})) {
    if ($tmp[0]->{"WHAT"} =~ /frisbee-pcvm/) {
390
391
392
393
	#
	# Ok, we're reloading, using the fake frisbee pcvm mfs.
	#
	$reload = 1;
394
395
396
397
398
399
400
	
	fatal("getloadinfo($vnodeid): $!") 
	    if (getloadinfo(\@tmp));
	if (!scalar(@tmp)) {
	    fatal("vnode $vnodeid in reloading, but got no loadinfo!");
	}
	else {
Leigh B Stoller's avatar
Leigh B Stoller committed
401
402
	    if ($tmp[0]->{"IMAGEID"} =~
		/^([-\d\w]+),([-\d\w]+),([-\d\w\.]+)$/) {
403
404
405
406
407
408
409
410
411
		$vnconfig{"reloadinfo"} = $tmp[0];
		$vnconfig{"image"}      = "$1-$2-$3";
	    }
	    else {
		fatal("vnode $vnodeid in reloading, but got bogus IMAGEID " . 
		      $tmp[0]->{"IMAGEID"} . " from loadinfo!");
	    }
	}
    }
412
413
414
415
416
417
418
419
420
421
422
423
424
425
    elsif ($tmp[0]->{"WHAT"} =~ /^\d*$/) {
	#
	# We are using bootwhat for a much different purpose then intended.
	# It tells us a partition number, but that is meaningless. Look at
	# the jailconfig to see what image should boot. That image better
	# be resident already. 
	#
	if (VNCONFIG('IMAGENAME') =~ /^([-\w]+),([-\w]+),([-\w]+)$/) {
	    $vnconfig{"image"}      = "$1-$2-$3";
	}
    }
    else {
	# The library will boot the default, whatever that is.
    }
426
427
428
429
430
431
432
433
}

#
# Install a signal handler. We can get signals from vnodesetup.
#
sub handler ($) {
    my ($signame) = @_;

434
435
    print STDERR "mkvnode ($PID) caught a SIG${signame}!\n";

436
437
438
439
440
441
442
443
444
445
    # No more interruptions during teardown.
    $SIG{INT}  = 'IGNORE';
    $SIG{USR1} = 'IGNORE';
    $SIG{USR2} = 'IGNORE';
    $SIG{HUP}  = 'IGNORE';

    my $str = "killed";
    if ($signame eq 'USR1') {
	$leaveme = $LEAVEME_HALT;
	$str = "halted";
446
    }
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
    elsif ($signame eq 'USR2') {
	$leaveme = $LEAVEME_REBOOT;
	$str = "rebooted";
    }

    #
    # XXX this is a woeful hack for vnodesetup.  At the end of rebootvnode,
    # vnodesetup calls hackwaitandexit which essentially waits for a vnode
    # to be well on the way back up before it returns.  This call was
    # apparently added for the lighter-weight "reconfigure a vnode"
    # (as opposed to reboot it) path, however it makes the semantics of
    # reboot on a vnode different than that for a pnode, where reboot returns
    # as soon as the node stops responding (i.e., when it goes down and not
    # when it comes back up).  Why do I care?  Because Xen vnodes cannot
    # always "reboot" under the current semantics in less than 30 seconds,
    # which is the timeout in libreboot.
    #
    # So by touching the "running" file here we force hackwaitandexit to
    # return when the vnode is shutdown in Xen (or OpenVZ), more closely
    # matching the pnode semantics while leaving the BSD jail case (which
    # doesn't use this code) alone.  This obviously needs to be revisited.
    #
    mysystem("touch $RUNNING_FILE")
	if ($leaveme && -e "$RUNNING_FILE");

472
473
    print STDERR "Container is being $str\n";
    MyFatal("Container has been $str by $signame");
474
475
}

476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
#
# If this file exists, we are rebooting an existing container. But
# need to check if its a stale or aborted container (one that failed
# to setup or teardown) and got left behind. Another wrinkle is shared
# nodes, so we use the node uuid to determine if its another logical
# pcvm with the same name, and needs to be destroyed before setting up.
#
if (-e "$VNDIR/vnode.info") {
    my $uuid;
    my $teardown = 0;

    my $str = `cat $VNDIR/vnode.info`;
    ($vmid, $vmtype, $uuid) = ($str =~ /^(\d*) (\w*) ([-\w]*)$/);

    # Consistency check.
    fatal("No matching file: $VMPATH/vnode.$vmid")
	if (! -e "$VMPATH/vnode.$vmid");
    $str = `cat $VMPATH/vnode.$vmid`;
    chomp($str);
    if ($str ne $vnodeid) {
	fatal("Inconsistent vnodeid in $VMPATH/vnode.$vmid");
    }

    if ($uuid ne $nodeuuid) {
	print "UUID mismatch; tearing down stale vnode $vnodeid\n";
	$teardown = 1;
    }
    elsif ($reload) {
	print "Reload requested, tearing down old vnode\n";
	$teardown = 1;
506
507
    }
    else {
508
509
510
511
512
513
514
515
516
517
518
519
520
	# We (might) need this to discover the state. 
	local $vnstate = { "private" => {} };
	
	if (-e "$VNDIR/vnode.state") {
	    my $tmp = eval { Storable::retrieve("$VNDIR/vnode.state"); };
	    if ($@) {
		print STDERR "$@";
		$teardown = 1;
	    }
	    else {
		$vnstate->{'private'} = $tmp->{'private'};
	    }
	}
521
522
523
	($ret,$err) = safeLibOp('vnodeState', 1, 0);
	if ($err) {
	    fatal("Failed to get status for existing container: $err");
524
	}
525
526
527
528
	if ($ret eq VNODE_STATUS_UNKNOWN()) {
	    print "Cannot determine status container $vmid. Deleting ...\n";
	    $teardown = 1;
	}
529
530
531
532
533
	elsif ($ret eq VNODE_STATUS_MOUNTED()) {
	    print("vnode $vnodeid still mounted. Unmounting then restarting\n");
	    $teardown = 1;
	    $leaveme  = $LEAVEME_REBOOT;
	}
534
535
	elsif ($ret ne VNODE_STATUS_STOPPED()) {
	    fatal("vnode $vnodeid not stopped, not booting!");
536
537
	}
    }
538
539
540
    if ($teardown) {
	TearDownStaleVM() == 0
	    or fatal("Could not tear down stale container");
541
542
543
	# See MOUNTED case above; we set leaveme to keep the container
	# file systems, but must reset leaveme. 
	$leaveme = 0;
544
545
546
547
    }
    else {
	$rebooting = 1;
    }
548
549
}

Leigh B Stoller's avatar
Leigh B Stoller committed
550
551
552
#
# Install handlers *after* down stale container teardown, since we set
# them to IGNORE during the teardown.
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
# 
# Ignore TERM since we want our caller to catch it first and then send
# it down to us. 
#
$SIG{TERM} = 'IGNORE';
# Halt container and exit. Tear down transient state, leave disk.
$SIG{USR1} = \&handler;
# Halt container and exit. Leave all state intact (we are rebooting).
$SIG{USR2} = \&handler;
# Halt container and exit. Tear down all state including disk.
$SIG{HUP}  = \&handler;
$SIG{INT}  = \&handler;

#
# Initial pre config for the experimental network. We want to make sure
# we can allocate the required devices and whatever else before going
# any further. 
570
#
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
TBDebugTimeStampWithDate("starting rootPreConfigNetwork()");
$ret = eval {
    $libops{GENVNODETYPE()}{'rootPreConfigNetwork'}->($vnodeid, undef,
	\%vnconfig, $vnstate->{'private'});
};
if ($ret || $@) {
    print STDERR $@
	if ($@);
    
    # If this fails, we require the library to clean up after itself
    # so that we can just exit without worrying about cleanup.
    fatal("rootPreConfigNetwork failed!");
}
TBDebugTimeStampWithDate("finished rootPreConfigNetwork()");

586
587
588
589
590
591
592
if (! -e "$VNDIR/vnode.info") {
    #
    # XXX XXX XXX: need to get this from tmcd!
    # NOTE: we first put the type into vndb so that the create call can go!
    #
    $vmtype = GENVNODETYPE();

Leigh B Stoller's avatar
Leigh B Stoller committed
593
    ($ret,$err) = safeLibOp('vnodeCreate',0,0);
594
595
596
597
598
    if ($err) {
	MyFatal("vnodeCreate failed");
    }
    $vmid = $ret;

599
    mysystem("echo '$vmid $vmtype $nodeuuid' > $VNDIR/vnode.info");
600
    mysystem("echo '$vnodeid' > $VMPATH/vnode.$vmid");
601
602
603

    # bootvnodes wants this to be here...
    mysystem("mkdir -p /var/emulab/jails/$vnodeid");
604
}
605
606
607
608
609
610
611
# This state structure is saved to disk for TearDown.
$vnstate->{"vmid"}   = $vmid;
$vnstate->{"vmtype"} = $vmtype;
$vnstate->{"uuid"}   = $nodeuuid;
# Store the state to disk.
if (StoreState()) {
    MyFatal("Could not store container state to disk");
612
613
}

614
615
my $cnet_mac = (defined(VNCONFIG('CTRLMAC')) ?
		VNCONFIG('CTRLMAC') : ipToMac(VNCONFIG('CTRLIP')));
616
617
618
619
620
621
622
623
624
my $ext_ctrlip = `cat $CTRLIPFILE`;
chomp($ext_ctrlip);
if ($ext_ctrlip !~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
    # cannot/should not really go on if this happens.
    MyFatal("error prior to vnodePreConfigControlNetwork($vnodeid): " . 
	    " could not find valid ip in $CTRLIPFILE!");
}
my $longdomain = "${eid}.${pid}.${DOMAINNAME}";

625
626
627
628
629
630
631
#
# Call back to do things to the container before it boots.
#
sub callback($)
{
    my ($path) = @_;

632
633
634
635
636
    #
    # Set up sshd port to listen on. If the vnode has its own IP
    # then listen on both 22 and the per-vnode port.
    #
    if (system('grep -q -e EmulabJail $path/etc/ssh/sshd_config')) {
637
638
	if (defined(VNCONFIG('SSHDPORT')) && VNCONFIG('SSHDPORT') ne "") {
	    my $sshdport = VNCONFIG('SSHDPORT');
639

640
641
	    mysystem2("echo '# EmulabJail' >> $path/etc/ssh/sshd_config");
	    mysystem2("echo 'Port $sshdport' >> $path/etc/ssh/sshd_config");
642
	    if (VNCONFIG('CTRLIP') ne $ext_ctrlip) {
643
		mysystem2("echo 'Port 22' >> $path/etc/ssh/sshd_config");
644
645
646
	    }
	}
    }
647
    # Localize the timezone.
648
649
    mysystem2("cp -fp /etc/localtime $path/etc");

650
651
652
    return 0;
}

653
# OP: preconfig
654
if (safeLibOp('vnodePreConfig', 1, 1, \&callback)) {
655
656
657
658
    MyFatal("vnodePreConfig failed");
}

# OP: control net preconfig
659
660
661
if (safeLibOp('vnodePreConfigControlNetwork',1,1,
	      VNCONFIG('CTRLIP'),
	      VNCONFIG('CTRLMASK'),$cnet_mac,
662
663
664
665
666
	      $ext_ctrlip,$vname,$longdomain,$DOMAINNAME,$BOSSIP)) {
    MyFatal("vnodePreConfigControlNetwork failed");
}

# OP: exp net preconfig
667
if (safeLibOp('vnodePreConfigExpNetwork', 1, 1)) {
668
669
    MyFatal("vnodePreConfigExpNetwork failed");
}
670
if (safeLibOp('vnodeConfigResources', 1, 1)) {
671
672
    MyFatal("vnodeConfigResources failed");
}
673
if (safeLibOp('vnodeConfigDevices', 1, 1)) {
674
675
676
    MyFatal("vnodeConfigDevices failed");
}

677
#
678
# Route to inner ssh, but not if the IP is routable, no need to.
679
#
680
681
if (defined(VNCONFIG('SSHDPORT')) && VNCONFIG('SSHDPORT') ne "" &&
    !isRoutable(VNCONFIG('CTRLIP'))) {
682
683
684
685
686
687
688
689
690
    my $ref = {};
    $ref->{'ext_ip'}   = $ext_ctrlip;
    $ref->{'ext_port'} = VNCONFIG('SSHDPORT');
    $ref->{'int_ip'}   = VNCONFIG('CTRLIP');
    $ref->{'int_port'} = VNCONFIG('SSHDPORT');
    $ref->{'protocol'} = "tcp";
    
    $vnstate->{'sshd_iprule'} = $ref
	if (libvnode::forwardPort($ref) == 0);
691
692
}

693
#
694
# Start the container. If all goes well, this will exit cleanly, with 
695
696
697
698
699
# it running in its new context. Still, lets protect it with a timer
# since it might get hung up inside and we do not want to get stuck here.
#
my $childpid = fork();
if ($childpid) {
700
701
    my $timedout = 0;
    local $SIG{ALRM} = sub { kill("TERM", $childpid); $timedout = 1; };
702
    alarm 180
Leigh B Stoller's avatar
Leigh B Stoller committed
703
	if (!$ISXENVM);
704
    waitpid($childpid, 0);
705
    alarm 0
Leigh B Stoller's avatar
Leigh B Stoller committed
706
	if (!$ISXENVM);
707
708
709
710

    #
    # If failure then cleanup.
    #
711
    if ($? || $timedout) {
Leigh B Stoller's avatar
Leigh B Stoller committed
712
713
	MyFatal("$vnodeid container startup ".
		($timedout ? "timed out." : "failed."));
714
715
716
    }
}
else {
Leigh B Stoller's avatar
Leigh B Stoller committed
717
718
719
    #
    # We want to call this as clean as possible.
    #
720
    $SIG{TERM} = 'DEFAULT';
721
722
723
724
    $SIG{INT}  = 'DEFAULT';
    $SIG{USR1} = 'DEFAULT';
    $SIG{USR2} = 'DEFAULT';
    $SIG{HUP}  = 'DEFAULT';
Leigh B Stoller's avatar
Leigh B Stoller committed
725
    POSIX::setsid();
726

Leigh B Stoller's avatar
Leigh B Stoller committed
727
728
    if ($libops{$vmtype}{"vnodeBoot"}->($vnodeid, $vmid,
				\%vnconfig, $vnstate->{'private'})) {
729
730
731
732
733
	print STDERR "*** ERROR: vnodeBoot failed\n";
	exit(1);
    }
    exit(0);
}
734
if (safeLibOp('vnodePostConfig', 1, 1)) {
735
736
737
    MyFatal("vnodePostConfig failed");
}
# XXX: need to do this for each type encountered!
738
TBDebugTimeStampWithDate("starting $vmtype rootPostConfig()");
739
$libops{$vmtype}{'rootPostConfig'}->();
740
741
742
743
744
745
TBDebugTimeStampWithDate("finished $vmtype rootPostConfig()");

if ($debug) {
    print "VN State:\n";
    print Dumper($vnstate);
}
746

747
748
749
750
# Store the state to disk.
if (StoreState()) {
    MyFatal("Could not store container state to disk");
}
751
# This is for vnodesetup
752
mysystem("touch $RUNNING_FILE");
753
$running = 1;
754
755

#
756
757
758
759
# This loop is to catch when the container stops. We used to run a sleep
# inside and wait for it to exit, but that is not portable across the
# backends, and the return value did not indicate how it exited. So, lets
# just loop, asking for the status every few seconds. 
760
#
761
762
763
764
# XXX Turn off debugging during this loop to keep the log file from growing.
#
TBDebugTimeStampsOff()
    if ($debug);
765

766
767
768
while (1) {
    sleep(5);
    
769
    #
770
771
772
773
    # If the container exits, either it rebooted from the inside or
    # the physical node is rebooting, or we are actively trying to kill
    # it cause our parent (vnodesetup) told us to. In all cases, we just
    # exit and let the parent decide what to do. 
774
    #
775
776
777
778
779
780
    my ($ret,$err) = safeLibOp('vnodeState', 0, 0);
    if ($err) {
	fatal("*** ERROR: vnodeState: $err\n");
    }
    if ($ret ne VNODE_STATUS_RUNNING()) {
	print "Container is no longer running.\n";
Leigh B Stoller's avatar
Leigh B Stoller committed
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
	if (!$cleaning) {
	    #
	    # Rebooted from inside, but not cause we told it to, so
	    # leave intact.
	    #
	    # But before we fold, lets wait a moment and check again
	    # since in XEN, the user can type reboot, which causes the
	    # domain to disappear for a while. We do not want to be
	    # fooled by that. Halt is another issue; if the user halts
	    # from inside the container it iss never coming back and the 
	    # user has screwed himself. Need to restart from the frontend.
	    #
	    sleep(15);
	    ($ret,$err) = safeLibOp('vnodeState', 0, 0);
	    if ($err) {
		fatal("*** ERROR: vnodeState: $err\n");
	    }
	    if ($ret eq VNODE_STATUS_RUNNING()) {
		print "Container has restarted itself.\n";
		next;
	    }
	    $leaveme = $LEAVEME_REBOOT;
	}
804
805
	last;
    }
806
}
807
808
809
TBDebugTimeStampsOn()
    if ($debug);
exit(CleanupVM());
810
811

#
812
813
814
815
# Teardown a container. This should not be used if the mkvnode process
# is still running; use vnodesetup instead. This is just for the case
# that the manager (vnodesetup,mkvnode) process is gone and the turds
# need to be cleaned up.
816
#
817
818
819
820
821
822
823
sub TearDownStaleVM()
{
    if (! -e "$VNDIR/vnode.info") {
	fatal("TearDownStaleVM: no vnode.info file for $vnodeid");
    }
    my $str = `cat $VNDIR/vnode.info`;
    ($vmid, $vmtype, undef) = ($str =~ /^(\d*) (\w*) ([-\w]*)$/);
824

825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
    #
    # Load the state. Use a local so that we do not overwrite
    # the outer version. Just a precaution.
    #
    # The state might not exist, but we proceed anyway.
    #
    local $vnstate = { "private" => {} };

    if (-e "$VNDIR/vnode.state") {
	$vnstate = eval { Storable::retrieve("$VNDIR/vnode.state"); };
	if ($@) {
	    print STDERR "$@";
	    return -1;
	}
	if ($debug) {
	    print "vnstate:\n";
	    print Dumper($vnstate);
842
	}
843
    }
844

Leigh B Stoller's avatar
Leigh B Stoller committed
845
846
847
848
849
850
    # No interruptions during stale teardown.
    $SIG{INT}  = 'IGNORE';
    $SIG{USR1} = 'IGNORE';
    $SIG{USR2} = 'IGNORE';
    $SIG{HUP}  = 'IGNORE';

851
852
853
854
855
856
857
    #
    # if we fail to cleanup, store the state back to disk so that we
    # capture any changes. 
    #
    if (CleanupVM()) {
	StoreState();
	return -1;
858
    }
Leigh B Stoller's avatar
Leigh B Stoller committed
859
860
861
862
863
    $SIG{INT}  = 'DEFAULT';
    $SIG{USR1} = 'DEFAULT';
    $SIG{USR2} = 'DEFAULT';
    $SIG{HUP}  = 'DEFAULT';
    
864
    return 0;
865
866
867
868
869
}

#
# Clean things up.
#
870
sub CleanupVM()
871
872
873
874
875
876
877
878
{
    if ($cleaning) {
	die("*** $0:\n".
	    "    Oops, already cleaning!\n");
    }
    $cleaning = 1;

    # If the container was never built, there is nothing to do.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
879
    return 0
880
881
	if (! -e "$VNDIR/vnode.info" || !defined($vmid));

882
883
    if (exists($vnstate->{'sshd_iprule'})) {
	my $ref = $vnstate->{'sshd_iprule'};
884
	libvnode::removePortForward($ref);
885
886
887
	# Update new state.
	delete($vnstate->{'sshd_iprule'});
	StoreState();
888
889
    }

890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
    #
    # The tmcc proxy causes teardown problems, no idea why.
    # It used to be kill off from the unmount script, but lets
    # do it here.
    #
    my $PROXYPID = "/var/run/tmccproxy.${vnodeid}.pid";
    if (-e $PROXYPID) {
	my $ppid = `cat $PROXYPID`;
	chomp($ppid);
	# untaint
	if ($ppid =~ /^([-\@\w.]+)$/) {
	    $ppid = $1;
	}
	if (kill('TERM', $ppid) == 0) {
	    print"*** ERROR: Could not kill(TERM) proxy process $ppid: $!\n";
	}
	else {
	    unlink($PROXYPID);
	}
    }

911
    # if not halted, try that first
912
    my ($ret,$err) = safeLibOp('vnodeState', 1, 0);
913
914
915
    if ($err) {
	print STDERR "*** ERROR: vnodeState: ".
	    "failed to cleanup $vnodeid: $err\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
916
	return -1;
917
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
918
    if ($ret eq VNODE_STATUS_RUNNING()) {
919
	print STDERR "cleanup: $vnodeid not stopped, trying to halt it.\n";
920
	($ret,$err) = safeLibOp('vnodeHalt', 1, 1);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
921
922
923
924
925
	if ($err) {
	    print STDERR "*** ERROR: vnodeHalt: ".
		"failed to halt $vnodeid: $err\n";
	    return -1;
	}
926
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
927
928
    elsif ($ret eq VNODE_STATUS_MOUNTED()) {
	print STDERR "cleanup: $vnodeid is mounted, trying to unmount it.\n";
929
	($ret,$err) = safeLibOp('vnodeUnmount', 1, 1);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
930
931
932
933
934
935
	if ($err) {
	    print STDERR "*** ERROR: vnodeUnmount: ".
		"failed to unmount $vnodeid: $err\n";
	    return -1;
	}
    }
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
    if ($leaveme) {
	if ($leaveme == $LEAVEME_HALT || $leaveme == $LEAVEME_REBOOT) {
	    #
	    # When halting, the disk state is left, but the transient state
	    # is removed since it will get reconstructed later if the vnode
	    # is restarted. This avoids leaking a bunch of stuff in case the
	    # vnode never starts up again. We of course leave the disk, but
	    # that will eventually get cleaned up if the pcvm is reused for
	    # a future experiment.
	    #
	    # XXX Reboot should be different; there is no reason to tear
	    # down the transient state, but we do not handle that yet.
	    # Not hard to add though.
	    #
	    ($ret,$err) = safeLibOp('vnodeTearDown', 1, 1);
	    # Always store in case some progress was made. 
	    StoreState();
	    if ($err) {
		print STDERR "*** ERROR: failed to teardown $vnodeid: $err\n";
		return -1;
	    }
	}
	return 0;
    }
960
961

    # now destroy
962
    ($ret,$err) = safeLibOp('vnodeDestroy', 1, 1);
963
964
    if ($err) {
	print STDERR "*** ERROR: failed to destroy $vnodeid: $err\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
965
	return -1;
966
967
    }
    unlink("$VNDIR/vnode.info");
968
    unlink("$VNDIR/vnode.state");
969
    unlink("$VMPATH/vnode.$vmid");
970
    $cleaning = 0;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
971
    return 0;
972
973
974
975
976
977
978
979
980
}
    
#
# Print error and exit.
#
sub MyFatal($)
{
    my ($msg) = @_;

981
982
983
984
    #
    # If rebooting but never got a chance to run, we do not want
    # to kill off the container. Might lose user data.
    #
985
    $leaveme = $LEAVEME_REBOOT
986
987
	if ($rebooting && !$running);

988
989
990
    TBDebugTimeStampsOn()
	if ($debug);
    
991
    CleanupVM();
992
993
994
995
996
997
998
    die("*** $0:\n".
	"    $msg\n");
}

#
# Helpers:
#
999
1000
sub safeLibOp($$$;@) {
    my ($op,$autolog,$autoerr,@args) = @_;
1001
1002
1003

    my $sargs = '';
    if (@args > 0) {
1004
 	$sargs = join(',',@args);
1005
    }
1006
    TBDebugTimeStampWithDate("starting $vmtype $op($sargs)")
1007
	if ($debug);
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019

    #
    # Block signals that could kill us in the middle of a library call.
    # Might be better to do this down in the library, but this is an
    # easier place to do it. This ensure that if we have to tear down
    # in the middle of setting up, the state is consistent. 
    #
    my $new_sigset = POSIX::SigSet->new(SIGHUP, SIGINT, SIGUSR1, SIGUSR2);
    my $old_sigset = POSIX::SigSet->new;
    if (! defined(sigprocmask(SIG_BLOCK, $new_sigset, $old_sigset))) {
	print STDERR "sigprocmask (BLOCK) failed!\n";
    }
1020
    my $ret = eval {
1021
1022
	$libops{$vmtype}{$op}->($vnodeid, $vmid,
				\%vnconfig, $vnstate->{'private'}, @args);
1023
    };
1024
    my $err = $@;
1025
    if (! defined(sigprocmask(SIG_SETMASK, $old_sigset))) {
1026
1027
1028
	print STDERR "sigprocmask (UNBLOCK) failed!\n";
    }
    if ($err) {
1029
1030
1031
	if ($autolog) {
	    ;
	}
1032
	TBDebugTimeStampWithDate("failed $vmtype $op($sargs): $err")
1033
1034
1035
1036
	    if ($debug);
	return (-1,$err);
    }
    if ($autoerr && $ret) {
1037
	$err = "$op($vnodeid) failed with exit code $ret!";
1038
1039
1040
	if ($autolog) {
	    ;
	}
1041
	TBDebugTimeStampWithDate("failed $vmtype $op($sargs): exited with $ret")
1042
1043
1044
1045
	    if ($debug);
	return ($ret,$err);
    }

1046
    TBDebugTimeStampWithDate("finished $vmtype $op($sargs)")
1047
1048
1049
1050
	if ($debug);

    return $ret;
}
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064

sub StoreState()
{
    # Store the state to disk.
    print "Storing state to disk ...\n"
	if ($debug);
    
    my $ret = eval { Storable::store($vnstate, "$VNDIR/vnode.state"); };
    if ($@) {
	print STDERR "$@";
	return -1;
    }
    return 0;
}