mkvnode.pl 24.4 KB
Newer Older
1
2
3
#!/usr/bin/perl -w
#
# EMULAB-COPYRIGHT
Leigh B Stoller's avatar
Leigh B Stoller committed
4
# Copyright (c) 2009-2012 University of Utah and the Flux Group.
5
6
7
8
9
10
11
# All rights reserved.
#
use strict;
use Getopt::Std;
use English;
use Errno;
use POSIX qw(strftime);
12
13
use POSIX qw(:sys_wait_h);
use POSIX qw(:signal_h);
14
use Data::Dumper;
15
16
use Storable;
use vars qw($vnstate);
17
18
19
20
21
22

#
# The corollary to mkjail.pl in the freebsd directory ...
#
sub usage()
{
Leigh B. Stoller's avatar
Leigh B. Stoller committed
23
    print "Usage: mkvnode [-d] vnodeid\n" . 
24
          "  -d   Debug mode.\n" .
25
26
	  "  -c   Cleanup stale container\n".
	  "  -s   Show state for container\n".
27
28
29
          "";
    exit(1);
}
30
my $optlist  = "dcs";
31
my $debug    = 1;
32
33
my $cleanup  = 0;
my $showstate= 0;
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
my $vnodeid;

#
# Turn off line buffering on output
#
$| = 1;

# Drag in path stuff so we can find emulab stuff.
BEGIN { require "/etc/emulab/paths.pm"; import emulabpaths; }

#
# Load the OS independent support library. It will load the OS dependent
# library and initialize itself. 
# 
use libsetup;
use libtmcc;
use libtestbed;
51
use liblocsetup;
52
53
54
55
56
57
    
# Pull in libvnode
use libvnode;

# Helpers
sub MyFatal($);
58
59
60
61
sub safeLibOp($$$;@);
sub CleanupVM();
sub TearDownStaleVM();
sub StoreState();
62
63
64

# Locals
my $CTRLIPFILE = "/var/emulab/boot/myip";
65
my $VMPATH     = "/var/emulab/vms/vminfo";
66
my $IPTABLES   = "/sbin/iptables";
67
68
69
70
71
72
73
74
75
76
77
78
my $VNDIR;
my $leaveme    = 0;
my $running    = 0;
my $cleaning   = 0;
my $rebooting  = 0;
my $reload     = 0;
my ($vmid,$vmtype,$ret,$err);

# Flags for leaveme.
my $LEAVEME_REBOOT = 0x1;
my $LEAVEME_HALT   = 0x2;

79
80
81
82
83
84
85
86
#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
87
88
89
90
91
92
93
94
95
if (defined($options{"d"})) {
    $debug = 1;
}
if (defined($options{"c"})) {
    $cleanup = 1;
}
if (defined($options{"s"})) {
    $showstate = 1;
    $debug   = 0;
96
97
98
}
usage()
    if (@ARGV != 1);
99

100
$vnodeid = $ARGV[0];
101
$VNDIR   = "$VMPATH/$vnodeid";
102
103
104
105
106
107
108
109
110
111
112
113

#
# Must be root.
# 
if ($UID != 0) {
    die("*** $0:\n".
	"    Must be root to run this script!\n");
}

# Tell the library what vnode we are messing with.
libsetup_setvnodeid($vnodeid);

114
115
116
# Can set this after above line. 
my $RUNNING_FILE = CONFDIR() . "/running";

117
118
119
120
121
122
123
#
# Turn on debug timestamps if desired.
#
if ($debug) {
    TBDebugTimeStampsOn();
}

124
125
126
127
128
129
130
131
#
# Remove old state files at boot.
#
if (! -e "/var/run/mkvnode.ready") {
    system("rm -f $VARDIR/vms/*/vnode.state");
    system("touch /var/run/mkvnode.ready");
}

132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#
# XXX: for now, support only a single vnode type per phys node.  This is bad,
# but it's the current assumption.  For now, we also assume the nodetype since
# we only have pcvm.  Later, we need to get this info from tmcd so we know 
# lib to load.
#
my @nodetypes = ( GENVNODETYPE() );

#
# We go through this crap so that we can pull in multiple packages implementing
# the libvnode API so they (hopefully) won't step on our namespace too much.
#
my %libops = ();
foreach my $type (@nodetypes) {
    if ($type =~ /^([\w\d\-]+)$/) {
	$type = $1;
    }
    # load lib and initialize it
    my %ops;
    eval "use libvnode_$type; %ops = %libvnode_${type}::ops";
    if ($@) {
	die "while trying to load 'libvnode_$type': $@";
    }
    if (0 && $debug) {
	print "%ops($type):\n" . Dumper(%ops);
    }
    $libops{$type} = \%ops;
    if ($debug) {
	$libops{$type}{'setDebug'}->(1);
    }
162
163
164
165
166
167
    $libops{$type}{'init'}->();

    # need to do this for each type encountered. 
    TBDebugTimeStampWithDate("starting $type rootPreConfig()");
    $libops{GENVNODETYPE()}{'rootPreConfig'}->();
    TBDebugTimeStampWithDate("finished $type rootPreConfig()");
168
169
170
171
172
173
174
175
176
177
}
if ($debug) {
    print "GENVNODETYPE " . GENVNODETYPE() . "\n";
    print "libops:\n" . Dumper(%libops);
}

#
# Need the domain, but no conistent way to do it. Ask tmcc for the
# boss node and parse out the domain. 
#
178
my ($DOMAINNAME,$BOSSIP) = tmccbossinfo();
179
180
181
182
183
184
185
186
187
188
die("Could not get bossname from tmcc!")
    if (!defined($DOMAINNAME));

if ($DOMAINNAME =~ /^[-\w]+\.(.*)$/) {
    $DOMAINNAME = $1;
}
else {
    die("Could not parse domain name!");
}
if ($BOSSIP !~ /^\d+\.\d+\.\d+\.\d+$/) {
189
    die "Bad bossip '$BOSSIP' from bossinfo!";
190
191
}

192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#
# Quickie way to show the state.
#
if ($showstate) {
    if (! -e "$VNDIR/vnode.info") {
	fatal("No vnode.info file for $vnodeid");
    }
    if (! -e "$VNDIR/vnode.state") {
	fatal("no vnode.state file for $vnodeid");
    }
    my $tmp = eval { Storable::retrieve("$VNDIR/vnode.state"); };
    if ($@) {
	fatal("$@");
    }
    print Dumper($tmp);
    exit(0);
}

210
211
212
213
#
# In most cases, the vnodeid directory will have been created by the
# caller, and a config file possibly dropped in.  When debugging, we
# have to create it here.
214
215
216
217
218
#
if (! -e $VMPATH) {
    mkdir($VMPATH, 0770) or
	fatal("Could not mkdir $VMPATH: $!");
}
219
220
221
222
223
224
225
chdir($VMPATH) or
    die("Could not chdir to $VMPATH: $!\n");

if (! -e $vnodeid) {
    mkdir($vnodeid, 0770) or
	fatal("Could not mkdir $vnodeid in $VMPATH: $!");
}
226
227
228
229
230
231
232
233
#
# The container description for the library routines. 
#
my %vnconfig = ( "vnodeid"   => $vnodeid,
                 "config"    => undef,
		 "ifconfig"  => undef,
		 "ldconfig"  => undef,
		 "tunconfig" => undef,
234
		 "attributes"=> undef,
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
);
sub VNCONFIG($) { return $vnconfig{'config'}->{$_[0]}; }

#
# If cleanup requested, make sure the manager process is not running
# Must do this after the stuff above is defined.
#
if ($cleanup) {
    # This path is in vnodesetup. 
    my $pidfile = "/var/run/tbvnode-${vnodeid}.pid";
    if (-e $pidfile) {
	print STDERR "Manager process still running. Use that instead.\n";
	print STDERR "If the manager is really dead, first rm $pidfile.\n";
	exit(1);
    }
    exit(TearDownStaleVM());
}

#
# This holds the container state set up by the library. There is state
# added here, and state added in the library ("private"). We locally
# redefine this below, so cannot be a lexical.
#
# NOTE: There should be NO state in here that needs to survive reboot.
#       We just remove them all when rebooting. See above.
#
$vnstate = { "private" => {} };
262

263
264
265
#
# Now we can start doing something useful.
#
266
my ($pid, $eid, $vname) = check_nickname();
267
268
my $nodeuuid = getnodeuuid();
$nodeuuid = $vnodeid if (!defined($nodeuuid));
269

270
271
272
273
274
275
#
# Get all the config stuff we need.
#
my %tmp;
my @tmp;
my $tmp;
276
my %attrs;
277

278
279
280
fatal("Could not get vnode config for $vnodeid")
    if (getgenvnodeconfig(\%tmp));
$vnconfig{"config"} = \%tmp;
281
282

fatal("getifconfig($vnodeid): $!")
283
284
    if (getifconfig(\@tmp));
$vnconfig{"ifconfig"} = [ @tmp ];
285
286

fatal("getlinkdelayconfig($vnodeid): $!") 
287
288
    if (getlinkdelayconfig(\@tmp));
$vnconfig{"ldconfig"} = [ @tmp ];
289

Leigh B. Stoller's avatar
Leigh B. Stoller committed
290
fatal("gettunnelconfig($vnodeid): $!")
291
292
    if (gettunnelconfig(\$tmp));
$vnconfig{"tunconfig"} = $tmp;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
293

294
295
296
297
fatal("getnodeattributes($vnodeid): $!")
    if (getnodeattributes(\%attrs));
$vnconfig{"attributes"} = \%attrs;

298
if ($debug) {
299
300
    print "VN Config:\n";
    print Dumper(\%vnconfig);
301
302
}

303
304
305
306
307
#
# see if we 1) are supposed to be "booting" into the reload mfs, and 2) if
# we have loadinfo.  Need both to reload!
#
fatal("getbootwhat($vnodeid): $!") 
308
    if (getbootwhat(\@tmp));
309
310
if (scalar(@tmp) && exists($tmp[0]->{"WHAT"})) {
    if ($tmp[0]->{"WHAT"} =~ /frisbee-pcvm/) {
311
312
313
314
	#
	# Ok, we're reloading, using the fake frisbee pcvm mfs.
	#
	$reload = 1;
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
	
	fatal("getloadinfo($vnodeid): $!") 
	    if (getloadinfo(\@tmp));
	if (!scalar(@tmp)) {
	    fatal("vnode $vnodeid in reloading, but got no loadinfo!");
	}
	else {
	    if ($tmp[0]->{"IMAGEID"} =~ /^([-\d\w]+),([-\d\w]+),([-\d\w]+)$/) {
		$vnconfig{"reloadinfo"} = $tmp[0];
		$vnconfig{"image"}      = "$1-$2-$3";
	    }
	    else {
		fatal("vnode $vnodeid in reloading, but got bogus IMAGEID " . 
		      $tmp[0]->{"IMAGEID"} . " from loadinfo!");
	    }
	}
    }
332
333
334
335
336
337
338
339
340
341
342
343
344
345
    elsif ($tmp[0]->{"WHAT"} =~ /^\d*$/) {
	#
	# We are using bootwhat for a much different purpose then intended.
	# It tells us a partition number, but that is meaningless. Look at
	# the jailconfig to see what image should boot. That image better
	# be resident already. 
	#
	if (VNCONFIG('IMAGENAME') =~ /^([-\w]+),([-\w]+),([-\w]+)$/) {
	    $vnconfig{"image"}      = "$1-$2-$3";
	}
    }
    else {
	# The library will boot the default, whatever that is.
    }
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
}

#
# Install a signal handler. We can get signals from vnodesetup.
#
sub handler ($) {
    my ($signame) = @_;

    # No more interruptions during teardown.
    $SIG{INT}  = 'IGNORE';
    $SIG{USR1} = 'IGNORE';
    $SIG{USR2} = 'IGNORE';
    $SIG{HUP}  = 'IGNORE';

    my $str = "killed";
    if ($signame eq 'USR1') {
	$leaveme = $LEAVEME_HALT;
	$str = "halted";
364
    }
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
    elsif ($signame eq 'USR2') {
	$leaveme = $LEAVEME_REBOOT;
	$str = "rebooted";
    }

    #
    # XXX this is a woeful hack for vnodesetup.  At the end of rebootvnode,
    # vnodesetup calls hackwaitandexit which essentially waits for a vnode
    # to be well on the way back up before it returns.  This call was
    # apparently added for the lighter-weight "reconfigure a vnode"
    # (as opposed to reboot it) path, however it makes the semantics of
    # reboot on a vnode different than that for a pnode, where reboot returns
    # as soon as the node stops responding (i.e., when it goes down and not
    # when it comes back up).  Why do I care?  Because Xen vnodes cannot
    # always "reboot" under the current semantics in less than 30 seconds,
    # which is the timeout in libreboot.
    #
    # So by touching the "running" file here we force hackwaitandexit to
    # return when the vnode is shutdown in Xen (or OpenVZ), more closely
    # matching the pnode semantics while leaving the BSD jail case (which
    # doesn't use this code) alone.  This obviously needs to be revisited.
    #
    mysystem("touch $RUNNING_FILE")
	if ($leaveme && -e "$RUNNING_FILE");

    MyFatal("mkvnode ($PID) caught a SIG${signame}! container $str");
391
392
}

393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
#
# If this file exists, we are rebooting an existing container. But
# need to check if its a stale or aborted container (one that failed
# to setup or teardown) and got left behind. Another wrinkle is shared
# nodes, so we use the node uuid to determine if its another logical
# pcvm with the same name, and needs to be destroyed before setting up.
#
if (-e "$VNDIR/vnode.info") {
    my $uuid;
    my $teardown = 0;

    my $str = `cat $VNDIR/vnode.info`;
    ($vmid, $vmtype, $uuid) = ($str =~ /^(\d*) (\w*) ([-\w]*)$/);

    # Consistency check.
    fatal("No matching file: $VMPATH/vnode.$vmid")
	if (! -e "$VMPATH/vnode.$vmid");
    $str = `cat $VMPATH/vnode.$vmid`;
    chomp($str);
    if ($str ne $vnodeid) {
	fatal("Inconsistent vnodeid in $VMPATH/vnode.$vmid");
    }

    if ($uuid ne $nodeuuid) {
	print "UUID mismatch; tearing down stale vnode $vnodeid\n";
	$teardown = 1;
    }
    elsif ($reload) {
	print "Reload requested, tearing down old vnode\n";
	$teardown = 1;
423
424
    }
    else {
425
426
427
	($ret,$err) = safeLibOp('vnodeState', 1, 0);
	if ($err) {
	    fatal("Failed to get status for existing container: $err");
428
	}
429
430
431
432
433
434
	if ($ret eq VNODE_STATUS_UNKNOWN()) {
	    print "Cannot determine status container $vmid. Deleting ...\n";
	    $teardown = 1;
	}
	elsif ($ret ne VNODE_STATUS_STOPPED()) {
	    fatal("vnode $vnodeid not stopped, not booting!");
435
436
	}
    }
437
438
439
440
441
442
443
    if ($teardown) {
	TearDownStaleVM() == 0
	    or fatal("Could not tear down stale container");
    }
    else {
	$rebooting = 1;
    }
444
445
}

446
#
Leigh B Stoller's avatar
Leigh B Stoller committed
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
# Another wrinkle; tagged vlans might not be setup yet when we get
# here, and we have to know those tags before we can proceed. We
# need to spin, but with signals enabled since we do not want to
# wait forever. Okay to get a signal and die at this point. 
#
if (0 && @{ $vnconfig{'ifconfig'} }) {
  again:
    foreach my $ifc (@{ $vnconfig{'ifconfig'} }) {
	my $lan = $ifc->{LAN};
	
	next
	    if ($ifc->{ITYPE} ne "vlan");

	# got the tag.
	next
	    if ($ifc->{VTAG});

	# no tag, wait and ask again.
	print STDERR
	    "$lan does not have a tag yet. Waiting, then asking again ...\n";

	sleep(5);

	my @tmp = ();
	fatal("getifconfig($vnodeid): $!")
	    if (getifconfig(\@tmp));
	$vnconfig{"ifconfig"} = [ @tmp ];

	# Just look through everything again; simple. 
	goto again;
    }
}

#
# Install handlers *after* down stale container teardown, since we set
# them to IGNORE during the teardown.
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
# 
# Ignore TERM since we want our caller to catch it first and then send
# it down to us. 
#
$SIG{TERM} = 'IGNORE';
# Halt container and exit. Tear down transient state, leave disk.
$SIG{USR1} = \&handler;
# Halt container and exit. Leave all state intact (we are rebooting).
$SIG{USR2} = \&handler;
# Halt container and exit. Tear down all state including disk.
$SIG{HUP}  = \&handler;
$SIG{INT}  = \&handler;

#
# Initial pre config for the experimental network. We want to make sure
# we can allocate the required devices and whatever else before going
# any further. 
500
#
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
TBDebugTimeStampWithDate("starting rootPreConfigNetwork()");
$ret = eval {
    $libops{GENVNODETYPE()}{'rootPreConfigNetwork'}->($vnodeid, undef,
	\%vnconfig, $vnstate->{'private'});
};
if ($ret || $@) {
    print STDERR $@
	if ($@);
    
    # If this fails, we require the library to clean up after itself
    # so that we can just exit without worrying about cleanup.
    fatal("rootPreConfigNetwork failed!");
}
TBDebugTimeStampWithDate("finished rootPreConfigNetwork()");

516
517
518
519
520
521
522
if (! -e "$VNDIR/vnode.info") {
    #
    # XXX XXX XXX: need to get this from tmcd!
    # NOTE: we first put the type into vndb so that the create call can go!
    #
    $vmtype = GENVNODETYPE();

523
    ($ret,$err) = safeLibOp('vnodeCreate',0,0);
524
525
526
527
528
    if ($err) {
	MyFatal("vnodeCreate failed");
    }
    $vmid = $ret;

529
    mysystem("echo '$vmid $vmtype $nodeuuid' > $VNDIR/vnode.info");
530
    mysystem("echo '$vnodeid' > $VMPATH/vnode.$vmid");
531
532
533

    # bootvnodes wants this to be here...
    mysystem("mkdir -p /var/emulab/jails/$vnodeid");
534
}
535
536
537
538
539
540
541
# This state structure is saved to disk for TearDown.
$vnstate->{"vmid"}   = $vmid;
$vnstate->{"vmtype"} = $vmtype;
$vnstate->{"uuid"}   = $nodeuuid;
# Store the state to disk.
if (StoreState()) {
    MyFatal("Could not store container state to disk");
542
543
}

544
my $cnet_mac = ipToMac(VNCONFIG('CTRLIP'));
545
546
547
548
549
550
551
552
553
my $ext_ctrlip = `cat $CTRLIPFILE`;
chomp($ext_ctrlip);
if ($ext_ctrlip !~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
    # cannot/should not really go on if this happens.
    MyFatal("error prior to vnodePreConfigControlNetwork($vnodeid): " . 
	    " could not find valid ip in $CTRLIPFILE!");
}
my $longdomain = "${eid}.${pid}.${DOMAINNAME}";

554
555
556
557
558
559
560
#
# Call back to do things to the container before it boots.
#
sub callback($)
{
    my ($path) = @_;

561
562
563
564
565
    #
    # Set up sshd port to listen on. If the vnode has its own IP
    # then listen on both 22 and the per-vnode port.
    #
    if (system('grep -q -e EmulabJail $path/etc/ssh/sshd_config')) {
566
567
	if (defined(VNCONFIG('SSHDPORT')) && VNCONFIG('SSHDPORT') ne "") {
	    my $sshdport = VNCONFIG('SSHDPORT');
568
569
570

	    system("echo '# EmulabJail' >> $path/etc/ssh/sshd_config");
	    system("echo 'Port $sshdport' >> $path/etc/ssh/sshd_config");
571
	    if (VNCONFIG('CTRLIP') ne $ext_ctrlip) {
572
573
574
575
		system("echo 'Port 22' >> $path/etc/ssh/sshd_config");
	    }
	}
    }
576
577
578
    return 0;
}

579
# OP: preconfig
580
if (safeLibOp('vnodePreConfig', 1, 1, \&callback)) {
581
582
583
584
    MyFatal("vnodePreConfig failed");
}

# OP: control net preconfig
585
586
587
if (safeLibOp('vnodePreConfigControlNetwork',1,1,
	      VNCONFIG('CTRLIP'),
	      VNCONFIG('CTRLMASK'),$cnet_mac,
588
589
590
591
592
	      $ext_ctrlip,$vname,$longdomain,$DOMAINNAME,$BOSSIP)) {
    MyFatal("vnodePreConfigControlNetwork failed");
}

# OP: exp net preconfig
593
if (safeLibOp('vnodePreConfigExpNetwork', 1, 1)) {
594
595
    MyFatal("vnodePreConfigExpNetwork failed");
}
596
if (safeLibOp('vnodeConfigResources', 1, 1)) {
597
598
    MyFatal("vnodeConfigResources failed");
}
599
if (safeLibOp('vnodeConfigDevices', 1, 1)) {
600
601
602
    MyFatal("vnodeConfigDevices failed");
}

603
604
605
#
# Route to inner sshd
#
606
607
608
if (defined(VNCONFIG('SSHDPORT')) && VNCONFIG('SSHDPORT') ne "") {
    my $sshdport = VNCONFIG('SSHDPORT');
    my $ctrlip   = VNCONFIG('CTRLIP');
609
610

    # Retry a few times cause of iptables locking stupidity.
611
    for (my $i = 0; $i < 10; $i++) {
612
613
614
	system("$IPTABLES -v -t nat -A PREROUTING -p tcp -d $ext_ctrlip ".
	       "--dport $sshdport -j DNAT ".
	       "--to-destination $ctrlip:$sshdport");
615
616
617
618
619
620
621
622
623
	
	if ($? == 0) {
	    my $ref = {};
	    $ref->{'port'}       = $sshdport;
	    $ref->{'ctrlip'}     = $ctrlip;
	    $ref->{'ext_ctrlip'} = $ext_ctrlip;
	    $vnstate->{'sshd_iprule'} = $ref;
	    last;
	}
624
625
	sleep(2);
    }
626
627
}

628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
#
# Start the container. If all goes well, this will exit cleanly, with the
# it running in its new context. Still, lets protect it with a timer
# since it might get hung up inside and we do not want to get stuck here.
#
my $childpid = fork();
if ($childpid) {
    local $SIG{ALRM} = sub { kill("TERM", $childpid); };
    alarm 30;
    waitpid($childpid, 0);
    alarm 0;

    #
    # If failure then cleanup.
    #
    if ($?) {
	MyFatal("$vnodeid container startup exited with $?");
    }
}
else {
    $SIG{TERM} = 'DEFAULT';

650
    if (safeLibOp('vnodeBoot', 1, 1)) {
651
652
653
654
655
	print STDERR "*** ERROR: vnodeBoot failed\n";
	exit(1);
    }
    exit(0);
}
656
if (safeLibOp('vnodePostConfig', 1, 1)) {
657
658
659
    MyFatal("vnodePostConfig failed");
}
# XXX: need to do this for each type encountered!
660
TBDebugTimeStampWithDate("starting $vmtype rootPostConfig()");
661
$libops{$vmtype}{'rootPostConfig'}->();
662
663
664
665
666
667
TBDebugTimeStampWithDate("finished $vmtype rootPostConfig()");

if ($debug) {
    print "VN State:\n";
    print Dumper($vnstate);
}
668

669
670
671
672
# Store the state to disk.
if (StoreState()) {
    MyFatal("Could not store container state to disk");
}
673
# This is for vnodesetup
674
mysystem("touch $RUNNING_FILE");
675
$running = 1;
676
677

#
678
679
680
681
# This loop is to catch when the container stops. We used to run a sleep
# inside and wait for it to exit, but that is not portable across the
# backends, and the return value did not indicate how it exited. So, lets
# just loop, asking for the status every few seconds. 
682
#
683
684
685
686
# XXX Turn off debugging during this loop to keep the log file from growing.
#
TBDebugTimeStampsOff()
    if ($debug);
687

688
689
690
while (1) {
    sleep(5);
    
691
    #
692
693
694
695
    # If the container exits, either it rebooted from the inside or
    # the physical node is rebooting, or we are actively trying to kill
    # it cause our parent (vnodesetup) told us to. In all cases, we just
    # exit and let the parent decide what to do. 
696
    #
697
698
699
700
701
702
703
704
705
706
707
    my ($ret,$err) = safeLibOp('vnodeState', 0, 0);
    if ($err) {
	fatal("*** ERROR: vnodeState: $err\n");
    }
    if ($ret ne VNODE_STATUS_RUNNING()) {
	print "Container is no longer running.\n";
	# Rebooted from inside, but not cause we told it to, so leave intact.
	$leaveme = $LEAVEME_REBOOT
	    if (!$cleaning);
	last;
    }
708
}
709
710
711
TBDebugTimeStampsOn()
    if ($debug);
exit(CleanupVM());
712
713

#
714
715
716
717
# Teardown a container. This should not be used if the mkvnode process
# is still running; use vnodesetup instead. This is just for the case
# that the manager (vnodesetup,mkvnode) process is gone and the turds
# need to be cleaned up.
718
#
719
720
721
722
723
724
725
sub TearDownStaleVM()
{
    if (! -e "$VNDIR/vnode.info") {
	fatal("TearDownStaleVM: no vnode.info file for $vnodeid");
    }
    my $str = `cat $VNDIR/vnode.info`;
    ($vmid, $vmtype, undef) = ($str =~ /^(\d*) (\w*) ([-\w]*)$/);
726

727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
    #
    # Load the state. Use a local so that we do not overwrite
    # the outer version. Just a precaution.
    #
    # The state might not exist, but we proceed anyway.
    #
    local $vnstate = { "private" => {} };

    if (-e "$VNDIR/vnode.state") {
	$vnstate = eval { Storable::retrieve("$VNDIR/vnode.state"); };
	if ($@) {
	    print STDERR "$@";
	    return -1;
	}
	if ($debug) {
	    print "vnstate:\n";
	    print Dumper($vnstate);
744
	}
745
    }
746

Leigh B Stoller's avatar
Leigh B Stoller committed
747
748
749
750
751
752
    # No interruptions during stale teardown.
    $SIG{INT}  = 'IGNORE';
    $SIG{USR1} = 'IGNORE';
    $SIG{USR2} = 'IGNORE';
    $SIG{HUP}  = 'IGNORE';

753
754
755
756
757
758
759
    #
    # if we fail to cleanup, store the state back to disk so that we
    # capture any changes. 
    #
    if (CleanupVM()) {
	StoreState();
	return -1;
760
    }
Leigh B Stoller's avatar
Leigh B Stoller committed
761
762
763
764
765
    $SIG{INT}  = 'DEFAULT';
    $SIG{USR1} = 'DEFAULT';
    $SIG{USR2} = 'DEFAULT';
    $SIG{HUP}  = 'DEFAULT';
    
766
    return 0;
767
768
769
770
771
}

#
# Clean things up.
#
772
sub CleanupVM()
773
774
775
776
777
778
779
780
{
    if ($cleaning) {
	die("*** $0:\n".
	    "    Oops, already cleaning!\n");
    }
    $cleaning = 1;

    # If the container was never built, there is nothing to do.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
781
    return 0
782
783
	if (! -e "$VNDIR/vnode.info" || !defined($vmid));

784
785
786
787
788
    if (exists($vnstate->{'sshd_iprule'})) {
	my $ref = $vnstate->{'sshd_iprule'};
	my $sshdport    = $ref->{'port'};
	my $ctrlip      = $ref->{'ctrlip'};
	my $ext_ctrlip  = $ref->{'ext_ctrlip'};
789
790

	# Retry a few times cause of iptables locking stupidity.
791
	for (my $i = 0; $i < 10; $i++) {
792
793
794
795
796
797
798
	    system("$IPTABLES -v -t nat -D PREROUTING -p tcp -d $ext_ctrlip ".
		   "--dport $sshdport -j DNAT ".
		   "--to-destination $ctrlip:$sshdport");
	    last
		if ($? == 0);
	    sleep(2);
	}
799
800
801
	# Update new state.
	delete($vnstate->{'sshd_iprule'});
	StoreState();
802
803
    }

804
    # if not halted, try that first
805
    my ($ret,$err) = safeLibOp('vnodeState', 1, 0);
806
807
808
    if ($err) {
	print STDERR "*** ERROR: vnodeState: ".
	    "failed to cleanup $vnodeid: $err\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
809
	return -1;
810
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
811
    if ($ret eq VNODE_STATUS_RUNNING()) {
812
	print STDERR "cleanup: $vnodeid not stopped, trying to halt it.\n";
813
	($ret,$err) = safeLibOp('vnodeHalt', 1, 1);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
814
815
816
817
818
	if ($err) {
	    print STDERR "*** ERROR: vnodeHalt: ".
		"failed to halt $vnodeid: $err\n";
	    return -1;
	}
819
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
820
821
    elsif ($ret eq VNODE_STATUS_MOUNTED()) {
	print STDERR "cleanup: $vnodeid is mounted, trying to unmount it.\n";
822
	($ret,$err) = safeLibOp('vnodeUnmount', 1, 1);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
823
824
825
826
827
828
	if ($err) {
	    print STDERR "*** ERROR: vnodeUnmount: ".
		"failed to unmount $vnodeid: $err\n";
	    return -1;
	}
    }
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
    if ($leaveme) {
	if ($leaveme == $LEAVEME_HALT || $leaveme == $LEAVEME_REBOOT) {
	    #
	    # When halting, the disk state is left, but the transient state
	    # is removed since it will get reconstructed later if the vnode
	    # is restarted. This avoids leaking a bunch of stuff in case the
	    # vnode never starts up again. We of course leave the disk, but
	    # that will eventually get cleaned up if the pcvm is reused for
	    # a future experiment.
	    #
	    # XXX Reboot should be different; there is no reason to tear
	    # down the transient state, but we do not handle that yet.
	    # Not hard to add though.
	    #
	    ($ret,$err) = safeLibOp('vnodeTearDown', 1, 1);
	    # Always store in case some progress was made. 
	    StoreState();
	    if ($err) {
		print STDERR "*** ERROR: failed to teardown $vnodeid: $err\n";
		return -1;
	    }
	}
	return 0;
    }
853
854

    # now destroy
855
    ($ret,$err) = safeLibOp('vnodeDestroy', 1, 1);
856
857
    if ($err) {
	print STDERR "*** ERROR: failed to destroy $vnodeid: $err\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
858
	return -1;
859
860
    }
    unlink("$VNDIR/vnode.info");
861
    unlink("$VNDIR/vnode.state");
862
    unlink("$VMPATH/vnode.$vmid");
863
    $cleaning = 0;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
864
    return 0;
865
866
867
868
869
870
871
872
873
}
    
#
# Print error and exit.
#
sub MyFatal($)
{
    my ($msg) = @_;

874
875
876
877
    #
    # If rebooting but never got a chance to run, we do not want
    # to kill off the container. Might lose user data.
    #
878
    $leaveme = $LEAVEME_REBOOT
879
880
	if ($rebooting && !$running);

881
    CleanupVM();
882
883
884
885
886
887
888
    die("*** $0:\n".
	"    $msg\n");
}

#
# Helpers:
#
889
890
sub safeLibOp($$$;@) {
    my ($op,$autolog,$autoerr,@args) = @_;
891
892
893

    my $sargs = '';
    if (@args > 0) {
894
 	$sargs = join(',',@args);
895
    }
896
    TBDebugTimeStampWithDate("starting $vmtype $op($sargs)")
897
	if ($debug);
898
899
900
901
902
903
904
905
906
907
908
909

    #
    # Block signals that could kill us in the middle of a library call.
    # Might be better to do this down in the library, but this is an
    # easier place to do it. This ensure that if we have to tear down
    # in the middle of setting up, the state is consistent. 
    #
    my $new_sigset = POSIX::SigSet->new(SIGHUP, SIGINT, SIGUSR1, SIGUSR2);
    my $old_sigset = POSIX::SigSet->new;
    if (! defined(sigprocmask(SIG_BLOCK, $new_sigset, $old_sigset))) {
	print STDERR "sigprocmask (BLOCK) failed!\n";
    }
910
    my $ret = eval {
911
912
	$libops{$vmtype}{$op}->($vnodeid, $vmid,
				\%vnconfig, $vnstate->{'private'}, @args);
913
    };
914
    my $err = $@;
915
    if (! defined(sigprocmask(SIG_SETMASK, $old_sigset))) {
916
917
918
	print STDERR "sigprocmask (UNBLOCK) failed!\n";
    }
    if ($err) {
919
920
921
	if ($autolog) {
	    ;
	}
922
	TBDebugTimeStampWithDate("failed $vmtype $op($sargs): $err")
923
924
925
926
	    if ($debug);
	return (-1,$err);
    }
    if ($autoerr && $ret) {
927
	$err = "$op($vnodeid) failed with exit code $ret!";
928
929
930
	if ($autolog) {
	    ;
	}
931
	TBDebugTimeStampWithDate("failed $vmtype $op($sargs): exited with $ret")
932
933
934
935
	    if ($debug);
	return ($ret,$err);
    }

936
    TBDebugTimeStampWithDate("finished $vmtype $op($sargs)")
937
938
939
940
	if ($debug);

    return $ret;
}
941
942
943
944
945
946
947
948
949
950
951
952
953
954

sub StoreState()
{
    # Store the state to disk.
    print "Storing state to disk ...\n"
	if ($debug);
    
    my $ret = eval { Storable::store($vnstate, "$VNDIR/vnode.state"); };
    if ($@) {
	print STDERR "$@";
	return -1;
    }
    return 0;
}