rc.frisbee 10.7 KB
Newer Older
1
2
3
4
5
6
7
8
#!/bin/sh
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2007 University of Utah and the Flux Group.
# All rights reserved.
#
# Optional flag argument says "do not reboot"
#
9
10
11

MBR_PATH=/etc/emulab

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
reboot=1
if [ $# -eq 1 -a "$1" = "-noreboot" ]; then
    reboot=0
fi

#
# Amount of memory in MB to leave for everyone else in the system.  If you
# get out-of-memory or vm_pager error while running frisbee, increase this.
#
RESIDMEM=32

if [ -r /etc/emulab/paths.sh ]; then
	. /etc/emulab/paths.sh
else
	BINDIR=/etc/testbed
	BOOTDIR=/etc/testbed
	ETCDIR=/etc/testbed
fi

get_value()
{
	local data="$1"
	local key="$2"

	echo $data | tr ' ' '\n' | sed -n "s/^$key=//p"
}

#
# Update the MBR of the given disk to the indicated "version."
#
# XXX this is somewhat of a hack right now.  We recognize two
# versions of the MBR:
#	v1 (partition 1 size 6281352)
#	v2 (partition 1 size 12305790)
# Currently we only install a new MBR if the existing one is the
# wrong size, just in case the user has customized the boot program.
#
49
tweakmbr()
50
51
52
53
54
55
56
{
	local disk=$1
	local new_mbr_ver=$2
	local cur_mbr_ver=''

	if ! dd if=$disk of=/dev/null bs=512 count=1 2>/dev/null; then
			echo "WARNING: could not read from $disk, MBR not changed"
57
			return 255
58
59
	fi

60
61
62
	size=`echo -e 'u\np\nq' | fdisk $disk 2> /dev/null| \
		sed -n "s#^${disk}1 *. *[0-9]* *[0-9]* *\([0-9]*\).*\\$#\1#p"`
	size=`expr $size '*' 2`
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

	case ${size}s in
		6281352s)
			cur_mbr_ver=1
			;;
		12305790s)
			cur_mbr_ver=2
			;;
		s)
			echo "Found no MBR on $disk, installing version $new_mbr_ver"
			;;
		*)
			echo "WARNING: custom MBR on $disk, not changed"
			return 0
			;;
	esac

	if [ "$cur_mbr_ver" = $new_mbr_ver ]; then
		return 0
	fi

Ryan Jackson's avatar
Ryan Jackson committed
84
	if ! [ -r $MBR_PATH/mbr${new_mbr_ver}.dd ]; then
85
		echo "WARNING: cannot find MBR version $new_mbr_ver, not installed"
86
		return 255
87
	fi
88

89
	echo "Installing MBR version $new_mbr_ver ..."
Ryan Jackson's avatar
Ryan Jackson committed
90
	dd if=$MBR_PATH/mbr${new_mbr_ver}.dd of=$disk bs=512 count=1
91
92
93
94
95

	# Linux won't re-read the partition table unless told to do so.
	# hdparm could be used for this, but it may not be installed.
	# fdisk tells the kernel to re-read the table after writing it
	# to disk, so we'll just use that.
96
97
98
99
100
101
102
103
	echo "Re-reading partition table ..."
	echo w | fdisk $disk > /dev/null 2>&1
}

find_disks() {
	local disks

	for d in /sys/block/[sh]d*; do
104
		disks="$disks ${d##*/}"
105
106
107
	done

	echo $disks
108
109
}

110
111
112
113
114
# Wipe out the superblocks on any partitions by zeroing the first 16 sectors.
# This implementation doesn't suffer the limitations of the FreeBSD one in that:
# A) All partitions get zapped, even if not DOS MBR partitions
# B) We can zap the partition device since Linux isn't overprotective of superblocks,
#    unlike FreeBSD.
115
zapsuperblocks()
116
{
117
	local disk=${1##*/}
118

119
120
121
122
	echo "Invalidating old potential superblocks on $disk"
	partitions=`echo /sys/block/$disk/$disk* 2>/dev/null`
	for part in $partitions; do
		dd if=/dev/zero of=/dev/${part##*/} bs=512 count=16 > /dev/null 2>&1
123
124
125
126
127
128
129
130
131
132
133
	done

	return 0
}

# FIXME shouldn't hard code "/images"
write_image()
{
	local address=$1
	local disk=$2
	local slice=$3
134
135
	local zfill=$4
	local ptype=$5
136
137
	local port=""
	local imagefile=""
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
	local frisbee_opts="$FRISBEE_OPTS"
	local imageunzip_opts="$IMAGEUNZIP_OPTS"

	# ZFILL==1: use frisbee
	# ZFILL==2: separate disk-wipe pass (not yet implemented)
	if [ "$zfill" -ne 0 ]; then
		frisbee_opts="$frisbee_opts -z"
		imageunzip_opts="$imageunzip_opts -z"
	fi

	if [ $slice -ne 0 ]; then
		frisbee_opts="$frisbee_opts -s $slice"
		imageunzip_opts="$imageunzip_opts -s $slice"
		if [ -n "$ptype" ]; then
			frisbee_opts="$frisbee_opts -D $ptype"
		fi
	fi
155
156
157
158
159
160
161
162
163
164
165
166
167
168

	local protocol=${address%%://*}
	if [ $protocol = $address ]; then
		case $address in
			/*) protocol=file ;;
			*) protocol=frisbee ;;
		esac
	fi

	case $protocol in
		frisbee)
			port=${address##*:}
			if [ $port = $address ]; then
				echo "*** WARNING: no port specified for frisbee"
169
				return 1
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
			fi
			address=${address%%:*}
			;;
		http|https)
			server=${address%%/*}
			filename=${address#*/}

			if ! [ -d /images ]; then
				echo "Need to create or mount /images directory!"
				return 1
			fi

			if ! $BINDIR/mkextrafs.pl -f $disk /images; then
		    		echo "Could not create /images partition"
				return 1
			fi

			wget -nv -N -P /images \
				$protocol://$server/$filename
			rc=$?
			if [ $rc -eq 0 ]; then
				echo "wget succeeded getting the image"
			else
				echo "wget failed, status $rc"
194
				return 1
195
196
197
198
199
200
201
202
			fi
			imagefile=/images/${filename##*/}
			;;
		file)
			imagefile=/$address
			;;
		*)
			echo "*** WARNING: Unsupported protocol $protocol!"
203
			return 1
204
205
206
207
208
209
			;;
	esac

	$BINDIR/tmcc state RELOADING

	if [ $protocol = frisbee ]; then
210
		$BINDIR/frisbee -m $address -p $port $frisbee_opts $disk
211
212
213
214
215
216
217
218
219
220
		rc=$?

		if [ $rc -ne 0 ]; then
			echo "Frisbee run failed, status $rc"
			return $rc
		fi

		echo "Frisbee run finished"
		rc=0
	else
221
		$BINDIR/imageunzip $imageunzip_opts $imagefile $disk
222
223
224
225
226
227
228
229
230
231
		rc=$?
	fi

	if mount | grep /images > /dev/null; then
		umount /images
	fi

	return $rc
}

232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
handle_loadinfo()
{
	local LOADINFO="$@"
	local ADDRESS=`get_value "$LOADINFO" ADDR`
	local PARTOS=`get_value "$LOADINFO" PARTOS`
	local PARTITION=`get_value "$LOADINFO" PART`
	PARTITION=${PARTITION:-'0'}

	local FREEBSD_DISK=''
	local DISK=`get_value "$LOADINFO" DISK`
	DISK=${DISK:-'ad0'}

	local ZFILL=`get_value "$LOADINFO" ZFILL`
	ZFILL=${ZFILL:-'0'}

	local ACPI=`get_value "$LOADINFO" ACPI`
	local ASF=`get_value "$LOADINFO" ASF`
	local MBR=`get_value "$LOADINFO" MBRVERS`
	local PREPARE=`get_value "$LOADINFO" PREPARE`
	local PTYPE=''

	if [ -z "$ADDRESS" ]; then
		echo "Unable to get address for loading image"
		return 1
	fi
257

258
259
260
	FREEBSD_DISK=$DISK
	# Convert from the FreeBSD device names to Linux device names
	# if necessary.
261
262
263
264
	case $DISK in
		[hs]d[a-z])
			;;
		*)
265
			DISK=`$BINDIR/freebsd_to_linux_disk $DISK 0`
266
267
268
269
			DISK=${DISK%% *}
			;;
	esac

270
271
272
273
274
275
276
	if [ $PARTITION -ne 0 ]; then
		case $PARTOS in
			FreeBSD) PTYPE=165 ;;
			OpenBSD) PTYPE=166 ;;
			Fedora|Linux)   PTYPE=131 ;;
		esac
	fi
277

278
279
280
281
282
283
284
285
286
287
288
289
290
291
	# For slice images, ensure that the MBR is the correct version
	# and replace if not.
	if [ -z "$FIRSTMBR" ]; then
		if [ "$PARTITION" != "0" ]; then
		    tweakmbr /dev/$DISK $MBR
		    echo "Resizing final disk partition"
		    growdisk -vW /dev/$DISK
		fi
		FIRSTMBR=$MBR
	else
		if [ "$MBR" != "$FIRSTMBR" ]; then
			echo "MBR Mismatch: First MBR is \"$FIRSTMBR\" while image #$NUM is \"$MBR\""
		    fi
	fi
292

293
294
295
296
297
298
299
300
301
302
303
304
305
306
	# If not zeroing the disk and we are loading a full disk image
	# we need to ensure that we at least invalidate any old superblocks
	# that might leak through (most likely in partition 4 which isn't
	# touched by our current image).  We do this before running frisbee
	# so that any legit filesystems loaded from the image work.

	# Since we do it before frisbee, we are counting on the current
	# MBR being the same as the MBR being layed down.  While not
	# a reasonable assumption in general, it mostly works in our
	# environment and at least won't hurt anything if not true.
	if [ $PREPARE -eq 1 ] || [ $IS_REMOTE -eq 0 -a $PARTITION -eq 0 \
	                           -a $ZFILL -eq 0 ]; then
		zapsuperblocks /dev/$DISK
	fi
307

308
309
310
311
312
313
	write_image $ADDRESS /dev/$DISK $PARTITION $ZFILL $PTYPE
	rc=$?
	if [ $rc -ne 0 ]; then
		echo "Failed to write image to disk, status $rc"
		exit 1
	fi
314

315
316
317
318
319
320
321
322
	# we resize the 4th partition after writing the image
	# if we used a full disk image.  Otherwise, the resize
	# happens before the image write in case we need the
	# extra partition for an image fetched via HTTP.
	if [ $PARTITION -eq 0 ]; then
	    echo "Resizing final disk partition"
	    growdisk -vW /dev/$DISK
	fi
323

324
325
326
327
328
329
	echo "Adjusting slice-related files"
	export SLICEFIX_ACPI=$ACPI
	export SLICEFIX_ASF=$ASF
	$BINDIR/slicefix $PARTITION $FREEBSD_DISK
	echo "Image load complete at `date`"
}
330

331
332
333
334
335
336
337
338
339
340
341
342
343
344
get_loadinfo()
{
	# Occasionally there is some delay before tmcd reports back valid
	# loadinfo, so try repeatedly for 30 seconds and give up if we don't
	# get any data.
	time=30
	while [ $time -gt 0 ]; do
		# Just write it out to a tempfile to avoid extra nasty
		# shell script hacks.
		$BINDIR/tmcc loadinfo > /tmp/loadinfo.out
		[ -s /tmp/loadinfo.out ] && break
		sleep 1
		time=$(( $time - 1 ))
	done
345
346
347
	if [ $time -eq 0 ]; then
		return 1
	fi
348

349
	return 0
350
351
352
353
354
}

# See if we can map drive names to BIOS numbers via EDD
if [ -x $BINDIR/get_edd_map.pl ]; then
	$BINDIR/get_edd_map.pl > $BOOTDIR/edd_map
355
fi
356
357


358
359
360
# Behave a little different on widearea nodes.
IS_REMOTE=0
[ -e $ETCDIR/isrem ] && IS_REMOTE=1
361

362
$BINDIR/tmcc state RELOADSETUP
363

364
365
366
BOSSINFO=`$BINDIR/tmcc bossinfo`
BOSSIP=${BOSSINFO##* }
STATUS=`$BINDIR/tmcc status`
367

368
369
370
371
echo "Trying to get loadinfo data... "
if ! get_loadinfo; then
	echo "*** Failed to get loadinfo data" 1>&2
	exit 2
372
373
fi

374
echo "Got loadinfo data"
375

376
377
378
# For testing purposes.
#BOSSINFO='boss.emulab.net 155.101.128.70'
#LOADINFO='ADDR=234.5.6.69:4444'
379

380
381
382
# FIXME shouldn't hardcode path
if [ -x /usr/sbin/ntpdate ]; then
	/usr/sbin/ntpdate -b $BOSSIP >/dev/null 2>&1
383
384
fi

385
386
387
# Enable IPoD
if [ -r $BINDIR/rc.ipod ]; then
	. $BINDIR/rc.ipod
388
389
fi

390
391
392
393
394
395
396
# Try to map disks to BIOS drive numbers via EDD
# The map is created now before we touch any disks
# since we may need to use the MBR to determine
# which disk is which.
$BINDIR/get_edd_map > $BOOTDIR/edd_map 2>/dev/null

FRISBEE_OPTS="-S $BOSSIP"
397

398
399
if [ -e $BOOTDIR/myip ]; then
	FRISBEE_OPTS="$FRISBEE_OPTS -i `cat $BOOTDIR/myip`"
400
fi
401
IMAGEUNZIP_OPTS="-o -O -W 32"
402

403
404
405
406
407
408
409
410
411
# Wipe the MBR and partition boot blocks on all disks if reloading
case $STATUS in
	*ALLOCATED=emulab-ops/reloading*)
		disks=`find_disks`
		for d in $disks; do
			#[ $d = $DISK ] && continue
			mount | grep "^/dev/$d" > /dev/null && continue
			zapsuperblocks /dev/$d
			echo "Invalidating MBR on /dev/$d"
412
			dd if=/dev/zero of=/dev/$d bs=512 count=16
413
414
415
416
417
418
419
420
421
422
423
424
	done
		;;
esac

FIRSTMBR=''
while read line; do
	if ! handle_loadinfo $line; then
		echo "Failed to load disk, dropping to login prompt at `date`" 1>&2
		exit 1
	fi
done < /tmp/loadinfo.out
rm -f /tmp/loadinfo.out
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457

#
# If requested to reboot, do so.
#
# Note: there is a race condition with stated here.
# If we reboot immediately after sending RELOADDONE,
# it is possible that, under heavy server load, we will
# finish the reboot and reach the bootinfo stage before
# stated gets and processes our RELOADDONE.  So now we
# wait around after sending the RELOADDONE.  stated should
# force us to reboot when the transition takes place.
# For backward compatibility we use a new state: RELOADDONEV2.
# For paranoia we just wait around for awhile and then
# reboot anyway, just in case stated's reboot fails for
# some reason.
#
if [ $reboot -eq 1 ]; then
    $BINDIR/tmcc state RELOADDONEV2
    echo "Waiting for server to reboot us ..."
    if [ $isrem -eq 1 ]; then
	sleep 30
    else
	sleep 240
    fi
    echo "No response from server, rebooting myself ..."
    /sbin/reboot
    sleep 100
else
    $BINDIR/tmcc state RELOADDONE
fi

#echo "Failed to load disk, dropping to login prompt at `date`"
#exit 1