recovery.c 24.5 KB
Newer Older
Ryusuke Konishi's avatar
Ryusuke Konishi committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * recovery.c - NILFS recovery logic
 *
 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * Written by Ryusuke Konishi <ryusuke@osrg.net>
 */

#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/swap.h>
26
#include <linux/slab.h>
Ryusuke Konishi's avatar
Ryusuke Konishi committed
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
#include <linux/crc32.h>
#include "nilfs.h"
#include "segment.h"
#include "sufile.h"
#include "page.h"
#include "segbuf.h"

/*
 * Segment check result
 */
enum {
	NILFS_SEG_VALID,
	NILFS_SEG_NO_SUPER_ROOT,
	NILFS_SEG_FAIL_IO,
	NILFS_SEG_FAIL_MAGIC,
	NILFS_SEG_FAIL_SEQ,
	NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT,
	NILFS_SEG_FAIL_CHECKSUM_FULL,
	NILFS_SEG_FAIL_CONSISTENCY,
};

/* work structure for recovery */
struct nilfs_recovery_block {
	ino_t ino;		/* Inode number of the file that this block
				   belongs to */
	sector_t blocknr;	/* block number */
	__u64 vblocknr;		/* virtual block number */
	unsigned long blkoff;	/* File offset of the data block (per block) */
	struct list_head list;
};


static int nilfs_warn_segment_error(int err)
{
	switch (err) {
	case NILFS_SEG_FAIL_IO:
		printk(KERN_WARNING
		       "NILFS warning: I/O error on loading last segment\n");
		return -EIO;
	case NILFS_SEG_FAIL_MAGIC:
		printk(KERN_WARNING
		       "NILFS warning: Segment magic number invalid\n");
		break;
	case NILFS_SEG_FAIL_SEQ:
		printk(KERN_WARNING
		       "NILFS warning: Sequence number mismatch\n");
		break;
	case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT:
		printk(KERN_WARNING
		       "NILFS warning: Checksum error in super root\n");
		break;
	case NILFS_SEG_FAIL_CHECKSUM_FULL:
		printk(KERN_WARNING
		       "NILFS warning: Checksum error in segment payload\n");
		break;
	case NILFS_SEG_FAIL_CONSISTENCY:
		printk(KERN_WARNING
		       "NILFS warning: Inconsistent segment\n");
		break;
	case NILFS_SEG_NO_SUPER_ROOT:
		printk(KERN_WARNING
		       "NILFS warning: No super root in the last segment\n");
		break;
	}
	return -EINVAL;
}

/**
95 96
 * nilfs_compute_checksum - compute checksum of blocks continuously
 * @nilfs: nilfs object
Ryusuke Konishi's avatar
Ryusuke Konishi committed
97 98 99 100 101 102 103
 * @bhs: buffer head of start block
 * @sum: place to store result
 * @offset: offset bytes in the first block
 * @check_bytes: number of bytes to be checked
 * @start: DBN of start block
 * @nblock: number of blocks to be checked
 */
104 105 106 107
static int nilfs_compute_checksum(struct the_nilfs *nilfs,
				  struct buffer_head *bhs, u32 *sum,
				  unsigned long offset, u64 check_bytes,
				  sector_t start, unsigned long nblock)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
108
{
109
	unsigned int blocksize = nilfs->ns_blocksize;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
110 111 112 113 114 115
	unsigned long size;
	u32 crc;

	BUG_ON(offset >= blocksize);
	check_bytes -= offset;
	size = min_t(u64, check_bytes, blocksize - offset);
116
	crc = crc32_le(nilfs->ns_crc_seed,
Ryusuke Konishi's avatar
Ryusuke Konishi committed
117 118 119
		       (unsigned char *)bhs->b_data + offset, size);
	if (--nblock > 0) {
		do {
120 121 122
			struct buffer_head *bh;

			bh = __bread(nilfs->ns_bdev, ++start, blocksize);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
123 124 125 126 127 128 129 130 131 132 133 134 135 136
			if (!bh)
				return -EIO;
			check_bytes -= size;
			size = min_t(u64, check_bytes, blocksize);
			crc = crc32_le(crc, bh->b_data, size);
			brelse(bh);
		} while (--nblock > 0);
	}
	*sum = crc;
	return 0;
}

/**
 * nilfs_read_super_root_block - read super root block
137
 * @nilfs: nilfs object
Ryusuke Konishi's avatar
Ryusuke Konishi committed
138 139 140 141
 * @sr_block: disk block number of the super root block
 * @pbh: address of a buffer_head pointer to return super root buffer
 * @check: CRC check flag
 */
142
int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block,
Ryusuke Konishi's avatar
Ryusuke Konishi committed
143 144 145 146 147 148 149 150
				struct buffer_head **pbh, int check)
{
	struct buffer_head *bh_sr;
	struct nilfs_super_root *sr;
	u32 crc;
	int ret;

	*pbh = NULL;
151
	bh_sr = __bread(nilfs->ns_bdev, sr_block, nilfs->ns_blocksize);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
152 153 154 155 156 157 158 159 160
	if (unlikely(!bh_sr)) {
		ret = NILFS_SEG_FAIL_IO;
		goto failed;
	}

	sr = (struct nilfs_super_root *)bh_sr->b_data;
	if (check) {
		unsigned bytes = le16_to_cpu(sr->sr_bytes);

161
		if (bytes == 0 || bytes > nilfs->ns_blocksize) {
Ryusuke Konishi's avatar
Ryusuke Konishi committed
162 163 164
			ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
			goto failed_bh;
		}
165 166 167
		if (nilfs_compute_checksum(
			    nilfs, bh_sr, &crc, sizeof(sr->sr_sum), bytes,
			    sr_block, 1)) {
Ryusuke Konishi's avatar
Ryusuke Konishi committed
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
			ret = NILFS_SEG_FAIL_IO;
			goto failed_bh;
		}
		if (crc != le32_to_cpu(sr->sr_sum)) {
			ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
			goto failed_bh;
		}
	}
	*pbh = bh_sr;
	return 0;

 failed_bh:
	brelse(bh_sr);

 failed:
	return nilfs_warn_segment_error(ret);
}

/**
187
 * nilfs_read_log_header - read summary header of the specified log
188
 * @nilfs: nilfs object
189 190
 * @start_blocknr: start block number of the log
 * @sum: pointer to return segment summary structure
Ryusuke Konishi's avatar
Ryusuke Konishi committed
191
 */
192 193 194
static struct buffer_head *
nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr,
		      struct nilfs_segment_summary **sum)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
195 196
{
	struct buffer_head *bh_sum;
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214

	bh_sum = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize);
	if (bh_sum)
		*sum = (struct nilfs_segment_summary *)bh_sum->b_data;
	return bh_sum;
}

/**
 * nilfs_validate_log - verify consistency of log
 * @nilfs: nilfs object
 * @seg_seq: sequence number of segment
 * @bh_sum: buffer head of summary block
 * @sum: segment summary struct
 */
static int nilfs_validate_log(struct the_nilfs *nilfs, u64 seg_seq,
			      struct buffer_head *bh_sum,
			      struct nilfs_segment_summary *sum)
{
215 216
	unsigned long nblock;
	u32 crc;
217
	int ret;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
218

219 220
	ret = NILFS_SEG_FAIL_MAGIC;
	if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
221 222
		goto out;

223 224 225
	ret = NILFS_SEG_FAIL_SEQ;
	if (le64_to_cpu(sum->ss_seq) != seg_seq)
		goto out;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
226

227 228 229
	nblock = le32_to_cpu(sum->ss_nblocks);
	ret = NILFS_SEG_FAIL_CONSISTENCY;
	if (unlikely(nblock == 0 || nblock > nilfs->ns_blocks_per_segment))
Ryusuke Konishi's avatar
Ryusuke Konishi committed
230
		/* This limits the number of blocks read in the CRC check */
231 232 233
		goto out;

	ret = NILFS_SEG_FAIL_IO;
234 235
	if (nilfs_compute_checksum(nilfs, bh_sum, &crc, sizeof(sum->ss_datasum),
				   ((u64)nblock << nilfs->ns_blocksize_bits),
236 237 238 239 240 241 242 243
				   bh_sum->b_blocknr, nblock))
		goto out;

	ret = NILFS_SEG_FAIL_CHECKSUM_FULL;
	if (crc != le32_to_cpu(sum->ss_datasum))
		goto out;
	ret = 0;
out:
Ryusuke Konishi's avatar
Ryusuke Konishi committed
244 245 246
	return ret;
}

247 248 249 250 251 252 253 254 255 256
/**
 * nilfs_read_summary_info - read an item on summary blocks of a log
 * @nilfs: nilfs object
 * @pbh: the current buffer head on summary blocks [in, out]
 * @offset: the current byte offset on summary blocks [in, out]
 * @bytes: byte size of the item to be read
 */
static void *nilfs_read_summary_info(struct the_nilfs *nilfs,
				     struct buffer_head **pbh,
				     unsigned int *offset, unsigned int bytes)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
257 258 259 260 261 262 263 264
{
	void *ptr;
	sector_t blocknr;

	BUG_ON((*pbh)->b_size < *offset);
	if (bytes > (*pbh)->b_size - *offset) {
		blocknr = (*pbh)->b_blocknr;
		brelse(*pbh);
265 266
		*pbh = __bread(nilfs->ns_bdev, blocknr + 1,
			       nilfs->ns_blocksize);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
267 268 269 270 271 272 273 274 275
		if (unlikely(!*pbh))
			return NULL;
		*offset = 0;
	}
	ptr = (*pbh)->b_data + *offset;
	*offset += bytes;
	return ptr;
}

276 277 278 279 280 281 282 283 284 285 286 287
/**
 * nilfs_skip_summary_info - skip items on summary blocks of a log
 * @nilfs: nilfs object
 * @pbh: the current buffer head on summary blocks [in, out]
 * @offset: the current byte offset on summary blocks [in, out]
 * @bytes: byte size of the item to be skipped
 * @count: number of items to be skipped
 */
static void nilfs_skip_summary_info(struct the_nilfs *nilfs,
				    struct buffer_head **pbh,
				    unsigned int *offset, unsigned int bytes,
				    unsigned long count)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
{
	unsigned int rest_item_in_current_block
		= ((*pbh)->b_size - *offset) / bytes;

	if (count <= rest_item_in_current_block) {
		*offset += bytes * count;
	} else {
		sector_t blocknr = (*pbh)->b_blocknr;
		unsigned int nitem_per_block = (*pbh)->b_size / bytes;
		unsigned int bcnt;

		count -= rest_item_in_current_block;
		bcnt = DIV_ROUND_UP(count, nitem_per_block);
		*offset = bytes * (count - (bcnt - 1) * nitem_per_block);

		brelse(*pbh);
304 305
		*pbh = __bread(nilfs->ns_bdev, blocknr + bcnt,
			       nilfs->ns_blocksize);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
306 307 308
	}
}

309 310 311 312
/**
 * nilfs_scan_dsync_log - get block information of a log written for data sync
 * @nilfs: nilfs object
 * @start_blocknr: start block number of the log
313
 * @sum: log summary information
314 315 316
 * @head: list head to add nilfs_recovery_block struct
 */
static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
317
				struct nilfs_segment_summary *sum,
318
				struct list_head *head)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
319 320 321
{
	struct buffer_head *bh;
	unsigned int offset;
322 323
	u32 nfinfo, sumbytes;
	sector_t blocknr;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
324 325 326
	ino_t ino;
	int err = -EIO;

327
	nfinfo = le32_to_cpu(sum->ss_nfinfo);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
328 329 330
	if (!nfinfo)
		return 0;

331 332
	sumbytes = le32_to_cpu(sum->ss_sumbytes);
	blocknr = start_blocknr + DIV_ROUND_UP(sumbytes, nilfs->ns_blocksize);
333
	bh = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
334 335 336
	if (unlikely(!bh))
		goto out;

337
	offset = le16_to_cpu(sum->ss_bytes);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
338 339 340 341
	for (;;) {
		unsigned long nblocks, ndatablk, nnodeblk;
		struct nilfs_finfo *finfo;

342 343
		finfo = nilfs_read_summary_info(nilfs, &bh, &offset,
						sizeof(*finfo));
Ryusuke Konishi's avatar
Ryusuke Konishi committed
344 345 346 347 348 349 350 351 352 353 354 355
		if (unlikely(!finfo))
			goto out;

		ino = le64_to_cpu(finfo->fi_ino);
		nblocks = le32_to_cpu(finfo->fi_nblocks);
		ndatablk = le32_to_cpu(finfo->fi_ndatablk);
		nnodeblk = nblocks - ndatablk;

		while (ndatablk-- > 0) {
			struct nilfs_recovery_block *rb;
			struct nilfs_binfo_v *binfo;

356 357
			binfo = nilfs_read_summary_info(nilfs, &bh, &offset,
							sizeof(*binfo));
Ryusuke Konishi's avatar
Ryusuke Konishi committed
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
			if (unlikely(!binfo))
				goto out;

			rb = kmalloc(sizeof(*rb), GFP_NOFS);
			if (unlikely(!rb)) {
				err = -ENOMEM;
				goto out;
			}
			rb->ino = ino;
			rb->blocknr = blocknr++;
			rb->vblocknr = le64_to_cpu(binfo->bi_vblocknr);
			rb->blkoff = le64_to_cpu(binfo->bi_blkoff);
			/* INIT_LIST_HEAD(&rb->list); */
			list_add_tail(&rb->list, head);
		}
		if (--nfinfo == 0)
			break;
375 376 377
		blocknr += nnodeblk; /* always 0 for data sync logs */
		nilfs_skip_summary_info(nilfs, &bh, &offset, sizeof(__le64),
					nnodeblk);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
378 379 380 381 382 383 384 385 386 387 388 389
		if (unlikely(!bh))
			goto out;
	}
	err = 0;
 out:
	brelse(bh);   /* brelse(NULL) is just ignored */
	return err;
}

static void dispose_recovery_list(struct list_head *head)
{
	while (!list_empty(head)) {
Ryusuke Konishi's avatar
Ryusuke Konishi committed
390 391 392
		struct nilfs_recovery_block *rb;

		rb = list_first_entry(head, struct nilfs_recovery_block, list);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
393 394 395 396 397
		list_del(&rb->list);
		kfree(rb);
	}
}

398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
struct nilfs_segment_entry {
	struct list_head	list;
	__u64			segnum;
};

static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
{
	struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS);

	if (unlikely(!ent))
		return -ENOMEM;

	ent->segnum = segnum;
	INIT_LIST_HEAD(&ent->list);
	list_add_tail(&ent->list, head);
	return 0;
}

Ryusuke Konishi's avatar
Ryusuke Konishi committed
416 417 418
void nilfs_dispose_segment_list(struct list_head *head)
{
	while (!list_empty(head)) {
Ryusuke Konishi's avatar
Ryusuke Konishi committed
419 420 421
		struct nilfs_segment_entry *ent;

		ent = list_first_entry(head, struct nilfs_segment_entry, list);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
422
		list_del(&ent->list);
423
		kfree(ent);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
424 425 426 427
	}
}

static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
428
					      struct super_block *sb,
Ryusuke Konishi's avatar
Ryusuke Konishi committed
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
					      struct nilfs_recovery_info *ri)
{
	struct list_head *head = &ri->ri_used_segments;
	struct nilfs_segment_entry *ent, *n;
	struct inode *sufile = nilfs->ns_sufile;
	__u64 segnum[4];
	int err;
	int i;

	segnum[0] = nilfs->ns_segnum;
	segnum[1] = nilfs->ns_nextnum;
	segnum[2] = ri->ri_segnum;
	segnum[3] = ri->ri_nextnum;

	/*
	 * Releasing the next segment of the latest super root.
	 * The next segment is invalidated by this recovery.
	 */
	err = nilfs_sufile_free(sufile, segnum[1]);
	if (unlikely(err))
		goto failed;

	for (i = 1; i < 4; i++) {
452 453
		err = nilfs_segment_list_add(head, segnum[i]);
		if (unlikely(err))
Ryusuke Konishi's avatar
Ryusuke Konishi committed
454 455 456 457 458
			goto failed;
	}

	/*
	 * Collecting segments written after the latest super root.
459
	 * These are marked dirty to avoid being reallocated in the next write.
Ryusuke Konishi's avatar
Ryusuke Konishi committed
460 461
	 */
	list_for_each_entry_safe(ent, n, head, list) {
462 463 464 465
		if (ent->segnum != segnum[0]) {
			err = nilfs_sufile_scrap(sufile, ent->segnum);
			if (unlikely(err))
				goto failed;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
466
		}
467
		list_del(&ent->list);
468
		kfree(ent);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
	}

	/* Allocate new segments for recovery */
	err = nilfs_sufile_alloc(sufile, &segnum[0]);
	if (unlikely(err))
		goto failed;

	nilfs->ns_pseg_offset = 0;
	nilfs->ns_seg_seq = ri->ri_seq + 2;
	nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0];

 failed:
	/* No need to recover sufile because it will be destroyed on error */
	return err;
}

485
static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
Ryusuke Konishi's avatar
Ryusuke Konishi committed
486 487 488 489 490 491
				     struct nilfs_recovery_block *rb,
				     struct page *page)
{
	struct buffer_head *bh_org;
	void *kaddr;

492
	bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
493 494 495
	if (unlikely(!bh_org))
		return -EIO;

496
	kaddr = kmap_atomic(page);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
497
	memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
498
	kunmap_atomic(kaddr);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
499 500 501 502
	brelse(bh_org);
	return 0;
}

503
static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
504
				      struct super_block *sb,
505
				      struct nilfs_root *root,
506 507
				      struct list_head *head,
				      unsigned long *nr_salvaged_blocks)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
508 509 510
{
	struct inode *inode;
	struct nilfs_recovery_block *rb, *n;
511
	unsigned blocksize = nilfs->ns_blocksize;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
512 513 514 515 516
	struct page *page;
	loff_t pos;
	int err = 0, err2 = 0;

	list_for_each_entry_safe(rb, n, head, list) {
517
		inode = nilfs_iget(sb, root, rb->ino);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
518 519 520 521 522 523 524
		if (IS_ERR(inode)) {
			err = PTR_ERR(inode);
			inode = NULL;
			goto failed_inode;
		}

		pos = rb->blkoff << inode->i_blkbits;
525 526 527 528 529
		err = block_write_begin(inode->i_mapping, pos, blocksize,
					0, &page, nilfs_get_block);
		if (unlikely(err)) {
			loff_t isize = inode->i_size;
			if (pos + blocksize > isize)
Marco Stornelli's avatar
Marco Stornelli committed
530 531
				nilfs_write_failed(inode->i_mapping,
							pos + blocksize);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
532
			goto failed_inode;
533
		}
Ryusuke Konishi's avatar
Ryusuke Konishi committed
534

535
		err = nilfs_recovery_copy_block(nilfs, rb, page);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
536 537 538
		if (unlikely(err))
			goto failed_page;

539
		err = nilfs_set_file_dirty(inode, 1);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
		if (unlikely(err))
			goto failed_page;

		block_write_end(NULL, inode->i_mapping, pos, blocksize,
				blocksize, page, NULL);

		unlock_page(page);
		page_cache_release(page);

		(*nr_salvaged_blocks)++;
		goto next;

 failed_page:
		unlock_page(page);
		page_cache_release(page);

 failed_inode:
		printk(KERN_WARNING
		       "NILFS warning: error recovering data block "
		       "(err=%d, ino=%lu, block-offset=%llu)\n",
560 561
		       err, (unsigned long)rb->ino,
		       (unsigned long long)rb->blkoff);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
562 563 564 565 566 567 568 569 570 571 572 573 574
		if (!err2)
			err2 = err;
 next:
		iput(inode); /* iput(NULL) is just ignored */
		list_del_init(&rb->list);
		kfree(rb);
	}
	return err2;
}

/**
 * nilfs_do_roll_forward - salvage logical segments newer than the latest
 * checkpoint
575
 * @nilfs: nilfs object
576
 * @sb: super block instance
Ryusuke Konishi's avatar
Ryusuke Konishi committed
577 578 579
 * @ri: pointer to a nilfs_recovery_info
 */
static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
580
				 struct super_block *sb,
581
				 struct nilfs_root *root,
Ryusuke Konishi's avatar
Ryusuke Konishi committed
582 583
				 struct nilfs_recovery_info *ri)
{
584
	struct buffer_head *bh_sum = NULL;
585
	struct nilfs_segment_summary *sum = NULL;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
586 587 588
	sector_t pseg_start;
	sector_t seg_start, seg_end;  /* Starting/ending DBN of full segment */
	unsigned long nsalvaged_blocks = 0;
589
	unsigned int flags;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606
	u64 seg_seq;
	__u64 segnum, nextnum = 0;
	int empty_seg = 0;
	int err = 0, ret;
	LIST_HEAD(dsync_blocks);  /* list of data blocks to be recovered */
	enum {
		RF_INIT_ST,
		RF_DSYNC_ST,   /* scanning data-sync segments */
	};
	int state = RF_INIT_ST;

	pseg_start = ri->ri_lsegs_start;
	seg_seq = ri->ri_lsegs_start_seq;
	segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
	nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);

	while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) {
607 608 609 610 611 612
		brelse(bh_sum);
		bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum);
		if (!bh_sum) {
			err = -EIO;
			goto failed;
		}
Ryusuke Konishi's avatar
Ryusuke Konishi committed
613

614
		ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
615 616 617 618 619 620 621
		if (ret) {
			if (ret == NILFS_SEG_FAIL_IO) {
				err = -EIO;
				goto failed;
			}
			goto strayed;
		}
622

623 624
		flags = le16_to_cpu(sum->ss_flags);
		if (flags & NILFS_SS_SR)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
625 626 627
			goto confused;

		/* Found a valid partial segment; do recovery actions */
628 629
		nextnum = nilfs_get_segnum_of_block(nilfs,
						    le64_to_cpu(sum->ss_next));
Ryusuke Konishi's avatar
Ryusuke Konishi committed
630
		empty_seg = 0;
631 632 633
		nilfs->ns_ctime = le64_to_cpu(sum->ss_create);
		if (!(flags & NILFS_SS_GC))
			nilfs->ns_nongc_ctime = nilfs->ns_ctime;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
634 635 636

		switch (state) {
		case RF_INIT_ST:
637 638
			if (!(flags & NILFS_SS_LOGBGN) ||
			    !(flags & NILFS_SS_SYNDT))
Ryusuke Konishi's avatar
Ryusuke Konishi committed
639 640 641 642
				goto try_next_pseg;
			state = RF_DSYNC_ST;
			/* Fall through */
		case RF_DSYNC_ST:
643
			if (!(flags & NILFS_SS_SYNDT))
Ryusuke Konishi's avatar
Ryusuke Konishi committed
644 645
				goto confused;

646
			err = nilfs_scan_dsync_log(nilfs, pseg_start, sum,
647
						   &dsync_blocks);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
648 649
			if (unlikely(err))
				goto failed;
650
			if (flags & NILFS_SS_LOGEND) {
651
				err = nilfs_recover_dsync_blocks(
652
					nilfs, sb, root, &dsync_blocks,
653
					&nsalvaged_blocks);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
654 655 656 657 658 659 660 661 662 663
				if (unlikely(err))
					goto failed;
				state = RF_INIT_ST;
			}
			break; /* Fall through to try_next_pseg */
		}

 try_next_pseg:
		if (pseg_start == ri->ri_lsegs_end)
			break;
664
		pseg_start += le32_to_cpu(sum->ss_nblocks);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
		if (pseg_start < seg_end)
			continue;
		goto feed_segment;

 strayed:
		if (pseg_start == ri->ri_lsegs_end)
			break;

 feed_segment:
		/* Looking to the next full segment */
		if (empty_seg++)
			break;
		seg_seq++;
		segnum = nextnum;
		nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
		pseg_start = seg_start;
	}

	if (nsalvaged_blocks) {
		printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n",
685
		       sb->s_id, nsalvaged_blocks);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
686 687 688
		ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
	}
 out:
689
	brelse(bh_sum);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
690 691 692 693 694 695 696 697 698
	dispose_recovery_list(&dsync_blocks);
	return err;

 confused:
	err = -EINVAL;
 failed:
	printk(KERN_ERR
	       "NILFS (device %s): Error roll-forwarding "
	       "(err=%d, pseg block=%llu). ",
699
	       sb->s_id, err, (unsigned long long)pseg_start);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
700 701 702 703 704 705 706 707 708 709 710 711 712
	goto out;
}

static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
				      struct nilfs_recovery_info *ri)
{
	struct buffer_head *bh;
	int err;

	if (nilfs_get_segnum_of_block(nilfs, ri->ri_lsegs_start) !=
	    nilfs_get_segnum_of_block(nilfs, ri->ri_super_root))
		return;

713
	bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
714 715 716 717 718 719 720 721 722 723 724 725
	BUG_ON(!bh);
	memset(bh->b_data, 0, bh->b_size);
	set_buffer_dirty(bh);
	err = sync_dirty_buffer(bh);
	if (unlikely(err))
		printk(KERN_WARNING
		       "NILFS warning: buffer sync write failed during "
		       "post-cleaning of recovery.\n");
	brelse(bh);
}

/**
726 727
 * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
 * @nilfs: nilfs object
728
 * @sb: super block instance
Ryusuke Konishi's avatar
Ryusuke Konishi committed
729 730 731 732 733 734 735 736 737 738 739 740 741 742 743
 * @ri: pointer to a nilfs_recovery_info struct to store search results.
 *
 * Return Value: On success, 0 is returned.  On error, one of the following
 * negative error code is returned.
 *
 * %-EINVAL - Inconsistent filesystem state.
 *
 * %-EIO - I/O error
 *
 * %-ENOSPC - No space left on device (only in a panic state).
 *
 * %-ERESTARTSYS - Interrupted.
 *
 * %-ENOMEM - Insufficient memory available.
 */
744
int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
745
			      struct super_block *sb,
746
			      struct nilfs_recovery_info *ri)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
747
{
748
	struct nilfs_root *root;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
749 750 751 752 753
	int err;

	if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0)
		return 0;

754
	err = nilfs_attach_checkpoint(sb, ri->ri_cno, true, &root);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
755 756 757 758 759 760
	if (unlikely(err)) {
		printk(KERN_ERR
		       "NILFS: error loading the latest checkpoint.\n");
		return err;
	}

761
	err = nilfs_do_roll_forward(nilfs, sb, root, ri);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
762 763 764 765
	if (unlikely(err))
		goto failed;

	if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
766
		err = nilfs_prepare_segment_for_recovery(nilfs, sb, ri);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
767 768 769 770 771 772
		if (unlikely(err)) {
			printk(KERN_ERR "NILFS: Error preparing segments for "
			       "recovery.\n");
			goto failed;
		}

773
		err = nilfs_attach_log_writer(sb, root);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
774 775 776 777
		if (unlikely(err))
			goto failed;

		set_nilfs_discontinued(nilfs);
778 779
		err = nilfs_construct_segment(sb);
		nilfs_detach_log_writer(sb);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
780 781 782 783 784 785 786

		if (unlikely(err)) {
			printk(KERN_ERR "NILFS: Oops! recovery failed. "
			       "(err=%d)\n", err);
			goto failed;
		}

787
		nilfs_finish_roll_forward(nilfs, ri);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
788 789 790
	}

 failed:
791
	nilfs_put_root(root);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
	return err;
}

/**
 * nilfs_search_super_root - search the latest valid super root
 * @nilfs: the_nilfs
 * @ri: pointer to a nilfs_recovery_info struct to store search results.
 *
 * nilfs_search_super_root() looks for the latest super-root from a partial
 * segment pointed by the superblock.  It sets up struct the_nilfs through
 * this search. It fills nilfs_recovery_info (ri) required for recovery.
 *
 * Return Value: On success, 0 is returned.  On error, one of the following
 * negative error code is returned.
 *
 * %-EINVAL - No valid segment found
 *
 * %-EIO - I/O error
810 811
 *
 * %-ENOMEM - Insufficient memory available.
Ryusuke Konishi's avatar
Ryusuke Konishi committed
812
 */
813
int nilfs_search_super_root(struct the_nilfs *nilfs,
Ryusuke Konishi's avatar
Ryusuke Konishi committed
814 815
			    struct nilfs_recovery_info *ri)
{
816
	struct buffer_head *bh_sum = NULL;
817
	struct nilfs_segment_summary *sum = NULL;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
818 819
	sector_t pseg_start, pseg_end, sr_pseg_start = 0;
	sector_t seg_start, seg_end; /* range of full segment (block number) */
820
	sector_t b, end;
821 822
	unsigned long nblocks;
	unsigned int flags;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
823 824 825 826 827 828 829 830 831 832 833 834 835 836 837
	u64 seg_seq;
	__u64 segnum, nextnum = 0;
	__u64 cno;
	LIST_HEAD(segments);
	int empty_seg = 0, scan_newer = 0;
	int ret;

	pseg_start = nilfs->ns_last_pseg;
	seg_seq = nilfs->ns_last_seq;
	cno = nilfs->ns_last_cno;
	segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);

	/* Calculate range of segment */
	nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);

838 839 840
	/* Read ahead segment */
	b = seg_start;
	while (b <= seg_end)
841
		__breadahead(nilfs->ns_bdev, b++, nilfs->ns_blocksize);
842

Ryusuke Konishi's avatar
Ryusuke Konishi committed
843
	for (;;) {
844 845 846 847 848 849 850
		brelse(bh_sum);
		ret = NILFS_SEG_FAIL_IO;
		bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum);
		if (!bh_sum)
			goto failed;

		ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
851 852 853 854 855
		if (ret) {
			if (ret == NILFS_SEG_FAIL_IO)
				goto failed;
			goto strayed;
		}
856

857 858
		nblocks = le32_to_cpu(sum->ss_nblocks);
		pseg_end = pseg_start + nblocks - 1;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
859 860 861 862 863 864 865 866 867
		if (unlikely(pseg_end > seg_end)) {
			ret = NILFS_SEG_FAIL_CONSISTENCY;
			goto strayed;
		}

		/* A valid partial segment */
		ri->ri_pseg_start = pseg_start;
		ri->ri_seq = seg_seq;
		ri->ri_segnum = segnum;
868 869
		nextnum = nilfs_get_segnum_of_block(nilfs,
						    le64_to_cpu(sum->ss_next));
Ryusuke Konishi's avatar
Ryusuke Konishi committed
870 871 872
		ri->ri_nextnum = nextnum;
		empty_seg = 0;

873 874
		flags = le16_to_cpu(sum->ss_flags);
		if (!(flags & NILFS_SS_SR) && !scan_newer) {
875 876 877 878 879 880 881 882 883 884
			/* This will never happen because a superblock
			   (last_segment) always points to a pseg
			   having a super root. */
			ret = NILFS_SEG_FAIL_CONSISTENCY;
			goto failed;
		}

		if (pseg_start == seg_start) {
			nilfs_get_segment_range(nilfs, nextnum, &b, &end);
			while (b <= end)
885 886
				__breadahead(nilfs->ns_bdev, b++,
					     nilfs->ns_blocksize);
887
		}
888 889
		if (!(flags & NILFS_SS_SR)) {
			if (!ri->ri_lsegs_start && (flags & NILFS_SS_LOGBGN)) {
Ryusuke Konishi's avatar
Ryusuke Konishi committed
890 891 892
				ri->ri_lsegs_start = pseg_start;
				ri->ri_lsegs_start_seq = seg_seq;
			}
893
			if (flags & NILFS_SS_LOGEND)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
894 895 896 897 898 899 900 901 902 903
				ri->ri_lsegs_end = pseg_start;
			goto try_next_pseg;
		}

		/* A valid super root was found. */
		ri->ri_cno = cno++;
		ri->ri_super_root = pseg_end;
		ri->ri_lsegs_start = ri->ri_lsegs_end = 0;

		nilfs_dispose_segment_list(&segments);
904 905
		sr_pseg_start = pseg_start;
		nilfs->ns_pseg_offset = pseg_start + nblocks - seg_start;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
906 907 908
		nilfs->ns_seg_seq = seg_seq;
		nilfs->ns_segnum = segnum;
		nilfs->ns_cno = cno;  /* nilfs->ns_cno = ri->ri_cno + 1 */
909
		nilfs->ns_ctime = le64_to_cpu(sum->ss_create);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
910 911 912 913
		nilfs->ns_nextnum = nextnum;

		if (scan_newer)
			ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED;
914 915 916 917 918
		else {
			if (nilfs->ns_mount_state & NILFS_VALID_FS)
				goto super_root_found;
			scan_newer = 1;
		}
Ryusuke Konishi's avatar
Ryusuke Konishi committed
919 920 921

 try_next_pseg:
		/* Standing on a course, or met an inconsistent state */
922
		pseg_start += nblocks;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
		if (pseg_start < seg_end)
			continue;
		goto feed_segment;

 strayed:
		/* Off the trail */
		if (!scan_newer)
			/*
			 * This can happen if a checkpoint was written without
			 * barriers, or as a result of an I/O failure.
			 */
			goto failed;

 feed_segment:
		/* Looking to the next full segment */
		if (empty_seg++)
			goto super_root_found; /* found a valid super root */

941 942
		ret = nilfs_segment_list_add(&segments, segnum);
		if (unlikely(ret))
Ryusuke Konishi's avatar
Ryusuke Konishi committed
943 944 945 946 947 948 949 950 951 952
			goto failed;

		seg_seq++;
		segnum = nextnum;
		nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
		pseg_start = seg_start;
	}

 super_root_found:
	/* Updating pointers relating to the latest checkpoint */
953
	brelse(bh_sum);
954
	list_splice_tail(&segments, &ri->ri_used_segments);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
955 956 957 958 959 960
	nilfs->ns_last_pseg = sr_pseg_start;
	nilfs->ns_last_seq = nilfs->ns_seg_seq;
	nilfs->ns_last_cno = ri->ri_cno;
	return 0;

 failed:
961
	brelse(bh_sum);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
962 963 964
	nilfs_dispose_segment_list(&segments);
	return (ret < 0) ? ret : nilfs_warn_segment_error(ret);
}