btree.c 62.6 KB
Newer Older
Koji Sato's avatar
Koji Sato committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 * btree.c - NILFS B-tree.
 *
 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * Written by Koji Sato <koji@osrg.net>.
 */

#include <linux/slab.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/pagevec.h>
#include "nilfs.h"
#include "page.h"
#include "btnode.h"
#include "btree.h"
#include "alloc.h"
32
#include "dat.h"
Koji Sato's avatar
Koji Sato committed
33

34 35
static void __nilfs_btree_init(struct nilfs_bmap *bmap);

36
static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
Koji Sato's avatar
Koji Sato committed
37
{
38 39
	struct nilfs_btree_path *path;
	int level = NILFS_BTREE_LEVEL_DATA;
Koji Sato's avatar
Koji Sato committed
40

41 42 43
	path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
	if (path == NULL)
		goto out;
Koji Sato's avatar
Koji Sato committed
44

45
	for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
Koji Sato's avatar
Koji Sato committed
46 47 48 49 50 51 52
		path[level].bp_bh = NULL;
		path[level].bp_sib_bh = NULL;
		path[level].bp_index = 0;
		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
		path[level].bp_op = NULL;
	}
53 54 55 56 57

out:
	return path;
}

58
static void nilfs_btree_free_path(struct nilfs_btree_path *path)
59
{
60
	int level = NILFS_BTREE_LEVEL_DATA;
Koji Sato's avatar
Koji Sato committed
61

62
	for (; level < NILFS_BTREE_LEVEL_MAX; level++)
63
		brelse(path[level].bp_bh);
64 65

	kmem_cache_free(nilfs_btree_path_cache, path);
Koji Sato's avatar
Koji Sato committed
66 67 68 69 70
}

/*
 * B-tree node operations
 */
71
static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
72 73
				     __u64 ptr, struct buffer_head **bhp)
{
74
	struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
75
	struct buffer_head *bh;
76

77 78 79 80 81 82 83
	bh = nilfs_btnode_create_block(btnc, ptr);
	if (!bh)
		return -ENOMEM;

	set_buffer_nilfs_volatile(bh);
	*bhp = bh;
	return 0;
84
}
Koji Sato's avatar
Koji Sato committed
85

86
static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
Koji Sato's avatar
Koji Sato committed
87 88 89 90
{
	return node->bn_flags;
}

91
static void
92
nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
Koji Sato's avatar
Koji Sato committed
93 94 95 96
{
	node->bn_flags = flags;
}

97
static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
Koji Sato's avatar
Koji Sato committed
98
{
99
	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
Koji Sato's avatar
Koji Sato committed
100 101
}

102
static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
Koji Sato's avatar
Koji Sato committed
103 104 105 106
{
	return node->bn_level;
}

107
static void
108
nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
Koji Sato's avatar
Koji Sato committed
109 110 111 112
{
	node->bn_level = level;
}

113
static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
Koji Sato's avatar
Koji Sato committed
114 115 116 117
{
	return le16_to_cpu(node->bn_nchildren);
}

118
static void
119
nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
Koji Sato's avatar
Koji Sato committed
120 121 122 123
{
	node->bn_nchildren = cpu_to_le16(nchildren);
}

124
static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
Koji Sato's avatar
Koji Sato committed
125
{
126
	return 1 << btree->b_inode->i_blkbits;
Koji Sato's avatar
Koji Sato committed
127 128
}

129
static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
Koji Sato's avatar
Koji Sato committed
130
{
131
	return btree->b_nchildren_per_block;
Koji Sato's avatar
Koji Sato committed
132 133
}

134
static __le64 *
135
nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
Koji Sato's avatar
Koji Sato committed
136 137
{
	return (__le64 *)((char *)(node + 1) +
138
			  (nilfs_btree_node_root(node) ?
Koji Sato's avatar
Koji Sato committed
139 140 141
			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
}

142
static __le64 *
143
nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
Koji Sato's avatar
Koji Sato committed
144
{
145
	return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
Koji Sato's avatar
Koji Sato committed
146 147
}

148
static __u64
149
nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
Koji Sato's avatar
Koji Sato committed
150
{
151
	return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
Koji Sato's avatar
Koji Sato committed
152 153
}

154
static void
155
nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
Koji Sato's avatar
Koji Sato committed
156
{
157
	*(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
Koji Sato's avatar
Koji Sato committed
158 159
}

160
static __u64
161 162
nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
			 int ncmax)
Koji Sato's avatar
Koji Sato committed
163
{
164
	return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
Koji Sato's avatar
Koji Sato committed
165 166
}

167
static void
168 169
nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
			 int ncmax)
Koji Sato's avatar
Koji Sato committed
170
{
171
	*(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
Koji Sato's avatar
Koji Sato committed
172 173
}

174 175
static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
				  int level, int nchildren, int ncmax,
Koji Sato's avatar
Koji Sato committed
176 177 178 179 180 181
				  const __u64 *keys, const __u64 *ptrs)
{
	__le64 *dkeys;
	__le64 *dptrs;
	int i;

182 183 184
	nilfs_btree_node_set_flags(node, flags);
	nilfs_btree_node_set_level(node, level);
	nilfs_btree_node_set_nchildren(node, nchildren);
Koji Sato's avatar
Koji Sato committed
185

186
	dkeys = nilfs_btree_node_dkeys(node);
187
	dptrs = nilfs_btree_node_dptrs(node, ncmax);
Koji Sato's avatar
Koji Sato committed
188
	for (i = 0; i < nchildren; i++) {
189 190
		dkeys[i] = cpu_to_le64(keys[i]);
		dptrs[i] = cpu_to_le64(ptrs[i]);
Koji Sato's avatar
Koji Sato committed
191 192 193 194
	}
}

/* Assume the buffer heads corresponding to left and right are locked. */
195
static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
Koji Sato's avatar
Koji Sato committed
196
				       struct nilfs_btree_node *right,
197
				       int n, int lncmax, int rncmax)
Koji Sato's avatar
Koji Sato committed
198 199 200 201 202
{
	__le64 *ldkeys, *rdkeys;
	__le64 *ldptrs, *rdptrs;
	int lnchildren, rnchildren;

203
	ldkeys = nilfs_btree_node_dkeys(left);
204
	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
205
	lnchildren = nilfs_btree_node_get_nchildren(left);
Koji Sato's avatar
Koji Sato committed
206

207
	rdkeys = nilfs_btree_node_dkeys(right);
208
	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
209
	rnchildren = nilfs_btree_node_get_nchildren(right);
Koji Sato's avatar
Koji Sato committed
210 211 212 213 214 215 216 217

	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));

	lnchildren += n;
	rnchildren -= n;
218 219
	nilfs_btree_node_set_nchildren(left, lnchildren);
	nilfs_btree_node_set_nchildren(right, rnchildren);
Koji Sato's avatar
Koji Sato committed
220 221 222
}

/* Assume that the buffer heads corresponding to left and right are locked. */
223
static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
Koji Sato's avatar
Koji Sato committed
224
					struct nilfs_btree_node *right,
225
					int n, int lncmax, int rncmax)
Koji Sato's avatar
Koji Sato committed
226 227 228 229 230
{
	__le64 *ldkeys, *rdkeys;
	__le64 *ldptrs, *rdptrs;
	int lnchildren, rnchildren;

231
	ldkeys = nilfs_btree_node_dkeys(left);
232
	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
233
	lnchildren = nilfs_btree_node_get_nchildren(left);
Koji Sato's avatar
Koji Sato committed
234

235
	rdkeys = nilfs_btree_node_dkeys(right);
236
	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
237
	rnchildren = nilfs_btree_node_get_nchildren(right);
Koji Sato's avatar
Koji Sato committed
238 239 240 241 242 243 244 245

	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));

	lnchildren -= n;
	rnchildren += n;
246 247
	nilfs_btree_node_set_nchildren(left, lnchildren);
	nilfs_btree_node_set_nchildren(right, rnchildren);
Koji Sato's avatar
Koji Sato committed
248 249 250
}

/* Assume that the buffer head corresponding to node is locked. */
251 252
static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
				    __u64 key, __u64 ptr, int ncmax)
Koji Sato's avatar
Koji Sato committed
253 254 255 256 257
{
	__le64 *dkeys;
	__le64 *dptrs;
	int nchildren;

258
	dkeys = nilfs_btree_node_dkeys(node);
259
	dptrs = nilfs_btree_node_dptrs(node, ncmax);
260
	nchildren = nilfs_btree_node_get_nchildren(node);
Koji Sato's avatar
Koji Sato committed
261 262 263 264 265 266
	if (index < nchildren) {
		memmove(dkeys + index + 1, dkeys + index,
			(nchildren - index) * sizeof(*dkeys));
		memmove(dptrs + index + 1, dptrs + index,
			(nchildren - index) * sizeof(*dptrs));
	}
267 268
	dkeys[index] = cpu_to_le64(key);
	dptrs[index] = cpu_to_le64(ptr);
Koji Sato's avatar
Koji Sato committed
269
	nchildren++;
270
	nilfs_btree_node_set_nchildren(node, nchildren);
Koji Sato's avatar
Koji Sato committed
271 272 273
}

/* Assume that the buffer head corresponding to node is locked. */
274 275
static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
				    __u64 *keyp, __u64 *ptrp, int ncmax)
Koji Sato's avatar
Koji Sato committed
276 277 278 279 280 281 282
{
	__u64 key;
	__u64 ptr;
	__le64 *dkeys;
	__le64 *dptrs;
	int nchildren;

283
	dkeys = nilfs_btree_node_dkeys(node);
284
	dptrs = nilfs_btree_node_dptrs(node, ncmax);
285 286
	key = le64_to_cpu(dkeys[index]);
	ptr = le64_to_cpu(dptrs[index]);
287
	nchildren = nilfs_btree_node_get_nchildren(node);
Koji Sato's avatar
Koji Sato committed
288 289 290 291 292 293 294 295 296 297 298 299
	if (keyp != NULL)
		*keyp = key;
	if (ptrp != NULL)
		*ptrp = ptr;

	if (index < nchildren - 1) {
		memmove(dkeys + index, dkeys + index + 1,
			(nchildren - index - 1) * sizeof(*dkeys));
		memmove(dptrs + index, dptrs + index + 1,
			(nchildren - index - 1) * sizeof(*dptrs));
	}
	nchildren--;
300
	nilfs_btree_node_set_nchildren(node, nchildren);
Koji Sato's avatar
Koji Sato committed
301 302
}

303
static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
Koji Sato's avatar
Koji Sato committed
304 305 306 307 308 309 310
				   __u64 key, int *indexp)
{
	__u64 nkey;
	int index, low, high, s;

	/* binary search */
	low = 0;
311
	high = nilfs_btree_node_get_nchildren(node) - 1;
Koji Sato's avatar
Koji Sato committed
312 313 314 315
	index = 0;
	s = 0;
	while (low <= high) {
		index = (low + high) / 2;
316
		nkey = nilfs_btree_node_get_key(node, index);
Koji Sato's avatar
Koji Sato committed
317 318 319 320 321 322 323 324 325 326 327 328 329
		if (nkey == key) {
			s = 0;
			goto out;
		} else if (nkey < key) {
			low = index + 1;
			s = -1;
		} else {
			high = index - 1;
			s = 1;
		}
	}

	/* adjust index */
330 331
	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
		if (s > 0 && index > 0)
Koji Sato's avatar
Koji Sato committed
332 333 334 335 336 337 338 339 340 341
			index--;
	} else if (s < 0)
		index++;

 out:
	*indexp = index;

	return s == 0;
}

342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
/**
 * nilfs_btree_node_broken - verify consistency of btree node
 * @node: btree node block to be examined
 * @size: node size (in bytes)
 * @blocknr: block number
 *
 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 */
static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
				   size_t size, sector_t blocknr)
{
	int level, flags, nchildren;
	int ret = 0;

	level = nilfs_btree_node_get_level(node);
	flags = nilfs_btree_node_get_flags(node);
	nchildren = nilfs_btree_node_get_nchildren(node);

	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
		     level >= NILFS_BTREE_LEVEL_MAX ||
		     (flags & NILFS_BTREE_NODE_ROOT) ||
		     nchildren < 0 ||
		     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
		printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
		       "level = %d, flags = 0x%x, nchildren = %d\n",
		       (unsigned long long)blocknr, level, flags, nchildren);
		ret = 1;
	}
	return ret;
}

373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
/**
 * nilfs_btree_root_broken - verify consistency of btree root node
 * @node: btree root node to be examined
 * @ino: inode number
 *
 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 */
static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
				   unsigned long ino)
{
	int level, flags, nchildren;
	int ret = 0;

	level = nilfs_btree_node_get_level(node);
	flags = nilfs_btree_node_get_flags(node);
	nchildren = nilfs_btree_node_get_nchildren(node);

	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
391
		     level >= NILFS_BTREE_LEVEL_MAX ||
392 393 394 395 396 397 398 399 400
		     nchildren < 0 ||
		     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
		pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
			ino, level, flags, nchildren);
		ret = 1;
	}
	return ret;
}

401 402
int nilfs_btree_broken_node_block(struct buffer_head *bh)
{
403 404 405 406 407 408
	int ret;

	if (buffer_nilfs_checked(bh))
		return 0;

	ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
409
				       bh->b_size, bh->b_blocknr);
410 411 412
	if (likely(!ret))
		set_buffer_nilfs_checked(bh);
	return ret;
413 414
}

415
static struct nilfs_btree_node *
416
nilfs_btree_get_root(const struct nilfs_bmap *btree)
Koji Sato's avatar
Koji Sato committed
417
{
418
	return (struct nilfs_btree_node *)btree->b_u.u_data;
Koji Sato's avatar
Koji Sato committed
419 420
}

421
static struct nilfs_btree_node *
422
nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
Koji Sato's avatar
Koji Sato committed
423 424 425 426
{
	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
}

427
static struct nilfs_btree_node *
428
nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
Koji Sato's avatar
Koji Sato committed
429 430 431 432
{
	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
}

433
static int nilfs_btree_height(const struct nilfs_bmap *btree)
Koji Sato's avatar
Koji Sato committed
434
{
435
	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
Koji Sato's avatar
Koji Sato committed
436 437
}

438
static struct nilfs_btree_node *
439
nilfs_btree_get_node(const struct nilfs_bmap *btree,
Koji Sato's avatar
Koji Sato committed
440
		     const struct nilfs_btree_path *path,
441
		     int level, int *ncmaxp)
Koji Sato's avatar
Koji Sato committed
442
{
443 444 445 446 447 448 449 450 451 452
	struct nilfs_btree_node *node;

	if (level == nilfs_btree_height(btree) - 1) {
		node = nilfs_btree_get_root(btree);
		*ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
	} else {
		node = nilfs_btree_get_nonroot_node(path, level);
		*ncmaxp = nilfs_btree_nchildren_per_block(btree);
	}
	return node;
Koji Sato's avatar
Koji Sato committed
453 454
}

455
static int
456 457 458 459 460 461 462 463 464 465 466
nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
{
	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
		dump_stack();
		printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
		       nilfs_btree_node_get_level(node), level);
		return 1;
	}
	return 0;
}

467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534
struct nilfs_btree_readahead_info {
	struct nilfs_btree_node *node;	/* parent node */
	int max_ra_blocks;		/* max nof blocks to read ahead */
	int index;			/* current index on the parent node */
	int ncmax;			/* nof children in the parent node */
};

static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
				   struct buffer_head **bhp,
				   const struct nilfs_btree_readahead_info *ra)
{
	struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
	struct buffer_head *bh, *ra_bh;
	sector_t submit_ptr = 0;
	int ret;

	ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
	if (ret) {
		if (ret != -EEXIST)
			return ret;
		goto out_check;
	}

	if (ra) {
		int i, n;
		__u64 ptr2;

		/* read ahead sibling nodes */
		for (n = ra->max_ra_blocks, i = ra->index + 1;
		     n > 0 && i < ra->ncmax; n--, i++) {
			ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);

			ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
							&ra_bh, &submit_ptr);
			if (likely(!ret || ret == -EEXIST))
				brelse(ra_bh);
			else if (ret != -EBUSY)
				break;
			if (!buffer_locked(bh))
				goto out_no_wait;
		}
	}

	wait_on_buffer(bh);

 out_no_wait:
	if (!buffer_uptodate(bh)) {
		brelse(bh);
		return -EIO;
	}

 out_check:
	if (nilfs_btree_broken_node_block(bh)) {
		clear_buffer_uptodate(bh);
		brelse(bh);
		return -EINVAL;
	}

	*bhp = bh;
	return 0;
}

static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
				   struct buffer_head **bhp)
{
	return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
}

535
static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
Koji Sato's avatar
Koji Sato committed
536
				 struct nilfs_btree_path *path,
537 538
				 __u64 key, __u64 *ptrp, int minlevel,
				 int readahead)
Koji Sato's avatar
Koji Sato committed
539 540
{
	struct nilfs_btree_node *node;
541
	struct nilfs_btree_readahead_info p, *ra;
Koji Sato's avatar
Koji Sato committed
542
	__u64 ptr;
543
	int level, index, found, ncmax, ret;
Koji Sato's avatar
Koji Sato committed
544 545

	node = nilfs_btree_get_root(btree);
546 547
	level = nilfs_btree_node_get_level(node);
	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
Koji Sato's avatar
Koji Sato committed
548 549
		return -ENOENT;

550
	found = nilfs_btree_node_lookup(node, key, &index);
551 552
	ptr = nilfs_btree_node_get_ptr(node, index,
				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
Koji Sato's avatar
Koji Sato committed
553 554 555
	path[level].bp_bh = NULL;
	path[level].bp_index = index;

556
	ncmax = nilfs_btree_nchildren_per_block(btree);
557

558 559 560 561 562 563 564 565 566 567 568
	while (--level >= minlevel) {
		ra = NULL;
		if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
			p.node = nilfs_btree_get_node(btree, path, level + 1,
						      &p.ncmax);
			p.index = index;
			p.max_ra_blocks = 7;
			ra = &p;
		}
		ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
					      ra);
Koji Sato's avatar
Koji Sato committed
569 570
		if (ret < 0)
			return ret;
571

572
		node = nilfs_btree_get_nonroot_node(path, level);
573 574
		if (nilfs_btree_bad_node(node, level))
			return -EINVAL;
Koji Sato's avatar
Koji Sato committed
575
		if (!found)
576
			found = nilfs_btree_node_lookup(node, key, &index);
Koji Sato's avatar
Koji Sato committed
577 578
		else
			index = 0;
579
		if (index < ncmax) {
580
			ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
581
		} else {
582
			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
Koji Sato's avatar
Koji Sato committed
583 584 585 586 587 588 589 590 591 592 593 594 595 596
			/* insert */
			ptr = NILFS_BMAP_INVALID_PTR;
		}
		path[level].bp_index = index;
	}
	if (!found)
		return -ENOENT;

	if (ptrp != NULL)
		*ptrp = ptr;

	return 0;
}

597
static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
Koji Sato's avatar
Koji Sato committed
598 599 600 601 602
				      struct nilfs_btree_path *path,
				      __u64 *keyp, __u64 *ptrp)
{
	struct nilfs_btree_node *node;
	__u64 ptr;
603
	int index, level, ncmax, ret;
Koji Sato's avatar
Koji Sato committed
604 605

	node = nilfs_btree_get_root(btree);
606
	index = nilfs_btree_node_get_nchildren(node) - 1;
Koji Sato's avatar
Koji Sato committed
607 608
	if (index < 0)
		return -ENOENT;
609
	level = nilfs_btree_node_get_level(node);
610 611
	ptr = nilfs_btree_node_get_ptr(node, index,
				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
Koji Sato's avatar
Koji Sato committed
612 613
	path[level].bp_bh = NULL;
	path[level].bp_index = index;
614
	ncmax = nilfs_btree_nchildren_per_block(btree);
Koji Sato's avatar
Koji Sato committed
615 616

	for (level--; level > 0; level--) {
617
		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
Koji Sato's avatar
Koji Sato committed
618 619
		if (ret < 0)
			return ret;
620
		node = nilfs_btree_get_nonroot_node(path, level);
621 622
		if (nilfs_btree_bad_node(node, level))
			return -EINVAL;
623
		index = nilfs_btree_node_get_nchildren(node) - 1;
624
		ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
Koji Sato's avatar
Koji Sato committed
625 626 627 628
		path[level].bp_index = index;
	}

	if (keyp != NULL)
629
		*keyp = nilfs_btree_node_get_key(node, index);
Koji Sato's avatar
Koji Sato committed
630 631 632 633 634 635
	if (ptrp != NULL)
		*ptrp = ptr;

	return 0;
}

636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673
/**
 * nilfs_btree_get_next_key - get next valid key from btree path array
 * @btree: bmap struct of btree
 * @path: array of nilfs_btree_path struct
 * @minlevel: start level
 * @nextkey: place to store the next valid key
 *
 * Return Value: If a next key was found, 0 is returned. Otherwise,
 * -ENOENT is returned.
 */
static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
				    const struct nilfs_btree_path *path,
				    int minlevel, __u64 *nextkey)
{
	struct nilfs_btree_node *node;
	int maxlevel = nilfs_btree_height(btree) - 1;
	int index, next_adj, level;

	/* Next index is already set to bp_index for leaf nodes. */
	next_adj = 0;
	for (level = minlevel; level <= maxlevel; level++) {
		if (level == maxlevel)
			node = nilfs_btree_get_root(btree);
		else
			node = nilfs_btree_get_nonroot_node(path, level);

		index = path[level].bp_index + next_adj;
		if (index < nilfs_btree_node_get_nchildren(node)) {
			/* Next key is in this node */
			*nextkey = nilfs_btree_node_get_key(node, index);
			return 0;
		}
		/* For non-leaf nodes, next index is stored at bp_index + 1. */
		next_adj = 1;
	}
	return -ENOENT;
}

674
static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
Koji Sato's avatar
Koji Sato committed
675 676 677 678 679
			      __u64 key, int level, __u64 *ptrp)
{
	struct nilfs_btree_path *path;
	int ret;

680
	path = nilfs_btree_alloc_path();
Koji Sato's avatar
Koji Sato committed
681 682 683
	if (path == NULL)
		return -ENOMEM;

684
	ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
Koji Sato's avatar
Koji Sato committed
685

686
	nilfs_btree_free_path(path);
Koji Sato's avatar
Koji Sato committed
687 688 689 690

	return ret;
}

691
static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
692 693 694 695 696 697 698 699
				     __u64 key, __u64 *ptrp, unsigned maxblocks)
{
	struct nilfs_btree_path *path;
	struct nilfs_btree_node *node;
	struct inode *dat = NULL;
	__u64 ptr, ptr2;
	sector_t blocknr;
	int level = NILFS_BTREE_LEVEL_NODE_MIN;
700
	int ret, cnt, index, maxlevel, ncmax;
701
	struct nilfs_btree_readahead_info p;
702

703
	path = nilfs_btree_alloc_path();
704 705
	if (path == NULL)
		return -ENOMEM;
706

707
	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
708 709 710
	if (ret < 0)
		goto out;

711 712
	if (NILFS_BMAP_USE_VBN(btree)) {
		dat = nilfs_bmap_get_dat(btree);
713 714 715 716 717 718 719 720 721 722
		ret = nilfs_dat_translate(dat, ptr, &blocknr);
		if (ret < 0)
			goto out;
		ptr = blocknr;
	}
	cnt = 1;
	if (cnt == maxblocks)
		goto end;

	maxlevel = nilfs_btree_height(btree) - 1;
723
	node = nilfs_btree_get_node(btree, path, level, &ncmax);
724 725
	index = path[level].bp_index + 1;
	for (;;) {
726 727
		while (index < nilfs_btree_node_get_nchildren(node)) {
			if (nilfs_btree_node_get_key(node, index) !=
728 729
			    key + cnt)
				goto end;
730
			ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
			if (dat) {
				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
				if (ret < 0)
					goto out;
				ptr2 = blocknr;
			}
			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
				goto end;
			index++;
			continue;
		}
		if (level == maxlevel)
			break;

		/* look-up right sibling node */
746 747 748 749 750
		p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
		p.index = path[level + 1].bp_index + 1;
		p.max_ra_blocks = 7;
		if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
		    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
751
			break;
752 753
		ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
		path[level + 1].bp_index = p.index;
754 755 756

		brelse(path[level].bp_bh);
		path[level].bp_bh = NULL;
757 758 759

		ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
					      &p);
760 761
		if (ret < 0)
			goto out;
762
		node = nilfs_btree_get_nonroot_node(path, level);
763
		ncmax = nilfs_btree_nchildren_per_block(btree);
764 765 766 767 768 769 770
		index = 0;
		path[level].bp_index = index;
	}
 end:
	*ptrp = ptr;
	ret = cnt;
 out:
771
	nilfs_btree_free_path(path);
772 773 774
	return ret;
}

775
static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
Koji Sato's avatar
Koji Sato committed
776 777 778 779 780 781
				    struct nilfs_btree_path *path,
				    int level, __u64 key)
{
	if (level < nilfs_btree_height(btree) - 1) {
		do {
			nilfs_btree_node_set_key(
782
				nilfs_btree_get_nonroot_node(path, level),
Koji Sato's avatar
Koji Sato committed
783 784
				path[level].bp_index, key);
			if (!buffer_dirty(path[level].bp_bh))
785
				mark_buffer_dirty(path[level].bp_bh);
Koji Sato's avatar
Koji Sato committed
786 787 788 789 790 791
		} while ((path[level].bp_index == 0) &&
			 (++level < nilfs_btree_height(btree) - 1));
	}

	/* root */
	if (level == nilfs_btree_height(btree) - 1) {
792
		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
Koji Sato's avatar
Koji Sato committed
793 794 795 796
					 path[level].bp_index, key);
	}
}

797
static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
Koji Sato's avatar
Koji Sato committed
798 799 800 801
				  struct nilfs_btree_path *path,
				  int level, __u64 *keyp, __u64 *ptrp)
{
	struct nilfs_btree_node *node;
802
	int ncblk;
Koji Sato's avatar
Koji Sato committed
803 804

	if (level < nilfs_btree_height(btree) - 1) {
805
		node = nilfs_btree_get_nonroot_node(path, level);
806 807 808
		ncblk = nilfs_btree_nchildren_per_block(btree);
		nilfs_btree_node_insert(node, path[level].bp_index,
					*keyp, *ptrp, ncblk);
Koji Sato's avatar
Koji Sato committed
809
		if (!buffer_dirty(path[level].bp_bh))
810
			mark_buffer_dirty(path[level].bp_bh);
Koji Sato's avatar
Koji Sato committed
811 812 813

		if (path[level].bp_index == 0)
			nilfs_btree_promote_key(btree, path, level + 1,
814 815
						nilfs_btree_node_get_key(node,
									 0));
Koji Sato's avatar
Koji Sato committed
816 817
	} else {
		node = nilfs_btree_get_root(btree);
818 819 820
		nilfs_btree_node_insert(node, path[level].bp_index,
					*keyp, *ptrp,
					NILFS_BTREE_ROOT_NCHILDREN_MAX);
Koji Sato's avatar
Koji Sato committed
821 822 823
	}
}

824
static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
Koji Sato's avatar
Koji Sato committed
825 826 827 828
				   struct nilfs_btree_path *path,
				   int level, __u64 *keyp, __u64 *ptrp)
{
	struct nilfs_btree_node *node, *left;
829
	int nchildren, lnchildren, n, move, ncblk;
Koji Sato's avatar
Koji Sato committed
830

831 832 833 834
	node = nilfs_btree_get_nonroot_node(path, level);
	left = nilfs_btree_get_sib_node(path, level);
	nchildren = nilfs_btree_node_get_nchildren(node);
	lnchildren = nilfs_btree_node_get_nchildren(left);
835
	ncblk = nilfs_btree_nchildren_per_block(btree);
Koji Sato's avatar
Koji Sato committed
836 837 838 839 840 841 842 843 844
	move = 0;

	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
	if (n > path[level].bp_index) {
		/* move insert point */
		n--;
		move = 1;
	}

845
	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
Koji Sato's avatar
Koji Sato committed
846 847

	if (!buffer_dirty(path[level].bp_bh))
848
		mark_buffer_dirty(path[level].bp_bh);
Koji Sato's avatar
Koji Sato committed
849
	if (!buffer_dirty(path[level].bp_sib_bh))
850
		mark_buffer_dirty(path[level].bp_sib_bh);
Koji Sato's avatar
Koji Sato committed
851 852

	nilfs_btree_promote_key(btree, path, level + 1,
853
				nilfs_btree_node_get_key(node, 0));
Koji Sato's avatar
Koji Sato committed
854 855

	if (move) {
856
		brelse(path[level].bp_bh);
Koji Sato's avatar
Koji Sato committed
857 858 859 860 861
		path[level].bp_bh = path[level].bp_sib_bh;
		path[level].bp_sib_bh = NULL;
		path[level].bp_index += lnchildren;
		path[level + 1].bp_index--;
	} else {
862
		brelse(path[level].bp_sib_bh);
Koji Sato's avatar
Koji Sato committed
863 864 865 866 867 868 869
		path[level].bp_sib_bh = NULL;
		path[level].bp_index -= n;
	}

	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
}

870
static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
Koji Sato's avatar
Koji Sato committed
871 872 873 874
				    struct nilfs_btree_path *path,
				    int level, __u64 *keyp, __u64 *ptrp)
{
	struct nilfs_btree_node *node, *right;
875
	int nchildren, rnchildren, n, move, ncblk;
Koji Sato's avatar
Koji Sato committed
876

877 878 879 880
	node = nilfs_btree_get_nonroot_node(path, level);
	right = nilfs_btree_get_sib_node(path, level);
	nchildren = nilfs_btree_node_get_nchildren(node);
	rnchildren = nilfs_btree_node_get_nchildren(right);
881
	ncblk = nilfs_btree_nchildren_per_block(btree);
Koji Sato's avatar
Koji Sato committed
882 883 884 885 886 887 888 889 890
	move = 0;

	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
	if (n > nchildren - path[level].bp_index) {
		/* move insert point */
		n--;
		move = 1;
	}

891
	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
Koji Sato's avatar
Koji Sato committed
892 893

	if (!buffer_dirty(path[level].bp_bh))
894
		mark_buffer_dirty(path[level].bp_bh);
Koji Sato's avatar
Koji Sato committed
895
	if (!buffer_dirty(path[level].bp_sib_bh))
896
		mark_buffer_dirty(path[level].bp_sib_bh);
Koji Sato's avatar
Koji Sato committed
897 898 899

	path[level + 1].bp_index++;
	nilfs_btree_promote_key(btree, path, level + 1,
900
				nilfs_btree_node_get_key(right, 0));
Koji Sato's avatar
Koji Sato committed
901 902 903
	path[level + 1].bp_index--;

	if (move) {
904
		brelse(path[level].bp_bh);
Koji Sato's avatar
Koji Sato committed
905 906
		path[level].bp_bh = path[level].bp_sib_bh;
		path[level].bp_sib_bh = NULL;
907
		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
Koji Sato's avatar
Koji Sato committed
908 909
		path[level + 1].bp_index++;
	} else {
910
		brelse(path[level].bp_sib_bh);
Koji Sato's avatar
Koji Sato committed
911 912 913 914 915 916
		path[level].bp_sib_bh = NULL;
	}

	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
}

917
static void nilfs_btree_split(struct nilfs_bmap *btree,
Koji Sato's avatar
Koji Sato committed
918 919 920 921
			      struct nilfs_btree_path *path,
			      int level, __u64 *keyp, __u64 *ptrp)
{
	struct nilfs_btree_node *node, *right;
922
	int nchildren, n, move, ncblk;
Koji Sato's avatar
Koji Sato committed
923

924 925 926
	node = nilfs_btree_get_nonroot_node(path, level);
	right = nilfs_btree_get_sib_node(path, level);
	nchildren = nilfs_btree_node_get_nchildren(node);
927
	ncblk = nilfs_btree_nchildren_per_block(btree);
Koji Sato's avatar
Koji Sato committed
928 929 930 931 932 933 934 935
	move = 0;

	n = (nchildren + 1) / 2;
	if (n > nchildren - path[level].bp_index) {
		n--;
		move = 1;
	}

936
	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
Koji Sato's avatar
Koji Sato committed
937 938

	if (!buffer_dirty(path[level].bp_bh))
939
		mark_buffer_dirty(path[level].bp_bh);
Koji Sato's avatar
Koji Sato committed
940
	if (!buffer_dirty(path[level].bp_sib_bh))
941
		mark_buffer_dirty(path[level].bp_sib_bh);
Koji Sato's avatar
Koji Sato committed
942 943

	if (move) {
944
		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
945 946
		nilfs_btree_node_insert(right, path[level].bp_index,
					*keyp, *ptrp, ncblk);
Koji Sato's avatar
Koji Sato committed
947

948
		*keyp = nilfs_btree_node_get_key(right, 0);
Koji Sato's avatar
Koji Sato committed
949