segment.c 72.4 KB
Newer Older
Ryusuke Konishi's avatar
Ryusuke Konishi committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
/*
 * segment.c - NILFS segment constructor.
 *
 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * Written by Ryusuke Konishi <ryusuke@osrg.net>
 *
 */

#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/bio.h>
#include <linux/completion.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/crc32.h>
#include <linux/pagevec.h>
35
#include <linux/slab.h>
Ryusuke Konishi's avatar
Ryusuke Konishi committed
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#include "nilfs.h"
#include "btnode.h"
#include "page.h"
#include "segment.h"
#include "sufile.h"
#include "cpfile.h"
#include "ifile.h"
#include "segbuf.h"


/*
 * Segment constructor
 */
#define SC_N_INODEVEC	16   /* Size of locally allocated inode vector */

#define SC_MAX_SEGDELTA 64   /* Upper limit of the number of segments
				appended in collection retry loop */

/* Construction mode */
enum {
	SC_LSEG_SR = 1,	/* Make a logical segment having a super root */
	SC_LSEG_DSYNC,	/* Flush data blocks of a given file and make
			   a logical segment without a super root */
	SC_FLUSH_FILE,	/* Flush data files, leads to segment writes without
			   creating a checkpoint */
	SC_FLUSH_DAT,	/* Flush DAT file. This also creates segments without
			   a checkpoint */
};

/* Stage numbers of dirty block collection */
enum {
	NILFS_ST_INIT = 0,
	NILFS_ST_GC,		/* Collecting dirty blocks for GC */
	NILFS_ST_FILE,
	NILFS_ST_IFILE,
	NILFS_ST_CPFILE,
	NILFS_ST_SUFILE,
	NILFS_ST_DAT,
	NILFS_ST_SR,		/* Super root */
	NILFS_ST_DSYNC,		/* Data sync blocks */
	NILFS_ST_DONE,
};

/* State flags of collection */
#define NILFS_CF_NODE		0x0001	/* Collecting node blocks */
#define NILFS_CF_IFILE_STARTED	0x0002	/* IFILE stage has started */
82
83
#define NILFS_CF_SUFREED	0x0004	/* segment usages has been freed */
#define NILFS_CF_HISTORY_MASK	(NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

/* Operations depending on the construction mode and file type */
struct nilfs_sc_operations {
	int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *,
			    struct inode *);
	int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *,
			    struct inode *);
	int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *,
			    struct inode *);
	void (*write_data_binfo)(struct nilfs_sc_info *,
				 struct nilfs_segsum_pointer *,
				 union nilfs_binfo *);
	void (*write_node_binfo)(struct nilfs_sc_info *,
				 struct nilfs_segsum_pointer *,
				 union nilfs_binfo *);
};

/*
 * Other definitions
 */
static void nilfs_segctor_start_timer(struct nilfs_sc_info *);
static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int);
static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *);
107
static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

#define nilfs_cnt32_gt(a, b)   \
	(typecheck(__u32, a) && typecheck(__u32, b) && \
	 ((__s32)(b) - (__s32)(a) < 0))
#define nilfs_cnt32_ge(a, b)   \
	(typecheck(__u32, a) && typecheck(__u32, b) && \
	 ((__s32)(a) - (__s32)(b) >= 0))
#define nilfs_cnt32_lt(a, b)  nilfs_cnt32_gt(b, a)
#define nilfs_cnt32_le(a, b)  nilfs_cnt32_ge(b, a)

static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti)
{
	struct nilfs_transaction_info *cur_ti = current->journal_info;
	void *save = NULL;

	if (cur_ti) {
		if (cur_ti->ti_magic == NILFS_TI_MAGIC)
			return ++cur_ti->ti_count;
		else {
			/*
			 * If journal_info field is occupied by other FS,
129
130
			 * it is saved and will be restored on
			 * nilfs_transaction_commit().
Ryusuke Konishi's avatar
Ryusuke Konishi committed
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
			 */
			printk(KERN_WARNING
			       "NILFS warning: journal info from a different "
			       "FS\n");
			save = current->journal_info;
		}
	}
	if (!ti) {
		ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS);
		if (!ti)
			return -ENOMEM;
		ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC;
	} else {
		ti->ti_flags = 0;
	}
	ti->ti_count = 0;
	ti->ti_save = save;
	ti->ti_magic = NILFS_TI_MAGIC;
	current->journal_info = ti;
	return 0;
}

/**
 * nilfs_transaction_begin - start indivisible file operations.
 * @sb: super block
 * @ti: nilfs_transaction_info
 * @vacancy_check: flags for vacancy rate checks
 *
 * nilfs_transaction_begin() acquires a reader/writer semaphore, called
 * the segment semaphore, to make a segment construction and write tasks
161
 * exclusive.  The function is used with nilfs_transaction_commit() in pairs.
Ryusuke Konishi's avatar
Ryusuke Konishi committed
162
163
164
165
166
167
 * The region enclosed by these two functions can be nested.  To avoid a
 * deadlock, the semaphore is only acquired or released in the outermost call.
 *
 * This function allocates a nilfs_transaction_info struct to keep context
 * information on it.  It is initialized and hooked onto the current task in
 * the outermost call.  If a pre-allocated struct is given to @ti, it is used
168
 * instead; otherwise a new struct is assigned from a slab.
Ryusuke Konishi's avatar
Ryusuke Konishi committed
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
 *
 * When @vacancy_check flag is set, this function will check the amount of
 * free space, and will wait for the GC to reclaim disk space if low capacity.
 *
 * Return Value: On success, 0 is returned. On error, one of the following
 * negative error code is returned.
 *
 * %-ENOMEM - Insufficient memory available.
 *
 * %-ENOSPC - No space left on device
 */
int nilfs_transaction_begin(struct super_block *sb,
			    struct nilfs_transaction_info *ti,
			    int vacancy_check)
{
	struct the_nilfs *nilfs;
	int ret = nilfs_prepare_segment_lock(ti);

	if (unlikely(ret < 0))
		return ret;
	if (ret > 0)
		return 0;

192
	sb_start_intwrite(sb);
193

194
	nilfs = sb->s_fs_info;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
195
196
197
198
199
200
201
202
203
204
205
206
207
	down_read(&nilfs->ns_segctor_sem);
	if (vacancy_check && nilfs_near_disk_full(nilfs)) {
		up_read(&nilfs->ns_segctor_sem);
		ret = -ENOSPC;
		goto failed;
	}
	return 0;

 failed:
	ti = current->journal_info;
	current->journal_info = ti->ti_save;
	if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
		kmem_cache_free(nilfs_transaction_cachep, ti);
208
	sb_end_intwrite(sb);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
209
210
211
212
	return ret;
}

/**
213
 * nilfs_transaction_commit - commit indivisible file operations.
Ryusuke Konishi's avatar
Ryusuke Konishi committed
214
215
 * @sb: super block
 *
216
217
218
219
220
221
 * nilfs_transaction_commit() releases the read semaphore which is
 * acquired by nilfs_transaction_begin(). This is only performed
 * in outermost call of this function.  If a commit flag is set,
 * nilfs_transaction_commit() sets a timer to start the segment
 * constructor.  If a sync flag is set, it starts construction
 * directly.
Ryusuke Konishi's avatar
Ryusuke Konishi committed
222
 */
223
int nilfs_transaction_commit(struct super_block *sb)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
224
225
{
	struct nilfs_transaction_info *ti = current->journal_info;
226
	struct the_nilfs *nilfs = sb->s_fs_info;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
227
228
229
	int err = 0;

	BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
230
	ti->ti_flags |= NILFS_TI_COMMIT;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
231
232
233
234
	if (ti->ti_count > 0) {
		ti->ti_count--;
		return 0;
	}
235
236
237
	if (nilfs->ns_writer) {
		struct nilfs_sc_info *sci = nilfs->ns_writer;

Ryusuke Konishi's avatar
Ryusuke Konishi committed
238
239
		if (ti->ti_flags & NILFS_TI_COMMIT)
			nilfs_segctor_start_timer(sci);
240
		if (atomic_read(&nilfs->ns_ndirtyblks) > sci->sc_watermark)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
241
242
			nilfs_segctor_do_flush(sci, 0);
	}
243
	up_read(&nilfs->ns_segctor_sem);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
244
245
246
247
248
249
	current->journal_info = ti->ti_save;

	if (ti->ti_flags & NILFS_TI_SYNC)
		err = nilfs_construct_segment(sb);
	if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
		kmem_cache_free(nilfs_transaction_cachep, ti);
250
	sb_end_intwrite(sb);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
251
252
253
	return err;
}

254
255
256
void nilfs_transaction_abort(struct super_block *sb)
{
	struct nilfs_transaction_info *ti = current->journal_info;
257
	struct the_nilfs *nilfs = sb->s_fs_info;
258
259
260
261
262
263

	BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
	if (ti->ti_count > 0) {
		ti->ti_count--;
		return;
	}
264
	up_read(&nilfs->ns_segctor_sem);
265
266
267
268

	current->journal_info = ti->ti_save;
	if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
		kmem_cache_free(nilfs_transaction_cachep, ti);
269
	sb_end_intwrite(sb);
270
271
}

Ryusuke Konishi's avatar
Ryusuke Konishi committed
272
273
void nilfs_relax_pressure_in_lock(struct super_block *sb)
{
274
	struct the_nilfs *nilfs = sb->s_fs_info;
275
	struct nilfs_sc_info *sci = nilfs->ns_writer;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294

	if (!sci || !sci->sc_flush_request)
		return;

	set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
	up_read(&nilfs->ns_segctor_sem);

	down_write(&nilfs->ns_segctor_sem);
	if (sci->sc_flush_request &&
	    test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) {
		struct nilfs_transaction_info *ti = current->journal_info;

		ti->ti_flags |= NILFS_TI_WRITER;
		nilfs_segctor_do_immediate_flush(sci);
		ti->ti_flags &= ~NILFS_TI_WRITER;
	}
	downgrade_write(&nilfs->ns_segctor_sem);
}

295
static void nilfs_transaction_lock(struct super_block *sb,
Ryusuke Konishi's avatar
Ryusuke Konishi committed
296
297
298
299
				   struct nilfs_transaction_info *ti,
				   int gcflag)
{
	struct nilfs_transaction_info *cur_ti = current->journal_info;
300
	struct the_nilfs *nilfs = sb->s_fs_info;
301
	struct nilfs_sc_info *sci = nilfs->ns_writer;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
302

303
	WARN_ON(cur_ti);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
304
305
306
307
308
309
310
	ti->ti_flags = NILFS_TI_WRITER;
	ti->ti_count = 0;
	ti->ti_save = cur_ti;
	ti->ti_magic = NILFS_TI_MAGIC;
	current->journal_info = ti;

	for (;;) {
311
312
		down_write(&nilfs->ns_segctor_sem);
		if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags))
Ryusuke Konishi's avatar
Ryusuke Konishi committed
313
314
			break;

315
		nilfs_segctor_do_immediate_flush(sci);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
316

317
		up_write(&nilfs->ns_segctor_sem);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
318
319
320
321
322
323
		yield();
	}
	if (gcflag)
		ti->ti_flags |= NILFS_TI_GC;
}

324
static void nilfs_transaction_unlock(struct super_block *sb)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
325
326
{
	struct nilfs_transaction_info *ti = current->journal_info;
327
	struct the_nilfs *nilfs = sb->s_fs_info;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
328
329
330
331

	BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
	BUG_ON(ti->ti_count > 0);

332
	up_write(&nilfs->ns_segctor_sem);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
	current->journal_info = ti->ti_save;
}

static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
					    struct nilfs_segsum_pointer *ssp,
					    unsigned bytes)
{
	struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
	unsigned blocksize = sci->sc_super->s_blocksize;
	void *p;

	if (unlikely(ssp->offset + bytes > blocksize)) {
		ssp->offset = 0;
		BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh,
					       &segbuf->sb_segsum_buffers));
		ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh);
	}
	p = ssp->bh->b_data + ssp->offset;
	ssp->offset += bytes;
	return p;
}

/**
 * nilfs_segctor_reset_segment_buffer - reset the current segment buffer
 * @sci: nilfs_sc_info
 */
static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
{
	struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
	struct buffer_head *sumbh;
	unsigned sumbytes;
	unsigned flags = 0;
	int err;

	if (nilfs_doing_gc())
		flags = NILFS_SS_GC;
369
	err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
	if (unlikely(err))
		return err;

	sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
	sumbytes = segbuf->sb_sum.sumbytes;
	sci->sc_finfo_ptr.bh = sumbh;  sci->sc_finfo_ptr.offset = sumbytes;
	sci->sc_binfo_ptr.bh = sumbh;  sci->sc_binfo_ptr.offset = sumbytes;
	sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
	return 0;
}

static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
{
	sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
	if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs))
		return -E2BIG; /* The current segment is filled up
				  (internal code) */
	sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
	return nilfs_segctor_reset_segment_buffer(sci);
}

static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci)
{
	struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
	int err;

	if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) {
		err = nilfs_segctor_feed_segment(sci);
		if (err)
			return err;
		segbuf = sci->sc_curseg;
	}
402
	err = nilfs_segbuf_extend_payload(segbuf, &segbuf->sb_super_root);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
	if (likely(!err))
		segbuf->sb_sum.flags |= NILFS_SS_SR;
	return err;
}

/*
 * Functions for making segment summary and payloads
 */
static int nilfs_segctor_segsum_block_required(
	struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp,
	unsigned binfo_size)
{
	unsigned blocksize = sci->sc_super->s_blocksize;
	/* Size of finfo and binfo is enough small against blocksize */

	return ssp->offset + binfo_size +
		(!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) >
		blocksize;
}

static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
				      struct inode *inode)
{
	sci->sc_curseg->sb_sum.nfinfo++;
	sci->sc_binfo_ptr = sci->sc_finfo_ptr;
	nilfs_segctor_map_segsum_entry(
		sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
430

431
432
	if (NILFS_I(inode)->i_root &&
	    !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
433
		set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
434
435
436
437
438
439
440
441
442
	/* skip finfo */
}

static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci,
				    struct inode *inode)
{
	struct nilfs_finfo *finfo;
	struct nilfs_inode_info *ii;
	struct nilfs_segment_buffer *segbuf;
443
	__u64 cno;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
444
445
446
447
448

	if (sci->sc_blk_cnt == 0)
		return;

	ii = NILFS_I(inode);
449
450
451
452
453
454
455
456

	if (test_bit(NILFS_I_GCINODE, &ii->i_state))
		cno = ii->i_cno;
	else if (NILFS_ROOT_METADATA_FILE(inode->i_ino))
		cno = 0;
	else
		cno = sci->sc_cno;

Ryusuke Konishi's avatar
Ryusuke Konishi committed
457
458
459
460
461
	finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr,
						 sizeof(*finfo));
	finfo->fi_ino = cpu_to_le64(inode->i_ino);
	finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt);
	finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt);
462
	finfo->fi_cno = cpu_to_le64(cno);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514

	segbuf = sci->sc_curseg;
	segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset +
		sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1);
	sci->sc_finfo_ptr = sci->sc_binfo_ptr;
	sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
}

static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
					struct buffer_head *bh,
					struct inode *inode,
					unsigned binfo_size)
{
	struct nilfs_segment_buffer *segbuf;
	int required, err = 0;

 retry:
	segbuf = sci->sc_curseg;
	required = nilfs_segctor_segsum_block_required(
		sci, &sci->sc_binfo_ptr, binfo_size);
	if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) {
		nilfs_segctor_end_finfo(sci, inode);
		err = nilfs_segctor_feed_segment(sci);
		if (err)
			return err;
		goto retry;
	}
	if (unlikely(required)) {
		err = nilfs_segbuf_extend_segsum(segbuf);
		if (unlikely(err))
			goto failed;
	}
	if (sci->sc_blk_cnt == 0)
		nilfs_segctor_begin_finfo(sci, inode);

	nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size);
	/* Substitution to vblocknr is delayed until update_blocknr() */
	nilfs_segbuf_add_file_buffer(segbuf, bh);
	sci->sc_blk_cnt++;
 failed:
	return err;
}

/*
 * Callback functions that enumerate, mark, and collect dirty blocks
 */
static int nilfs_collect_file_data(struct nilfs_sc_info *sci,
				   struct buffer_head *bh, struct inode *inode)
{
	int err;

	err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
515
516
	if (err < 0)
		return err;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
517
518
519
520
521
522
523
524
525
526
527
528

	err = nilfs_segctor_add_file_block(sci, bh, inode,
					   sizeof(struct nilfs_binfo_v));
	if (!err)
		sci->sc_datablk_cnt++;
	return err;
}

static int nilfs_collect_file_node(struct nilfs_sc_info *sci,
				   struct buffer_head *bh,
				   struct inode *inode)
{
529
	return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
530
531
532
533
534
535
}

static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci,
				   struct buffer_head *bh,
				   struct inode *inode)
{
536
	WARN_ON(!buffer_dirty(bh));
Ryusuke Konishi's avatar
Ryusuke Konishi committed
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
	return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
}

static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci,
					struct nilfs_segsum_pointer *ssp,
					union nilfs_binfo *binfo)
{
	struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry(
		sci, ssp, sizeof(*binfo_v));
	*binfo_v = binfo->bi_v;
}

static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci,
					struct nilfs_segsum_pointer *ssp,
					union nilfs_binfo *binfo)
{
	__le64 *vblocknr = nilfs_segctor_map_segsum_entry(
		sci, ssp, sizeof(*vblocknr));
	*vblocknr = binfo->bi_v.bi_vblocknr;
}

558
static struct nilfs_sc_operations nilfs_sc_file_ops = {
Ryusuke Konishi's avatar
Ryusuke Konishi committed
559
560
561
562
563
564
565
566
567
568
569
570
571
	.collect_data = nilfs_collect_file_data,
	.collect_node = nilfs_collect_file_node,
	.collect_bmap = nilfs_collect_file_bmap,
	.write_data_binfo = nilfs_write_file_data_binfo,
	.write_node_binfo = nilfs_write_file_node_binfo,
};

static int nilfs_collect_dat_data(struct nilfs_sc_info *sci,
				  struct buffer_head *bh, struct inode *inode)
{
	int err;

	err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
572
573
	if (err < 0)
		return err;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
574
575
576
577
578
579
580
581
582
583

	err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
	if (!err)
		sci->sc_datablk_cnt++;
	return err;
}

static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci,
				  struct buffer_head *bh, struct inode *inode)
{
584
	WARN_ON(!buffer_dirty(bh));
Ryusuke Konishi's avatar
Ryusuke Konishi committed
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
	return nilfs_segctor_add_file_block(sci, bh, inode,
					    sizeof(struct nilfs_binfo_dat));
}

static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci,
				       struct nilfs_segsum_pointer *ssp,
				       union nilfs_binfo *binfo)
{
	__le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp,
							  sizeof(*blkoff));
	*blkoff = binfo->bi_dat.bi_blkoff;
}

static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci,
				       struct nilfs_segsum_pointer *ssp,
				       union nilfs_binfo *binfo)
{
	struct nilfs_binfo_dat *binfo_dat =
		nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat));
	*binfo_dat = binfo->bi_dat;
}

607
static struct nilfs_sc_operations nilfs_sc_dat_ops = {
Ryusuke Konishi's avatar
Ryusuke Konishi committed
608
609
610
611
612
613
614
	.collect_data = nilfs_collect_dat_data,
	.collect_node = nilfs_collect_file_node,
	.collect_bmap = nilfs_collect_dat_bmap,
	.write_data_binfo = nilfs_write_dat_data_binfo,
	.write_node_binfo = nilfs_write_dat_node_binfo,
};

615
static struct nilfs_sc_operations nilfs_sc_dsync_ops = {
Ryusuke Konishi's avatar
Ryusuke Konishi committed
616
617
618
619
620
621
622
	.collect_data = nilfs_collect_file_data,
	.collect_node = NULL,
	.collect_bmap = NULL,
	.write_data_binfo = nilfs_write_file_data_binfo,
	.write_node_binfo = NULL,
};

623
624
625
626
static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
					      struct list_head *listp,
					      size_t nlimit,
					      loff_t start, loff_t end)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
627
628
629
{
	struct address_space *mapping = inode->i_mapping;
	struct pagevec pvec;
630
631
632
	pgoff_t index = 0, last = ULONG_MAX;
	size_t ndirties = 0;
	int i;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
633

634
635
636
637
638
639
640
641
642
	if (unlikely(start != 0 || end != LLONG_MAX)) {
		/*
		 * A valid range is given for sync-ing data pages. The
		 * range is rounded to per-page; extra dirty buffers
		 * may be included if blocksize < pagesize.
		 */
		index = start >> PAGE_SHIFT;
		last = end >> PAGE_SHIFT;
	}
Ryusuke Konishi's avatar
Ryusuke Konishi committed
643
644
	pagevec_init(&pvec, 0);
 repeat:
645
646
647
648
649
	if (unlikely(index > last) ||
	    !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
				min_t(pgoff_t, last - index,
				      PAGEVEC_SIZE - 1) + 1))
		return ndirties;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
650
651
652
653
654

	for (i = 0; i < pagevec_count(&pvec); i++) {
		struct buffer_head *bh, *head;
		struct page *page = pvec.pages[i];

655
656
657
		if (unlikely(page->index > last))
			break;

658
659
660
661
		lock_page(page);
		if (!page_has_buffers(page))
			create_empty_buffers(page, 1 << inode->i_blkbits, 0);
		unlock_page(page);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
662
663
664

		bh = head = page_buffers(page);
		do {
665
			if (!buffer_dirty(bh) || buffer_async_write(bh))
666
667
668
669
670
671
672
673
				continue;
			get_bh(bh);
			list_add_tail(&bh->b_assoc_buffers, listp);
			ndirties++;
			if (unlikely(ndirties >= nlimit)) {
				pagevec_release(&pvec);
				cond_resched();
				return ndirties;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
674
			}
675
		} while (bh = bh->b_this_page, bh != head);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
676
677
678
	}
	pagevec_release(&pvec);
	cond_resched();
679
	goto repeat;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
}

static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
					    struct list_head *listp)
{
	struct nilfs_inode_info *ii = NILFS_I(inode);
	struct address_space *mapping = &ii->i_btnode_cache;
	struct pagevec pvec;
	struct buffer_head *bh, *head;
	unsigned int i;
	pgoff_t index = 0;

	pagevec_init(&pvec, 0);

	while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
				  PAGEVEC_SIZE)) {
		for (i = 0; i < pagevec_count(&pvec); i++) {
			bh = head = page_buffers(pvec.pages[i]);
			do {
699
700
				if (buffer_dirty(bh) &&
						!buffer_async_write(bh)) {
Ryusuke Konishi's avatar
Ryusuke Konishi committed
701
702
703
704
705
706
707
708
709
710
711
712
					get_bh(bh);
					list_add_tail(&bh->b_assoc_buffers,
						      listp);
				}
				bh = bh->b_this_page;
			} while (bh != head);
		}
		pagevec_release(&pvec);
		cond_resched();
	}
}

713
static void nilfs_dispose_list(struct the_nilfs *nilfs,
Ryusuke Konishi's avatar
Ryusuke Konishi committed
714
715
716
717
718
719
720
			       struct list_head *head, int force)
{
	struct nilfs_inode_info *ii, *n;
	struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii;
	unsigned nv = 0;

	while (!list_empty(head)) {
721
		spin_lock(&nilfs->ns_inode_lock);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
722
723
724
725
726
727
728
729
730
731
		list_for_each_entry_safe(ii, n, head, i_dirty) {
			list_del_init(&ii->i_dirty);
			if (force) {
				if (unlikely(ii->i_bh)) {
					brelse(ii->i_bh);
					ii->i_bh = NULL;
				}
			} else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
				set_bit(NILFS_I_QUEUED, &ii->i_state);
				list_add_tail(&ii->i_dirty,
732
					      &nilfs->ns_dirty_files);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
733
734
735
736
737
738
				continue;
			}
			ivec[nv++] = ii;
			if (nv == SC_N_INODEVEC)
				break;
		}
739
		spin_unlock(&nilfs->ns_inode_lock);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
740
741
742
743
744
745

		for (pii = ivec; nv > 0; pii++, nv--)
			iput(&(*pii)->vfs_inode);
	}
}

746
747
748
749
750
751
752
753
754
static void nilfs_iput_work_func(struct work_struct *work)
{
	struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info,
						 sc_iput_work);
	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;

	nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0);
}

755
756
static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs,
				     struct nilfs_root *root)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
757
758
759
{
	int ret = 0;

760
	if (nilfs_mdt_fetch_dirty(root->ifile))
Ryusuke Konishi's avatar
Ryusuke Konishi committed
761
762
763
764
765
		ret++;
	if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile))
		ret++;
	if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile))
		ret++;
766
767
	if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat))
		ret++;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
768
769
770
771
772
773
774
	return ret;
}

static int nilfs_segctor_clean(struct nilfs_sc_info *sci)
{
	return list_empty(&sci->sc_dirty_files) &&
		!test_bit(NILFS_SC_DIRTY, &sci->sc_flags) &&
775
		sci->sc_nfreesegs == 0 &&
Ryusuke Konishi's avatar
Ryusuke Konishi committed
776
777
778
779
780
		(!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes));
}

static int nilfs_segctor_confirm(struct nilfs_sc_info *sci)
{
781
	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
782
783
	int ret = 0;

784
	if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
Ryusuke Konishi's avatar
Ryusuke Konishi committed
785
786
		set_bit(NILFS_SC_DIRTY, &sci->sc_flags);

787
788
	spin_lock(&nilfs->ns_inode_lock);
	if (list_empty(&nilfs->ns_dirty_files) && nilfs_segctor_clean(sci))
Ryusuke Konishi's avatar
Ryusuke Konishi committed
789
790
		ret++;

791
	spin_unlock(&nilfs->ns_inode_lock);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
792
793
794
795
796
	return ret;
}

static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
{
797
	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
798

799
	nilfs_mdt_clear_dirty(sci->sc_root->ifile);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
800
801
	nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
	nilfs_mdt_clear_dirty(nilfs->ns_sufile);
802
	nilfs_mdt_clear_dirty(nilfs->ns_dat);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
803
804
805
806
}

static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
{
807
	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
808
809
810
811
812
813
814
815
816
817
818
	struct buffer_head *bh_cp;
	struct nilfs_checkpoint *raw_cp;
	int err;

	/* XXX: this interface will be changed */
	err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1,
					  &raw_cp, &bh_cp);
	if (likely(!err)) {
		/* The following code is duplicated with cpfile.  But, it is
		   needed to collect the checkpoint even if it was not newly
		   created */
819
		mark_buffer_dirty(bh_cp);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
820
821
822
		nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
		nilfs_cpfile_put_checkpoint(
			nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
823
824
825
	} else
		WARN_ON(err == -EINVAL || err == -ENOENT);

Ryusuke Konishi's avatar
Ryusuke Konishi committed
826
827
828
829
830
	return err;
}

static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
{
831
	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
832
833
834
835
836
837
838
	struct buffer_head *bh_cp;
	struct nilfs_checkpoint *raw_cp;
	int err;

	err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
					  &raw_cp, &bh_cp);
	if (unlikely(err)) {
839
		WARN_ON(err == -EINVAL || err == -ENOENT);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
840
841
842
843
844
		goto failed_ibh;
	}
	raw_cp->cp_snapshot_list.ssl_next = 0;
	raw_cp->cp_snapshot_list.ssl_prev = 0;
	raw_cp->cp_inodes_count =
845
		cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count));
Ryusuke Konishi's avatar
Ryusuke Konishi committed
846
	raw_cp->cp_blocks_count =
847
		cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count));
Ryusuke Konishi's avatar
Ryusuke Konishi committed
848
849
850
851
	raw_cp->cp_nblk_inc =
		cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc);
	raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime);
	raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
852

853
854
855
856
857
	if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
		nilfs_checkpoint_clear_minor(raw_cp);
	else
		nilfs_checkpoint_set_minor(raw_cp);

858
859
	nilfs_write_inode_common(sci->sc_root->ifile,
				 &raw_cp->cp_ifile_inode, 1);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
	nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
	return 0;

 failed_ibh:
	return err;
}

static void nilfs_fill_in_file_bmap(struct inode *ifile,
				    struct nilfs_inode_info *ii)

{
	struct buffer_head *ibh;
	struct nilfs_inode *raw_inode;

	if (test_bit(NILFS_I_BMAP, &ii->i_state)) {
		ibh = ii->i_bh;
		BUG_ON(!ibh);
		raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino,
						  ibh);
		nilfs_bmap_write(ii->i_bmap, raw_inode);
		nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh);
	}
}

884
static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci)
Ryusuke Konishi's avatar
Ryusuke Konishi committed
885
886
887
888
{
	struct nilfs_inode_info *ii;

	list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) {
889
		nilfs_fill_in_file_bmap(sci->sc_root->ifile, ii);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
890
891
892
893
894
895
896
		set_bit(NILFS_I_COLLECTED, &ii->i_state);
	}
}

static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
					     struct the_nilfs *nilfs)
{
897
898
	struct buffer_head *bh_sr;
	struct nilfs_super_root *raw_sr;
899
	unsigned isz, srsz;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
900

901
902
	bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
	raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
903
904
	isz = nilfs->ns_inode_size;
	srsz = NILFS_SR_BYTES(isz);
905

906
	raw_sr->sr_bytes = cpu_to_le16(srsz);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
907
908
909
910
911
	raw_sr->sr_nongc_ctime
		= cpu_to_le64(nilfs_doing_gc() ?
			      nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
	raw_sr->sr_flags = 0;

912
	nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr +
913
914
915
916
917
				 NILFS_SR_DAT_OFFSET(isz), 1);
	nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
				 NILFS_SR_CPFILE_OFFSET(isz), 1);
	nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
				 NILFS_SR_SUFILE_OFFSET(isz), 1);
918
	memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
}

static void nilfs_redirty_inodes(struct list_head *head)
{
	struct nilfs_inode_info *ii;

	list_for_each_entry(ii, head, i_dirty) {
		if (test_bit(NILFS_I_COLLECTED, &ii->i_state))
			clear_bit(NILFS_I_COLLECTED, &ii->i_state);
	}
}

static void nilfs_drop_collected_inodes(struct list_head *head)
{
	struct nilfs_inode_info *ii;

	list_for_each_entry(ii, head, i_dirty) {
		if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state))
			continue;

939
		clear_bit(NILFS_I_INODE_SYNC, &ii->i_state);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
		set_bit(NILFS_I_UPDATED, &ii->i_state);
	}
}

static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
				       struct inode *inode,
				       struct list_head *listp,
				       int (*collect)(struct nilfs_sc_info *,
						      struct buffer_head *,
						      struct inode *))
{
	struct buffer_head *bh, *n;
	int err = 0;

	if (collect) {
		list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) {
			list_del_init(&bh->b_assoc_buffers);
			err = collect(sci, bh, inode);
			brelse(bh);
			if (unlikely(err))
				goto dispose_buffers;
		}
		return 0;
	}

 dispose_buffers:
	while (!list_empty(listp)) {
Ryusuke Konishi's avatar
Ryusuke Konishi committed
967
968
		bh = list_first_entry(listp, struct buffer_head,
				      b_assoc_buffers);
Ryusuke Konishi's avatar
Ryusuke Konishi committed
969
970
971
972
973
974
		list_del_init(&bh->b_assoc_buffers);
		brelse(bh);
	}
	return err;
}

975
976
977
978
979
980
981
static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci)
{
	/* Remaining number of blocks within segment buffer */
	return sci->sc_segbuf_nblocks -
		(sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks);
}

Ryusuke Konishi's avatar
Ryusuke Konishi committed
982
983
984
985
986
987
static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci,
				   struct inode *inode,
				   struct nilfs_sc_operations *sc_ops)
{
	LIST_HEAD(data_buffers);
	LIST_HEAD(node_buffers);
988
	int err;
Ryusuke Konishi's avatar
Ryusuke Konishi committed
989
990

	if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
991
992
993
994
995
996
		size_t n, rest = nilfs_segctor_buffer_rest(sci);

		n = nilfs_lookup_dirty_data_buffers(
			inode, &data_buffers, rest + 1, 0, LLONG_MAX);
		if (n > rest) {
			err = nilfs_segctor_apply_buffers(
Ryusuke Konishi's avatar
Ryusuke Konishi committed
997
				sci, inode, &data_buffers,
998
999
				sc_ops->collect_data);
			BUG_ON(!err); /* always receive -E2BIG or true error */
Ryusuke Konishi's avatar
Ryusuke Konishi committed
1000
			goto break_or_fail;
For faster browsing, not all history is shown. View entire blame