xfs_trans.c 57 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
/*
2
 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3
 * Copyright (C) 2010 Red Hat, Inc.
4
 * All Rights Reserved.
Linus Torvalds's avatar
Linus Torvalds committed
5
 *
6
7
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
Linus Torvalds's avatar
Linus Torvalds committed
8
9
 * published by the Free Software Foundation.
 *
10
11
12
13
 * This program is distributed in the hope that it would be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
Linus Torvalds's avatar
Linus Torvalds committed
14
 *
15
16
17
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
Linus Torvalds's avatar
Linus Torvalds committed
18
19
 */
#include "xfs.h"
20
#include "xfs_fs.h"
Linus Torvalds's avatar
Linus Torvalds committed
21
#include "xfs_types.h"
22
#include "xfs_bit.h"
Linus Torvalds's avatar
Linus Torvalds committed
23
#include "xfs_log.h"
24
#include "xfs_inum.h"
Linus Torvalds's avatar
Linus Torvalds committed
25
26
27
28
29
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_error.h"
30
#include "xfs_da_btree.h"
Linus Torvalds's avatar
Linus Torvalds committed
31
#include "xfs_bmap_btree.h"
32
#include "xfs_alloc_btree.h"
Linus Torvalds's avatar
Linus Torvalds committed
33
34
35
#include "xfs_ialloc_btree.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
36
37
38
#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
Linus Torvalds's avatar
Linus Torvalds committed
39
40
#include "xfs_bmap.h"
#include "xfs_quota.h"
41
#include "xfs_trans_priv.h"
Linus Torvalds's avatar
Linus Torvalds committed
42
#include "xfs_trans_space.h"
43
#include "xfs_inode_item.h"
44
#include "xfs_trace.h"
Linus Torvalds's avatar
Linus Torvalds committed
45

46
kmem_zone_t	*xfs_trans_zone;
47
kmem_zone_t	*xfs_log_item_desc_zone;
Linus Torvalds's avatar
Linus Torvalds committed
48

49

50
/*
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
 * Various log reservation values.
 *
 * These are based on the size of the file system block because that is what
 * most transactions manipulate.  Each adds in an additional 128 bytes per
 * item logged to try to account for the overhead of the transaction mechanism.
 *
 * Note:  Most of the reservations underestimate the number of allocation
 * groups into which they could free extents in the xfs_bmap_finish() call.
 * This is because the number in the worst case is quite high and quite
 * unusual.  In order to fix this we need to change xfs_bmap_finish() to free
 * extents in only a single AG at a time.  This will require changes to the
 * EFI code as well, however, so that the EFI for the extents not freed is
 * logged again in each transaction.  See SGI PV #261917.
 *
 * Reservation functions here avoid a huge stack in xfs_trans_init due to
 * register overflow from temporaries in the calculations.
 */


/*
 * In a write transaction we can allocate a maximum of 2
 * extents.  This gives:
 *    the inode getting the new extents: inode size
 *    the inode's bmap btree: max depth * block size
 *    the agfs of the ags from which the extents are allocated: 2 * sector
 *    the superblock free block counter: sector size
 *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
 * And the bmap_finish transaction can free bmap blocks in a join:
 *    the agfs of the ags containing the blocks: 2 * sector size
 *    the agfls of the ags containing the blocks: 2 * sector size
 *    the super block free block counter: sector size
 *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
83
84
 */
STATIC uint
85
86
xfs_calc_write_reservation(
	struct xfs_mount	*mp)
87
{
88
89
90
91
92
93
94
95
96
97
98
99
100
	return XFS_DQUOT_LOGRES(mp) +
		MAX((mp->m_sb.sb_inodesize +
		     XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
		     2 * mp->m_sb.sb_sectsize +
		     mp->m_sb.sb_sectsize +
		     XFS_ALLOCFREE_LOG_RES(mp, 2) +
		     128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
			    XFS_ALLOCFREE_LOG_COUNT(mp, 2))),
		    (2 * mp->m_sb.sb_sectsize +
		     2 * mp->m_sb.sb_sectsize +
		     mp->m_sb.sb_sectsize +
		     XFS_ALLOCFREE_LOG_RES(mp, 2) +
		     128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
101
102
}

103
104
105
106
107
108
109
110
111
112
113
114
115
/*
 * In truncating a file we free up to two extents at once.  We can modify:
 *    the inode being truncated: inode size
 *    the inode's bmap btree: (max depth + 1) * block size
 * And the bmap_finish transaction can free the blocks and bmap blocks:
 *    the agf for each of the ags: 4 * sector size
 *    the agfl for each of the ags: 4 * sector size
 *    the super block to reflect the freed blocks: sector size
 *    worst case split in allocation btrees per extent assuming 4 extents:
 *		4 exts * 2 trees * (2 * max depth - 1) * block size
 *    the inode btree: max depth * blocksize
 *    the allocation btrees: 2 trees * (max depth - 1) * block size
 */
116
STATIC uint
117
118
xfs_calc_itruncate_reservation(
	struct xfs_mount	*mp)
119
{
120
121
122
123
124
125
126
127
128
129
130
131
132
	return XFS_DQUOT_LOGRES(mp) +
		MAX((mp->m_sb.sb_inodesize +
		     XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) +
		     128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
		    (4 * mp->m_sb.sb_sectsize +
		     4 * mp->m_sb.sb_sectsize +
		     mp->m_sb.sb_sectsize +
		     XFS_ALLOCFREE_LOG_RES(mp, 4) +
		     128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) +
		     128 * 5 +
		     XFS_ALLOCFREE_LOG_RES(mp, 1) +
		     128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
			    XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
133
134
}

135
136
137
138
139
140
141
142
143
144
145
146
/*
 * In renaming a files we can modify:
 *    the four inodes involved: 4 * inode size
 *    the two directory btrees: 2 * (max depth + v2) * dir block size
 *    the two directory bmap btrees: 2 * max depth * block size
 * And the bmap_finish transaction can free dir and bmap blocks (two sets
 *	of bmap blocks) giving:
 *    the agf for the ags in which the blocks live: 3 * sector size
 *    the agfl for the ags in which the blocks live: 3 * sector size
 *    the superblock for the free block count: sector size
 *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
 */
147
STATIC uint
148
149
xfs_calc_rename_reservation(
	struct xfs_mount	*mp)
150
{
151
152
153
154
155
156
157
158
159
	return XFS_DQUOT_LOGRES(mp) +
		MAX((4 * mp->m_sb.sb_inodesize +
		     2 * XFS_DIROP_LOG_RES(mp) +
		     128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))),
		    (3 * mp->m_sb.sb_sectsize +
		     3 * mp->m_sb.sb_sectsize +
		     mp->m_sb.sb_sectsize +
		     XFS_ALLOCFREE_LOG_RES(mp, 3) +
		     128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
160
161
}

162
163
164
165
166
167
168
169
170
171
172
173
/*
 * For creating a link to an inode:
 *    the parent directory inode: inode size
 *    the linked inode: inode size
 *    the directory btree could split: (max depth + v2) * dir block size
 *    the directory bmap btree could join or split: (max depth + v2) * blocksize
 * And the bmap_finish transaction can free some bmap blocks giving:
 *    the agf for the ag in which the blocks live: sector size
 *    the agfl for the ag in which the blocks live: sector size
 *    the superblock for the free block count: sector size
 *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
 */
174
STATIC uint
175
176
xfs_calc_link_reservation(
	struct xfs_mount	*mp)
177
{
178
179
180
181
182
183
184
185
186
187
	return XFS_DQUOT_LOGRES(mp) +
		MAX((mp->m_sb.sb_inodesize +
		     mp->m_sb.sb_inodesize +
		     XFS_DIROP_LOG_RES(mp) +
		     128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
		    (mp->m_sb.sb_sectsize +
		     mp->m_sb.sb_sectsize +
		     mp->m_sb.sb_sectsize +
		     XFS_ALLOCFREE_LOG_RES(mp, 1) +
		     128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
188
189
}

190
191
192
193
194
195
196
197
198
199
200
201
/*
 * For removing a directory entry we can modify:
 *    the parent directory inode: inode size
 *    the removed inode: inode size
 *    the directory btree could join: (max depth + v2) * dir block size
 *    the directory bmap btree could join or split: (max depth + v2) * blocksize
 * And the bmap_finish transaction can free the dir and bmap blocks giving:
 *    the agf for the ag in which the blocks live: 2 * sector size
 *    the agfl for the ag in which the blocks live: 2 * sector size
 *    the superblock for the free block count: sector size
 *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
 */
202
STATIC uint
203
204
xfs_calc_remove_reservation(
	struct xfs_mount	*mp)
205
{
206
207
208
209
210
211
212
213
214
215
	return XFS_DQUOT_LOGRES(mp) +
		MAX((mp->m_sb.sb_inodesize +
		     mp->m_sb.sb_inodesize +
		     XFS_DIROP_LOG_RES(mp) +
		     128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
		    (2 * mp->m_sb.sb_sectsize +
		     2 * mp->m_sb.sb_sectsize +
		     mp->m_sb.sb_sectsize +
		     XFS_ALLOCFREE_LOG_RES(mp, 2) +
		     128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
216
217
}

218
219
220
221
222
223
224
225
226
227
228
229
230
231
/*
 * For symlink we can modify:
 *    the parent directory inode: inode size
 *    the new inode: inode size
 *    the inode btree entry: 1 block
 *    the directory btree: (max depth + v2) * dir block size
 *    the directory inode's bmap btree: (max depth + v2) * block size
 *    the blocks for the symlink: 1 kB
 * Or in the first xact we allocate some inodes giving:
 *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
 *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
 *    the inode btree: max depth * blocksize
 *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
 */
232
STATIC uint
233
234
xfs_calc_symlink_reservation(
	struct xfs_mount	*mp)
235
{
236
237
238
239
240
241
242
243
244
245
246
247
248
	return XFS_DQUOT_LOGRES(mp) +
		MAX((mp->m_sb.sb_inodesize +
		     mp->m_sb.sb_inodesize +
		     XFS_FSB_TO_B(mp, 1) +
		     XFS_DIROP_LOG_RES(mp) +
		     1024 +
		     128 * (4 + XFS_DIROP_LOG_COUNT(mp))),
		    (2 * mp->m_sb.sb_sectsize +
		     XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
		     XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
		     XFS_ALLOCFREE_LOG_RES(mp, 1) +
		     128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
			    XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
249
250
}

251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
/*
 * For create we can modify:
 *    the parent directory inode: inode size
 *    the new inode: inode size
 *    the inode btree entry: block size
 *    the superblock for the nlink flag: sector size
 *    the directory btree: (max depth + v2) * dir block size
 *    the directory inode's bmap btree: (max depth + v2) * block size
 * Or in the first xact we allocate some inodes giving:
 *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
 *    the superblock for the nlink flag: sector size
 *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
 *    the inode btree: max depth * blocksize
 *    the allocation btrees: 2 trees * (max depth - 1) * block size
 */
266
STATIC uint
267
268
xfs_calc_create_reservation(
	struct xfs_mount	*mp)
269
{
270
271
272
273
274
275
276
277
278
279
280
281
282
	return XFS_DQUOT_LOGRES(mp) +
		MAX((mp->m_sb.sb_inodesize +
		     mp->m_sb.sb_inodesize +
		     mp->m_sb.sb_sectsize +
		     XFS_FSB_TO_B(mp, 1) +
		     XFS_DIROP_LOG_RES(mp) +
		     128 * (3 + XFS_DIROP_LOG_COUNT(mp))),
		    (3 * mp->m_sb.sb_sectsize +
		     XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
		     XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
		     XFS_ALLOCFREE_LOG_RES(mp, 1) +
		     128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
			    XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
283
284
}

285
286
287
/*
 * Making a new directory is the same as creating a new file.
 */
288
STATIC uint
289
290
xfs_calc_mkdir_reservation(
	struct xfs_mount	*mp)
291
{
292
	return xfs_calc_create_reservation(mp);
293
294
}

295
296
297
298
299
300
301
302
303
304
/*
 * In freeing an inode we can modify:
 *    the inode being freed: inode size
 *    the super block free inode counter: sector size
 *    the agi hash list and counters: sector size
 *    the inode btree entry: block size
 *    the on disk inode before ours in the agi hash list: inode cluster size
 *    the inode btree: max depth * blocksize
 *    the allocation btrees: 2 trees * (max depth - 1) * block size
 */
305
STATIC uint
306
307
xfs_calc_ifree_reservation(
	struct xfs_mount	*mp)
308
{
309
310
311
312
313
314
315
316
317
318
319
	return XFS_DQUOT_LOGRES(mp) +
		mp->m_sb.sb_inodesize +
		mp->m_sb.sb_sectsize +
		mp->m_sb.sb_sectsize +
		XFS_FSB_TO_B(mp, 1) +
		MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
		    XFS_INODE_CLUSTER_SIZE(mp)) +
		128 * 5 +
		XFS_ALLOCFREE_LOG_RES(mp, 1) +
		128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
		       XFS_ALLOCFREE_LOG_COUNT(mp, 1));
320
321
}

322
323
324
325
/*
 * When only changing the inode we log the inode and possibly the superblock
 * We also add a bit of slop for the transaction stuff.
 */
326
STATIC uint
327
328
xfs_calc_ichange_reservation(
	struct xfs_mount	*mp)
329
{
330
331
332
333
334
	return XFS_DQUOT_LOGRES(mp) +
		mp->m_sb.sb_inodesize +
		mp->m_sb.sb_sectsize +
		512;

335
336
}

337
338
339
340
341
342
/*
 * Growing the data section of the filesystem.
 *	superblock
 *	agi and agf
 *	allocation btrees
 */
343
STATIC uint
344
345
xfs_calc_growdata_reservation(
	struct xfs_mount	*mp)
346
{
347
348
349
	return mp->m_sb.sb_sectsize * 3 +
		XFS_ALLOCFREE_LOG_RES(mp, 1) +
		128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1));
350
351
}

352
353
354
355
356
357
358
359
360
361
/*
 * Growing the rt section of the filesystem.
 * In the first set of transactions (ALLOC) we allocate space to the
 * bitmap or summary files.
 *	superblock: sector size
 *	agf of the ag from which the extent is allocated: sector size
 *	bmap btree for bitmap/summary inode: max depth * blocksize
 *	bitmap/summary inode: inode size
 *	allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
 */
362
STATIC uint
363
364
xfs_calc_growrtalloc_reservation(
	struct xfs_mount	*mp)
365
{
366
367
368
369
370
371
	return 2 * mp->m_sb.sb_sectsize +
		XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
		mp->m_sb.sb_inodesize +
		XFS_ALLOCFREE_LOG_RES(mp, 1) +
		128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
		       XFS_ALLOCFREE_LOG_COUNT(mp, 1));
372
373
}

374
375
376
377
378
/*
 * Growing the rt section of the filesystem.
 * In the second set of transactions (ZERO) we zero the new metadata blocks.
 *	one bitmap/summary block: blocksize
 */
379
STATIC uint
380
381
xfs_calc_growrtzero_reservation(
	struct xfs_mount	*mp)
382
{
383
	return mp->m_sb.sb_blocksize + 128;
384
385
}

386
387
388
389
390
391
392
393
394
395
/*
 * Growing the rt section of the filesystem.
 * In the third set of transactions (FREE) we update metadata without
 * allocating any new blocks.
 *	superblock: sector size
 *	bitmap inode: inode size
 *	summary inode: inode size
 *	one bitmap block: blocksize
 *	summary blocks: new summary size
 */
396
STATIC uint
397
398
xfs_calc_growrtfree_reservation(
	struct xfs_mount	*mp)
399
{
400
401
402
403
404
	return mp->m_sb.sb_sectsize +
		2 * mp->m_sb.sb_inodesize +
		mp->m_sb.sb_blocksize +
		mp->m_rsumsize +
		128 * 5;
405
406
}

407
408
409
410
/*
 * Logging the inode modification timestamp on a synchronous write.
 *	inode
 */
411
STATIC uint
412
413
xfs_calc_swrite_reservation(
	struct xfs_mount	*mp)
414
{
415
	return mp->m_sb.sb_inodesize + 128;
416
417
}

418
419
420
421
/*
 * Logging the inode mode bits when writing a setuid/setgid file
 *	inode
 */
422
423
424
STATIC uint
xfs_calc_writeid_reservation(xfs_mount_t *mp)
{
425
	return mp->m_sb.sb_inodesize + 128;
426
427
}

428
429
430
431
432
433
434
435
/*
 * Converting the inode from non-attributed to attributed.
 *	the inode being converted: inode size
 *	agf block and superblock (for block allocation)
 *	the new block (directory sized)
 *	bmap blocks for the new directory block
 *	allocation btrees
 */
436
STATIC uint
437
438
xfs_calc_addafork_reservation(
	struct xfs_mount	*mp)
439
{
440
441
442
443
444
445
446
447
	return XFS_DQUOT_LOGRES(mp) +
		mp->m_sb.sb_inodesize +
		mp->m_sb.sb_sectsize * 2 +
		mp->m_dirblksize +
		XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) +
		XFS_ALLOCFREE_LOG_RES(mp, 1) +
		128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 +
		       XFS_ALLOCFREE_LOG_COUNT(mp, 1));
448
449
}

450
451
452
453
454
455
456
457
458
459
460
/*
 * Removing the attribute fork of a file
 *    the inode being truncated: inode size
 *    the inode's bmap btree: max depth * block size
 * And the bmap_finish transaction can free the blocks and bmap blocks:
 *    the agf for each of the ags: 4 * sector size
 *    the agfl for each of the ags: 4 * sector size
 *    the super block to reflect the freed blocks: sector size
 *    worst case split in allocation btrees per extent assuming 4 extents:
 *		4 exts * 2 trees * (2 * max depth - 1) * block size
 */
461
STATIC uint
462
463
xfs_calc_attrinval_reservation(
	struct xfs_mount	*mp)
464
{
465
466
467
468
469
470
471
472
	return MAX((mp->m_sb.sb_inodesize +
		    XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
		    128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))),
		   (4 * mp->m_sb.sb_sectsize +
		    4 * mp->m_sb.sb_sectsize +
		    mp->m_sb.sb_sectsize +
		    XFS_ALLOCFREE_LOG_RES(mp, 4) +
		    128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
473
474
}

475
476
477
478
479
480
481
482
483
484
/*
 * Setting an attribute.
 *	the inode getting the attribute
 *	the superblock for allocations
 *	the agfs extents are allocated from
 *	the attribute btree * max depth
 *	the inode allocation btree
 * Since attribute transaction space is dependent on the size of the attribute,
 * the calculation is done partially at mount time and partially at runtime.
 */
485
STATIC uint
486
487
xfs_calc_attrset_reservation(
	struct xfs_mount	*mp)
488
{
489
490
491
492
493
	return XFS_DQUOT_LOGRES(mp) +
		mp->m_sb.sb_inodesize +
		mp->m_sb.sb_sectsize +
		XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
		128 * (2 + XFS_DA_NODE_MAXDEPTH);
494
495
}

496
497
498
499
500
501
502
503
504
505
506
/*
 * Removing an attribute.
 *    the inode: inode size
 *    the attribute btree could join: max depth * block size
 *    the inode bmap btree could join or split: max depth * block size
 * And the bmap_finish transaction can free the attr blocks freed giving:
 *    the agf for the ag in which the blocks live: 2 * sector size
 *    the agfl for the ag in which the blocks live: 2 * sector size
 *    the superblock for the free block count: sector size
 *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
 */
507
STATIC uint
508
509
xfs_calc_attrrm_reservation(
	struct xfs_mount	*mp)
510
{
511
512
513
514
515
516
517
518
519
520
521
	return XFS_DQUOT_LOGRES(mp) +
		MAX((mp->m_sb.sb_inodesize +
		     XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
		     XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
		     128 * (1 + XFS_DA_NODE_MAXDEPTH +
			    XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
		    (2 * mp->m_sb.sb_sectsize +
		     2 * mp->m_sb.sb_sectsize +
		     mp->m_sb.sb_sectsize +
		     XFS_ALLOCFREE_LOG_RES(mp, 2) +
		     128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
522
523
}

524
525
526
/*
 * Clearing a bad agino number in an agi hash bucket.
 */
527
STATIC uint
528
529
xfs_calc_clear_agi_bucket_reservation(
	struct xfs_mount	*mp)
530
{
531
	return mp->m_sb.sb_sectsize + 128;
532
533
}

Linus Torvalds's avatar
Linus Torvalds committed
534
535
536
537
538
539
/*
 * Initialize the precomputed transaction reservation values
 * in the mount structure.
 */
void
xfs_trans_init(
540
	struct xfs_mount	*mp)
Linus Torvalds's avatar
Linus Torvalds committed
541
{
542
	struct xfs_trans_reservations *resp = &mp->m_reservations;
Linus Torvalds's avatar
Linus Torvalds committed
543

544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
	resp->tr_write = xfs_calc_write_reservation(mp);
	resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
	resp->tr_rename = xfs_calc_rename_reservation(mp);
	resp->tr_link = xfs_calc_link_reservation(mp);
	resp->tr_remove = xfs_calc_remove_reservation(mp);
	resp->tr_symlink = xfs_calc_symlink_reservation(mp);
	resp->tr_create = xfs_calc_create_reservation(mp);
	resp->tr_mkdir = xfs_calc_mkdir_reservation(mp);
	resp->tr_ifree = xfs_calc_ifree_reservation(mp);
	resp->tr_ichange = xfs_calc_ichange_reservation(mp);
	resp->tr_growdata = xfs_calc_growdata_reservation(mp);
	resp->tr_swrite = xfs_calc_swrite_reservation(mp);
	resp->tr_writeid = xfs_calc_writeid_reservation(mp);
	resp->tr_addafork = xfs_calc_addafork_reservation(mp);
	resp->tr_attrinval = xfs_calc_attrinval_reservation(mp);
	resp->tr_attrset = xfs_calc_attrset_reservation(mp);
	resp->tr_attrrm = xfs_calc_attrrm_reservation(mp);
	resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp);
	resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp);
	resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp);
	resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp);
Linus Torvalds's avatar
Linus Torvalds committed
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
}

/*
 * This routine is called to allocate a transaction structure.
 * The type parameter indicates the type of the transaction.  These
 * are enumerated in xfs_trans.h.
 *
 * Dynamically allocate the transaction structure from the transaction
 * zone, initialize it, and return it to the caller.
 */
xfs_trans_t *
xfs_trans_alloc(
	xfs_mount_t	*mp,
	uint		type)
{
Christoph Hellwig's avatar
Christoph Hellwig committed
580
	xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
581
	return _xfs_trans_alloc(mp, type, KM_SLEEP);
Linus Torvalds's avatar
Linus Torvalds committed
582
583
584
585
586
}

xfs_trans_t *
_xfs_trans_alloc(
	xfs_mount_t	*mp,
587
588
	uint		type,
	uint		memflags)
Linus Torvalds's avatar
Linus Torvalds committed
589
590
591
{
	xfs_trans_t	*tp;

592
	atomic_inc(&mp->m_active_trans);
Linus Torvalds's avatar
Linus Torvalds committed
593

594
	tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
Linus Torvalds's avatar
Linus Torvalds committed
595
596
597
	tp->t_magic = XFS_TRANS_MAGIC;
	tp->t_type = type;
	tp->t_mountp = mp;
598
	INIT_LIST_HEAD(&tp->t_items);
599
	INIT_LIST_HEAD(&tp->t_busy);
600
	return tp;
Linus Torvalds's avatar
Linus Torvalds committed
601
602
}

603
604
605
606
607
608
/*
 * Free the transaction structure.  If there is more clean up
 * to do when the structure is freed, add it here.
 */
STATIC void
xfs_trans_free(
609
	struct xfs_trans	*tp)
610
{
611
	xfs_alloc_busy_sort(&tp->t_busy);
612
	xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy, false);
613

614
615
616
617
618
	atomic_dec(&tp->t_mountp->m_active_trans);
	xfs_trans_free_dqinfo(tp);
	kmem_zone_free(xfs_trans_zone, tp);
}

Linus Torvalds's avatar
Linus Torvalds committed
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
/*
 * This is called to create a new transaction which will share the
 * permanent log reservation of the given transaction.  The remaining
 * unused block and rt extent reservations are also inherited.  This
 * implies that the original transaction is no longer allowed to allocate
 * blocks.  Locks and log items, however, are no inherited.  They must
 * be added to the new transaction explicitly.
 */
xfs_trans_t *
xfs_trans_dup(
	xfs_trans_t	*tp)
{
	xfs_trans_t	*ntp;

	ntp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);

	/*
	 * Initialize the new transaction structure.
	 */
	ntp->t_magic = XFS_TRANS_MAGIC;
	ntp->t_type = tp->t_type;
	ntp->t_mountp = tp->t_mountp;
641
	INIT_LIST_HEAD(&ntp->t_items);
642
	INIT_LIST_HEAD(&ntp->t_busy);
Linus Torvalds's avatar
Linus Torvalds committed
643
644
645

	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
	ASSERT(tp->t_ticket != NULL);
646

Linus Torvalds's avatar
Linus Torvalds committed
647
	ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE);
648
	ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
Linus Torvalds's avatar
Linus Torvalds committed
649
650
651
652
	ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
	tp->t_blk_res = tp->t_blk_res_used;
	ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
	tp->t_rtx_res = tp->t_rtx_res_used;
653
	ntp->t_pflags = tp->t_pflags;
Linus Torvalds's avatar
Linus Torvalds committed
654

Christoph Hellwig's avatar
Christoph Hellwig committed
655
	xfs_trans_dup_dqinfo(tp, ntp);
Linus Torvalds's avatar
Linus Torvalds committed
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684

	atomic_inc(&tp->t_mountp->m_active_trans);
	return ntp;
}

/*
 * This is called to reserve free disk blocks and log space for the
 * given transaction.  This must be done before allocating any resources
 * within the transaction.
 *
 * This will return ENOSPC if there are not enough blocks available.
 * It will sleep waiting for available log space.
 * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which
 * is used by long running transactions.  If any one of the reservations
 * fails then they will all be backed out.
 *
 * This does not do quota reservations. That typically is done by the
 * caller afterwards.
 */
int
xfs_trans_reserve(
	xfs_trans_t	*tp,
	uint		blocks,
	uint		logspace,
	uint		rtextents,
	uint		flags,
	uint		logcount)
{
	int		log_flags;
685
686
	int		error = 0;
	int		rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
Linus Torvalds's avatar
Linus Torvalds committed
687
688

	/* Mark this thread as being in a transaction */
689
	current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
Linus Torvalds's avatar
Linus Torvalds committed
690
691
692
693
694
695
696

	/*
	 * Attempt to reserve the needed disk blocks by decrementing
	 * the number needed from the number available.  This will
	 * fail if the count would go below zero.
	 */
	if (blocks > 0) {
697
		error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
698
					  -((int64_t)blocks), rsvd);
Linus Torvalds's avatar
Linus Torvalds committed
699
		if (error != 0) {
700
			current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
Linus Torvalds's avatar
Linus Torvalds committed
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
			return (XFS_ERROR(ENOSPC));
		}
		tp->t_blk_res += blocks;
	}

	/*
	 * Reserve the log space needed for this transaction.
	 */
	if (logspace > 0) {
		ASSERT((tp->t_log_res == 0) || (tp->t_log_res == logspace));
		ASSERT((tp->t_log_count == 0) ||
			(tp->t_log_count == logcount));
		if (flags & XFS_TRANS_PERM_LOG_RES) {
			log_flags = XFS_LOG_PERM_RESERV;
			tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
		} else {
			ASSERT(tp->t_ticket == NULL);
			ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
			log_flags = 0;
		}

		error = xfs_log_reserve(tp->t_mountp, logspace, logcount,
					&tp->t_ticket,
724
					XFS_TRANSACTION, log_flags, tp->t_type);
Linus Torvalds's avatar
Linus Torvalds committed
725
726
727
728
729
730
731
732
733
734
735
736
737
738
		if (error) {
			goto undo_blocks;
		}
		tp->t_log_res = logspace;
		tp->t_log_count = logcount;
	}

	/*
	 * Attempt to reserve the needed realtime extents by decrementing
	 * the number needed from the number available.  This will
	 * fail if the count would go below zero.
	 */
	if (rtextents > 0) {
		error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS,
739
					  -((int64_t)rtextents), rsvd);
Linus Torvalds's avatar
Linus Torvalds committed
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
		if (error) {
			error = XFS_ERROR(ENOSPC);
			goto undo_log;
		}
		tp->t_rtx_res += rtextents;
	}

	return 0;

	/*
	 * Error cases jump to one of these labels to undo any
	 * reservations which have already been performed.
	 */
undo_log:
	if (logspace > 0) {
		if (flags & XFS_TRANS_PERM_LOG_RES) {
			log_flags = XFS_LOG_REL_PERM_RESERV;
		} else {
			log_flags = 0;
		}
		xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags);
		tp->t_ticket = NULL;
		tp->t_log_res = 0;
		tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
	}

undo_blocks:
	if (blocks > 0) {
768
		xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
769
					 (int64_t)blocks, rsvd);
Linus Torvalds's avatar
Linus Torvalds committed
770
771
772
		tp->t_blk_res = 0;
	}

773
	current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
Linus Torvalds's avatar
Linus Torvalds committed
774

775
	return error;
Linus Torvalds's avatar
Linus Torvalds committed
776
777
778
779
780
781
782
783
784
}

/*
 * Record the indicated change to the given field for application
 * to the file system's superblock when the transaction commits.
 * For now, just store the change in the transaction structure.
 *
 * Mark the transaction structure to indicate that the superblock
 * needs to be updated before committing.
David Chinner's avatar
David Chinner committed
785
786
787
788
789
790
791
792
 *
 * Because we may not be keeping track of allocated/free inodes and
 * used filesystem blocks in the superblock, we do not mark the
 * superblock dirty in this transaction if we modify these fields.
 * We still need to update the transaction deltas so that they get
 * applied to the incore superblock, but we don't want them to
 * cause the superblock to get locked and logged if these are the
 * only fields in the superblock that the transaction modifies.
Linus Torvalds's avatar
Linus Torvalds committed
793
794
795
796
797
 */
void
xfs_trans_mod_sb(
	xfs_trans_t	*tp,
	uint		field,
798
	int64_t		delta)
Linus Torvalds's avatar
Linus Torvalds committed
799
{
David Chinner's avatar
David Chinner committed
800
801
	uint32_t	flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY);
	xfs_mount_t	*mp = tp->t_mountp;
Linus Torvalds's avatar
Linus Torvalds committed
802
803
804
805

	switch (field) {
	case XFS_TRANS_SB_ICOUNT:
		tp->t_icount_delta += delta;
David Chinner's avatar
David Chinner committed
806
807
		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
			flags &= ~XFS_TRANS_SB_DIRTY;
Linus Torvalds's avatar
Linus Torvalds committed
808
809
810
		break;
	case XFS_TRANS_SB_IFREE:
		tp->t_ifree_delta += delta;
David Chinner's avatar
David Chinner committed
811
812
		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
			flags &= ~XFS_TRANS_SB_DIRTY;
Linus Torvalds's avatar
Linus Torvalds committed
813
814
815
816
817
818
819
820
821
822
823
824
		break;
	case XFS_TRANS_SB_FDBLOCKS:
		/*
		 * Track the number of blocks allocated in the
		 * transaction.  Make sure it does not exceed the
		 * number reserved.
		 */
		if (delta < 0) {
			tp->t_blk_res_used += (uint)-delta;
			ASSERT(tp->t_blk_res_used <= tp->t_blk_res);
		}
		tp->t_fdblocks_delta += delta;
David Chinner's avatar
David Chinner committed
825
826
		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
			flags &= ~XFS_TRANS_SB_DIRTY;
Linus Torvalds's avatar
Linus Torvalds committed
827
828
829
830
831
832
833
834
835
		break;
	case XFS_TRANS_SB_RES_FDBLOCKS:
		/*
		 * The allocation has already been applied to the
		 * in-core superblock's counter.  This should only
		 * be applied to the on-disk superblock.
		 */
		ASSERT(delta < 0);
		tp->t_res_fdblocks_delta += delta;
David Chinner's avatar
David Chinner committed
836
837
		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
			flags &= ~XFS_TRANS_SB_DIRTY;
Linus Torvalds's avatar
Linus Torvalds committed
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
		break;
	case XFS_TRANS_SB_FREXTENTS:
		/*
		 * Track the number of blocks allocated in the
		 * transaction.  Make sure it does not exceed the
		 * number reserved.
		 */
		if (delta < 0) {
			tp->t_rtx_res_used += (uint)-delta;
			ASSERT(tp->t_rtx_res_used <= tp->t_rtx_res);
		}
		tp->t_frextents_delta += delta;
		break;
	case XFS_TRANS_SB_RES_FREXTENTS:
		/*
		 * The allocation has already been applied to the
854
		 * in-core superblock's counter.  This should only
Linus Torvalds's avatar
Linus Torvalds committed
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
		 * be applied to the on-disk superblock.
		 */
		ASSERT(delta < 0);
		tp->t_res_frextents_delta += delta;
		break;
	case XFS_TRANS_SB_DBLOCKS:
		ASSERT(delta > 0);
		tp->t_dblocks_delta += delta;
		break;
	case XFS_TRANS_SB_AGCOUNT:
		ASSERT(delta > 0);
		tp->t_agcount_delta += delta;
		break;
	case XFS_TRANS_SB_IMAXPCT:
		tp->t_imaxpct_delta += delta;
		break;
	case XFS_TRANS_SB_REXTSIZE:
		tp->t_rextsize_delta += delta;
		break;
	case XFS_TRANS_SB_RBMBLOCKS:
		tp->t_rbmblocks_delta += delta;
		break;
	case XFS_TRANS_SB_RBLOCKS:
		tp->t_rblocks_delta += delta;
		break;
	case XFS_TRANS_SB_REXTENTS:
		tp->t_rextents_delta += delta;
		break;
	case XFS_TRANS_SB_REXTSLOG:
		tp->t_rextslog_delta += delta;
		break;
	default:
		ASSERT(0);
		return;
	}

891
	tp->t_flags |= flags;
Linus Torvalds's avatar
Linus Torvalds committed
892
893
894
895
896
897
898
899
900
901
902
903
904
905
}

/*
 * xfs_trans_apply_sb_deltas() is called from the commit code
 * to bring the superblock buffer into the current transaction
 * and modify it as requested by earlier calls to xfs_trans_mod_sb().
 *
 * For now we just look at each field allowed to change and change
 * it if necessary.
 */
STATIC void
xfs_trans_apply_sb_deltas(
	xfs_trans_t	*tp)
{
906
	xfs_dsb_t	*sbp;
Linus Torvalds's avatar
Linus Torvalds committed
907
908
909
910
911
912
913
914
915
916
917
918
919
	xfs_buf_t	*bp;
	int		whole = 0;

	bp = xfs_trans_getsb(tp, tp->t_mountp, 0);
	sbp = XFS_BUF_TO_SBP(bp);

	/*
	 * Check that superblock mods match the mods made to AGF counters.
	 */
	ASSERT((tp->t_fdblocks_delta + tp->t_res_fdblocks_delta) ==
	       (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta +
		tp->t_ag_btree_delta));

David Chinner's avatar
David Chinner committed
920
921
922
923
	/*
	 * Only update the superblock counters if we are logging them
	 */
	if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) {
924
		if (tp->t_icount_delta)
925
			be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta);
926
		if (tp->t_ifree_delta)
927
			be64_add_cpu(&sbp->sb_ifree, tp->t_ifree_delta);
928
		if (tp->t_fdblocks_delta)
929
			be64_add_cpu(&sbp->sb_fdblocks, tp->t_fdblocks_delta);
930
		if (tp->t_res_fdblocks_delta)
931
			be64_add_cpu(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta);
Linus Torvalds's avatar
Linus Torvalds committed
932
933
	}

934
	if (tp->t_frextents_delta)
935
		be64_add_cpu(&sbp->sb_frextents, tp->t_frextents_delta);
936
	if (tp->t_res_frextents_delta)
937
		be64_add_cpu(&sbp->sb_frextents, tp->t_res_frextents_delta);
938
939

	if (tp->t_dblocks_delta) {
940
		be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta);
Linus Torvalds's avatar
Linus Torvalds committed
941
942
		whole = 1;
	}
943
	if (tp->t_agcount_delta) {
944
		be32_add_cpu(&sbp->sb_agcount, tp->t_agcount_delta);
Linus Torvalds's avatar
Linus Torvalds committed
945
946
		whole = 1;
	}
947
948
	if (tp->t_imaxpct_delta) {
		sbp->sb_imax_pct += tp->t_imaxpct_delta;
Linus Torvalds's avatar
Linus Torvalds committed
949
950
		whole = 1;
	}
951
	if (tp->t_rextsize_delta) {
952
		be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta);
Linus Torvalds's avatar
Linus Torvalds committed
953
954
		whole = 1;
	}
955
	if (tp->t_rbmblocks_delta) {
956
		be32_add_cpu(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta);
Linus Torvalds's avatar
Linus Torvalds committed
957
958
		whole = 1;
	}
959
	if (tp->t_rblocks_delta) {
960
		be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta);
Linus Torvalds's avatar
Linus Torvalds committed
961
962
		whole = 1;
	}
963
	if (tp->t_rextents_delta) {
964
		be64_add_cpu(&sbp->sb_rextents, tp->t_rextents_delta);
Linus Torvalds's avatar
Linus Torvalds committed
965
966
		whole = 1;
	}
967
968
	if (tp->t_rextslog_delta) {
		sbp->sb_rextslog += tp->t_rextslog_delta;
Linus Torvalds's avatar
Linus Torvalds committed
969
970
971
972
973
		whole = 1;
	}

	if (whole)
		/*
974
		 * Log the whole thing, the fields are noncontiguous.
Linus Torvalds's avatar
Linus Torvalds committed
975
		 */
976
		xfs_trans_log_buf(tp, bp, 0, sizeof(xfs_dsb_t) - 1);
Linus Torvalds's avatar
Linus Torvalds committed
977
978
979
980
981
	else
		/*
		 * Since all the modifiable fields are contiguous, we
		 * can get away with this.
		 */
982
983
		xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount),
				  offsetof(xfs_dsb_t, sb_frextents) +
Linus Torvalds's avatar
Linus Torvalds committed
984
985
986
987
				  sizeof(sbp->sb_frextents) - 1);
}

/*
988
989
990
991
992
 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
 * and apply superblock counter changes to the in-core superblock.  The
 * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
 * applied to the in-core superblock.  The idea is that that has already been
 * done.
Linus Torvalds's avatar
Linus Torvalds committed
993
994
 *
 * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
995
996
997
998
999
1000
 * However, we have to ensure that we only modify each superblock field only
 * once because the application of the delta values may not be atomic. That can
 * lead to ENOSPC races occurring if we have two separate modifcations of the
 * free space counter to put back the entire reservation and then take away
 * what we used.
 *
For faster browsing, not all history is shown. View entire blame