Commit 2d896c78 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6

* 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6: (37 commits)
  [XFS] Fix lockdep annotations for xfs_lock_inodes
  [LIB]: export radix_tree_preload()
  [XFS] Fix XFS_IOC_FSBULKSTAT{,_SINGLE} & XFS_IOC_FSINUMBERS in compat mode
  [XFS] Compat ioctl handler for handle operations
  [XFS] Compat ioctl handler for XFS_IOC_FSGEOMETRY_V1.
  [XFS] Clean up function name handling in tracing code
  [XFS] Quota inode has no parent.
  [XFS] Concurrent Multi-File Data Streams
  [XFS] Use uninitialized_var macro to stop warning about rtx
  [XFS] XFS should not be looking at filp reference counts
  [XFS] Use is_power_of_2 instead of open coding checks
  [XFS] Reduce shouting by removing unnecessary macros from dir2 code.
  [XFS] Simplify XFS min/max macros.
  [XFS] Kill off xfs_count_bits
  [XFS] Cancel transactions on xfs_itruncate_start error.
  [XFS] Use do_div() on 64 bit types.
  [XFS] Fix remount,readonly path to flush everything correctly.
  [XFS] Cleanup inode extent size hint extraction
  [XFS] Prevent ENOSPC from aborting transactions that need to succeed
  [XFS] Prevent deadlock when flushing inodes on unmount
  ...
parents 2a9915c8 0f1145cc
......@@ -64,6 +64,7 @@ xfs-y += xfs_alloc.o \
xfs_dir2_sf.o \
xfs_error.o \
xfs_extfree_item.o \
xfs_filestream.o \
xfs_fsops.o \
xfs_ialloc.o \
xfs_ialloc_btree.o \
......@@ -77,6 +78,7 @@ xfs-y += xfs_alloc.o \
xfs_log.o \
xfs_log_recover.o \
xfs_mount.o \
xfs_mru_cache.o \
xfs_rename.o \
xfs_trans.o \
xfs_trans_ail.o \
......
......@@ -100,25 +100,6 @@ kmem_zone_destroy(kmem_zone_t *zone)
extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
/*
* Low memory cache shrinkers
*/
typedef struct shrinker *kmem_shaker_t;
typedef int (*kmem_shake_func_t)(int, gfp_t);
static inline kmem_shaker_t
kmem_shake_register(kmem_shake_func_t sfunc)
{
return set_shrinker(DEFAULT_SEEKS, sfunc);
}
static inline void
kmem_shake_deregister(kmem_shaker_t shrinker)
{
remove_shrinker(shrinker);
}
static inline int
kmem_shake_allow(gfp_t gfp_mask)
{
......
......@@ -108,14 +108,19 @@ xfs_page_trace(
/*
* Schedule IO completion handling on a xfsdatad if this was
* the final hold on this ioend.
* the final hold on this ioend. If we are asked to wait,
* flush the workqueue.
*/
STATIC void
xfs_finish_ioend(
xfs_ioend_t *ioend)
xfs_ioend_t *ioend,
int wait)
{
if (atomic_dec_and_test(&ioend->io_remaining))
if (atomic_dec_and_test(&ioend->io_remaining)) {
queue_work(xfsdatad_workqueue, &ioend->io_work);
if (wait)
flush_workqueue(xfsdatad_workqueue);
}
}
/*
......@@ -156,6 +161,8 @@ xfs_setfilesize(
xfs_fsize_t bsize;
ip = xfs_vtoi(ioend->io_vnode);
if (!ip)
return;
ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
ASSERT(ioend->io_type != IOMAP_READ);
......@@ -334,7 +341,7 @@ xfs_end_bio(
bio->bi_end_io = NULL;
bio_put(bio);
xfs_finish_ioend(ioend);
xfs_finish_ioend(ioend, 0);
return 0;
}
......@@ -470,7 +477,7 @@ xfs_submit_ioend(
}
if (bio)
xfs_submit_ioend_bio(ioend, bio);
xfs_finish_ioend(ioend);
xfs_finish_ioend(ioend, 0);
} while ((ioend = next) != NULL);
}
......@@ -1003,6 +1010,8 @@ xfs_page_state_convert(
if (buffer_unwritten(bh) || buffer_delay(bh) ||
((buffer_uptodate(bh) || PageUptodate(page)) &&
!buffer_mapped(bh) && (unmapped || startio))) {
int new_ioend = 0;
/*
* Make sure we don't use a read-only iomap
*/
......@@ -1021,6 +1030,15 @@ xfs_page_state_convert(
}
if (!iomap_valid) {
/*
* if we didn't have a valid mapping then we
* need to ensure that we put the new mapping
* in a new ioend structure. This needs to be
* done to ensure that the ioends correctly
* reflect the block mappings at io completion
* for unwritten extent conversion.
*/
new_ioend = 1;
if (type == IOMAP_NEW) {
size = xfs_probe_cluster(inode,
page, bh, head, 0);
......@@ -1040,7 +1058,7 @@ xfs_page_state_convert(
if (startio) {
xfs_add_to_ioend(inode, bh, offset,
type, &ioend,
!iomap_valid);
new_ioend);
} else {
set_buffer_dirty(bh);
unlock_buffer(bh);
......@@ -1416,6 +1434,13 @@ xfs_end_io_direct(
* This is not necessary for synchronous direct I/O, but we do
* it anyway to keep the code uniform and simpler.
*
* Well, if only it were that simple. Because synchronous direct I/O
* requires extent conversion to occur *before* we return to userspace,
* we have to wait for extent conversion to complete. Look at the
* iocb that has been passed to us to determine if this is AIO or
* not. If it is synchronous, tell xfs_finish_ioend() to kick the
* workqueue and wait for it to complete.
*
* The core direct I/O code might be changed to always call the
* completion handler in the future, in which case all this can
* go away.
......@@ -1423,9 +1448,9 @@ xfs_end_io_direct(
ioend->io_offset = offset;
ioend->io_size = size;
if (ioend->io_type == IOMAP_READ) {
xfs_finish_ioend(ioend);
xfs_finish_ioend(ioend, 0);
} else if (private && size > 0) {
xfs_finish_ioend(ioend);
xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
} else {
/*
* A direct I/O write ioend starts it's life in unwritten
......@@ -1434,7 +1459,7 @@ xfs_end_io_direct(
* handler.
*/
INIT_WORK(&ioend->io_work, xfs_end_bio_written);
xfs_finish_ioend(ioend);
xfs_finish_ioend(ioend, 0);
}
/*
......
......@@ -35,7 +35,7 @@
#include <linux/freezer.h>
static kmem_zone_t *xfs_buf_zone;
static kmem_shaker_t xfs_buf_shake;
static struct shrinker *xfs_buf_shake;
STATIC int xfsbufd(void *);
STATIC int xfsbufd_wakeup(int, gfp_t);
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
......@@ -314,7 +314,7 @@ xfs_buf_free(
ASSERT(list_empty(&bp->b_hash_list));
if (bp->b_flags & _XBF_PAGE_CACHE) {
if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
uint i;
if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
......@@ -323,18 +323,11 @@ xfs_buf_free(
for (i = 0; i < bp->b_page_count; i++) {
struct page *page = bp->b_pages[i];
ASSERT(!PagePrivate(page));
if (bp->b_flags & _XBF_PAGE_CACHE)
ASSERT(!PagePrivate(page));
page_cache_release(page);
}
_xfs_buf_free_pages(bp);
} else if (bp->b_flags & _XBF_KMEM_ALLOC) {
/*
* XXX(hch): bp->b_count_desired might be incorrect (see
* xfs_buf_associate_memory for details), but fortunately
* the Linux version of kmem_free ignores the len argument..
*/
kmem_free(bp->b_addr, bp->b_count_desired);
_xfs_buf_free_pages(bp);
}
xfs_buf_deallocate(bp);
......@@ -764,43 +757,44 @@ xfs_buf_get_noaddr(
size_t len,
xfs_buftarg_t *target)
{
size_t malloc_len = len;
unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
int error, i;
xfs_buf_t *bp;
void *data;
int error;
bp = xfs_buf_allocate(0);
if (unlikely(bp == NULL))
goto fail;
_xfs_buf_initialize(bp, target, 0, len, 0);
try_again:
data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE);
if (unlikely(data == NULL))
error = _xfs_buf_get_pages(bp, page_count, 0);
if (error)
goto fail_free_buf;
/* check whether alignment matches.. */
if ((__psunsigned_t)data !=
((__psunsigned_t)data & ~target->bt_smask)) {
/* .. else double the size and try again */
kmem_free(data, malloc_len);
malloc_len <<= 1;
goto try_again;
for (i = 0; i < page_count; i++) {
bp->b_pages[i] = alloc_page(GFP_KERNEL);
if (!bp->b_pages[i])
goto fail_free_mem;
}
bp->b_flags |= _XBF_PAGES;
error = xfs_buf_associate_memory(bp, data, len);
if (error)
error = _xfs_buf_map_pages(bp, XBF_MAPPED);
if (unlikely(error)) {
printk(KERN_WARNING "%s: failed to map pages\n",
__FUNCTION__);
goto fail_free_mem;
bp->b_flags |= _XBF_KMEM_ALLOC;
}
xfs_buf_unlock(bp);
XB_TRACE(bp, "no_daddr", data);
XB_TRACE(bp, "no_daddr", len);
return bp;
fail_free_mem:
kmem_free(data, malloc_len);
while (--i >= 0)
__free_page(bp->b_pages[i]);
_xfs_buf_free_pages(bp);
fail_free_buf:
xfs_buf_free(bp);
xfs_buf_deallocate(bp);
fail:
return NULL;
}
......@@ -1453,6 +1447,7 @@ xfs_free_buftarg(
int external)
{
xfs_flush_buftarg(btp, 1);
xfs_blkdev_issue_flush(btp);
if (external)
xfs_blkdev_put(btp->bt_bdev);
xfs_free_bufhash(btp);
......@@ -1837,7 +1832,7 @@ xfs_buf_init(void)
if (!xfsdatad_workqueue)
goto out_destroy_xfslogd_workqueue;
xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup);
xfs_buf_shake = set_shrinker(DEFAULT_SEEKS, xfsbufd_wakeup);
if (!xfs_buf_shake)
goto out_destroy_xfsdatad_workqueue;
......@@ -1859,7 +1854,7 @@ xfs_buf_init(void)
void
xfs_buf_terminate(void)
{
kmem_shake_deregister(xfs_buf_shake);
remove_shrinker(xfs_buf_shake);
destroy_workqueue(xfsdatad_workqueue);
destroy_workqueue(xfslogd_workqueue);
kmem_zone_destroy(xfs_buf_zone);
......
......@@ -63,7 +63,7 @@ typedef enum {
/* flags used only internally */
_XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
_XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */
_XBF_PAGES = (1 << 18), /* backed by refcounted pages */
_XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
_XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
} xfs_buf_flags_t;
......
......@@ -183,15 +183,6 @@ xfs_file_open(
return -bhv_vop_open(vn_from_inode(inode), NULL);
}
STATIC int
xfs_file_close(
struct file *filp,
fl_owner_t id)
{
return -bhv_vop_close(vn_from_inode(filp->f_path.dentry->d_inode), 0,
file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL);
}
STATIC int
xfs_file_release(
struct inode *inode,
......@@ -436,7 +427,6 @@ const struct file_operations xfs_file_operations = {
#endif
.mmap = xfs_file_mmap,
.open = xfs_file_open,
.flush = xfs_file_close,
.release = xfs_file_release,
.fsync = xfs_file_fsync,
#ifdef HAVE_FOP_OPEN_EXEC
......@@ -458,7 +448,6 @@ const struct file_operations xfs_invis_file_operations = {
#endif
.mmap = xfs_file_mmap,
.open = xfs_file_open,
.flush = xfs_file_close,
.release = xfs_file_release,
.fsync = xfs_file_fsync,
};
......
......@@ -46,6 +46,7 @@ xfs_param_t xfs_params = {
.inherit_nosym = { 0, 0, 1 },
.rotorstep = { 1, 1, 255 },
.inherit_nodfrg = { 0, 1, 1 },
.fstrm_timer = { 1, 50, 3600*100},
};
/*
......
......@@ -1019,7 +1019,7 @@ xfs_ioc_bulkstat(
if (cmd == XFS_IOC_FSINUMBERS)
error = xfs_inumbers(mp, &inlast, &count,
bulkreq.ubuffer);
bulkreq.ubuffer, xfs_inumbers_fmt);
else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
error = xfs_bulkstat_single(mp, &inlast,
bulkreq.ubuffer, &done);
......
......@@ -23,10 +23,25 @@
#include <linux/fs.h>
#include <asm/uaccess.h>
#include "xfs.h"
#include "xfs_types.h"
#include "xfs_fs.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_attr_sf.h"
#include "xfs_dir2_sf.h"
#include "xfs_vfs.h"
#include "xfs_vnode.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_itable.h"
#include "xfs_error.h"
#include "xfs_dfrag.h"
#define _NATIVE_IOC(cmd, type) \
......@@ -34,6 +49,7 @@
#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
#define BROKEN_X86_ALIGNMENT
#define _PACKED __attribute__((packed))
/* on ia32 l_start is on a 32-bit boundary */
typedef struct xfs_flock64_32 {
__s16 l_type;
......@@ -75,35 +91,276 @@ xfs_ioctl32_flock(
return (unsigned long)p;
}
typedef struct compat_xfs_fsop_geom_v1 {
__u32 blocksize; /* filesystem (data) block size */
__u32 rtextsize; /* realtime extent size */
__u32 agblocks; /* fsblocks in an AG */
__u32 agcount; /* number of allocation groups */
__u32 logblocks; /* fsblocks in the log */
__u32 sectsize; /* (data) sector size, bytes */
__u32 inodesize; /* inode size in bytes */
__u32 imaxpct; /* max allowed inode space(%) */
__u64 datablocks; /* fsblocks in data subvolume */
__u64 rtblocks; /* fsblocks in realtime subvol */
__u64 rtextents; /* rt extents in realtime subvol*/
__u64 logstart; /* starting fsblock of the log */
unsigned char uuid[16]; /* unique id of the filesystem */
__u32 sunit; /* stripe unit, fsblocks */
__u32 swidth; /* stripe width, fsblocks */
__s32 version; /* structure version */
__u32 flags; /* superblock version flags */
__u32 logsectsize; /* log sector size, bytes */
__u32 rtsectsize; /* realtime sector size, bytes */
__u32 dirblocksize; /* directory block size, bytes */
} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
#define XFS_IOC_FSGEOMETRY_V1_32 \
_IOR ('X', 100, struct compat_xfs_fsop_geom_v1)
STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg)
{
compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg;
xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p));
if (copy_in_user(p, p32, sizeof(*p32)))
return -EFAULT;
return (unsigned long)p;
}
typedef struct compat_xfs_inogrp {
__u64 xi_startino; /* starting inode number */
__s32 xi_alloccount; /* # bits set in allocmask */
__u64 xi_allocmask; /* mask of allocated inodes */
} __attribute__((packed)) compat_xfs_inogrp_t;
STATIC int xfs_inumbers_fmt_compat(
void __user *ubuffer,
const xfs_inogrp_t *buffer,
long count,
long *written)
{
compat_xfs_inogrp_t *p32 = ubuffer;
long i;
for (i = 0; i < count; i++) {
if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) ||
put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask))
return -EFAULT;
}
*written = count * sizeof(*p32);
return 0;
}
#else
typedef struct xfs_fsop_bulkreq32 {
#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
#define _PACKED
#endif
/* XFS_IOC_FSBULKSTAT and friends */
typedef struct compat_xfs_bstime {
__s32 tv_sec; /* seconds */
__s32 tv_nsec; /* and nanoseconds */
} compat_xfs_bstime_t;
STATIC int xfs_bstime_store_compat(
compat_xfs_bstime_t __user *p32,
const xfs_bstime_t *p)
{
__s32 sec32;
sec32 = p->tv_sec;
if (put_user(sec32, &p32->tv_sec) ||
put_user(p->tv_nsec, &p32->tv_nsec))
return -EFAULT;
return 0;
}
typedef struct compat_xfs_bstat {
__u64 bs_ino; /* inode number */
__u16 bs_mode; /* type and mode */
__u16 bs_nlink; /* number of links */
__u32 bs_uid; /* user id */
__u32 bs_gid; /* group id */
__u32 bs_rdev; /* device value */
__s32 bs_blksize; /* block size */
__s64 bs_size; /* file size */
compat_xfs_bstime_t bs_atime; /* access time */
compat_xfs_bstime_t bs_mtime; /* modify time */
compat_xfs_bstime_t bs_ctime; /* inode change time */
int64_t bs_blocks; /* number of blocks */
__u32 bs_xflags; /* extended flags */
__s32 bs_extsize; /* extent size */
__s32 bs_extents; /* number of extents */
__u32 bs_gen; /* generation count */
__u16 bs_projid; /* project id */
unsigned char bs_pad[14]; /* pad space, unused */
__u32 bs_dmevmask; /* DMIG event mask */
__u16 bs_dmstate; /* DMIG state info */
__u16 bs_aextents; /* attribute number of extents */
} _PACKED compat_xfs_bstat_t;
STATIC int xfs_bulkstat_one_fmt_compat(
void __user *ubuffer,
const xfs_bstat_t *buffer)
{
compat_xfs_bstat_t __user *p32 = ubuffer;
if (put_user(buffer->bs_ino, &p32->bs_ino) ||
put_user(buffer->bs_mode, &p32->bs_mode) ||
put_user(buffer->bs_nlink, &p32->bs_nlink) ||
put_user(buffer->bs_uid, &p32->bs_uid) ||
put_user(buffer->bs_gid, &p32->bs_gid) ||
put_user(buffer->bs_rdev, &p32->bs_rdev) ||
put_user(buffer->bs_blksize, &p32->bs_blksize) ||
put_user(buffer->bs_size, &p32->bs_size) ||
xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
put_user(buffer->bs_blocks, &p32->bs_blocks) ||
put_user(buffer->bs_xflags, &p32->bs_xflags) ||
put_user(buffer->bs_extsize, &p32->bs_extsize) ||
put_user(buffer->bs_extents, &p32->bs_extents) ||
put_user(buffer->bs_gen, &p32->bs_gen) ||
put_user(buffer->bs_projid, &p32->bs_projid) ||
put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
put_user(buffer->bs_aextents, &p32->bs_aextents))
return -EFAULT;
return sizeof(*p32);
}
typedef struct compat_xfs_fsop_bulkreq {
compat_uptr_t lastip; /* last inode # pointer */
__s32 icount; /* count of entries in buffer */
compat_uptr_t ubuffer; /* user buffer for inode desc. */
__s32 ocount; /* output count pointer */
} xfs_fsop_bulkreq32_t;
compat_uptr_t ocount; /* output count pointer */
} compat_xfs_fsop_bulkreq_t;
STATIC unsigned long
xfs_ioctl32_bulkstat(
unsigned long arg)
#define XFS_IOC_FSBULKSTAT_32 \
_IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
_IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
#define XFS_IOC_FSINUMBERS_32 \
_IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
/* copied from xfs_ioctl.c */
STATIC int
xfs_ioc_bulkstat_compat(
xfs_mount_t *mp,
unsigned int cmd,
void __user *arg)
{
xfs_fsop_bulkreq32_t __user *p32 = (void __user *)arg;
xfs_fsop_bulkreq_t __user *p = compat_alloc_user_space(sizeof(*p));
compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg;
u32 addr;
xfs_fsop_bulkreq_t bulkreq;
int count; /* # of records returned */
xfs_ino_t inlast; /* last inode number */
int done;
int error;
/* done = 1 if there are more stats to get and if bulkstat */
/* should be called again (unused here, but used in dmapi) */
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (XFS_FORCED_SHUTDOWN(mp))
return -XFS_ERROR(EIO);
if (get_user(addr, &p32->lastip))
return -EFAULT;
bulkreq.lastip = compat_ptr(addr);
if (get_user(bulkreq.icount, &p32->icount) ||
get_user(addr, &p32->ubuffer))
return -EFAULT;
bulkreq.ubuffer = compat_ptr(addr);
if (get_user(addr, &p32->ocount))
return -EFAULT;
bulkreq.ocount = compat_ptr(addr);
if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
return -XFS_ERROR(EFAULT);
if ((count = bulkreq.icount) <= 0)
return -XFS_ERROR(EINVAL);
if (cmd == XFS_IOC_FSINUMBERS)
error = xfs_inumbers(mp, &inlast, &count,
bulkreq.ubuffer, xfs_inumbers_fmt_compat);
else {
/* declare a var to get a warning in case the type changes */
bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat;
error = xfs_bulkstat(mp, &inlast, &count,
xfs_bulkstat_one, formatter,