Commit aab174f0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs update from Al Viro:

 - big one - consolidation of descriptor-related logics; almost all of
   that is moved to fs/file.c

   (BTW, I'm seriously tempted to rename the result to fd.c.  As it is,
   we have a situation when file_table.c is about handling of struct
   file and file.c is about handling of descriptor tables; the reasons
   are historical - file_table.c used to be about a static array of
   struct file we used to have way back).

   A lot of stray ends got cleaned up and converted to saner primitives,
   disgusting mess in android/binder.c is still disgusting, but at least
   doesn't poke so much in descriptor table guts anymore.  A bunch of
   relatively minor races got fixed in process, plus an ext4 struct file
   leak.

 - related thing - fget_light() partially unuglified; see fdget() in
   there (and yes, it generates the code as good as we used to have).

 - also related - bits of Cyrill's procfs stuff that got entangled into
   that work; _not_ all of it, just the initial move to fs/proc/fd.c and
   switch of fdinfo to seq_file.

 - Alex's fs/coredump.c spiltoff - the same story, had been easier to
   take that commit than mess with conflicts.  The rest is a separate
   pile, this was just a mechanical code movement.

 - a few misc patches all over the place.  Not all for this cycle,
   there'll be more (and quite a few currently sit in akpm's tree)."

Fix up trivial conflicts in the android binder driver, and some fairly
simple conflicts due to two different changes to the sock_alloc_file()
interface ("take descriptor handling from sock_alloc_file() to callers"
vs "net: Providing protocol type via system.sockprotoname xattr of
/proc/PID/fd entries" adding a dentry name to the socket)

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (72 commits)
  MAX_LFS_FILESIZE should be a loff_t
  compat: fs: Generic compat_sys_sendfile implementation
  fs: push rcu_barrier() from deactivate_locked_super() to filesystems
  btrfs: reada_extent doesn't need kref for refcount
  coredump: move core dump functionality into its own file
  coredump: prevent double-free on an error path in core dumper
  usb/gadget: fix misannotations
  fcntl: fix misannotations
  ceph: don't abuse d_delete() on failure exits
  hypfs: ->d_parent is never NULL or negative
  vfs: delete surplus inode NULL check
  switch simple cases of fget_light to fdget
  new helpers: fdget()/fdput()
  switch o2hb_region_dev_write() to fget_light()
  proc_map_files_readdir(): don't bother with grabbing files
  make get_file() return its argument
  vhost_set_vring(): turn pollstart/pollstop into bool
  switch prctl_set_mm_exe_file() to fget_light()
  switch xfs_find_handle() to fget_light()
  switch xfs_swapext() to fget_light()
  ...
parents ca41cc96 2bd2c194
......@@ -145,27 +145,24 @@ SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd,
long __user *, basep)
{
int error;
struct file *file;
struct fd arg = fdget(fd);
struct osf_dirent_callback buf;
error = -EBADF;
file = fget(fd);
if (!file)
goto out;
if (!arg.file)
return -EBADF;
buf.dirent = dirent;
buf.basep = basep;
buf.count = count;
buf.error = 0;
error = vfs_readdir(file, osf_filldir, &buf);
error = vfs_readdir(arg.file, osf_filldir, &buf);
if (error >= 0)
error = buf.error;
if (count != buf.count)
error = count - buf.count;
fput(file);
out:
fdput(arg);
return error;
}
......
......@@ -2306,7 +2306,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
* partially initialize the vma for the sampling buffer
*/
vma->vm_mm = mm;
vma->vm_file = filp;
vma->vm_file = get_file(filp);
vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED;
vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */
......@@ -2345,8 +2345,6 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
goto error;
}
get_file(filp);
/*
* now insert the vma in the vm list for the process, must be
* done with mmap lock held
......@@ -4782,7 +4780,7 @@ recheck:
asmlinkage long
sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
{
struct file *file = NULL;
struct fd f = {NULL, 0};
pfm_context_t *ctx = NULL;
unsigned long flags = 0UL;
void *args_k = NULL;
......@@ -4879,17 +4877,17 @@ restart_args:
ret = -EBADF;
file = fget(fd);
if (unlikely(file == NULL)) {
f = fdget(fd);
if (unlikely(f.file == NULL)) {
DPRINT(("invalid fd %d\n", fd));
goto error_args;
}
if (unlikely(PFM_IS_FILE(file) == 0)) {
if (unlikely(PFM_IS_FILE(f.file) == 0)) {
DPRINT(("fd %d not related to perfmon\n", fd));
goto error_args;
}
ctx = file->private_data;
ctx = f.file->private_data;
if (unlikely(ctx == NULL)) {
DPRINT(("no context for fd %d\n", fd));
goto error_args;
......@@ -4919,8 +4917,8 @@ abort_locked:
if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
error_args:
if (file)
fput(file);
if (f.file)
fdput(f);
kfree(args_k);
......
......@@ -109,33 +109,32 @@ Efault:
int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned int count)
{
struct file * file;
struct fd arg;
struct hpux_dirent __user * lastdirent;
struct getdents_callback buf;
int error = -EBADF;
int error;
file = fget(fd);
if (!file)
goto out;
arg = fdget(fd);
if (!arg.file)
return -EBADF;
buf.current_dir = dirent;
buf.previous = NULL;
buf.count = count;
buf.error = 0;
error = vfs_readdir(file, filldir, &buf);
error = vfs_readdir(arg.file, filldir, &buf);
if (error >= 0)
error = buf.error;
lastdirent = buf.previous;
if (lastdirent) {
if (put_user(file->f_pos, &lastdirent->d_off))
if (put_user(arg.file->f_pos, &lastdirent->d_off))
error = -EFAULT;
else
error = count - buf.count;
}
fput(file);
out:
fdput(arg);
return error;
}
......
......@@ -189,7 +189,7 @@ SYSCALL_SPU(getcwd)
SYSCALL_SPU(capget)
SYSCALL_SPU(capset)
COMPAT_SYS(sigaltstack)
SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile)
SYSX_SPU(sys_sendfile,compat_sys_sendfile_wrapper,sys_sendfile)
SYSCALL(ni_syscall)
SYSCALL(ni_syscall)
PPC_SYS(vfork)
......@@ -229,7 +229,7 @@ COMPAT_SYS_SPU(sched_setaffinity)
COMPAT_SYS_SPU(sched_getaffinity)
SYSCALL(ni_syscall)
SYSCALL(ni_syscall)
SYS32ONLY(sendfile64)
SYSX(sys_ni_syscall,compat_sys_sendfile64_wrapper,sys_sendfile64)
COMPAT_SYS_SPU(io_setup)
SYSCALL_SPU(io_destroy)
COMPAT_SYS_SPU(io_getevents)
......
......@@ -419,6 +419,7 @@
#define __ARCH_WANT_COMPAT_SYS_TIME
#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
#define __ARCH_WANT_SYS_NEWFSTATAT
#define __ARCH_WANT_COMPAT_SYS_SENDFILE
#endif
/*
......
......@@ -143,48 +143,17 @@ long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t pt
* proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
* and the register representation of a signed int (msr in 64-bit mode) is performed.
*/
asmlinkage long compat_sys_sendfile(u32 out_fd, u32 in_fd, compat_off_t __user * offset, u32 count)
asmlinkage long compat_sys_sendfile_wrapper(u32 out_fd, u32 in_fd,
compat_off_t __user *offset, u32 count)
{
mm_segment_t old_fs = get_fs();
int ret;
off_t of;
off_t __user *up;
if (offset && get_user(of, offset))
return -EFAULT;
/* The __user pointer cast is valid because of the set_fs() */
set_fs(KERNEL_DS);
up = offset ? (off_t __user *) &of : NULL;
ret = sys_sendfile((int)out_fd, (int)in_fd, up, count);
set_fs(old_fs);
if (offset && put_user(of, offset))
return -EFAULT;
return ret;
return compat_sys_sendfile((int)out_fd, (int)in_fd, offset, count);
}
asmlinkage int compat_sys_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, s32 count)
asmlinkage long compat_sys_sendfile64_wrapper(u32 out_fd, u32 in_fd,
compat_loff_t __user *offset, u32 count)
{
mm_segment_t old_fs = get_fs();
int ret;
loff_t lof;
loff_t __user *up;
if (offset && get_user(lof, offset))
return -EFAULT;
/* The __user pointer cast is valid because of the set_fs() */
set_fs(KERNEL_DS);
up = offset ? (loff_t __user *) &lof : NULL;
ret = sys_sendfile64(out_fd, in_fd, up, count);
set_fs(old_fs);
if (offset && put_user(lof, offset))
return -EFAULT;
return ret;
return sys_sendfile((int)out_fd, (int)in_fd,
(off_t __user *)offset, count);
}
long compat_sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
......
......@@ -69,8 +69,6 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
umode_t, mode, int, neighbor_fd)
{
long ret;
struct file *neighbor;
int fput_needed;
struct spufs_calls *calls;
calls = spufs_calls_get();
......@@ -78,11 +76,11 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
return -ENOSYS;
if (flags & SPU_CREATE_AFFINITY_SPU) {
struct fd neighbor = fdget(neighbor_fd);
ret = -EBADF;
neighbor = fget_light(neighbor_fd, &fput_needed);
if (neighbor) {
ret = calls->create_thread(name, flags, mode, neighbor);
fput_light(neighbor, fput_needed);
if (neighbor.file) {
ret = calls->create_thread(name, flags, mode, neighbor.file);
fdput(neighbor);
}
} else
ret = calls->create_thread(name, flags, mode, NULL);
......@@ -94,8 +92,7 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
{
long ret;
struct file *filp;
int fput_needed;
struct fd arg;
struct spufs_calls *calls;
calls = spufs_calls_get();
......@@ -103,10 +100,10 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
return -ENOSYS;
ret = -EBADF;
filp = fget_light(fd, &fput_needed);
if (filp) {
ret = calls->spu_run(filp, unpc, ustatus);
fput_light(filp, fput_needed);
arg = fdget(fd);
if (arg.file) {
ret = calls->spu_run(arg.file, unpc, ustatus);
fdput(arg);
}
spufs_calls_put(calls);
......
......@@ -106,6 +106,17 @@ static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
return total;
}
static int match_context(const void *v, struct file *file, unsigned fd)
{
struct spu_context *ctx;
if (file->f_op != &spufs_context_fops)
return 0;
ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx;
if (ctx->flags & SPU_CREATE_NOSCHED)
return 0;
return fd + 1;
}
/*
* The additional architecture-specific notes for Cell are various
* context files in the spu context.
......@@ -115,29 +126,18 @@ static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
* internal functionality to dump them without needing to actually
* open the files.
*/
/*
* descriptor table is not shared, so files can't change or go away.
*/
static struct spu_context *coredump_next_context(int *fd)
{
struct fdtable *fdt = files_fdtable(current->files);
struct file *file;
struct spu_context *ctx = NULL;
for (; *fd < fdt->max_fds; (*fd)++) {
if (!fd_is_open(*fd, fdt))
continue;
file = fcheck(*fd);
if (!file || file->f_op != &spufs_context_fops)
continue;
ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx;
if (ctx->flags & SPU_CREATE_NOSCHED)
continue;
break;
}
return ctx;
int n = iterate_fd(current->files, *fd, match_context, NULL);
if (!n)
return NULL;
*fd = n - 1;
file = fcheck(*fd);
return SPUFS_I(file->f_dentry->d_inode)->i_ctx;
}
int spufs_coredump_extra_notes_size(void)
......
......@@ -72,8 +72,6 @@ static void hypfs_remove(struct dentry *dentry)
struct dentry *parent;
parent = dentry->d_parent;
if (!parent || !parent->d_inode)
return;
mutex_lock(&parent->d_inode->i_mutex);
if (hypfs_positive(dentry)) {
if (S_ISDIR(dentry->d_inode->i_mode))
......
......@@ -447,6 +447,7 @@
#else
#define __ARCH_WANT_COMPAT_SYS_TIME
#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
#define __ARCH_WANT_COMPAT_SYS_SENDFILE
#endif
/*
......
......@@ -90,7 +90,7 @@ SIGN1(sys32_mkdir, sys_mkdir, %o1)
SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5)
SIGN1(sys32_sysfs, compat_sys_sysfs, %o0)
SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1)
SIGN2(sys32_sendfile64, compat_sys_sendfile64, %o0, %o1)
SIGN2(sys32_sendfile64, sys_sendfile, %o0, %o1)
SIGN1(sys32_prctl, sys_prctl, %o0)
SIGN1(sys32_sched_rr_get_interval, compat_sys_sched_rr_get_interval, %o0)
SIGN2(sys32_waitpid, sys_waitpid, %o0, %o2)
......
......@@ -506,52 +506,6 @@ long compat_sys_fadvise64_64(int fd,
advice);
}
asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
compat_off_t __user *offset,
compat_size_t count)
{
mm_segment_t old_fs = get_fs();
int ret;
off_t of;
if (offset && get_user(of, offset))
return -EFAULT;
set_fs(KERNEL_DS);
ret = sys_sendfile(out_fd, in_fd,
offset ? (off_t __user *) &of : NULL,
count);
set_fs(old_fs);
if (offset && put_user(of, offset))
return -EFAULT;
return ret;
}
asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd,
compat_loff_t __user *offset,
compat_size_t count)
{
mm_segment_t old_fs = get_fs();
int ret;
loff_t lof;
if (offset && get_user(lof, offset))
return -EFAULT;
set_fs(KERNEL_DS);
ret = sys_sendfile64(out_fd, in_fd,
offset ? (loff_t __user *) &lof : NULL,
count);
set_fs(old_fs);
if (offset && put_user(lof, offset))
return -EFAULT;
return ret;
}
/* This is just a version for 32-bit applications which does
* not force O_LARGEFILE on.
*/
......
......@@ -21,6 +21,9 @@
#include <linux/un.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/file.h>
#include <asm/uaccess.h>
#include <asm/switch_to.h>
......@@ -118,90 +121,38 @@ void mconsole_log(struct mc_request *req)
mconsole_reply(req, "", 0, 0);
}
/* This is a more convoluted version of mconsole_proc, which has some stability
* problems; however, we need it fixed, because it is expected that UML users
* mount HPPFS instead of procfs on /proc. And we want mconsole_proc to still
* show the real procfs content, not the ones from hppfs.*/
#if 0
void mconsole_proc(struct mc_request *req)
{
struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt;
struct file *file;
int n;
char *ptr = req->request.data, *buf;
mm_segment_t old_fs = get_fs();
ptr += strlen("proc");
ptr = skip_spaces(ptr);
file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
if (IS_ERR(file)) {
mconsole_reply(req, "Failed to open file", 1, 0);
goto out;
}
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (buf == NULL) {
mconsole_reply(req, "Failed to allocate buffer", 1, 0);
goto out_fput;
}
if (file->f_op->read) {
do {
loff_t pos;
set_fs(KERNEL_DS);
n = vfs_read(file, buf, PAGE_SIZE - 1, &pos);
file_pos_write(file, pos);
set_fs(old_fs);
if (n >= 0) {
buf[n] = '\0';
mconsole_reply(req, buf, 0, (n > 0));
}
else {
mconsole_reply(req, "Read of file failed",
1, 0);
goto out_free;
}
} while (n > 0);
}
else mconsole_reply(req, "", 0, 0);
out_free:
kfree(buf);
out_fput:
fput(file);
out: ;
}
#endif
void mconsole_proc(struct mc_request *req)
{
char path[64];
char *buf;
int len;
int fd;
struct file *file;
int first_chunk = 1;
char *ptr = req->request.data;
ptr += strlen("proc");
ptr = skip_spaces(ptr);
snprintf(path, sizeof(path), "/proc/%s", ptr);
fd = sys_open(path, 0, 0);
if (fd < 0) {
file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
if (IS_ERR(file)) {
mconsole_reply(req, "Failed to open file", 1, 0);
printk(KERN_ERR "open %s: %d\n",path,fd);
printk(KERN_ERR "open /proc/%s: %ld\n", ptr, PTR_ERR(file));
goto out;
}
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (buf == NULL) {
mconsole_reply(req, "Failed to allocate buffer", 1, 0);
goto out_close;
goto out_fput;
}
for (;;) {
len = sys_read(fd, buf, PAGE_SIZE-1);
do {
loff_t pos;
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
len = vfs_read(file, buf, PAGE_SIZE - 1, &pos);
set_fs(old_fs);
file->f_pos = pos;
if (len < 0) {
mconsole_reply(req, "Read of file failed", 1, 0);
goto out_free;
......@@ -211,22 +162,14 @@ void mconsole_proc(struct mc_request *req)
mconsole_reply(req, "\n", 0, 1);
first_chunk = 0;
}
if (len == PAGE_SIZE-1) {
buf[len] = '\0';
mconsole_reply(req, buf, 0, 1);
} else {
buf[len] = '\0';
mconsole_reply(req, buf, 0, 0);
break;
}
}
buf[len] = '\0';
mconsole_reply(req, buf, 0, (len > 0));
} while (len > 0);
out_free:
kfree(buf);
out_close:
sys_close(fd);
out:
/* nothing */;
out_fput:
fput(file);
out: ;
}
#define UML_MCONSOLE_HELPTEXT \
......
......@@ -460,8 +460,7 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
if (vma->vm_file)
fput(vma->vm_file);
vma->vm_file = dmabuf->file;
get_file(vma->vm_file);
vma->vm_file = get_file(dmabuf->file);
vma->vm_pgoff = pgoff;
......
......@@ -1183,7 +1183,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
struct rdma_ucm_migrate_id cmd;
struct rdma_ucm_migrate_resp resp;
struct ucma_context *ctx;
struct file *filp;
struct fd f;
struct ucma_file *cur_file;
int ret = 0;
......@@ -1191,12 +1191,12 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
return -EFAULT;
/* Get current fd to protect against it being closed */
filp = fget(cmd.fd);
if (!filp)
f = fdget(cmd.fd);
if (!f.file)
return -ENOENT;
/* Validate current fd and prevent destruction of id. */
ctx = ucma_get_ctx(filp->private_data, cmd.id);
ctx = ucma_get_ctx(f.file->private_data, cmd.id);
if (IS_ERR(ctx)) {
ret = PTR_ERR(ctx);