Commit 603ba7e4 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs pile #2 from Al Viro:
 "Next pile (and there'll be one or two more).

  The large piece in this one is getting rid of /proc/*/ns/* weirdness;
  among other things, it allows to (finally) make nameidata completely
  opaque outside of fs/namei.c, making for easier further cleanups in
  there"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  coda_venus_readdir(): use file_inode()
  fs/namei.c: fold link_path_walk() call into path_init()
  path_init(): don't bother with LOOKUP_PARENT in argument
  fs/namei.c: new helper (path_cleanup())
  path_init(): store the "base" pointer to file in nameidata itself
  make default ->i_fop have ->open() fail with ENXIO
  make nameidata completely opaque outside of fs/namei.c
  kill proc_ns completely
  take the targets of /proc/*/ns/* symlinks to separate fs
  bury struct proc_ns in fs/proc
  copy address of proc_ns_ops into ns_common
  new helpers: ns_alloc_inum/ns_free_inum
  make proc_ns_operations work with struct ns_common * instead of void *
  switch the rest of proc_ns_operations to working with &...->ns
  netns: switch ->get()/->put()/->install()/->inum() to working with &net->ns
  make mntns ->get()/->put()/->install()/->inum() work with &mnt_ns->ns
  common object embedded into various struct ....ns
parents 31f48fc8 93fe74b2
......@@ -2145,22 +2145,12 @@ doit:
return 0;
}
static int
pfm_no_open(struct inode *irrelevant, struct file *dontcare)
{
DPRINT(("pfm_no_open called\n"));
return -ENXIO;
}
static const struct file_operations pfm_file_ops = {
.llseek = no_llseek,
.read = pfm_read,
.write = pfm_write,
.poll = pfm_poll,
.unlocked_ioctl = pfm_ioctl,
.open = pfm_no_open, /* special open code to disallow open via /proc */
.fasync = pfm_fasync,
.release = pfm_close,
.flush = pfm_flush
......
......@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o \
stack.o fs_struct.o statfs.o fs_pin.o
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o block_dev.o direct-io.o mpage.o
......
......@@ -426,7 +426,6 @@ static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx)
struct coda_file_info *cfi;
struct coda_inode_info *cii;
struct file *host_file;
struct dentry *de;
struct venus_dirent *vdir;
unsigned long vdir_size = offsetof(struct venus_dirent, d_name);
unsigned int type;
......@@ -438,8 +437,7 @@ static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx)
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
de = coda_file->f_path.dentry;
cii = ITOC(de->d_inode);
cii = ITOC(file_inode(coda_file));
vdir = kmalloc(sizeof(*vdir), GFP_KERNEL);
if (!vdir) return -ENOMEM;
......
......@@ -114,6 +114,11 @@ int proc_nr_inodes(struct ctl_table *table, int write,
}
#endif
static int no_open(struct inode *inode, struct file *file)
{
return -ENXIO;
}
/**
* inode_init_always - perform inode structure intialisation
* @sb: superblock inode belongs to
......@@ -125,7 +130,7 @@ int proc_nr_inodes(struct ctl_table *table, int write,
int inode_init_always(struct super_block *sb, struct inode *inode)
{
static const struct inode_operations empty_iops;
static const struct file_operations empty_fops;
static const struct file_operations no_open_fops = {.open = no_open};
struct address_space *const mapping = &inode->i_data;
inode->i_sb = sb;
......@@ -133,7 +138,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_flags = 0;
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
inode->i_fop = &empty_fops;
inode->i_fop = &no_open_fops;
inode->__i_nlink = 1;
inode->i_opflags = 0;
i_uid_write(inode, 0);
......@@ -1798,7 +1803,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
} else if (S_ISFIFO(mode))
inode->i_fop = &pipefifo_fops;
else if (S_ISSOCK(mode))
inode->i_fop = &bad_sock_fops;
; /* leave it no_open_fops */
else
printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
" inode %s:%lu\n", mode, inode->i_sb->s_id,
......
......@@ -147,3 +147,8 @@ extern const struct file_operations pipefifo_fops;
*/
extern void sb_pin_kill(struct super_block *sb);
extern void mnt_pin_kill(struct mount *m);
/*
* fs/nsfs.c
*/
extern struct dentry_operations ns_dentry_operations;
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
#include <linux/ns_common.h>
struct mnt_namespace {
atomic_t count;
unsigned int proc_inum;
struct ns_common ns;
struct mount * root;
struct list_head list;
struct user_namespace *user_ns;
......
......@@ -487,6 +487,19 @@ void path_put(const struct path *path)
}
EXPORT_SYMBOL(path_put);
struct nameidata {
struct path path;
struct qstr last;
struct path root;
struct inode *inode; /* path.dentry.d_inode */
unsigned int flags;
unsigned seq, m_seq;
int last_type;
unsigned depth;
struct file *base;
char *saved_names[MAX_NESTED_LINKS + 1];
};
/*
* Path walking has 2 modes, rcu-walk and ref-walk (see
* Documentation/filesystems/path-lookup.txt). In situations when we can't
......@@ -695,6 +708,18 @@ void nd_jump_link(struct nameidata *nd, struct path *path)
nd->flags |= LOOKUP_JUMPED;
}
void nd_set_link(struct nameidata *nd, char *path)
{
nd->saved_names[nd->depth] = path;
}
EXPORT_SYMBOL(nd_set_link);
char *nd_get_link(struct nameidata *nd)
{
return nd->saved_names[nd->depth];
}
EXPORT_SYMBOL(nd_get_link);
static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
{
struct inode *inode = link->dentry->d_inode;
......@@ -1821,13 +1846,14 @@ static int link_path_walk(const char *name, struct nameidata *nd)
}
static int path_init(int dfd, const char *name, unsigned int flags,
struct nameidata *nd, struct file **fp)
struct nameidata *nd)
{
int retval = 0;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags | LOOKUP_JUMPED;
nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
nd->depth = 0;
nd->base = NULL;
if (flags & LOOKUP_ROOT) {
struct dentry *root = nd->root.dentry;
struct inode *inode = root->d_inode;
......@@ -1847,7 +1873,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
} else {
path_get(&nd->path);
}
return 0;
goto done;
}
nd->root.mnt = NULL;
......@@ -1897,7 +1923,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->path = f.file->f_path;
if (flags & LOOKUP_RCU) {
if (f.flags & FDPUT_FPUT)
*fp = f.file;
nd->base = f.file;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
rcu_read_lock();
} else {
......@@ -1908,13 +1934,26 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->inode = nd->path.dentry->d_inode;
if (!(flags & LOOKUP_RCU))
return 0;
goto done;
if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq)))
return 0;
goto done;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
rcu_read_unlock();
return -ECHILD;
done:
current->total_link_count = 0;
return link_path_walk(name, nd);
}
static void path_cleanup(struct nameidata *nd)
{
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
path_put(&nd->root);
nd->root.mnt = NULL;
}
if (unlikely(nd->base))
fput(nd->base);
}
static inline int lookup_last(struct nameidata *nd, struct path *path)
......@@ -1930,7 +1969,6 @@ static inline int lookup_last(struct nameidata *nd, struct path *path)
static int path_lookupat(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
struct file *base = NULL;
struct path path;
int err;
......@@ -1948,14 +1986,7 @@ static int path_lookupat(int dfd, const char *name,
* be handled by restarting a traditional ref-walk (which will always
* be able to complete).
*/
err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
if (unlikely(err))
goto out;
current->total_link_count = 0;
err = link_path_walk(name, nd);
err = path_init(dfd, name, flags, nd);
if (!err && !(flags & LOOKUP_PARENT)) {
err = lookup_last(nd, &path);
while (err > 0) {
......@@ -1983,14 +2014,7 @@ static int path_lookupat(int dfd, const char *name,
}
}
out:
if (base)
fput(base);
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
path_put(&nd->root);
nd->root.mnt = NULL;
}
path_cleanup(nd);
return err;
}
......@@ -2297,19 +2321,13 @@ out:
static int
path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
{
struct file *base = NULL;
struct nameidata nd;
int err;
err = path_init(dfd, name, flags | LOOKUP_PARENT, &nd, &base);
err = path_init(dfd, name, flags, &nd);
if (unlikely(err))
goto out;
current->total_link_count = 0;
err = link_path_walk(name, &nd);
if (err)
goto out;
err = mountpoint_last(&nd, path);
while (err > 0) {
void *cookie;
......@@ -2325,12 +2343,7 @@ path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags
put_link(&nd, &link, cookie);
}
out:
if (base)
fput(base);
if (nd.root.mnt && !(nd.flags & LOOKUP_ROOT))
path_put(&nd.root);
path_cleanup(&nd);
return err;
}
......@@ -3181,7 +3194,6 @@ out:
static struct file *path_openat(int dfd, struct filename *pathname,
struct nameidata *nd, const struct open_flags *op, int flags)
{
struct file *base = NULL;
struct file *file;
struct path path;
int opened = 0;
......@@ -3198,12 +3210,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
goto out;
}
error = path_init(dfd, pathname->name, flags | LOOKUP_PARENT, nd, &base);
if (unlikely(error))
goto out;
current->total_link_count = 0;
error = link_path_walk(pathname->name, nd);
error = path_init(dfd, pathname->name, flags, nd);
if (unlikely(error))
goto out;
......@@ -3229,10 +3236,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
put_link(nd, &link, cookie);
}
out:
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
path_put(&nd->root);
if (base)
fput(base);
path_cleanup(nd);
if (!(opened & FILE_OPENED)) {
BUG_ON(!error);
put_filp(file);
......
......@@ -1569,17 +1569,13 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
static bool is_mnt_ns_file(struct dentry *dentry)
{
/* Is this a proxy for a mount namespace? */
struct inode *inode = dentry->d_inode;
struct proc_ns *ei;
if (!proc_ns_inode(inode))
return false;
ei = get_proc_ns(inode);
if (ei->ns_ops != &mntns_operations)
return false;
return dentry->d_op == &ns_dentry_operations &&
dentry->d_fsdata == &mntns_operations;
}
return true;
struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
{
return container_of(ns, struct mnt_namespace, ns);
}
static bool mnt_ns_loop(struct dentry *dentry)
......@@ -1591,7 +1587,7 @@ static bool mnt_ns_loop(struct dentry *dentry)
if (!is_mnt_ns_file(dentry))
return false;
mnt_ns = get_proc_ns(dentry->d_inode)->ns;
mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}
......@@ -2020,7 +2016,10 @@ static int do_loopback(struct path *path, const char *old_name,
if (IS_MNT_UNBINDABLE(old))
goto out2;
if (!check_mnt(parent) || !check_mnt(old))
if (!check_mnt(parent))
goto out2;
if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
goto out2;
if (!recurse && has_locked_children(old, old_path.dentry))
......@@ -2640,7 +2639,7 @@ dput_out:
static void free_mnt_ns(struct mnt_namespace *ns)
{
proc_free_inum(ns->proc_inum);
ns_free_inum(&ns->ns);
put_user_ns(ns->user_ns);
kfree(ns);
}
......@@ -2662,11 +2661,12 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
if (!new_ns)
return ERR_PTR(-ENOMEM);
ret = proc_alloc_inum(&new_ns->proc_inum);
ret = ns_alloc_inum(&new_ns->ns);
if (ret) {
kfree(new_ns);
return ERR_PTR(ret);
}
new_ns->ns.ops = &mntns_operations;
new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
atomic_set(&new_ns->count, 1);
new_ns->root = NULL;
......@@ -3144,31 +3144,31 @@ found:
return visible;
}
static void *mntns_get(struct task_struct *task)
static struct ns_common *mntns_get(struct task_struct *task)
{
struct mnt_namespace *ns = NULL;
struct ns_common *ns = NULL;
struct nsproxy *nsproxy;
task_lock(task);
nsproxy = task->nsproxy;
if (nsproxy) {
ns = nsproxy->mnt_ns;
get_mnt_ns(ns);
ns = &nsproxy->mnt_ns->ns;
get_mnt_ns(to_mnt_ns(ns));
}
task_unlock(task);
return ns;
}
static void mntns_put(void *ns)
static void mntns_put(struct ns_common *ns)
{
put_mnt_ns(ns);
put_mnt_ns(to_mnt_ns(ns));
}
static int mntns_install(struct nsproxy *nsproxy, void *ns)
static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
{
struct fs_struct *fs = current->fs;
struct mnt_namespace *mnt_ns = ns;
struct mnt_namespace *mnt_ns = to_mnt_ns(ns);
struct path root;
if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
......@@ -3198,17 +3198,10 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
return 0;
}
static unsigned int mntns_inum(void *ns)
{
struct mnt_namespace *mnt_ns = ns;
return mnt_ns->proc_inum;
}
const struct proc_ns_operations mntns_operations = {
.name = "mnt",
.type = CLONE_NEWNS,
.get = mntns_get,
.put = mntns_put,
.install = mntns_install,
.inum = mntns_inum,
};
#include <linux/mount.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/proc_ns.h>
#include <linux/magic.h>
#include <linux/ktime.h>
static struct vfsmount *nsfs_mnt;
static const struct file_operations ns_file_operations = {
.llseek = no_llseek,
};
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
{
struct inode *inode = dentry->d_inode;
const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
ns_ops->name, inode->i_ino);
}
static void ns_prune_dentry(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
if (inode) {
struct ns_common *ns = inode->i_private;
atomic_long_set(&ns->stashed, 0);
}
}
const struct dentry_operations ns_dentry_operations =
{
.d_prune = ns_prune_dentry,
.d_delete = always_delete_dentry,
.d_dname = ns_dname,
};
static void nsfs_evict(struct inode *inode)
{
struct ns_common *ns = inode->i_private;
clear_inode(inode);
ns->ops->put(ns);
}
void *ns_get_path(struct path *path, struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
struct vfsmount *mnt = mntget(nsfs_mnt);
struct qstr qname = { .name = "", };
struct dentry *dentry;
struct inode *inode;
struct ns_common *ns;
unsigned long d;
again:
ns = ns_ops->get(task);
if (!ns) {
mntput(mnt);
return ERR_PTR(-ENOENT);
}
rcu_read_lock();
d = atomic_long_read(&ns->stashed);
if (!d)
goto slow;
dentry = (struct dentry *)d;
if (!lockref_get_not_dead(&dentry->d_lockref))
goto slow;
rcu_read_unlock();
ns_ops->put(ns);
got_it:
path->mnt = mnt;
path->dentry = dentry;
return NULL;
slow:
rcu_read_unlock();
inode = new_inode_pseudo(mnt->mnt_sb);
if (!inode) {
ns_ops->put(ns);
mntput(mnt);
return ERR_PTR(-ENOMEM);
}
inode->i_ino = ns->inum;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_flags |= S_IMMUTABLE;
inode->i_mode = S_IFREG | S_IRUGO;
inode->i_fop = &ns_file_operations;
inode->i_private = ns;
dentry = d_alloc_pseudo(mnt->mnt_sb, &qname);
if (!dentry) {
iput(inode);
mntput(mnt);
return ERR_PTR(-ENOMEM);
}
d_instantiate(dentry, inode);
dentry->d_fsdata = (void *)ns_ops;
d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
if (d) {
d_delete(dentry); /* make sure ->d_prune() does nothing */
dput(dentry);
cpu_relax();
goto again;
}
goto got_it;
}
int ns_get_name(char *buf, size_t size, struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
struct ns_common *ns;
int res = -ENOENT;
ns = ns_ops->get(task);
if (ns) {
res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum);
ns_ops->put(ns);
}
return res;
}
struct file *proc_ns_fget(int fd)
{
struct file *file;
file = fget(fd);
if (!file)
return ERR_PTR(-EBADF);
if (file->f_op != &ns_file_operations)
goto out_invalid;
return file;
out_invalid:
fput(file);
return ERR_PTR(-EINVAL);
}
static const struct super_operations nsfs_ops = {
.statfs = simple_statfs,
.evict_inode = nsfs_evict,
};
static struct dentry *nsfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
return mount_pseudo(fs_type, "nsfs:", &nsfs_ops,
&ns_dentry_operations, NSFS_MAGIC);
}
static struct file_system_type nsfs = {
.name = "nsfs",
.mount = nsfs_mount,
.kill_sb = kill_anon_super,
};
void __init nsfs_init(void)
{
nsfs_mnt = kern_mount(&nsfs);
if (IS_ERR(nsfs_mnt))
panic("can't set nsfs up\n");
nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER;
}
......@@ -32,8 +32,6 @@ static void proc_evict_inode(struct inode *inode)
{
struct proc_dir_entry *de;
struct ctl_table_header *head;
const struct proc_ns_operations *ns_ops;
void *ns;
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
......@@ -50,11 +48,6 @@ static void proc_evict_inode(struct inode *inode)
RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
sysctl_head_put(head);
}
/* Release any associated namespace */
ns_ops = PROC_I(inode)->ns.ns_ops;
ns = PROC_I(inode)->ns.ns;
if (ns_ops && ns)
ns_ops->put(ns);
}
static struct kmem_cache * proc_inode_cachep;
......@@ -73,8 +66,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
ei->pde = NULL;
ei->sysctl = NULL;
ei->sysctl_entry = NULL;
ei->ns.ns = NULL;
ei->ns.ns_ops = NULL;
ei->ns_ops = NULL;
inode = &ei->vfs_inode;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
return inode;
......
......@@ -65,7 +65,7 @@ struct proc_inode {
struct proc_dir_entry *pde;
struct ctl_table_header *sysctl;
struct ctl_table *sysctl_entry;
struct proc_ns ns;
const struct proc_ns_operations *ns_ops;
struct inode vfs_inode;
};
......
#include <linux/proc_fs.h>
#include <linux/nsproxy.h>
#include <linux/sched.h>
#include <linux/ptrace.h>
#include <linux/fs_struct.h>
#include <linux/mount.h>
#include <linux/path.h>
#include <linux/namei.h>
#include <linux/file.h>
#include <linux/utsname.h>
......@@ -34,138 +30,45 @@ static const struct proc_ns_operations *ns_entries[] = {
&mntns_operations,
};
static const struct file_operations ns_file_operations = {
.llseek = no_llseek,
};
static const struct inode_operations ns_inode_operations = {
.setattr = proc_setattr,
};
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
{
struct inode *inode = dentry->d_inode;
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
ns_ops->name, inode->i_ino);
}
const struct dentry_operations ns_dentry_operations =
{
.d_delete = always_delete_dentry,
.d_dname = ns_dname,
};
static struct dentry *proc_ns_get_dentry(struct super_block *sb,
struct task_struct *task, const struct proc_ns_operations *ns_ops)
{
struct dentry *dentry, *result;
struct inode *inode;
struct proc_inode *ei;
struct qstr qname = { .name = "", };
void *ns;
ns = ns_ops->get(task);
if (!ns)
return ERR_PTR(-ENOENT);
dentry = d_alloc_pseudo(sb, &qname);
if (!dentry) {
ns_ops->put(ns);
return ERR_PTR(-ENOMEM);
}
inode = iget_locked(sb, ns_ops->inum(ns));
if (!inode) {
dput(dentry);
ns_ops->put(ns);
return ERR_PTR(-ENOMEM);
}
ei = PROC_I(inode);
if (inode->i_state & I_NEW) {
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_op = &ns_inode_operations;
inode->i_mode = S_IFREG | S_IRUGO;
inode->i_fop = &ns_file_operations;
ei->ns.ns_ops = ns_ops;
ei->ns.ns = ns;
unlock_new_inode(inode);
} else {
ns_ops->put(ns);
}
d_set_d_op(dentry, &ns_dentry_operations);
result = d_instantiate_unique(dentry, inode);
if (result) {
dput(dentry);
dentry = result;
}
return dentry;
}
static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;