Commit 698f415c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ofs-pull-tag-1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux

Pull orangefs filesystem from Mike Marshall.

This finally merges the long-pending orangefs filesystem, which has been
much cleaned up with input from Al Viro over the last six months.  From
the documentation file:

 "OrangeFS is an LGPL userspace scale-out parallel storage system.  It
  is ideal for large storage problems faced by HPC, BigData, Streaming
  Video, Genomics, Bioinformatics.

  Orangefs, originally called PVFS, was first developed in 1993 by Walt
  Ligon and Eric Blumer as a parallel file system for Parallel Virtual
  Machine (PVM) as part of a NASA grant to study the I/O patterns of
  parallel programs.

  Orangefs features include:

    - Distributes file data among multiple file servers
    - Supports simultaneous access by multiple clients
    - Stores file data and metadata on servers using local file system
      and access methods
    - Userspace implementation is easy to install and maintain
    - Direct MPI support
    - Stateless"

see Documentation/filesystems/orangefs.txt for more in-depth details.

* tag 'ofs-pull-tag-1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux: (174 commits)
  orangefs: fix orangefs_superblock locking
  orangefs: fix do_readv_writev() handling of error halfway through
  orangefs: have ->kill_sb() evict the VFS side of things first
  orangefs: sanitize ->llseek()
  orangefs-bufmap.h: trim unused junk
  orangefs: saner calling conventions for getting a slot
  orangefs_copy_{to,from}_bufmap(): don't pass bufmap pointer
  orangefs: get rid of readdir_handle_s
  ornagefs: ensure that truncate has an up to date inode size
  orangefs: move code which sets i_link to orangefs_inode_getattr
  orangefs: remove needless wrapper around GFP_KERNEL
  orangefs: remove wrapper around mutex_lock(&inode->i_mutex)
  orangefs: refactor inode type or link_target change detection
  orangefs: use new getattr for revalidate and remove old getattr
  orangefs: use new getattr in inode getattr and permission
  orangefs: use new orangefs_inode_getattr to get size in write and llseek
  orangefs: use new orangefs_inode_getattr to create new inodes
  orangefs: rename orangefs_inode_getattr to orangefs_inode_old_getattr
  orangefs: remove inode->i_lock wrapper
  orangefs: put register_chrdev immediately before register_filesystem
  ...
parents b4cec5f6 45996492
What: /sys/fs/orangefs/perf_counters/*
Date: Jun 2015
Contact: Mike Marshall <hubcap@omnibond.com>
Description:
Counters and settings for various caches.
Read only.
What: /sys/fs/orangefs/perf_counter_reset
Date: June 2015
Contact: Mike Marshall <hubcap@omnibond.com>
Description:
echo a 0 or a 1 into perf_counter_reset to
reset all the counters in
/sys/fs/orangefs/perf_counters
except ones with PINT_PERF_PRESERVE set.
What: /sys/fs/orangefs/perf_time_interval_secs
Date: Jun 2015
Contact: Mike Marshall <hubcap@omnibond.com>
Description:
Length of perf counter intervals in
seconds.
What: /sys/fs/orangefs/perf_history_size
Date: Jun 2015
Contact: Mike Marshall <hubcap@omnibond.com>
Description:
The perf_counters cache statistics have N, or
perf_history_size, samples. The default is
one.
Every perf_time_interval_secs the (first)
samples are reset.
If N is greater than one, the "current" set
of samples is reset, and the samples from the
other N-1 intervals remain available.
What: /sys/fs/orangefs/op_timeout_secs
Date: Jun 2015
Contact: Mike Marshall <hubcap@omnibond.com>
Description:
Service operation timeout in seconds.
What: /sys/fs/orangefs/slot_timeout_secs
Date: Jun 2015
Contact: Mike Marshall <hubcap@omnibond.com>
Description:
"Slot" timeout in seconds. A "slot"
is an indexed buffer in the shared
memory segment used for communication
between the kernel module and userspace.
Slots are requested and waited for,
the wait times out after slot_timeout_secs.
What: /sys/fs/orangefs/acache/*
Date: Jun 2015
Contact: Mike Marshall <hubcap@omnibond.com>
Description:
Attribute cache configurable settings.
What: /sys/fs/orangefs/ncache/*
Date: Jun 2015
Contact: Mike Marshall <hubcap@omnibond.com>
Description:
Name cache configurable settings.
What: /sys/fs/orangefs/capcache/*
Date: Jun 2015
Contact: Mike Marshall <hubcap@omnibond.com>
Description:
Capability cache configurable settings.
What: /sys/fs/orangefs/ccache/*
Date: Jun 2015
Contact: Mike Marshall <hubcap@omnibond.com>
Description:
Credential cache configurable settings.
This diff is collapsed.
......@@ -8251,6 +8251,14 @@ S: Supported
F: fs/overlayfs/
F: Documentation/filesystems/overlayfs.txt
ORANGEFS FILESYSTEM
M: Mike Marshall <hubcap@omnibond.com>
L: pvfs2-developers@beowulf-underground.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux.git
S: Supported
F: fs/orangefs/
F: Documentation/filesystems/orangefs.txt
P54 WIRELESS DRIVER
M: Christian Lamparter <chunkeey@googlemail.com>
L: linux-wireless@vger.kernel.org
......
......@@ -209,6 +209,7 @@ menuconfig MISC_FILESYSTEMS
if MISC_FILESYSTEMS
source "fs/orangefs/Kconfig"
source "fs/adfs/Kconfig"
source "fs/affs/Kconfig"
source "fs/ecryptfs/Kconfig"
......
......@@ -106,6 +106,7 @@ obj-$(CONFIG_AUTOFS4_FS) += autofs4/
obj-$(CONFIG_ADFS_FS) += adfs/
obj-$(CONFIG_FUSE_FS) += fuse/
obj-$(CONFIG_OVERLAY_FS) += overlayfs/
obj-$(CONFIG_ORANGEFS_FS) += orangefs/
obj-$(CONFIG_UDF_FS) += udf/
obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
obj-$(CONFIG_OMFS_FS) += omfs/
......
config ORANGEFS_FS
tristate "ORANGEFS (Powered by PVFS) support"
select FS_POSIX_ACL
help
Orange is a parallel file system designed for use on high end
computing (HEC) systems.
#
# Makefile for the ORANGEFS filesystem.
#
obj-$(CONFIG_ORANGEFS_FS) += orangefs.o
orangefs-objs := acl.o file.o orangefs-cache.o orangefs-utils.o xattr.o \
dcache.o inode.o orangefs-sysfs.o orangefs-mod.o super.o \
devorangefs-req.o namei.o symlink.o dir.o orangefs-bufmap.o \
orangefs-debugfs.o waitqueue.o
/*
* (C) 2001 Clemson University and The University of Chicago
*
* See COPYING in top-level directory.
*/
#include "protocol.h"
#include "orangefs-kernel.h"
#include "orangefs-bufmap.h"
#include <linux/posix_acl_xattr.h>
#include <linux/fs_struct.h>
struct posix_acl *orangefs_get_acl(struct inode *inode, int type)
{
struct posix_acl *acl;
int ret;
char *key = NULL, *value = NULL;
switch (type) {
case ACL_TYPE_ACCESS:
key = ORANGEFS_XATTR_NAME_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
key = ORANGEFS_XATTR_NAME_ACL_DEFAULT;
break;
default:
gossip_err("orangefs_get_acl: bogus value of type %d\n", type);
return ERR_PTR(-EINVAL);
}
/*
* Rather than incurring a network call just to determine the exact
* length of the attribute, I just allocate a max length to save on
* the network call. Conceivably, we could pass NULL to
* orangefs_inode_getxattr() to probe the length of the value, but
* I don't do that for now.
*/
value = kmalloc(ORANGEFS_MAX_XATTR_VALUELEN, GFP_KERNEL);
if (value == NULL)
return ERR_PTR(-ENOMEM);
gossip_debug(GOSSIP_ACL_DEBUG,
"inode %pU, key %s, type %d\n",
get_khandle_from_ino(inode),
key,
type);
ret = orangefs_inode_getxattr(inode,
"",
key,
value,
ORANGEFS_MAX_XATTR_VALUELEN);
/* if the key exists, convert it to an in-memory rep */
if (ret > 0) {
acl = posix_acl_from_xattr(&init_user_ns, value, ret);
} else if (ret == -ENODATA || ret == -ENOSYS) {
acl = NULL;
} else {
gossip_err("inode %pU retrieving acl's failed with error %d\n",
get_khandle_from_ino(inode),
ret);
acl = ERR_PTR(ret);
}
/* kfree(NULL) is safe, so don't worry if value ever got used */
kfree(value);
return acl;
}
int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
int error = 0;
void *value = NULL;
size_t size = 0;
const char *name = NULL;
switch (type) {
case ACL_TYPE_ACCESS:
name = ORANGEFS_XATTR_NAME_ACL_ACCESS;
if (acl) {
umode_t mode = inode->i_mode;
/*
* can we represent this with the traditional file
* mode permission bits?
*/
error = posix_acl_equiv_mode(acl, &mode);
if (error < 0) {
gossip_err("%s: posix_acl_equiv_mode err: %d\n",
__func__,
error);
return error;
}
if (inode->i_mode != mode)
SetModeFlag(orangefs_inode);
inode->i_mode = mode;
mark_inode_dirty_sync(inode);
if (error == 0)
acl = NULL;
}
break;
case ACL_TYPE_DEFAULT:
name = ORANGEFS_XATTR_NAME_ACL_DEFAULT;
break;
default:
gossip_err("%s: invalid type %d!\n", __func__, type);
return -EINVAL;
}
gossip_debug(GOSSIP_ACL_DEBUG,
"%s: inode %pU, key %s type %d\n",
__func__, get_khandle_from_ino(inode),
name,
type);
if (acl) {
size = posix_acl_xattr_size(acl->a_count);
value = kmalloc(size, GFP_KERNEL);
if (!value)
return -ENOMEM;
error = posix_acl_to_xattr(&init_user_ns, acl, value, size);
if (error < 0)
goto out;
}
gossip_debug(GOSSIP_ACL_DEBUG,
"%s: name %s, value %p, size %zd, acl %p\n",
__func__, name, value, size, acl);
/*
* Go ahead and set the extended attribute now. NOTE: Suppose acl
* was NULL, then value will be NULL and size will be 0 and that
* will xlate to a removexattr. However, we don't want removexattr
* complain if attributes does not exist.
*/
error = orangefs_inode_setxattr(inode, "", name, value, size, 0);
out:
kfree(value);
if (!error)
set_cached_acl(inode, type, acl);
return error;
}
int orangefs_init_acl(struct inode *inode, struct inode *dir)
{
struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
struct posix_acl *default_acl, *acl;
umode_t mode = inode->i_mode;
int error = 0;
ClearModeFlag(orangefs_inode);
error = posix_acl_create(dir, &mode, &default_acl, &acl);
if (error)
return error;
if (default_acl) {
error = orangefs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
posix_acl_release(default_acl);
}
if (acl) {
if (!error)
error = orangefs_set_acl(inode, acl, ACL_TYPE_ACCESS);
posix_acl_release(acl);
}
/* If mode of the inode was changed, then do a forcible ->setattr */
if (mode != inode->i_mode) {
SetModeFlag(orangefs_inode);
inode->i_mode = mode;
orangefs_flush_inode(inode);
}
return error;
}
/*
* (C) 2001 Clemson University and The University of Chicago
*
* See COPYING in top-level directory.
*/
/*
* Implementation of dentry (directory cache) functions.
*/
#include "protocol.h"
#include "orangefs-kernel.h"
/* Returns 1 if dentry can still be trusted, else 0. */
static int orangefs_revalidate_lookup(struct dentry *dentry)
{
struct dentry *parent_dentry = dget_parent(dentry);
struct inode *parent_inode = parent_dentry->d_inode;
struct orangefs_inode_s *parent = ORANGEFS_I(parent_inode);
struct inode *inode = dentry->d_inode;
struct orangefs_kernel_op_s *new_op;
int ret = 0;
int err = 0;
gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__);
new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP);
if (!new_op)
goto out_put_parent;
new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW;
new_op->upcall.req.lookup.parent_refn = parent->refn;
strncpy(new_op->upcall.req.lookup.d_name,
dentry->d_name.name,
ORANGEFS_NAME_MAX);
gossip_debug(GOSSIP_DCACHE_DEBUG,
"%s:%s:%d interrupt flag [%d]\n",
__FILE__,
__func__,
__LINE__,
get_interruptible_flag(parent_inode));
err = service_operation(new_op, "orangefs_lookup",
get_interruptible_flag(parent_inode));
/* Positive dentry: reject if error or not the same inode. */
if (inode) {
if (err) {
gossip_debug(GOSSIP_DCACHE_DEBUG,
"%s:%s:%d lookup failure.\n",
__FILE__, __func__, __LINE__);
goto out_drop;
}
if (!match_handle(new_op->downcall.resp.lookup.refn.khandle,
inode)) {
gossip_debug(GOSSIP_DCACHE_DEBUG,
"%s:%s:%d no match.\n",
__FILE__, __func__, __LINE__);
goto out_drop;
}
/* Negative dentry: reject if success or error other than ENOENT. */
} else {
gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: negative dentry.\n",
__func__);
if (!err || err != -ENOENT) {
if (new_op->downcall.status != 0)
gossip_debug(GOSSIP_DCACHE_DEBUG,
"%s:%s:%d lookup failure.\n",
__FILE__, __func__, __LINE__);
goto out_drop;
}
}
ret = 1;
out_release_op:
op_release(new_op);
out_put_parent:
dput(parent_dentry);
return ret;
out_drop:
gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d revalidate failed\n",
__FILE__, __func__, __LINE__);
goto out_release_op;
}
/*
* Verify that dentry is valid.
*
* Should return 1 if dentry can still be trusted, else 0.
*/
static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
{
int ret;
if (flags & LOOKUP_RCU)
return -ECHILD;
gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: called on dentry %p.\n",
__func__, dentry);
/* skip root handle lookups. */
if (dentry->d_inode && is_root_handle(dentry->d_inode))
return 1;
/*
* If this passes, the positive dentry still exists or the negative
* dentry still does not exist.
*/
if (!orangefs_revalidate_lookup(dentry))
return 0;
/* We do not need to continue with negative dentries. */
if (!dentry->d_inode)
goto out;
/* Now we must perform a getattr to validate the inode contents. */
ret = orangefs_inode_check_changed(dentry->d_inode);
if (ret < 0) {
gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d getattr failure.\n",
__FILE__, __func__, __LINE__);
return 0;
}
if (ret == 0)
return 0;
out:
gossip_debug(GOSSIP_DCACHE_DEBUG,
"%s: negative dentry or positive dentry and inode valid.\n",
__func__);
return 1;
}
const struct dentry_operations orangefs_dentry_operations = {
.d_revalidate = orangefs_d_revalidate,
};
This diff is collapsed.
This diff is collapsed.
/*
* (C) 2001 Clemson University and The University of Chicago
*
* See COPYING in top-level directory.
*/
/*
* Definitions of downcalls used in Linux kernel module.
*/
#ifndef __DOWNCALL_H
#define __DOWNCALL_H
/*
* Sanitized the device-client core interaction
* for clean 32-64 bit usage
*/
struct orangefs_io_response {
__s64 amt_complete;
};
struct orangefs_lookup_response {
struct orangefs_object_kref refn;
};
struct orangefs_create_response {
struct orangefs_object_kref refn;
};
struct orangefs_symlink_response {
struct orangefs_object_kref refn;
};
struct orangefs_getattr_response {
struct ORANGEFS_sys_attr_s attributes;
char link_target[ORANGEFS_NAME_MAX];
};
struct orangefs_mkdir_response {
struct orangefs_object_kref refn;
};
/*
* duplication of some system interface structures so that I don't have
* to allocate extra memory
*/
struct orangefs_dirent {
char *d_name;
int d_length;
struct orangefs_khandle khandle;
};
struct orangefs_statfs_response {
__s64 block_size;
__s64 blocks_total;
__s64 blocks_avail;
__s64 files_total;
__s64 files_avail;
};
struct orangefs_fs_mount_response {
__s32 fs_id;
__s32 id;
struct orangefs_khandle root_khandle;
};
/* the getxattr response is the attribute value */
struct orangefs_getxattr_response {
__s32 val_sz;
__s32 __pad1;
char val[ORANGEFS_MAX_XATTR_VALUELEN];
};
/* the listxattr response is an array of attribute names */
struct orangefs_listxattr_response {
__s32 returned_count;
__s32 __pad1;
__u64 token;
char key[ORANGEFS_MAX_XATTR_LISTLEN * ORANGEFS_MAX_XATTR_NAMELEN];
__s32 keylen;
__s32 __pad2;
__s32 lengths[ORANGEFS_MAX_XATTR_LISTLEN];
};
struct orangefs_param_response {
__s64 value;
};
#define PERF_COUNT_BUF_SIZE 4096
struct orangefs_perf_count_response {
char buffer[PERF_COUNT_BUF_SIZE];
};
#define FS_KEY_BUF_SIZE 4096
struct orangefs_fs_key_response {
__s32 fs_keylen;
__s32 __pad1;
char fs_key[FS_KEY_BUF_SIZE];
};
struct orangefs_downcall_s {
__s32 type;
__s32 status;
/* currently trailer is used only by readdir */
__s64 trailer_size;
char *trailer_buf;
union {
struct orangefs_io_response io;
struct orangefs_lookup_response lookup;
struct orangefs_create_response create;
struct orangefs_symlink_response sym;
struct orangefs_getattr_response getattr;
struct orangefs_mkdir_response mkdir;
struct orangefs_statfs_response statfs;
struct orangefs_fs_mount_response fs_mount;
struct orangefs_getxattr_response getxattr;
struct orangefs_listxattr_response listxattr;
struct orangefs_param_response param;
struct orangefs_perf_count_response perf_count;
struct orangefs_fs_key_response fs_key;
} resp;
};
struct orangefs_readdir_response_s {
__u64 token;
__u64 directory_version;
__u32 __pad2;
__u32 orangefs_dirent_outcount;
struct orangefs_dirent *dirent_array;
};
#endif /* __DOWNCALL_H */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* (C) 2001 Clemson University and The University of Chicago
*
* See COPYING in top-level directory.
*/
#ifndef __ORANGEFS_BUFMAP_H
#define __ORANGEFS_BUFMAP_H
int orangefs_bufmap_size_query(void);
int orangefs_bufmap_shift_query(void);
int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc);
void orangefs_bufmap_finalize(void);
void orangefs_bufmap_run_down(void);
int orangefs_bufmap_get(void);
void orangefs_bufmap_put(int buffer_index);
int orangefs_readdir_index_get(void);
void orangefs_readdir_index_put(int buffer_index);
int orangefs_bufmap_copy_from_iovec(struct iov_iter *iter,
int buffer_index,
size_t size);
int orangefs_bufmap_copy_to_iovec(struct iov_iter *iter,
int buffer_index,
size_t size);
#endif /* __ORANGEFS_BUFMAP_H */
/*
* (C) 2001 Clemson University and The University of Chicago
*
* See COPYING in top-level directory.
*/
#include "protocol.h"
#include "orangefs-kernel.h"
/* tags assigned to kernel upcall operations */
static __u64 next_tag_value;
static DEFINE_SPINLOCK(next_tag_value_lock);
/* the orangefs memory caches */
/* a cache for orangefs upcall/downcall operations */
static struct kmem_cache *op_cache;
int op_cache_initialize(void)
{
op_cache = kmem_cache_create("orangefs_op_cache",
sizeof(struct orangefs_kernel_op_s),
0,
ORANGEFS_CACHE_CREATE_FLAGS,
NULL);
if (!op_cache) {
gossip_err("Cannot create orangefs_op_cache\n");
return -ENOMEM;
}
/* initialize our atomic tag counter */
spin_lock(&next_tag_value_lock);
next_tag_value = 100;
spin_unlock(&next_tag_value_lock);
return 0;
}
int op_cache_finalize(void)
{
kmem_cache_destroy(op_cache);
return 0;
}
char *get_opname_string(struct orangefs_kernel_op_s *new_op)
{
if (new_op) {
__s32 type = new_op->upcall.type;
if (type == ORANGEFS_VFS_OP_FILE_IO)
return "OP_FILE_IO";
else if (type == ORANGEFS_VFS_OP_LOOKUP)
return "OP_LOOKUP";
else if (type == ORANGEFS_VFS_OP_CREATE)
return "OP_CREATE";
else if (type == ORANGEFS_VFS_OP_GETATTR)
return "OP_GETATTR";
else if (type == ORANGEFS_VFS_OP_REMOVE)
return "OP_REMOVE";
else if (type == ORANGEFS_VFS_OP_MKDIR)
return "OP_MKDIR";
else if (type == ORANGEFS_VFS_OP_READDIR)
return "OP_READDIR";
else if (type == ORANGEFS_VFS_OP_READDIRPLUS)
return "OP_READDIRPLUS";
else if (type == ORANGEFS_VFS_OP_SETATTR)
return "OP_SETATTR";
else if (type == ORANGEFS_VFS_OP_SYMLINK)
return "OP_SYMLINK";
else if (type == ORANGEFS_VFS_OP_RENAME)
return "OP_RENAME";
else if (type == ORANGEFS_VFS_OP_STATFS)
return "OP_STATFS";
else if (type == ORANGEFS_VFS_OP_TRUNCATE)
return "OP_TRUNCATE";
else if (type == ORANGEFS_VFS_OP_MMAP_RA_FLUSH)
return "OP_MMAP_RA_FLUSH";
else if (type == ORANGEFS_VFS_OP_FS_MOUNT)
return "OP_FS_MOUNT";
else if (type == ORANGEFS_VFS_OP_FS_UMOUNT)
return "OP_FS_UMOUNT";
else if (type == ORANGEFS_VFS_OP_GETXATTR)
return "OP_GETXATTR";
else if (type == ORANGEFS_VFS_OP_SETXATTR)
return "OP_SETXATTR";
else if (type == ORANGEFS_VFS_OP_LISTXATTR)
return "OP_LISTXATTR";
else if (type == ORANGEFS_VFS_OP_REMOVEXATTR)
return "OP_REMOVEXATTR";
else if (type == ORANGEFS_VFS_OP_PARAM)
return "OP_PARAM";
else if (type == ORANGEFS_VFS_OP_PERF_COUNT)
return "OP_PERF_COUNT";
else if (type == ORANGEFS_VFS_OP_CANCEL)
return "OP_CANCEL";
else if (type == ORANGEFS_VFS_OP_FSYNC)
return "OP_FSYNC";
else if (type == ORANGEFS_VFS_OP_FSKEY)
return "OP_FSKEY";
}
return "OP_UNKNOWN?";
}
void orangefs_new_tag(struct orangefs_kernel_op_s *op)
{
spin_lock(&next_tag_value_lock);