Commit 04b5da4a authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'akpm' (patches from Andrew Morton)

Merge fixes from Andrew Morton:
 "The nmi patch and watchdog patch aren't actually fixes - they're
  features which needed a few last-minutes touchups.

  Otherwise, a rather large batch of fixes - ocfs2 review takes a while
  and I got distracted and missed last week's batch"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (31 commits)
  ocfs2/dlm: do not purge lockres that is queued for assert master
  ocfs2: do not return DLM_MIGRATE_RESPONSE_MASTERY_REF to avoid endless,loop during umount
  ocfs2: manually do the iput once ocfs2_add_entry failed in ocfs2_symlink and ocfs2_mknod
  ocfs2: fix a tiny race when running dirop_fileop_racer
  ocfs2/dlm: fix misuse of list_move_tail() in dlm_run_purge_list()
  ocfs2: refcount: take rw_lock in ocfs2_reflink
  ocfs2: revert "ocfs2: fix NULL pointer dereference when dismount and ocfs2rec simultaneously"
  ocfs2: fix deadlock when two nodes are converting same lock from PR to EX and idletimeout closes conn
  ocfs2: should add inode into orphan dir after updating entry in ocfs2_rename()
  mm: fix crashes from mbind() merging vmas
  checkpatch: reduce false positives when checking void function return statements
  ia64: arch/ia64/include/uapi/asm/fcntl.h needs personality.h
  DMA, CMA: fix possible memory leak
  slab: fix oops when reading /proc/slab_allocators
  shmem: fix faulting into a hole while it's punched
  mm: let mm_find_pmd fix buggy race with THP fault
  mm: thp: fix DEBUG_PAGEALLOC oops in copy_page_rep()
  kernel/watchdog.c: print traces for all cpus on lockup detection
  nmi: provide the option to issue an NMI back trace to every cpu but current
  Documentation/accounting/getdelays.c: add missing null-terminate after strncpy call
  ...
parents a497c3ba ac4fef4d
......@@ -9,6 +9,10 @@
Linus
----------
M: Matt Mackal
E: mpm@selenic.com
D: SLOB slab allocator
N: Matti Aarnio
E: mea@nic.funet.fi
D: Alpha systems hacking, IPv6 and other network related stuff
......
......@@ -314,6 +314,7 @@ int main(int argc, char *argv[])
break;
case 'm':
strncpy(cpumask, optarg, sizeof(cpumask));
cpumask[sizeof(cpumask) - 1] = '\0';
maskset = 1;
printf("cpumask %s maskset %d\n", cpumask, maskset);
break;
......
......@@ -3130,6 +3130,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
[KNL] Should the soft-lockup detector generate panics.
Format: <integer>
softlockup_all_cpu_backtrace=
[KNL] Should the soft-lockup detector generate
backtraces on all cpus.
Format: <integer>
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/laptops/sonypi.txt
......
......@@ -209,15 +209,12 @@ If memory device is found, memory hotplug code will be called.
4.2 Notify memory hot-add event by hand
------------
On powerpc, the firmware does not notify a memory hotplug event to the kernel.
Therefore, "probe" interface is supported to notify the event to the kernel.
This interface depends on CONFIG_ARCH_MEMORY_PROBE.
CONFIG_ARCH_MEMORY_PROBE is supported on powerpc only. On x86, this config
option is disabled by default since ACPI notifies a memory hotplug event to
the kernel, which performs its hotplug operation as the result. Please
enable this option if you need the "probe" interface for testing purposes
on x86.
On some architectures, the firmware may not notify the kernel of a memory
hotplug event. Therefore, the memory "probe" interface is supported to
explicitly notify the kernel. This interface depends on
CONFIG_ARCH_MEMORY_PROBE and can be configured on powerpc, sh, and x86
if hotplug is supported, although for x86 this should be handled by ACPI
notification.
Probe interface is located at
/sys/devices/system/memory/probe
......
......@@ -75,6 +75,7 @@ show up in /proc/sys/kernel:
- shmall
- shmmax [ sysv ipc ]
- shmmni
- softlockup_all_cpu_backtrace
- stop-a [ SPARC only ]
- sysrq ==> Documentation/sysrq.txt
- sysctl_writes_strict
......@@ -783,6 +784,22 @@ via the /proc/sys interface:
==============================================================
softlockup_all_cpu_backtrace:
This value controls the soft lockup detector thread's behavior
when a soft lockup condition is detected as to whether or not
to gather further debug information. If enabled, each cpu will
be issued an NMI and instructed to capture stack trace.
This feature is only applicable for architectures which support
NMI.
0: do nothing. This is the default behavior.
1: on detection capture more debug information.
==============================================================
tainted:
Non-zero if the kernel has been tainted. Numeric values, which
......
......@@ -702,7 +702,8 @@ The batch value of each per cpu pagelist is also updated as a result. It is
set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8)
The initial value is zero. Kernel does not use this value at boot time to set
the high water marks for each per cpu page list.
the high water marks for each per cpu page list. If the user writes '0' to this
sysctl, it will revert to this default behavior.
==============================================================
......
......@@ -8196,13 +8196,15 @@ S: Maintained
F: drivers/usb/misc/sisusbvga/
SLAB ALLOCATOR
M: Christoph Lameter <cl@linux-foundation.org>
M: Christoph Lameter <cl@linux.com>
M: Pekka Enberg <penberg@kernel.org>
M: Matt Mackall <mpm@selenic.com>
M: David Rientjes <rientjes@google.com>
M: Joonsoo Kim <iamjoonsoo.kim@lge.com>
M: Andrew Morton <akpm@linux-foundation.org>
L: linux-mm@kvack.org
S: Maintained
F: include/linux/sl?b*.h
F: mm/sl?b.c
F: mm/sl?b*
SLEEPABLE READ-COPY UPDATE (SRCU)
M: Lai Jiangshan <laijs@cn.fujitsu.com>
......
......@@ -8,6 +8,7 @@
#define force_o_largefile() \
(personality(current->personality) != PER_LINUX32)
#include <linux/personality.h>
#include <asm-generic/fcntl.h>
#endif /* _ASM_IA64_FCNTL_H */
......@@ -89,7 +89,7 @@ static inline unsigned long get_softint(void)
return retval;
}
void arch_trigger_all_cpu_backtrace(void);
void arch_trigger_all_cpu_backtrace(bool);
#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
extern void *hardirq_stack[NR_CPUS];
......
......@@ -239,7 +239,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp)
}
}
void arch_trigger_all_cpu_backtrace(void)
void arch_trigger_all_cpu_backtrace(bool include_self)
{
struct thread_info *tp = current_thread_info();
struct pt_regs *regs = get_irq_regs();
......@@ -251,16 +251,22 @@ void arch_trigger_all_cpu_backtrace(void)
spin_lock_irqsave(&global_cpu_snapshot_lock, flags);
memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
this_cpu = raw_smp_processor_id();
__global_reg_self(tp, regs, this_cpu);
memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
if (include_self)
__global_reg_self(tp, regs, this_cpu);
smp_fetch_global_regs();
for_each_online_cpu(cpu) {
struct global_reg_snapshot *gp = &global_cpu_snapshot[cpu].reg;
struct global_reg_snapshot *gp;
if (!include_self && cpu == this_cpu)
continue;
gp = &global_cpu_snapshot[cpu].reg;
__global_reg_poll(gp);
......@@ -292,7 +298,7 @@ void arch_trigger_all_cpu_backtrace(void)
static void sysrq_handle_globreg(int key)
{
arch_trigger_all_cpu_backtrace();
arch_trigger_all_cpu_backtrace(true);
}
static struct sysrq_key_op sparc_globalreg_op = {
......
......@@ -43,7 +43,7 @@ extern int vector_used_by_percpu_irq(unsigned int vector);
extern void init_ISA_irqs(void);
#ifdef CONFIG_X86_LOCAL_APIC
void arch_trigger_all_cpu_backtrace(void);
void arch_trigger_all_cpu_backtrace(bool);
#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
#endif
......
......@@ -33,31 +33,41 @@ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
/* "in progress" flag of arch_trigger_all_cpu_backtrace */
static unsigned long backtrace_flag;
void arch_trigger_all_cpu_backtrace(void)
void arch_trigger_all_cpu_backtrace(bool include_self)
{
int i;
int cpu = get_cpu();
if (test_and_set_bit(0, &backtrace_flag))
if (test_and_set_bit(0, &backtrace_flag)) {
/*
* If there is already a trigger_all_cpu_backtrace() in progress
* (backtrace_flag == 1), don't output double cpu dump infos.
*/
put_cpu();
return;
}
cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
if (!include_self)
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
printk(KERN_INFO "sending NMI to all CPUs:\n");
apic->send_IPI_all(NMI_VECTOR);
if (!cpumask_empty(to_cpumask(backtrace_mask))) {
pr_info("sending NMI to %s CPUs:\n",
(include_self ? "all" : "other"));
apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR);
}
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
for (i = 0; i < 10 * 1000; i++) {
if (cpumask_empty(to_cpumask(backtrace_mask)))
break;
mdelay(1);
touch_softlockup_watchdog();
}
clear_bit(0, &backtrace_flag);
smp_mb__after_atomic();
put_cpu();
}
static int
......
......@@ -176,14 +176,24 @@ static int __init cma_activate_area(struct cma *cma)
base_pfn = pfn;
for (j = pageblock_nr_pages; j; --j, pfn++) {
WARN_ON_ONCE(!pfn_valid(pfn));
/*
* alloc_contig_range requires the pfn range
* specified to be in the same zone. Make this
* simple by forcing the entire CMA resv range
* to be in the same zone.
*/
if (page_zone(pfn_to_page(pfn)) != zone)
return -EINVAL;
goto err;
}
init_cma_reserved_pageblock(pfn_to_page(base_pfn));
} while (--i);
mutex_init(&cma->lock);
return 0;
err:
kfree(cma->bitmap);
return -EINVAL;
}
static struct cma cma_areas[MAX_CMA_AREAS];
......
......@@ -601,6 +601,7 @@ static int rtsx_pci_ms_drv_remove(struct platform_device *pdev)
pcr->slots[RTSX_MS_CARD].card_event = NULL;
msh = host->msh;
host->eject = true;
cancel_work_sync(&host->handle_req);
mutex_lock(&host->host_mutex);
if (host->req) {
......
......@@ -331,6 +331,7 @@ struct dlm_lock_resource
u16 state;
char lvb[DLM_LVB_LEN];
unsigned int inflight_locks;
unsigned int inflight_assert_workers;
unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
};
......@@ -910,6 +911,9 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res);
void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res);
void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
......
......@@ -581,6 +581,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
atomic_set(&res->asts_reserved, 0);
res->migration_pending = 0;
res->inflight_locks = 0;
res->inflight_assert_workers = 0;
res->dlm = dlm;
......@@ -683,6 +684,43 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
wake_up(&res->wq);
}
void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
assert_spin_locked(&res->spinlock);
res->inflight_assert_workers++;
mlog(0, "%s:%.*s: inflight assert worker++: now %u\n",
dlm->name, res->lockname.len, res->lockname.name,
res->inflight_assert_workers);
}
static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
spin_lock(&res->spinlock);
__dlm_lockres_grab_inflight_worker(dlm, res);
spin_unlock(&res->spinlock);
}
static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
assert_spin_locked(&res->spinlock);
BUG_ON(res->inflight_assert_workers == 0);
res->inflight_assert_workers--;
mlog(0, "%s:%.*s: inflight assert worker--: now %u\n",
dlm->name, res->lockname.len, res->lockname.name,
res->inflight_assert_workers);
}
static void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
spin_lock(&res->spinlock);
__dlm_lockres_drop_inflight_worker(dlm, res);
spin_unlock(&res->spinlock);
}
/*
* lookup a lock resource by name.
* may already exist in the hashtable.
......@@ -1603,7 +1641,8 @@ send_response:
mlog(ML_ERROR, "failed to dispatch assert master work\n");
response = DLM_MASTER_RESP_ERROR;
dlm_lockres_put(res);
}
} else
dlm_lockres_grab_inflight_worker(dlm, res);
} else {
if (res)
dlm_lockres_put(res);
......@@ -2118,6 +2157,8 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
dlm_lockres_release_ast(dlm, res);
put:
dlm_lockres_drop_inflight_worker(dlm, res);
dlm_lockres_put(res);
mlog(0, "finished with dlm_assert_master_worker\n");
......@@ -3088,11 +3129,15 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
/* remove it so that only one mle will be found */
__dlm_unlink_mle(dlm, tmp);
__dlm_mle_detach_hb_events(dlm, tmp);
ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
"telling master to get ref for cleared out mle "
"during migration\n", dlm->name, namelen, name,
master, new_master);
if (tmp->type == DLM_MLE_MASTER) {
ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
"telling master to get ref "
"for cleared out mle during "
"migration\n", dlm->name,
namelen, name, master,
new_master);
}
}
spin_unlock(&tmp->spinlock);
}
......
......@@ -1708,7 +1708,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
mlog_errno(-ENOMEM);
/* retry!? */
BUG();
}
} else
__dlm_lockres_grab_inflight_worker(dlm, res);
} else /* put.. incase we are not the master */
dlm_lockres_put(res);
spin_unlock(&res->spinlock);
......
......@@ -259,12 +259,15 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
* refs on it. */
unused = __dlm_lockres_unused(lockres);
if (!unused ||
(lockres->state & DLM_LOCK_RES_MIGRATING)) {
(lockres->state & DLM_LOCK_RES_MIGRATING) ||
(lockres->inflight_assert_workers != 0)) {
mlog(0, "%s: res %.*s is in use or being remastered, "
"used %d, state %d\n", dlm->name,
lockres->lockname.len, lockres->lockname.name,
!unused, lockres->state);
list_move_tail(&dlm->purge_list, &lockres->purge);
"used %d, state %d, assert master workers %u\n",
dlm->name, lockres->lockname.len,
lockres->lockname.name,
!unused, lockres->state,
lockres->inflight_assert_workers);
list_move_tail(&lockres->purge, &dlm->purge_list);
spin_unlock(&lockres->spinlock);
continue;
}
......
......@@ -191,7 +191,9 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
DLM_UNLOCK_CLEAR_CONVERT_TYPE);
} else if (status == DLM_RECOVERING ||
status == DLM_MIGRATING ||
status == DLM_FORWARD) {
status == DLM_FORWARD ||
status == DLM_NOLOCKMGR
) {
/* must clear the actions because this unlock
* is about to be retried. cannot free or do
* any list manipulation. */
......@@ -200,7 +202,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
res->lockname.name,
status==DLM_RECOVERING?"recovering":
(status==DLM_MIGRATING?"migrating":
"forward"));
(status == DLM_FORWARD ? "forward" :
"nolockmanager")));
actions = 0;
}
if (flags & LKM_CANCEL)
......@@ -364,7 +367,10 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
* updated state to the recovery master. this thread
* just needs to finish out the operation and call
* the unlockast. */
ret = DLM_NORMAL;
if (dlm_is_node_dead(dlm, owner))
ret = DLM_NORMAL;
else
ret = DLM_NOLOCKMGR;
} else {
/* something bad. this will BUG in ocfs2 */
ret = dlm_err_to_dlm_status(tmpret);
......@@ -638,7 +644,9 @@ retry:
if (status == DLM_RECOVERING ||
status == DLM_MIGRATING ||
status == DLM_FORWARD) {
status == DLM_FORWARD ||
status == DLM_NOLOCKMGR) {
/* We want to go away for a tiny bit to allow recovery
* / migration to complete on this resource. I don't
* know of any wait queue we could sleep on as this
......@@ -650,7 +658,7 @@ retry:
msleep(50);
mlog(0, "retrying unlock due to pending recovery/"
"migration/in-progress\n");
"migration/in-progress/reconnect\n");
goto retry;
}
......
......@@ -205,6 +205,21 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
return inode;
}
static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb,
struct dentry *dentry, struct inode *inode)
{
struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
ocfs2_lock_res_free(&dl->dl_lockres);
BUG_ON(dl->dl_count != 1);
spin_lock(&dentry_attach_lock);
dentry->d_fsdata = NULL;
spin_unlock(&dentry_attach_lock);
kfree(dl);
iput(inode);
}
static int ocfs2_mknod(struct inode *dir,
struct dentry *dentry,
umode_t mode,
......@@ -231,6 +246,7 @@ static int ocfs2_mknod(struct inode *dir,
sigset_t oldset;
int did_block_signals = 0;
struct posix_acl *default_acl = NULL, *acl = NULL;
struct ocfs2_dentry_lock *dl = NULL;
trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
(unsigned long long)OCFS2_I(dir)->ip_blkno,
......@@ -423,6 +439,8 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
dl = dentry->d_fsdata;
status = ocfs2_add_entry(handle, dentry, inode,
OCFS2_I(inode)->ip_blkno, parent_fe_bh,
&lookup);
......@@ -469,6 +487,9 @@ leave:
* ocfs2_delete_inode will mutex_lock again.
*/
if ((status < 0) && inode) {
if (dl)
ocfs2_cleanup_add_entry_failure(osb, dentry, inode);
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
clear_nlink(inode);
iput(inode);
......@@ -991,6 +1012,65 @@ leave:
return status;
}
static int ocfs2_check_if_ancestor(struct ocfs2_super *osb,
u64 src_inode_no, u64 dest_inode_no)
{
int ret = 0, i = 0;
u64 parent_inode_no = 0;
u64 child_inode_no = src_inode_no;
struct inode *child_inode;
#define MAX_LOOKUP_TIMES 32
while (1) {
child_inode = ocfs2_iget(osb, child_inode_no, 0, 0);
if (IS_ERR(child_inode)) {
ret = PTR_ERR(child_inode);
break;
}
ret = ocfs2_inode_lock(child_inode, NULL, 0);
if (ret < 0) {
iput(child_inode);
if (ret != -ENOENT)
mlog_errno(ret);
break;
}
ret = ocfs2_lookup_ino_from_name(child_inode, "..", 2,
&parent_inode_no);
ocfs2_inode_unlock(child_inode, 0);
iput(child_inode);
if (ret < 0) {
ret = -ENOENT;
break;
}
if (parent_inode_no == dest_inode_no) {
ret = 1;
break;
}
if (parent_inode_no == osb->root_inode->i_ino) {
ret = 0;
break;
}
child_inode_no = parent_inode_no;
if (++i >= MAX_LOOKUP_TIMES) {
mlog(ML_NOTICE, "max lookup times reached, filesystem "
"may have nested directories, "
"src inode: %llu, dest inode: %llu.\n",
(unsigned long long)src_inode_no,
(unsigned long long)dest_inode_no);
ret = 0;
break;
}
}
return ret;
}
/*
* The only place this should be used is rename!
* if they have the same id, then the 1st one is the only one locked.
......@@ -1002,6 +1082,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
struct inode *inode2)
{
int status;
int inode1_is_ancestor, inode2_is_ancestor;
struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
struct buffer_head **tmpbh;
......@@ -1015,9 +1096,26 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
if (*bh2)
*bh2 = NULL;
/* we always want to lock the one with the lower lockid first. */
/* we always want to lock the one with the lower lockid first.
* and if they are nested, we lock ancestor first */
if (oi1->ip_blkno != oi2->ip_blkno) {
if (oi1->ip_blkno < oi2->ip_blkno) {
inode1_is_ancestor = ocfs2_check_if_ancestor(osb, oi2->ip_blkno,
oi1->ip_blkno);
if (inode1_is_ancestor < 0) {
status = inode1_is_ancestor;
goto bail;
}
inode2_is_ancestor = ocfs2_check_if_ancestor(osb, oi1->ip_blkno,
oi2->ip_blkno);
if (inode2_is_ancestor < 0) {
status = inode2_is_ancestor;
goto bail;
}
if ((inode1_is_ancestor == 1) ||
(oi1->ip_blkno < oi2->ip_blkno &&
inode2_is_ancestor == 0)) {
/* switch id1 and id2 around */
tmpbh = bh2;
bh2 = bh1;
......@@ -1098,6 +1196,7 @@ static int ocfs2_rename(struct inode *old_dir,
struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, };
struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
struct ocfs2_dir_lookup_result target_insert = { NULL, };
bool should_add_orphan = false;