Commit fb1c8f93 authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds
Browse files

[PATCH] spinlock consolidation



This patch (written by me and also containing many suggestions of Arjan van
de Ven) does a major cleanup of the spinlock code.  It does the following
things:

 - consolidates and enhances the spinlock/rwlock debugging code

 - simplifies the asm/spinlock.h files

 - encapsulates the raw spinlock type and moves generic spinlock
   features (such as ->break_lock) into the generic code.

 - cleans up the spinlock code hierarchy to get rid of the spaghetti.

Most notably there's now only a single variant of the debugging code,
located in lib/spinlock_debug.c.  (previously we had one SMP debugging
variant per architecture, plus a separate generic one for UP builds)

Also, i've enhanced the rwlock debugging facility, it will now track
write-owners.  There is new spinlock-owner/CPU-tracking on SMP builds too.
All locks have lockup detection now, which will work for both soft and hard
spin/rwlock lockups.

The arch-level include files now only contain the minimally necessary
subset of the spinlock code - all the rest that can be generalized now
lives in the generic headers:

 include/asm-i386/spinlock_types.h       |   16
 include/asm-x86_64/spinlock_types.h     |   16

I have also split up the various spinlock variants into separate files,
making it easier to see which does what. The new layout is:

   SMP                         |  UP
   ----------------------------|-----------------------------------
   asm/spinlock_types_smp.h    |  linux/spinlock_types_up.h
   linux/spinlock_types.h      |  linux/spinlock_types.h
   asm/spinlock_smp.h          |  linux/spinlock_up.h
   linux/spinlock_api_smp.h    |  linux/spinlock_api_up.h
   linux/spinlock.h            |  linux/spinlock.h

/*
 * here's the role of the various spinlock/rwlock related include files:
 *
 * on SMP builds:
 *
 *  asm/spinlock_types.h: contains the raw_spinlock_t/raw_rwlock_t and the
 *                        initializers
 *
 *  linux/spinlock_types.h:
 *                        defines the generic type and initializers
 *
 *  asm/spinlock.h:       contains the __raw_spin_*()/etc. lowlevel
 *                        implementations, mostly inline assembly code
 *
 *   (also included on UP-debug builds:)
 *
 *  linux/spinlock_api_smp.h:
 *                        contains the prototypes for the _spin_*() APIs.
 *
 *  linux/spinlock.h:     builds the final spin_*() APIs.
 *
 * on UP builds:
 *
 *  linux/spinlock_type_up.h:
 *                        contains the generic, simplified UP spinlock type.
 *                        (which is an empty structure on non-debug builds)
 *
 *  linux/spinlock_types.h:
 *                        defines the generic type and initializers
 *
 *  linux/spinlock_up.h:
 *                        contains the __raw_spin_*()/etc. version of UP
 *                        builds. (which are NOPs on non-debug, non-preempt
 *                        builds)
 *
 *   (included on UP-non-debug builds:)
 *
 *  linux/spinlock_api_up.h:
 *                        builds the _spin_*() APIs.
 *
 *  linux/spinlock.h:     builds the final spin_*() APIs.
 */

All SMP and UP architectures are converted by this patch.

arm, i386, ia64, ppc, ppc64, s390/s390x, x64 was build-tested via
crosscompilers.  m32r, mips, sh, sparc, have not been tested yet, but should
be mostly fine.

From: Grant Grundler <grundler@parisc-linux.org>

  Booted and lightly tested on a500-44 (64-bit, SMP kernel, dual CPU).
  Builds 32-bit SMP kernel (not booted or tested).  I did not try to build
  non-SMP kernels.  That should be trivial to fix up later if necessary.

  I converted bit ops atomic_hash lock to raw_spinlock_t.  Doing so avoids
  some ugly nesting of linux/*.h and asm/*.h files.  Those particular locks
  are well tested and contained entirely inside arch specific code.  I do NOT
  expect any new issues to arise with them.

 If someone does ever need to use debug/metrics with them, then they will
  need to unravel this hairball between spinlocks, atomic ops, and bit ops
  that exist only because parisc has exactly one atomic instruction: LDCW
  (load and clear word).

From: "Luck, Tony" <tony.luck@intel.com>

   ia64 fix
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarArjan van de Ven <arjanv@infradead.org>
Signed-off-by: default avatarGrant Grundler <grundler@parisc-linux.org>
Cc: Matthew Wilcox <willy@debian.org>
Signed-off-by: default avatarHirokazu Takata <takata@linux-m32r.org>
Signed-off-by: default avatarMikael Pettersson <mikpe@csd.uu.se>
Signed-off-by: default avatarBenoit Boissinot <benoit.boissinot@ens-lyon.org>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 4327edf6
......@@ -185,15 +185,6 @@ EXPORT_SYMBOL(smp_num_cpus);
EXPORT_SYMBOL(smp_call_function);
EXPORT_SYMBOL(smp_call_function_on_cpu);
EXPORT_SYMBOL(_atomic_dec_and_lock);
#ifdef CONFIG_DEBUG_SPINLOCK
EXPORT_SYMBOL(_raw_spin_unlock);
EXPORT_SYMBOL(debug_spin_lock);
EXPORT_SYMBOL(debug_spin_trylock);
#endif
#ifdef CONFIG_DEBUG_RWLOCK
EXPORT_SYMBOL(_raw_write_lock);
EXPORT_SYMBOL(_raw_read_lock);
#endif
EXPORT_SYMBOL(cpu_present_mask);
#endif /* CONFIG_SMP */
......
......@@ -989,175 +989,3 @@ flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
preempt_enable();
}
#ifdef CONFIG_DEBUG_SPINLOCK
void
_raw_spin_unlock(spinlock_t * lock)
{
mb();
lock->lock = 0;
lock->on_cpu = -1;
lock->previous = NULL;
lock->task = NULL;
lock->base_file = "none";
lock->line_no = 0;
}
void
debug_spin_lock(spinlock_t * lock, const char *base_file, int line_no)
{
long tmp;
long stuck;
void *inline_pc = __builtin_return_address(0);
unsigned long started = jiffies;
int printed = 0;
int cpu = smp_processor_id();
stuck = 1L << 30;
try_again:
/* Use sub-sections to put the actual loop at the end
of this object file's text section so as to perfect
branch prediction. */
__asm__ __volatile__(
"1: ldl_l %0,%1\n"
" subq %2,1,%2\n"
" blbs %0,2f\n"
" or %0,1,%0\n"
" stl_c %0,%1\n"
" beq %0,3f\n"
"4: mb\n"
".subsection 2\n"
"2: ldl %0,%1\n"
" subq %2,1,%2\n"
"3: blt %2,4b\n"
" blbs %0,2b\n"
" br 1b\n"
".previous"
: "=r" (tmp), "=m" (lock->lock), "=r" (stuck)
: "m" (lock->lock), "2" (stuck) : "memory");
if (stuck < 0) {
printk(KERN_WARNING
"%s:%d spinlock stuck in %s at %p(%d)"
" owner %s at %p(%d) %s:%d\n",
base_file, line_no,
current->comm, inline_pc, cpu,
lock->task->comm, lock->previous,
lock->on_cpu, lock->base_file, lock->line_no);
stuck = 1L << 36;
printed = 1;
goto try_again;
}
/* Exiting. Got the lock. */
lock->on_cpu = cpu;
lock->previous = inline_pc;
lock->task = current;
lock->base_file = base_file;
lock->line_no = line_no;
if (printed) {
printk(KERN_WARNING
"%s:%d spinlock grabbed in %s at %p(%d) %ld ticks\n",
base_file, line_no, current->comm, inline_pc,
cpu, jiffies - started);
}
}
int
debug_spin_trylock(spinlock_t * lock, const char *base_file, int line_no)
{
int ret;
if ((ret = !test_and_set_bit(0, lock))) {
lock->on_cpu = smp_processor_id();
lock->previous = __builtin_return_address(0);
lock->task = current;
} else {
lock->base_file = base_file;
lock->line_no = line_no;
}
return ret;
}
#endif /* CONFIG_DEBUG_SPINLOCK */
#ifdef CONFIG_DEBUG_RWLOCK
void _raw_write_lock(rwlock_t * lock)
{
long regx, regy;
int stuck_lock, stuck_reader;
void *inline_pc = __builtin_return_address(0);
try_again:
stuck_lock = 1<<30;
stuck_reader = 1<<30;
__asm__ __volatile__(
"1: ldl_l %1,%0\n"
" blbs %1,6f\n"
" blt %1,8f\n"
" mov 1,%1\n"
" stl_c %1,%0\n"
" beq %1,6f\n"
"4: mb\n"
".subsection 2\n"
"6: blt %3,4b # debug\n"
" subl %3,1,%3 # debug\n"
" ldl %1,%0\n"
" blbs %1,6b\n"
"8: blt %4,4b # debug\n"
" subl %4,1,%4 # debug\n"
" ldl %1,%0\n"
" blt %1,8b\n"
" br 1b\n"
".previous"
: "=m" (*(volatile int *)lock), "=&r" (regx), "=&r" (regy),
"=&r" (stuck_lock), "=&r" (stuck_reader)
: "m" (*(volatile int *)lock), "3" (stuck_lock), "4" (stuck_reader) : "memory");
if (stuck_lock < 0) {
printk(KERN_WARNING "write_lock stuck at %p\n", inline_pc);
goto try_again;
}
if (stuck_reader < 0) {
printk(KERN_WARNING "write_lock stuck on readers at %p\n",
inline_pc);
goto try_again;
}
}
void _raw_read_lock(rwlock_t * lock)
{
long regx;
int stuck_lock;
void *inline_pc = __builtin_return_address(0);
try_again:
stuck_lock = 1<<30;
__asm__ __volatile__(
"1: ldl_l %1,%0;"
" blbs %1,6f;"
" subl %1,2,%1;"
" stl_c %1,%0;"
" beq %1,6f;"
"4: mb\n"
".subsection 2\n"
"6: ldl %1,%0;"
" blt %2,4b # debug\n"
" subl %2,1,%2 # debug\n"
" blbs %1,6b;"
" br 1b\n"
".previous"
: "=m" (*(volatile int *)lock), "=&r" (regx), "=&r" (stuck_lock)
: "m" (*(volatile int *)lock), "2" (stuck_lock) : "memory");
if (stuck_lock < 0) {
printk(KERN_WARNING "read_lock stuck at %p\n", inline_pc);
goto try_again;
}
}
#endif /* CONFIG_DEBUG_RWLOCK */
......@@ -491,12 +491,7 @@ init_handler_platform (pal_min_state_area_t *ms,
unw_init_from_interruption(&info, current, pt, sw);
ia64_do_show_stack(&info, NULL);
#ifdef CONFIG_SMP
/* read_trylock() would be handy... */
if (!tasklist_lock.write_lock)
read_lock(&tasklist_lock);
#endif
{
if (read_trylock(&tasklist_lock)) {
struct task_struct *g, *t;
do_each_thread (g, t) {
if (t == current)
......@@ -506,10 +501,6 @@ init_handler_platform (pal_min_state_area_t *ms,
show_stack(t, NULL);
} while_each_thread (g, t);
}
#ifdef CONFIG_SMP
if (!tasklist_lock.write_lock)
read_unlock(&tasklist_lock);
#endif
printk("\nINIT dump complete. Please reboot now.\n");
while (1); /* hang city if no debugger */
......
......@@ -892,7 +892,6 @@ unsigned long send_IPI_mask_phys(cpumask_t physid_mask, int ipi_num,
int try)
{
spinlock_t *ipilock;
unsigned long flags = 0;
volatile unsigned long *ipicr_addr;
unsigned long ipicr_val;
unsigned long my_physid_mask;
......@@ -916,50 +915,27 @@ unsigned long send_IPI_mask_phys(cpumask_t physid_mask, int ipi_num,
* write IPICRi (send IPIi)
* unlock ipi_lock[i]
*/
spin_lock(ipilock);
__asm__ __volatile__ (
";; LOCK ipi_lock[i] \n\t"
";; CHECK IPICRi == 0 \n\t"
".fillinsn \n"
"1: \n\t"
"mvfc %1, psw \n\t"
"clrpsw #0x40 -> nop \n\t"
DCACHE_CLEAR("r4", "r5", "%2")
"lock r4, @%2 \n\t"
"addi r4, #-1 \n\t"
"unlock r4, @%2 \n\t"
"mvtc %1, psw \n\t"
"bnez r4, 2f \n\t"
LOCK_SECTION_START(".balign 4 \n\t")
".fillinsn \n"
"2: \n\t"
"ld r4, @%2 \n\t"
"blez r4, 2b \n\t"
"ld %0, @%1 \n\t"
"and %0, %4 \n\t"
"beqz %0, 2f \n\t"
"bnez %3, 3f \n\t"
"bra 1b \n\t"
LOCK_SECTION_END
";; CHECK IPICRi == 0 \n\t"
".fillinsn \n"
"3: \n\t"
"ld %0, @%3 \n\t"
"and %0, %6 \n\t"
"beqz %0, 4f \n\t"
"bnez %5, 5f \n\t"
"bra 3b \n\t"
";; WRITE IPICRi (send IPIi) \n\t"
".fillinsn \n"
"4: \n\t"
"st %4, @%3 \n\t"
";; UNLOCK ipi_lock[i] \n\t"
"2: \n\t"
"st %2, @%1 \n\t"
".fillinsn \n"
"5: \n\t"
"ldi r4, #1 \n\t"
"st r4, @%2 \n\t"
"3: \n\t"
: "=&r"(ipicr_val)
: "r"(flags), "r"(&ipilock->slock), "r"(ipicr_addr),
"r"(mask), "r"(try), "r"(my_physid_mask)
: "memory", "r4"
#ifdef CONFIG_CHIP_M32700_TS1
, "r5"
#endif /* CONFIG_CHIP_M32700_TS1 */
: "r"(ipicr_addr), "r"(mask), "r"(try), "r"(my_physid_mask)
: "memory"
);
spin_unlock(ipilock);
return ipicr_val;
}
......@@ -20,14 +20,7 @@
* has a cmpxchg, and where atomic->value is an int holding
* the value of the atomic (i.e. the high bits aren't used
* for a lock or anything like that).
*
* N.B. ATOMIC_DEC_AND_LOCK gets defined in include/linux/spinlock.h
* if spinlocks are empty and thus atomic_dec_and_lock is defined
* to be atomic_dec_and_test - in that case we don't need it
* defined here as well.
*/
#ifndef ATOMIC_DEC_AND_LOCK
int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
{
int counter;
......@@ -52,4 +45,3 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
}
EXPORT_SYMBOL(_atomic_dec_and_lock);
#endif /* ATOMIC_DEC_AND_LOCK */
......@@ -5,5 +5,3 @@
lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o
obj-y := iomap.o
lib-$(CONFIG_SMP) += debuglocks.o
......@@ -13,8 +13,8 @@
#include <asm/atomic.h>
#ifdef CONFIG_SMP
spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = {
[0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = {
[0 ... (ATOMIC_HASH_SIZE-1)] = __RAW_SPIN_LOCK_UNLOCKED
};
#endif
......
/*
* Debugging versions of SMP locking primitives.
*
* Copyright (C) 2004 Thibaut VARENE <varenet@parisc-linux.org>
*
* Some code stollen from alpha & sparc64 ;)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* We use pdc_printf() throughout the file for all output messages, to avoid
* losing messages because of disabled interrupts. Since we're using these
* messages for debugging purposes, it makes sense not to send them to the
* linux console.
*/
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/hardirq.h> /* in_interrupt() */
#include <asm/system.h>
#include <asm/hardirq.h> /* in_interrupt() */
#include <asm/pdc.h>
#undef INIT_STUCK
#define INIT_STUCK 1L << 30
#ifdef CONFIG_DEBUG_SPINLOCK
void _dbg_spin_lock(spinlock_t * lock, const char *base_file, int line_no)
{
volatile unsigned int *a;
long stuck = INIT_STUCK;
void *inline_pc = __builtin_return_address(0);
unsigned long started = jiffies;
int printed = 0;
int cpu = smp_processor_id();
try_again:
/* Do the actual locking */
/* <T-Bone> ggg: we can't get stuck on the outter loop?
* <ggg> T-Bone: We can hit the outer loop
* alot if multiple CPUs are constantly racing for a lock
* and the backplane is NOT fair about which CPU sees
* the update first. But it won't hang since every failed
* attempt will drop us back into the inner loop and
* decrement `stuck'.
* <ggg> K-class and some of the others are NOT fair in the HW
* implementation so we could see false positives.
* But fixing the lock contention is easier than
* fixing the HW to be fair.
* <tausq> __ldcw() returns 1 if we get the lock; otherwise we
* spin until the value of the lock changes, or we time out.
*/
mb();
a = __ldcw_align(lock);
while (stuck && (__ldcw(a) == 0))
while ((*a == 0) && --stuck);
mb();
if (unlikely(stuck <= 0)) {
pdc_printf(
"%s:%d: spin_lock(%s/%p) stuck in %s at %p(%d)"
" owned by %s:%d in %s at %p(%d)\n",
base_file, line_no, lock->module, lock,
current->comm, inline_pc, cpu,
lock->bfile, lock->bline, lock->task->comm,
lock->previous, lock->oncpu);
stuck = INIT_STUCK;
printed = 1;
goto try_again;
}
/* Exiting. Got the lock. */
lock->oncpu = cpu;
lock->previous = inline_pc;
lock->task = current;
lock->bfile = (char *)base_file;
lock->bline = line_no;
if (unlikely(printed)) {
pdc_printf(
"%s:%d: spin_lock grabbed in %s at %p(%d) %ld ticks\n",
base_file, line_no, current->comm, inline_pc,
cpu, jiffies - started);
}
}
void _dbg_spin_unlock(spinlock_t * lock, const char *base_file, int line_no)
{
CHECK_LOCK(lock);
volatile unsigned int *a;
mb();
a = __ldcw_align(lock);
if (unlikely((*a != 0) && lock->babble)) {
lock->babble--;
pdc_printf(
"%s:%d: spin_unlock(%s:%p) not locked\n",
base_file, line_no, lock->module, lock);
}
*a = 1;
mb();
}
int _dbg_spin_trylock(spinlock_t * lock, const char *base_file, int line_no)
{
int ret;
volatile unsigned int *a;
mb();
a = __ldcw_align(lock);
ret = (__ldcw(a) != 0);
mb();
if (ret) {
lock->oncpu = smp_processor_id();
lock->previous = __builtin_return_address(0);
lock->task = current;
} else {
lock->bfile = (char *)base_file;
lock->bline = line_no;
}
return ret;
}
#endif /* CONFIG_DEBUG_SPINLOCK */
#ifdef CONFIG_DEBUG_RWLOCK
/* Interrupts trouble detailed explanation, thx Grant:
*
* o writer (wants to modify data) attempts to acquire the rwlock
* o He gets the write lock.
* o Interupts are still enabled, we take an interrupt with the
* write still holding the lock.
* o interrupt handler tries to acquire the rwlock for read.
* o deadlock since the writer can't release it at this point.
*
* In general, any use of spinlocks that competes between "base"
* level and interrupt level code will risk deadlock. Interrupts
* need to be disabled in the base level routines to avoid it.
* Or more precisely, only the IRQ the base level routine
* is competing with for the lock. But it's more efficient/faster
* to just disable all interrupts on that CPU to guarantee
* once it gets the lock it can release it quickly too.
*/
void _dbg_write_lock(rwlock_t *rw, const char *bfile, int bline)
{
void *inline_pc = __builtin_return_address(0);
unsigned long started = jiffies;
long stuck = INIT_STUCK;
int printed = 0;
int cpu = smp_processor_id();
if(unlikely(in_interrupt())) { /* acquiring write lock in interrupt context, bad idea */
pdc_printf("write_lock caller: %s:%d, IRQs enabled,\n", bfile, bline);
BUG();
}
/* Note: if interrupts are disabled (which is most likely), the printk
will never show on the console. We might need a polling method to flush
the dmesg buffer anyhow. */
retry:
_raw_spin_lock(&rw->lock);
if(rw->counter != 0) {
/* this basically never happens */
_raw_spin_unlock(&rw->lock);
stuck--;
if ((unlikely(stuck <= 0)) && (rw->counter < 0)) {
pdc_printf(
"%s:%d: write_lock stuck on writer"
" in %s at %p(%d) %ld ticks\n",
bfile, bline, current->comm, inline_pc,
cpu, jiffies - started);
stuck = INIT_STUCK;
printed = 1;
}
else if (unlikely(stuck <= 0)) {
pdc_printf(
"%s:%d: write_lock stuck on reader"
" in %s at %p(%d) %ld ticks\n",
bfile, bline, current->comm, inline_pc,
cpu, jiffies - started);
stuck = INIT_STUCK;
printed = 1;
}
while(rw->counter != 0);
goto retry;
}
/* got it. now leave without unlocking */
rw->counter = -1; /* remember we are locked */
if (unlikely(printed)) {
pdc_printf(
"%s:%d: write_lock grabbed in %s at %p(%d) %ld ticks\n",
bfile, bline, current->comm, inline_pc,
cpu, jiffies - started);
}
}
int _dbg_write_trylock(rwlock_t *rw, const char *bfile, int bline)
{
#if 0
void *inline_pc = __builtin_return_address(0);
int cpu = smp_processor_id();
#endif
if(unlikely(in_interrupt())) { /* acquiring write lock in interrupt context, bad idea */
pdc_printf("write_lock caller: %s:%d, IRQs enabled,\n", bfile, bline);
BUG();
}
/* Note: if interrupts are disabled (which is most likely), the printk
will never show on the console. We might need a polling method to flush
the dmesg buffer anyhow. */
_raw_spin_lock(&rw->lock);
if(rw->counter != 0) {
/* this basically never happens */
_raw_spin_unlock(&rw->lock);
return 0;
}
/* got it. now leave without unlocking */
rw->counter = -1; /* remember we are locked */
#if 0
pdc_printf("%s:%d: try write_lock grabbed in %s at %p(%d)\n",
bfile, bline, current->comm, inline_pc, cpu);
#endif
return 1;
}
void _dbg_read_lock(rwlock_t * rw, const char *bfile, int bline)
{
#if 0
void *inline_pc = __builtin_return_address(0);
unsigned long started = jiffies;
int cpu = smp_processor_id();
#endif
unsigned long flags;
local_irq_save(flags);
_raw_spin_lock(&rw->lock);
rw->counter++;
#if 0
pdc_printf(
"%s:%d: read_lock grabbed in %s at %p(%d) %ld ticks\n",
bfile, bline, current->comm, inline_pc,
cpu, jiffies - started);
#endif
_raw_spin_unlock(&rw->lock);
local_irq_restore(flags);
}
#endif /* CONFIG_DEBUG_RWLOCK */
......@@ -4,6 +4,5 @@
obj-y := checksum.o string.o strcase.o dec_and_lock.o div64.o
obj-$(CONFIG_SMP) += locks.o
obj-$(CONFIG_8xx) += rheap.o
obj-$(CONFIG_CPM2) += rheap.o
......@@ -11,14 +11,7 @@
* has a cmpxchg, and where atomic->value is an int holding
* the value of the atomic (i.e. the high bits aren't used
* for a lock or anything like that).