Commit 42682c6c authored by James Hogan's avatar James Hogan

metag: SMP support

Add SMP support for metag. This allows Linux to take control of multiple
hardware threads on a single Meta core, treating them as separate Linux
CPUs.
Signed-off-by: default avatarJames Hogan <james.hogan@imgtec.com>
parent fdabf525
/*
* Meta cache partition manipulation.
*
* Copyright 2010 Imagination Technologies Ltd.
*/
#ifndef _METAG_CACHEPART_H_
#define _METAG_CACHEPART_H_
/**
* get_dcache_size() - Get size of data cache.
*/
unsigned int get_dcache_size(void);
/**
* get_icache_size() - Get size of code cache.
*/
unsigned int get_icache_size(void);
/**
* get_global_dcache_size() - Get the thread's global dcache.
*
* Returns the size of the current thread's global dcache partition.
*/
unsigned int get_global_dcache_size(void);
/**
* get_global_icache_size() - Get the thread's global icache.
*
* Returns the size of the current thread's global icache partition.
*/
unsigned int get_global_icache_size(void);
/**
* check_for_dache_aliasing() - Ensure that the bootloader has configured the
* dache and icache properly to avoid aliasing
* @thread_id: Hardware thread ID
*
*/
void check_for_cache_aliasing(int thread_id);
#endif
#ifndef __ASM_METAG_CORE_REG_H_
#define __ASM_METAG_CORE_REG_H_
#include <asm/metag_regs.h>
extern void core_reg_write(int unit, int reg, int thread, unsigned int val);
extern unsigned int core_reg_read(int unit, int reg, int thread);
/*
* These macros allow direct access from C to any register known to the
* assembler. Example candidates are TXTACTCYC, TXIDLECYC, and TXPRIVEXT.
*/
#define __core_reg_get(reg) ({ \
unsigned int __grvalue; \
asm volatile("MOV %0," #reg \
: "=r" (__grvalue)); \
__grvalue; \
})
#define __core_reg_set(reg, value) do { \
unsigned int __srvalue = (value); \
asm volatile("MOV " #reg ",%0" \
: \
: "r" (__srvalue)); \
} while (0)
#define __core_reg_swap(reg, value) do { \
unsigned int __srvalue = (value); \
asm volatile("SWAP " #reg ",%0" \
: "+r" (__srvalue)); \
(value) = __srvalue; \
} while (0)
#endif
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
#include <linux/cpumask.h>
#define raw_smp_processor_id() (current_thread_info()->cpu)
enum ipi_msg_type {
IPI_CALL_FUNC,
IPI_CALL_FUNC_SINGLE,
IPI_RESCHEDULE,
};
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
asmlinkage void secondary_start_kernel(void);
extern void secondary_startup(void);
#ifdef CONFIG_HOTPLUG_CPU
extern void __cpu_die(unsigned int cpu);
extern int __cpu_disable(void);
extern void cpu_die(void);
#endif
extern void smp_init_cpus(void);
#endif /* __ASM_SMP_H */
#ifndef _ASM_METAG_TOPOLOGY_H
#define _ASM_METAG_TOPOLOGY_H
#ifdef CONFIG_NUMA
/* sched_domains SD_NODE_INIT for Meta machines */
#define SD_NODE_INIT (struct sched_domain) { \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
.min_interval = 8, \
.max_interval = 32, \
.busy_factor = 32, \
.imbalance_pct = 125, \
.cache_nice_tries = 2, \
.busy_idx = 3, \
.idle_idx = 2, \
.newidle_idx = 0, \
.wake_idx = 0, \
.forkexec_idx = 0, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_FORK \
| SD_BALANCE_EXEC \
| SD_BALANCE_NEWIDLE \
| SD_SERIALIZE, \
.last_balance = jiffies, \
.balance_interval = 1, \
.nr_balance_failed = 0, \
}
#define cpu_to_node(cpu) ((void)(cpu), 0)
#define parent_node(node) ((void)(node), 0)
#define cpumask_of_node(node) ((void)node, cpu_online_mask)
#define pcibus_to_node(bus) ((void)(bus), -1)
#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
cpu_all_mask : \
cpumask_of_node(pcibus_to_node(bus)))
#endif
#define mc_capable() (1)
const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
extern cpumask_t cpu_core_map[NR_CPUS];
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
#include <asm-generic/topology.h>
#endif /* _ASM_METAG_TOPOLOGY_H */
/*
* Meta cache partition manipulation.
*
* Copyright 2010 Imagination Technologies Ltd.
*/
#include <linux/kernel.h>
#include <linux/io.h>
#include <linux/errno.h>
#include <asm/processor.h>
#include <asm/cachepart.h>
#include <asm/metag_isa.h>
#include <asm/metag_mem.h>
#define SYSC_DCPART(n) (SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
#define SYSC_ICPART(n) (SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))
#define CACHE_ASSOCIATIVITY 4 /* 4 way set-assosiative */
#define ICACHE 0
#define DCACHE 1
/* The CORE_CONFIG2 register is not available on Meta 1 */
#ifdef CONFIG_METAG_META21
unsigned int get_dcache_size(void)
{
unsigned int config2 = metag_in32(METAC_CORE_CONFIG2);
return 0x1000 << ((config2 & METAC_CORECFG2_DCSZ_BITS)
>> METAC_CORECFG2_DCSZ_S);
}
unsigned int get_icache_size(void)
{
unsigned int config2 = metag_in32(METAC_CORE_CONFIG2);
return 0x1000 << ((config2 & METAC_CORE_C2ICSZ_BITS)
>> METAC_CORE_C2ICSZ_S);
}
unsigned int get_global_dcache_size(void)
{
unsigned int cpart = metag_in32(SYSC_DCPART(hard_processor_id()));
unsigned int temp = cpart & SYSC_xCPARTG_AND_BITS;
return (get_dcache_size() * ((temp >> SYSC_xCPARTG_AND_S) + 1)) >> 4;
}
unsigned int get_global_icache_size(void)
{
unsigned int cpart = metag_in32(SYSC_ICPART(hard_processor_id()));
unsigned int temp = cpart & SYSC_xCPARTG_AND_BITS;
return (get_icache_size() * ((temp >> SYSC_xCPARTG_AND_S) + 1)) >> 4;
}
static unsigned int get_thread_cache_size(unsigned int cache, int thread_id)
{
unsigned int cache_size;
unsigned int t_cache_part;
unsigned int isEnabled;
unsigned int offset = 0;
isEnabled = (cache == DCACHE ? metag_in32(MMCU_DCACHE_CTRL_ADDR) & 0x1 :
metag_in32(MMCU_ICACHE_CTRL_ADDR) & 0x1);
if (!isEnabled)
return 0;
#if PAGE_OFFSET >= LINGLOBAL_BASE
/* Checking for global cache */
cache_size = (cache == DCACHE ? get_global_dache_size() :
get_global_icache_size());
offset = 8;
#else
cache_size = (cache == DCACHE ? get_dcache_size() :
get_icache_size());
#endif
t_cache_part = (cache == DCACHE ?
(metag_in32(SYSC_DCPART(thread_id)) >> offset) & 0xF :
(metag_in32(SYSC_ICPART(thread_id)) >> offset) & 0xF);
switch (t_cache_part) {
case 0xF:
return cache_size;
case 0x7:
return cache_size / 2;
case 0x3:
return cache_size / 4;
case 0x1:
return cache_size / 8;
case 0:
return cache_size / 16;
}
return -1;
}
void check_for_cache_aliasing(int thread_id)
{
unsigned int thread_cache_size;
unsigned int cache_type;
for (cache_type = ICACHE; cache_type <= DCACHE; cache_type++) {
thread_cache_size =
get_thread_cache_size(cache_type, thread_id);
if (thread_cache_size < 0)
pr_emerg("Can't read %s cache size", \
cache_type ? "DCACHE" : "ICACHE");
else if (thread_cache_size == 0)
/* Cache is off. No need to check for aliasing */
continue;
if (thread_cache_size / CACHE_ASSOCIATIVITY > PAGE_SIZE) {
pr_emerg("Cache aliasing detected in %s on Thread %d",
cache_type ? "DCACHE" : "ICACHE", thread_id);
pr_warn("Total %s size: %u bytes",
cache_type ? "DCACHE" : "ICACHE ",
cache_type ? get_dcache_size()
: get_icache_size());
pr_warn("Thread %s size: %d bytes",
cache_type ? "CACHE" : "ICACHE",
thread_cache_size);
pr_warn("Page Size: %lu bytes", PAGE_SIZE);
}
}
}
#else
void check_for_cache_aliasing(int thread_id)
{
return;
}
#endif
/*
* Support for reading and writing Meta core internal registers.
*
* Copyright (C) 2011 Imagination Technologies Ltd.
*
*/
#include <linux/delay.h>
#include <linux/export.h>
#include <asm/core_reg.h>
#include <asm/global_lock.h>
#include <asm/hwthread.h>
#include <asm/io.h>
#include <asm/metag_mem.h>
#include <asm/metag_regs.h>
#define UNIT_BIT_MASK TXUXXRXRQ_UXX_BITS
#define REG_BIT_MASK TXUXXRXRQ_RX_BITS
#define THREAD_BIT_MASK TXUXXRXRQ_TX_BITS
#define UNIT_SHIFTS TXUXXRXRQ_UXX_S
#define REG_SHIFTS TXUXXRXRQ_RX_S
#define THREAD_SHIFTS TXUXXRXRQ_TX_S
#define UNIT_VAL(x) (((x) << UNIT_SHIFTS) & UNIT_BIT_MASK)
#define REG_VAL(x) (((x) << REG_SHIFTS) & REG_BIT_MASK)
#define THREAD_VAL(x) (((x) << THREAD_SHIFTS) & THREAD_BIT_MASK)
/*
* core_reg_write() - modify the content of a register in a core unit.
* @unit: The unit to be modified.
* @reg: Register number within the unit.
* @thread: The thread we want to access.
* @val: The new value to write.
*
* Check asm/metag_regs.h for a list/defines of supported units (ie: TXUPC_ID,
* TXUTR_ID, etc), and regnums within the units (ie: TXMASKI_REGNUM,
* TXPOLLI_REGNUM, etc).
*/
void core_reg_write(int unit, int reg, int thread, unsigned int val)
{
unsigned long flags;
/* TXUCT_ID has its own memory mapped registers */
if (unit == TXUCT_ID) {
void __iomem *cu_reg = __CU_addr(thread, reg);
metag_out32(val, cu_reg);
return;
}
__global_lock2(flags);
/* wait for ready */
while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
udelay(10);
/* set the value to write */
metag_out32(val, TXUXXRXDT);
/* set the register to write */
val = UNIT_VAL(unit) | REG_VAL(reg) | THREAD_VAL(thread);
metag_out32(val, TXUXXRXRQ);
/* wait for finish */
while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
udelay(10);
__global_unlock2(flags);
}
EXPORT_SYMBOL(core_reg_write);
/*
* core_reg_read() - read the content of a register in a core unit.
* @unit: The unit to be modified.
* @reg: Register number within the unit.
* @thread: The thread we want to access.
*
* Check asm/metag_regs.h for a list/defines of supported units (ie: TXUPC_ID,
* TXUTR_ID, etc), and regnums within the units (ie: TXMASKI_REGNUM,
* TXPOLLI_REGNUM, etc).
*/
unsigned int core_reg_read(int unit, int reg, int thread)
{
unsigned long flags;
unsigned int val;
/* TXUCT_ID has its own memory mapped registers */
if (unit == TXUCT_ID) {
void __iomem *cu_reg = __CU_addr(thread, reg);
val = metag_in32(cu_reg);
return val;
}
__global_lock2(flags);
/* wait for ready */
while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
udelay(10);
/* set the register to read */
val = (UNIT_VAL(unit) | REG_VAL(reg) | THREAD_VAL(thread) |
TXUXXRXRQ_RDnWR_BIT);
metag_out32(val, TXUXXRXRQ);
/* wait for finish */
while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
udelay(10);
/* read the register value */
val = metag_in32(TXUXXRXDT);
__global_unlock2(flags);
return val;
}
EXPORT_SYMBOL(core_reg_read);
......@@ -43,3 +43,15 @@ __start:
__exit:
XOR TXENABLE,D0Re0,D0Re0
.size __exit,.-__exit
#ifdef CONFIG_SMP
.global _secondary_startup
.type _secondary_startup,function
_secondary_startup:
MOVT A0StP,#HI(_secondary_data_stack)
ADD A0StP,A0StP,#LO(_secondary_data_stack)
GETD A0StP,[A0StP]
ADD A0StP,A0StP,#THREAD_INFO_SIZE
B _secondary_start_kernel
.size _secondary_startup,.-_secondary_startup
#endif
/*
* Copyright (C) 2009,2010,2011 Imagination Technologies Ltd.
*
* Copyright (C) 2002 ARM Limited, All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/atomic.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/cache.h>
#include <linux/profile.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/seq_file.h>
#include <linux/irq.h>
#include <linux/bootmem.h>
#include <asm/cacheflush.h>
#include <asm/cachepart.h>
#include <asm/core_reg.h>
#include <asm/cpu.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
#include <asm/setup.h>
#include <asm/tlbflush.h>
#include <asm/hwthread.h>
#include <asm/traps.h>
DECLARE_PER_CPU(PTBI, pTBI);
void *secondary_data_stack;
/*
* structures for inter-processor calls
* - A collection of single bit ipi messages.
*/
struct ipi_data {
spinlock_t lock;
unsigned long ipi_count;
unsigned long bits;
};
static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
.lock = __SPIN_LOCK_UNLOCKED(ipi_data.lock),
};
static DEFINE_SPINLOCK(boot_lock);
/*
* "thread" is assumed to be a valid Meta hardware thread ID.
*/
int __cpuinit boot_secondary(unsigned int thread, struct task_struct *idle)
{
u32 val;
/*
* set synchronisation state between this boot processor
* and the secondary one
*/
spin_lock(&boot_lock);
core_reg_write(TXUPC_ID, 0, thread, (unsigned int)secondary_startup);
core_reg_write(TXUPC_ID, 1, thread, 0);
/*
* Give the thread privilege (PSTAT) and clear potentially problematic
* bits in the process (namely ISTAT, CBMarker, CBMarkerI, LSM_STEP).
*/
core_reg_write(TXUCT_ID, TXSTATUS_REGNUM, thread, TXSTATUS_PSTAT_BIT);
/* Clear the minim enable bit. */
val = core_reg_read(TXUCT_ID, TXPRIVEXT_REGNUM, thread);
core_reg_write(TXUCT_ID, TXPRIVEXT_REGNUM, thread, val & ~0x80);
/*
* set the ThreadEnable bit (0x1) in the TXENABLE register
* for the specified thread - off it goes!
*/
val = core_reg_read(TXUCT_ID, TXENABLE_REGNUM, thread);
core_reg_write(TXUCT_ID, TXENABLE_REGNUM, thread, val | 0x1);
/*
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
spin_unlock(&boot_lock);
return 0;
}
int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
{
unsigned int thread = cpu_2_hwthread_id[cpu];
int ret;
load_pgd(swapper_pg_dir, thread);
flush_tlb_all();
/*
* Tell the secondary CPU where to find its idle thread's stack.
*/
secondary_data_stack = task_stack_page(idle);
wmb();
/*
* Now bring the CPU into our world.
*/
ret = boot_secondary(thread, idle);
if (ret == 0) {
unsigned long timeout;
/*
* CPU was successfully started, wait for it
* to come online or time out.
*/
timeout = jiffies + HZ;
while (time_before(jiffies, timeout)) {
if (cpu_online(cpu))
break;
udelay(10);
barrier();
}
if (!cpu_online(cpu))
ret = -EIO;
}
secondary_data_stack = NULL;
if (ret) {
pr_crit("CPU%u: processor failed to boot\n", cpu);
/*
* FIXME: We need to clean up the new idle thread. --rmk
*/
}
return ret;
}
#ifdef CONFIG_HOTPLUG_CPU
static DECLARE_COMPLETION(cpu_killed);
/*
* __cpu_disable runs on the processor to be shutdown.
*/
int __cpuexit __cpu_disable(void)
{
unsigned int cpu = smp_processor_id();
struct task_struct *p;
/*
* Take this CPU offline. Once we clear this, we can't return,
* and we must not schedule until we're ready to give up the cpu.
*/
set_cpu_online(cpu, false);
/*
* OK - migrate IRQs away from this CPU
*/
migrate_irqs();
/*
* Flush user cache and TLB mappings, and then remove this CPU
* from the vm mask set of all processes.
*/
flush_cache_all();
local_flush_tlb_all();
read_lock(&tasklist_lock);
for_each_process(p) {
if (p->mm)
cpumask_clear_cpu(cpu, mm_cpumask(p->mm));
}
read_unlock(&tasklist_lock);
return 0;
}
/*
* called on the thread which is asking for a CPU to be shutdown -
* waits until shutdown has completed, or it is timed out.
*/
void __cpuexit __cpu_die(unsigned int cpu)
{
if (!wait_for_completion_timeout(&cpu_killed, msecs_to_jiffies(1)))
pr_err("CPU%u: unable to kill\n", cpu);
}
/*
* Called from the idle thread for the CPU which has been shutdown.
*
* Note that we do not return from this function. If this cpu is
* brought online again it will need to run secondary_startup().
*/