Commit 95322526 authored by Lorenzo Pieralisi's avatar Lorenzo Pieralisi

arm64: kernel: cpu_{suspend/resume} implementation

Kernel subsystems like CPU idle and suspend to RAM require a generic
mechanism to suspend a processor, save its context and put it into
a quiescent state. The cpu_{suspend}/{resume} implementation provides
such a framework through a kernel interface allowing to save/restore
registers, flush the context to DRAM and suspend/resume to/from
low-power states where processor context may be lost.

The CPU suspend implementation relies on the suspend protocol registered
in CPU operations to carry out a suspend request after context is
saved and flushed to DRAM. The cpu_suspend interface:

int cpu_suspend(unsigned long arg);

allows to pass an opaque parameter that is handed over to the suspend CPU
operations back-end so that it can take action according to the
semantics attached to it. The arg parameter allows suspend to RAM and CPU
idle drivers to communicate to suspend protocol back-ends; it requires
standardization so that the interface can be reused seamlessly across
systems, paving the way for generic drivers.

Context memory is allocated on the stack, whose address is stashed in a
per-cpu variable to keep track of it and passed to core functions that
save/restore the registers required by the architecture.

Even though, upon successful execution, the cpu_suspend function shuts
down the suspending processor, the warm boot resume mechanism, based
on the cpu_resume function, makes the resume path operate as a
cpu_suspend function return, so that cpu_suspend can be treated as a C
function by the caller, which simplifies coding the PM drivers that rely
on the cpu_suspend API.

Upon context save, the minimal amount of memory is flushed to DRAM so
that it can be retrieved when the MMU is off and caches are not searched.

The suspend CPU operation, depending on the required operations (eg CPU vs
Cluster shutdown) is in charge of flushing the cache hierarchy either
implicitly (by calling firmware implementations like PSCI) or explicitly
by executing the required cache maintainance functions.

Debug exceptions are disabled during cpu_{suspend}/{resume} operations
so that debug registers can be saved and restored properly preventing
preemption from debug agents enabled in the kernel.
Signed-off-by: default avatarLorenzo Pieralisi <>
parent 6732bc65
......@@ -39,6 +39,9 @@ struct device_node;
* from the cpu to be killed.
* @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the
* cpu being killed.
* @cpu_suspend: Suspends a cpu and saves the required context. May fail owing
* to wrong parameters or error conditions. Called from the
* CPU being suspended. Must be called with IRQs disabled.
struct cpu_operations {
const char *name;
......@@ -50,6 +53,9 @@ struct cpu_operations {
int (*cpu_disable)(unsigned int cpu);
void (*cpu_die)(unsigned int cpu);
int (*cpu_suspend)(unsigned long);
extern const struct cpu_operations *cpu_ops[NR_CPUS];
......@@ -15,4 +15,13 @@ struct cpu_suspend_ctx {
u64 ctx_regs[NR_CTX_REGS];
u64 sp;
} __aligned(16);
struct sleep_save_sp {
phys_addr_t *save_ptr_stash;
phys_addr_t save_ptr_stash_phys;
extern void cpu_resume(void);
extern int cpu_suspend(unsigned long);
......@@ -25,6 +25,8 @@
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/cputable.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/vdso_datapage.h>
#include <linux/kbuild.h>
......@@ -137,6 +139,15 @@ int main(void)
DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr));
DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr));
DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base));
DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx));
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask));
DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff));
DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp));
DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys));
DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash));
return 0;
#include <linux/errno.h>
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
* Implementation of MPIDR_EL1 hash algorithm through shifting
* and OR'ing.
* @dst: register containing hash result
* @rs0: register containing affinity level 0 bit shift
* @rs1: register containing affinity level 1 bit shift
* @rs2: register containing affinity level 2 bit shift
* @rs3: register containing affinity level 3 bit shift
* @mpidr: register containing MPIDR_EL1 value
* @mask: register containing MPIDR mask
* Pseudo C-code:
*u32 dst;
*compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 rs3, u64 mpidr, u64 mask) {
* u32 aff0, aff1, aff2, aff3;
* u64 mpidr_masked = mpidr & mask;
* aff0 = mpidr_masked & 0xff;
* aff1 = mpidr_masked & 0xff00;
* aff2 = mpidr_masked & 0xff0000;
* aff2 = mpidr_masked & 0xff00000000;
* dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3);
* Input registers: rs0, rs1, rs2, rs3, mpidr, mask
* Output register: dst
* Note: input and output registers must be disjoint register sets
(eg: a macro instance with mpidr = x1 and dst = x1 is invalid)
.macro compute_mpidr_hash dst, rs0, rs1, rs2, rs3, mpidr, mask
and \mpidr, \mpidr, \mask // mask out MPIDR bits
and \dst, \mpidr, #0xff // mask=aff0
lsr \dst ,\dst, \rs0 // dst=aff0>>rs0
and \mask, \mpidr, #0xff00 // mask = aff1
lsr \mask ,\mask, \rs1
orr \dst, \dst, \mask // dst|=(aff1>>rs1)
and \mask, \mpidr, #0xff0000 // mask = aff2
lsr \mask ,\mask, \rs2
orr \dst, \dst, \mask // dst|=(aff2>>rs2)
and \mask, \mpidr, #0xff00000000 // mask = aff3
lsr \mask ,\mask, \rs3
orr \dst, \dst, \mask // dst|=(aff3>>rs3)
* Save CPU state for a suspend. This saves callee registers, and allocates
* space on the kernel stack to save the CPU specific registers + some
* other data for resume.
* x0 = suspend finisher argument
stp x29, lr, [sp, #-96]!
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
mov x2, sp
sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx
mov x1, sp
* x1 now points to struct cpu_suspend_ctx allocated on the stack
str x2, [x1, #CPU_CTX_SP]
ldr x2, =sleep_save_sp
ldr x2, [x2, #SLEEP_SAVE_SP_VIRT]
mrs x7, mpidr_el1
ldr x9, =mpidr_hash
ldr x10, [x9, #MPIDR_HASH_MASK]
* Following code relies on the struct mpidr_hash
* members size.
ldp w3, w4, [x9, #MPIDR_HASH_SHIFTS]
ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)]
compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
add x2, x2, x8, lsl #3
bl __cpu_suspend_finisher
* Never gets here, unless suspend fails.
* Successful cpu_suspend should return from cpu_resume, returning
* through this code path is considered an error
* If the return value is set to 0 force x0 = -EOPNOTSUPP
* to make sure a proper error condition is propagated
cmp x0, #0
mov x3, #-EOPNOTSUPP
csel x0, x3, x0, eq
add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer
ldp x19, x20, [sp, #16]
ldp x21, x22, [sp, #32]
ldp x23, x24, [sp, #48]
ldp x25, x26, [sp, #64]
ldp x27, x28, [sp, #80]
ldp x29, lr, [sp], #96
* x0 must contain the sctlr value retrieved from restored context
ldr x3, =cpu_resume_after_mmu
msr sctlr_el1, x0 // restore sctlr_el1
br x3 // global jump to virtual address
mov x0, #0 // return zero on success
ldp x19, x20, [sp, #16]
ldp x21, x22, [sp, #32]
ldp x23, x24, [sp, #48]
ldp x25, x26, [sp, #64]
ldp x27, x28, [sp, #80]
ldp x29, lr, [sp], #96
bl el2_setup // if in EL2 drop to EL1 cleanly
mrs x1, mpidr_el1
adr x4, mpidr_hash_ptr
ldr x5, [x4]
add x8, x4, x5 // x8 = struct mpidr_hash phys address
/* retrieve mpidr_hash members to compute the hash */
ldr x2, [x8, #MPIDR_HASH_MASK]
ldp w3, w4, [x8, #MPIDR_HASH_SHIFTS]
ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)]
compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2
/* x7 contains hash index, let's use it to grab context pointer */
mov x7, xzr
adr x0, sleep_save_sp
ldr x0, [x0, #SLEEP_SAVE_SP_PHYS]
ldr x0, [x0, x7, lsl #3]
/* load sp from context */
ldr x2, [x0, #CPU_CTX_SP]
adr x1, sleep_idmap_phys
/* load physical address of identity map page table in x1 */
ldr x1, [x1]
mov sp, x2
* cpu_do_resume expects x0 to contain context physical address
* pointer and x1 to contain physical address of 1:1 page tables
bl cpu_do_resume // PC relative jump, MMU off
b cpu_resume_mmu // Resume MMU, never returns
.align 3
* offset of mpidr_hash symbol from current location
* used to obtain run-time mpidr_hash address with MMU off
.quad mpidr_hash - .
* physical address of identity mapped page tables
.type sleep_idmap_phys, #object
.quad 0
* struct sleep_save_sp {
* phys_addr_t *save_ptr_stash;
* phys_addr_t save_ptr_stash_phys;
* };
.type sleep_save_sp, #object
.space SLEEP_SAVE_SP_SZ // struct sleep_save_sp
#include <linux/slab.h>
#include <asm/cacheflush.h>
#include <asm/cpu_ops.h>
#include <asm/debug-monitors.h>
#include <asm/pgtable.h>
#include <asm/memory.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/tlbflush.h>
extern int __cpu_suspend(unsigned long);
* This is called by __cpu_suspend() to save the state, and do whatever
* flushing is required to ensure that when the CPU goes to sleep we have
* the necessary data available when the caches are not searched.
* @arg: Argument to pass to suspend operations
* @ptr: CPU context virtual address
* @save_ptr: address of the location where the context physical address
* must be saved
int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr,
phys_addr_t *save_ptr)
int cpu = smp_processor_id();
*save_ptr = virt_to_phys(ptr);
* Only flush the context that must be retrieved with the MMU
* off. VA primitives ensure the flush is applied to all
* cache levels so context is pushed to DRAM.
__flush_dcache_area(ptr, sizeof(*ptr));
__flush_dcache_area(save_ptr, sizeof(*save_ptr));
return cpu_ops[cpu]->cpu_suspend(arg);
* cpu_suspend
* @arg: argument to pass to the finisher function
int cpu_suspend(unsigned long arg)
struct mm_struct *mm = current->active_mm;
int ret, cpu = smp_processor_id();
unsigned long flags;
* If cpu_ops have not been registered or suspend
* has not been initialized, cpu_suspend call fails early.
if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend)
* From this point debug exceptions are disabled to prevent
* updates to mdscr register (saved and restored along with
* general purpose registers) from kernel debuggers.
* mm context saved on the stack, it will be restored when
* the cpu comes out of reset through the identity mapped
* page tables, so that the thread address space is properly
* set-up on function return.
ret = __cpu_suspend(arg);
if (ret == 0) {
cpu_switch_mm(mm->pgd, mm);
* Restore pstate flags. OS lock and mdscr have been already
* restored, so from this point onwards, debugging is fully
* renabled if it was enabled when core started shutdown.
return ret;
extern struct sleep_save_sp sleep_save_sp;
extern phys_addr_t sleep_idmap_phys;
static int cpu_suspend_init(void)
void *ctx_ptr;
/* ctx_ptr is an array of physical addresses */
ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL);
if (WARN_ON(!ctx_ptr))
return -ENOMEM;
sleep_save_sp.save_ptr_stash = ctx_ptr;
sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
sleep_idmap_phys = virt_to_phys(idmap_pg_dir);
__flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp));
__flush_dcache_area(&sleep_idmap_phys, sizeof(sleep_idmap_phys));
return 0;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment