Commit 82869ac5 authored by James Morse's avatar James Morse Committed by Will Deacon

arm64: kernel: Add support for hibernate/suspend-to-disk

Add support for hibernate/suspend-to-disk.

Suspend borrows code from cpu_suspend() to write cpu state onto the stack,
before calling swsusp_save() to save the memory image.

Restore creates a set of temporary page tables, covering only the
linear map, copies the restore code to a 'safe' page, then uses the copy to
restore the memory image. The copied code executes in the lower half of the
address space, and once complete, restores the original kernel's page
tables. It then calls into cpu_resume(), and follows the normal
cpu_suspend() path back into the suspend code.

To restore a kernel using KASLR, the address of the page tables, and
cpu_resume() are stored in the hibernate arch-header and the el2
vectors are pivotted via the 'safe' page in low memory.
Reviewed-by: default avatarCatalin Marinas <>
Tested-by: Kevin Hilman <> # Tested on Juno R2
Signed-off-by: default avatarJames Morse <>
Signed-off-by: default avatarWill Deacon <>
parent f6cf0545
......@@ -978,6 +978,14 @@ menu "Power management options"
source "kernel/power/Kconfig"
def_bool y
depends on CPU_PM
def_bool y
depends on HIBERNATION
def_bool y
......@@ -40,4 +40,11 @@ extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
extern void cpu_resume(void);
int __cpu_suspend_enter(struct sleep_stack_data *state);
void __cpu_suspend_exit(void);
void _cpu_resume(void);
int swsusp_arch_suspend(void);
int swsusp_arch_resume(void);
int arch_hibernation_header_save(void *addr, unsigned int max_size);
int arch_hibernation_header_restore(void *addr);
......@@ -45,6 +45,7 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o
arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o
arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
......@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
#include <linux/suspend.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/smp_plat.h>
......@@ -124,5 +125,9 @@ int main(void)
DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0));
DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2));
DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
return 0;
* Hibernate low-level support
* Copyright (C) 2016 ARM Ltd.
* Author: James Morse <>
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <>.
#include <linux/linkage.h>
#include <linux/errno.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
#include <asm/cputype.h>
#include <asm/memory.h>
#include <asm/page.h>
#include <asm/virt.h>
* To prevent the possibility of old and new partial table walks being visible
* in the tlb, switch the ttbr to a zero page when we invalidate the old
* records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
* Even switching to our copied tables will cause a changed output address at
* each stage of the walk.
.macro break_before_make_ttbr_switch zero_page, page_table
msr ttbr1_el1, \zero_page
tlbi vmalle1is
dsb ish
msr ttbr1_el1, \page_table
* Resume from hibernate
* Loads temporary page tables then restores the memory image.
* Finally branches to cpu_resume() to restore the state saved by
* swsusp_arch_suspend().
* Because this code has to be copied to a 'safe' page, it can't call out to
* other functions by PC-relative address. Also remember that it may be
* mid-way through over-writing other functions. For this reason it contains
* code from flush_icache_range() and uses the copy_page() macro.
* This 'safe' page is mapped via ttbr0, and executed from there. This function
* switches to a copy of the linear map in ttbr1, performs the restore, then
* switches ttbr1 to the original kernel's swapper_pg_dir.
* All of memory gets written to, including code. We need to clean the kernel
* text to the Point of Coherence (PoC) before secondary cores can be booted.
* Because the kernel modules and executable pages mapped to user space are
* also written as data, we clean all pages we touch to the Point of
* Unification (PoU).
* x0: physical address of temporary page tables
* x1: physical address of swapper page tables
* x2: address of cpu_resume
* x3: linear map address of restore_pblist in the current kernel
* x4: physical address of __hyp_stub_vectors, or 0
* x5: physical address of a zero page that remains zero after resume
.pushsection ".hibernate_exit.text", "ax"
* We execute from ttbr0, change ttbr1 to our copied linear map tables
* with a break-before-make via the zero page
break_before_make_ttbr_switch x5, x0
mov x21, x1
mov x30, x2
mov x24, x4
mov x25, x5
/* walk the restore_pblist and use copy_page() to over-write memory */
mov x19, x3
1: ldr x10, [x19, #HIBERN_PBE_ORIG]
mov x0, x10
ldr x1, [x19, #HIBERN_PBE_ADDR]
copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
add x1, x10, #PAGE_SIZE
/* Clean the copied page to PoU - based on flush_icache_range() */
dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x10, x3
2: dc cvau, x4 /* clean D line / unified line */
add x4, x4, x2
cmp x4, x1
b.lo 2b
ldr x19, [x19, #HIBERN_PBE_NEXT]
cbnz x19, 1b
dsb ish /* wait for PoU cleaning to finish */
/* switch to the restored kernels page tables */
break_before_make_ttbr_switch x25, x21
ic ialluis
dsb ish
cbz x24, 3f /* Do we need to re-initialise EL2? */
hvc #0
3: ret
* Restore the hyp stub.
* This must be done before the hibernate page is unmapped by _cpu_resume(),
* but happens before any of the hyp-stub's code is cleaned to PoC.
* x24: The physical address of __hyp_stub_vectors
msr vbar_el2, x24
.macro invalid_vector label
b \label
invalid_vector el2_sync_invalid
invalid_vector el2_irq_invalid
invalid_vector el2_fiq_invalid
invalid_vector el2_error_invalid
invalid_vector el1_sync_invalid
invalid_vector el1_irq_invalid
invalid_vector el1_fiq_invalid
invalid_vector el1_error_invalid
/* el2 vectors - switch el2 here while we restore the memory image. */
.align 11
ventry el2_sync_invalid // Synchronous EL2t
ventry el2_irq_invalid // IRQ EL2t
ventry el2_fiq_invalid // FIQ EL2t
ventry el2_error_invalid // Error EL2t
ventry el2_sync_invalid // Synchronous EL2h
ventry el2_irq_invalid // IRQ EL2h
ventry el2_fiq_invalid // FIQ EL2h
ventry el2_error_invalid // Error EL2h
ventry el1_sync // Synchronous 64-bit EL1
ventry el1_irq_invalid // IRQ 64-bit EL1
ventry el1_fiq_invalid // FIQ 64-bit EL1
ventry el1_error_invalid // Error 64-bit EL1
ventry el1_sync_invalid // Synchronous 32-bit EL1
ventry el1_irq_invalid // IRQ 32-bit EL1
ventry el1_fiq_invalid // FIQ 32-bit EL1
ventry el1_error_invalid // Error 32-bit EL1
* Hibernate support specific for ARM64
* Derived from work on ARM hibernation support by:
* Ubuntu project, hibernation support for mach-dove
* Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
* Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
* Copyright (C) 2006 Rafael J. Wysocki <>
* License terms: GNU General Public License (GPL) version 2
#define pr_fmt(x) "hibernate: " x
#include <linux/kvm_host.h>
#include <linux/mm.h>
#include <linux/pm.h>
#include <linux/sched.h>
#include <linux/suspend.h>
#include <linux/utsname.h>
#include <linux/version.h>
#include <asm/barrier.h>
#include <asm/cacheflush.h>
#include <asm/irqflags.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/pgtable-hwdef.h>
#include <asm/sections.h>
#include <asm/suspend.h>
#include <asm/virt.h>
* Hibernate core relies on this value being 0 on resume, and marks it
* __nosavedata assuming it will keep the resume kernel's '0' value. This
* doesn't happen with either KASLR.
* defined as "__visible int in_suspend __nosavedata" in
* kernel/power/hibernate.c
extern int in_suspend;
/* Find a symbols alias in the linear map */
#define LMADDR(x) phys_to_virt(virt_to_phys(x))
/* Do we need to reset el2? */
#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
* Start/end of the hibernate exit code, this must be copied to a 'safe'
* location in memory, and executed from there.
extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
/* temporary el2 vectors in the __hibernate_exit_text section. */
extern char hibernate_el2_vectors[];
/* hyp-stub vectors, used to restore el2 during resume from hibernate. */
extern char __hyp_stub_vectors[];
* Values that may not change over hibernate/resume. We put the build number
* and date in here so that we guarantee not to resume with a different
* kernel.
struct arch_hibernate_hdr_invariants {
char uts_version[__NEW_UTS_LEN + 1];
/* These values need to be know across a hibernate/restore. */
static struct arch_hibernate_hdr {
struct arch_hibernate_hdr_invariants invariants;
/* These are needed to find the relocated kernel if built with kaslr */
phys_addr_t ttbr1_el1;
void (*reenter_kernel)(void);
* We need to know where the __hyp_stub_vectors are after restore to
* re-configure el2.
phys_addr_t __hyp_stub_vectors;
} resume_hdr;
static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i)
memset(i, 0, sizeof(*i));
memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version));
int pfn_is_nosave(unsigned long pfn)
unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
void notrace save_processor_state(void)
WARN_ON(num_online_cpus() != 1);
void notrace restore_processor_state(void)
int arch_hibernation_header_save(void *addr, unsigned int max_size)
struct arch_hibernate_hdr *hdr = addr;
if (max_size < sizeof(*hdr))
return -EOVERFLOW;
hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir);
hdr->reenter_kernel = _cpu_resume;
/* We can't use __hyp_get_vectors() because kvm may still be loaded */
if (el2_reset_needed())
hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors);
hdr->__hyp_stub_vectors = 0;
return 0;
int arch_hibernation_header_restore(void *addr)
struct arch_hibernate_hdr_invariants invariants;
struct arch_hibernate_hdr *hdr = addr;
if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) {
pr_crit("Hibernate image not generated by this kernel!\n");
return -EINVAL;
resume_hdr = *hdr;
return 0;
* Copies length bytes, starting at src_start into an new page,
* perform cache maintentance, then maps it at the specified address low
* address as executable.
* This is used by hibernate to copy the code it needs to execute when
* overwriting the kernel text. This function generates a new set of page
* tables, which it loads into ttbr0.
* Length is provided as we probably only want 4K of data, even on a 64K
* page system.
static int create_safe_exec_page(void *src_start, size_t length,
unsigned long dst_addr,
phys_addr_t *phys_dst_addr,
void *(*allocator)(gfp_t mask),
gfp_t mask)
int rc = 0;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long dst = (unsigned long)allocator(mask);
if (!dst) {
rc = -ENOMEM;
goto out;
memcpy((void *)dst, src_start, length);
flush_icache_range(dst, dst + length);
pgd = pgd_offset_raw(allocator(mask), dst_addr);
if (pgd_none(*pgd)) {
pud = allocator(mask);
if (!pud) {
rc = -ENOMEM;
goto out;
pgd_populate(&init_mm, pgd, pud);
pud = pud_offset(pgd, dst_addr);
if (pud_none(*pud)) {
pmd = allocator(mask);
if (!pmd) {
rc = -ENOMEM;
goto out;
pud_populate(&init_mm, pud, pmd);
pmd = pmd_offset(pud, dst_addr);
if (pmd_none(*pmd)) {
pte = allocator(mask);
if (!pte) {
rc = -ENOMEM;
goto out;
pmd_populate_kernel(&init_mm, pmd, pte);
pte = pte_offset_kernel(pmd, dst_addr);
set_pte(pte, __pte(virt_to_phys((void *)dst) |
/* Load our new page tables */
asm volatile("msr ttbr0_el1, %0;"
"tlbi vmalle1is;"
"dsb ish;"
"isb" : : "r"(virt_to_phys(pgd)));
*phys_dst_addr = virt_to_phys((void *)dst);
return rc;
int swsusp_arch_suspend(void)
int ret = 0;
unsigned long flags;
struct sleep_stack_data state;
if (__cpu_suspend_enter(&state)) {
ret = swsusp_save();
} else {
/* Clean kernel to PoC for secondary core startup */
* Tell the hibernation core that we've just restored
* the memory
in_suspend = 0;
return ret;
static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
unsigned long end)
pte_t *src_pte;
pte_t *dst_pte;
unsigned long addr = start;
dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pte)
return -ENOMEM;
pmd_populate_kernel(&init_mm, dst_pmd, dst_pte);
dst_pte = pte_offset_kernel(dst_pmd, start);
src_pte = pte_offset_kernel(src_pmd, start);
do {
if (!pte_none(*src_pte))
* Resume will overwrite areas that may be marked
* read only (code, rodata). Clear the RDONLY bit from
* the temporary mappings we use during restore.
set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY));
} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
return 0;
static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start,
unsigned long end)
pmd_t *src_pmd;
pmd_t *dst_pmd;
unsigned long next;
unsigned long addr = start;
if (pud_none(*dst_pud)) {
dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pmd)
return -ENOMEM;
pud_populate(&init_mm, dst_pud, dst_pmd);
dst_pmd = pmd_offset(dst_pud, start);
src_pmd = pmd_offset(src_pud, start);
do {
next = pmd_addr_end(addr, end);
if (pmd_none(*src_pmd))
if (pmd_table(*src_pmd)) {
if (copy_pte(dst_pmd, src_pmd, addr, next))
return -ENOMEM;
} else {
__pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY));
} while (dst_pmd++, src_pmd++, addr = next, addr != end);
return 0;
static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start,
unsigned long end)
pud_t *dst_pud;
pud_t *src_pud;
unsigned long next;
unsigned long addr = start;
if (pgd_none(*dst_pgd)) {
dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pud)
return -ENOMEM;
pgd_populate(&init_mm, dst_pgd, dst_pud);
dst_pud = pud_offset(dst_pgd, start);
src_pud = pud_offset(src_pgd, start);
do {
next = pud_addr_end(addr, end);
if (pud_none(*src_pud))
if (pud_table(*(src_pud))) {
if (copy_pmd(dst_pud, src_pud, addr, next))
return -ENOMEM;
} else {
__pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY));
} while (dst_pud++, src_pud++, addr = next, addr != end);
return 0;
static int copy_page_tables(pgd_t *dst_pgd, unsigned long start,
unsigned long end)
unsigned long next;
unsigned long addr = start;
pgd_t *src_pgd = pgd_offset_k(start);
dst_pgd = pgd_offset_raw(dst_pgd, start);
do {
next = pgd_addr_end(addr, end);
if (pgd_none(*src_pgd))
if (copy_pud(dst_pgd, src_pgd, addr, next))
return -ENOMEM;
} while (dst_pgd++, src_pgd++, addr = next, addr != end);
return 0;
* Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
* Memory allocated by get_safe_page() will be dealt with by the hibernate code,
* we don't need to free it here.
int swsusp_arch_resume(void)
int rc = 0;
void *zero_page;
size_t exit_size;
pgd_t *tmp_pg_dir;