Commit 8d379dad authored by Jan Beulich's avatar Jan Beulich Committed by Andi Kleen
Browse files

[PATCH] annotate arch/x86_64/lib/*.S



Add unwind annotations to arch/x86_64/lib/*.S, and also use the macros
provided by linux/linkage.h where-ever possible.

Some of the alternative instructions handling needed to be adjusted so
that the replacement code would also have valid unwind information.
Signed-off-by: default avatarJan Beulich <jbeulich@novell.com>
Signed-off-by: default avatarAndi Kleen <ak@suse.de>
parent fb2e2848
#include <linux/linkage.h>
#include <asm/dwarf2.h>
/*
* Zero a page.
* rdi page
*/
.globl clear_page
.p2align 4
clear_page:
ALIGN
clear_page_c:
CFI_STARTPROC
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
ret
CFI_ENDPROC
ENDPROC(clear_page)
ENTRY(clear_page)
CFI_STARTPROC
xorl %eax,%eax
movl $4096/64,%ecx
.p2align 4
......@@ -23,28 +35,25 @@ clear_page:
jnz .Lloop
nop
ret
clear_page_end:
CFI_ENDPROC
.Lclear_page_end:
ENDPROC(clear_page)
/* Some CPUs run faster using the string instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
.align 8
.quad clear_page
.quad clear_page_c
.byte X86_FEATURE_REP_GOOD
.byte clear_page_end-clear_page
.byte clear_page_c_end-clear_page_c
.previous
.section .altinstr_replacement,"ax"
clear_page_c:
movl $4096/8,%ecx
xorl %eax,%eax
rep
stosq
ret
clear_page_c_end:
.quad clear_page
.quad 1b
.byte X86_FEATURE_REP_GOOD
.byte .Lclear_page_end - clear_page
.byte 2b - 1b
.previous
/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
#include <linux/config.h>
#include <linux/linkage.h>
#include <asm/dwarf2.h>
ALIGN
copy_page_c:
CFI_STARTPROC
movl $4096/8,%ecx
rep movsq
ret
CFI_ENDPROC
ENDPROC(copy_page_c)
/* Don't use streaming store because it's better when the target
ends up in cache. */
/* Could vary the prefetch distance based on SMP/UP */
.globl copy_page
.p2align 4
copy_page:
ENTRY(copy_page)
CFI_STARTPROC
subq $3*8,%rsp
CFI_ADJUST_CFA_OFFSET 3*8
movq %rbx,(%rsp)
CFI_REL_OFFSET rbx, 0
movq %r12,1*8(%rsp)
CFI_REL_OFFSET r12, 1*8
movq %r13,2*8(%rsp)
CFI_REL_OFFSET r13, 2*8
movl $(4096/64)-5,%ecx
.p2align 4
......@@ -72,30 +88,33 @@ copy_page:
jnz .Loop2
movq (%rsp),%rbx
CFI_RESTORE rbx
movq 1*8(%rsp),%r12
CFI_RESTORE r12
movq 2*8(%rsp),%r13
CFI_RESTORE r13
addq $3*8,%rsp
CFI_ADJUST_CFA_OFFSET -3*8
ret
.Lcopy_page_end:
CFI_ENDPROC
ENDPROC(copy_page)
/* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
.align 8
.quad copy_page
.quad copy_page_c
.byte X86_FEATURE_REP_GOOD
.byte copy_page_c_end-copy_page_c
.byte copy_page_c_end-copy_page_c
.previous
.section .altinstr_replacement,"ax"
copy_page_c:
movl $4096/8,%ecx
rep
movsq
ret
copy_page_c_end:
.quad copy_page
.quad 1b
.byte X86_FEATURE_REP_GOOD
.byte .Lcopy_page_end - copy_page
.byte 2b - 1b
.previous
......@@ -4,6 +4,9 @@
* Functions to copy from and to user space.
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#define FIX_ALIGNMENT 1
#include <asm/current.h>
......@@ -12,9 +15,8 @@
#include <asm/cpufeature.h>
/* Standard copy_to_user with segment limit checking */
.globl copy_to_user
.p2align 4
copy_to_user:
ENTRY(copy_to_user)
CFI_STARTPROC
GET_THREAD_INFO(%rax)
movq %rdi,%rcx
addq %rdx,%rcx
......@@ -25,9 +27,11 @@ copy_to_user:
.byte 0xe9 /* 32bit jump */
.long .Lcug-1f
1:
CFI_ENDPROC
ENDPROC(copy_to_user)
.section .altinstr_replacement,"ax"
3: .byte 0xe9 /* replacement jmp with 8 bit immediate */
3: .byte 0xe9 /* replacement jmp with 32 bit immediate */
.long copy_user_generic_c-1b /* offset */
.previous
.section .altinstructions,"a"
......@@ -40,9 +44,8 @@ copy_to_user:
.previous
/* Standard copy_from_user with segment limit checking */
.globl copy_from_user
.p2align 4
copy_from_user:
ENTRY(copy_from_user)
CFI_STARTPROC
GET_THREAD_INFO(%rax)
movq %rsi,%rcx
addq %rdx,%rcx
......@@ -50,10 +53,13 @@ copy_from_user:
cmpq threadinfo_addr_limit(%rax),%rcx
jae bad_from_user
/* FALL THROUGH to copy_user_generic */
CFI_ENDPROC
ENDPROC(copy_from_user)
.section .fixup,"ax"
/* must zero dest */
bad_from_user:
CFI_STARTPROC
movl %edx,%ecx
xorl %eax,%eax
rep
......@@ -61,6 +67,8 @@ bad_from_user:
bad_to_user:
movl %edx,%eax
ret
CFI_ENDPROC
END(bad_from_user)
.previous
......@@ -75,9 +83,8 @@ bad_to_user:
* Output:
* eax uncopied bytes or 0 if successful.
*/
.globl copy_user_generic
.p2align 4
copy_user_generic:
ENTRY(copy_user_generic)
CFI_STARTPROC
.byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */
.byte 0x66,0x90
1:
......@@ -95,6 +102,8 @@ copy_user_generic:
.previous
.Lcug:
pushq %rbx
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET rbx, 0
xorl %eax,%eax /*zero for the exception handler */
#ifdef FIX_ALIGNMENT
......@@ -168,9 +177,13 @@ copy_user_generic:
decl %ecx
jnz .Lloop_1
CFI_REMEMBER_STATE
.Lende:
popq %rbx
CFI_ADJUST_CFA_OFFSET -8
CFI_RESTORE rbx
ret
CFI_RESTORE_STATE
#ifdef FIX_ALIGNMENT
/* align destination */
......@@ -261,6 +274,9 @@ copy_user_generic:
.Le_zero:
movq %rdx,%rax
jmp .Lende
CFI_ENDPROC
ENDPROC(copy_user_generic)
/* Some CPUs run faster using the string copy instructions.
This is also a lot simpler. Use them when possible.
......@@ -282,6 +298,7 @@ copy_user_generic:
* this please consider this.
*/
copy_user_generic_c:
CFI_STARTPROC
movl %edx,%ecx
shrl $3,%ecx
andl $7,%edx
......@@ -294,6 +311,8 @@ copy_user_generic_c:
ret
3: lea (%rdx,%rcx,8),%rax
ret
CFI_ENDPROC
END(copy_user_generic_c)
.section __ex_table,"a"
.quad 1b,3b
......
......@@ -5,8 +5,9 @@
* License. See the file COPYING in the main directory of this archive
* for more details. No warranty for anything given at all.
*/
#include <linux/linkage.h>
#include <asm/errno.h>
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/errno.h>
/*
* Checksum copy with exception handling.
......@@ -53,19 +54,24 @@
.endm
.globl csum_partial_copy_generic
.p2align 4
csum_partial_copy_generic:
ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
cmpl $3*64,%edx
jle .Lignore
.Lignore:
subq $7*8,%rsp
CFI_ADJUST_CFA_OFFSET 7*8
movq %rbx,2*8(%rsp)
CFI_REL_OFFSET rbx, 2*8
movq %r12,3*8(%rsp)
CFI_REL_OFFSET r12, 3*8
movq %r14,4*8(%rsp)
CFI_REL_OFFSET r14, 4*8
movq %r13,5*8(%rsp)
CFI_REL_OFFSET r13, 5*8
movq %rbp,6*8(%rsp)
CFI_REL_OFFSET rbp, 6*8
movq %r8,(%rsp)
movq %r9,1*8(%rsp)
......@@ -208,14 +214,22 @@ csum_partial_copy_generic:
addl %ebx,%eax
adcl %r9d,%eax /* carry */
CFI_REMEMBER_STATE
.Lende:
movq 2*8(%rsp),%rbx
CFI_RESTORE rbx
movq 3*8(%rsp),%r12
CFI_RESTORE r12
movq 4*8(%rsp),%r14
CFI_RESTORE r14
movq 5*8(%rsp),%r13
CFI_RESTORE r13
movq 6*8(%rsp),%rbp
CFI_RESTORE rbp
addq $7*8,%rsp
CFI_ADJUST_CFA_OFFSET -7*8
ret
CFI_RESTORE_STATE
/* Exception handlers. Very simple, zeroing is done in the wrappers */
.Lbad_source:
......@@ -231,3 +245,5 @@ csum_partial_copy_generic:
jz .Lende
movl $-EFAULT,(%rax)
jmp .Lende
CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)
......@@ -27,25 +27,26 @@
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/page.h>
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
.text
.p2align 4
.globl __get_user_1
__get_user_1:
ENTRY(__get_user_1)
CFI_STARTPROC
GET_THREAD_INFO(%r8)
cmpq threadinfo_addr_limit(%r8),%rcx
jae bad_get_user
1: movzb (%rcx),%edx
xorl %eax,%eax
ret
CFI_ENDPROC
ENDPROC(__get_user_1)
.p2align 4
.globl __get_user_2
__get_user_2:
ENTRY(__get_user_2)
CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $1,%rcx
jc 20f
......@@ -57,10 +58,11 @@ __get_user_2:
ret
20: decq %rcx
jmp bad_get_user
CFI_ENDPROC
ENDPROC(__get_user_2)
.p2align 4
.globl __get_user_4
__get_user_4:
ENTRY(__get_user_4)
CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $3,%rcx
jc 30f
......@@ -72,10 +74,11 @@ __get_user_4:
ret
30: subq $3,%rcx
jmp bad_get_user
CFI_ENDPROC
ENDPROC(__get_user_4)
.p2align 4
.globl __get_user_8
__get_user_8:
ENTRY(__get_user_8)
CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $7,%rcx
jc 40f
......@@ -87,11 +90,16 @@ __get_user_8:
ret
40: subq $7,%rcx
jmp bad_get_user
CFI_ENDPROC
ENDPROC(__get_user_8)
bad_get_user:
CFI_STARTPROC
xorl %edx,%edx
movq $(-EFAULT),%rax
ret
CFI_ENDPROC
END(bad_get_user)
.section __ex_table,"a"
.quad 1b,bad_get_user
......
......@@ -15,12 +15,16 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>
/*
* override generic version in lib/iomap_copy.c
*/
.globl __iowrite32_copy
.p2align 4
__iowrite32_copy:
ENTRY(__iowrite32_copy)
CFI_STARTPROC
movl %edx,%ecx
rep movsd
ret
CFI_ENDPROC
ENDPROC(__iowrite32_copy)
/* Copyright 2002 Andi Kleen */
#include <asm/cpufeature.h>
#include <linux/config.h>
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
/*
* memcpy - Copy a memory block.
*
......@@ -13,12 +17,26 @@
* rax original destination
*/
.globl __memcpy
.globl memcpy
.p2align 4
__memcpy:
memcpy:
ALIGN
memcpy_c:
CFI_STARTPROC
movq %rdi,%rax
movl %edx,%ecx
shrl $3,%ecx
andl $7,%edx
rep movsq
movl %edx,%ecx
rep movsb
ret
CFI_ENDPROC
ENDPROC(memcpy_c)
ENTRY(__memcpy)
ENTRY(memcpy)
CFI_STARTPROC
pushq %rbx
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET rbx, 0
movq %rdi,%rax
movl %edx,%ecx
......@@ -86,36 +104,27 @@ memcpy:
.Lende:
popq %rbx
CFI_ADJUST_CFA_OFFSET -8
CFI_RESTORE rbx
ret
.Lfinal:
CFI_ENDPROC
ENDPROC(memcpy)
ENDPROC(__memcpy)
/* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
.align 8
.quad memcpy
.quad memcpy_c
.byte X86_FEATURE_REP_GOOD
.byte .Lfinal-memcpy
.byte memcpy_c_end-memcpy_c
.previous
.section .altinstr_replacement,"ax"
/* rdi destination
* rsi source
* rdx count
*/
memcpy_c:
movq %rdi,%rax
movl %edx,%ecx
shrl $3,%ecx
andl $7,%edx
rep
movsq
movl %edx,%ecx
rep
movsb
ret
memcpy_c_end:
.quad memcpy
.quad 1b
.byte X86_FEATURE_REP_GOOD
.byte .Lfinal - memcpy
.byte 2b - 1b
.previous
/* Copyright 2002 Andi Kleen, SuSE Labs */
#include <linux/config.h>
#include <linux/linkage.h>
#include <asm/dwarf2.h>
/*
* ISO C memset - set a memory block to a byte value.
*
......@@ -8,11 +13,29 @@
*
* rax original destination
*/
.globl __memset
.globl memset
.p2align 4
memset:
__memset:
ALIGN
memset_c:
CFI_STARTPROC
movq %rdi,%r9
movl %edx,%r8d
andl $7,%r8d
movl %edx,%ecx
shrl $3,%ecx
/* expand byte value */
movzbl %sil,%esi
movabs $0x0101010101010101,%rax
mulq %rsi /* with rax, clobbers rdx */
rep stosq
movl %r8d,%ecx
rep stosb
movq %r9,%rax
ret
CFI_ENDPROC
ENDPROC(memset_c)
ENTRY(memset)
ENTRY(__memset)
CFI_STARTPROC
movq %rdi,%r10
movq %rdx,%r11
......@@ -25,6 +48,7 @@ __memset:
movl %edi,%r9d
andl $7,%r9d
jnz .Lbad_alignment
CFI_REMEMBER_STATE
.Lafter_bad_alignment:
movl %r11d,%ecx
......@@ -75,6 +99,7 @@ __memset:
movq %r10,%rax
ret
CFI_RESTORE_STATE
.Lbad_alignment:
cmpq $7,%r11
jbe .Lhandle_7
......@@ -84,42 +109,26 @@ __memset:
addq %r8,%rdi
subq %r8,%r11
jmp .Lafter_bad_alignment
.Lfinal:
CFI_ENDPROC
ENDPROC(memset)
ENDPROC(__memset)
/* Some CPUs run faster using the string instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (memset_c - memset) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
.align 8
.quad memset
.quad memset_c
.byte X86_FEATURE_REP_GOOD
.byte memset_c_end-memset_c
.byte memset_c_end-memset_c
.previous
.section .altinstr_replacement,"ax"
/* rdi destination
* rsi value
* rdx count
*/
memset_c:
movq %rdi,%r9
movl %edx,%r8d
andl $7,%r8d