Commit 540cd30f authored by Vikram Narayanan's avatar Vikram Narayanan

merge/v4.8/lcd: Update lcd arch libs to v4.8

Signed-off-by: Vikram Narayanan's avatarVikram Narayanan <vikram186@gmail.com>
parent 230dd4fe
/* Copyright 2002 Andi Kleen */
#include <linux/linkage.h>
#include <asm/cpufeature.h>
#include <asm/dwarf2.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
* We build a jump to memcpy_orig by default which gets NOPped out on
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
*/
.weak memcpy
/*
* memcpy - Copy a memory block.
*
......@@ -17,15 +25,11 @@
* Output:
* rax original destination
*/
ENTRY(__memcpy)
ENTRY(memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
/*
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
*
* This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c:
movq %rdi, %rax
movq %rdx, %rcx
shrq $3, %rcx
......@@ -34,28 +38,21 @@
movl %edx, %ecx
rep movsb
ret
.Lmemcpy_e:
.previous
ENDPROC(memcpy)
ENDPROC(__memcpy)
/*
* memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
* memcpy_c. Use memcpy_c_e when possible.
*
* This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c_e:
ENTRY(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
ret
.Lmemcpy_e_e:
.previous
ENDPROC(memcpy_erms)
ENTRY(__memcpy)
ENTRY(memcpy)
CFI_STARTPROC
ENTRY(memcpy_orig)
movq %rdi, %rax
cmpq $0x20, %rdx
......@@ -180,27 +177,121 @@ ENTRY(memcpy)
.Lend:
retq
CFI_ENDPROC
ENDPROC(memcpy)
ENDPROC(__memcpy)
ENDPROC(memcpy_orig)
#ifndef CONFIG_UML
/*
* memcpy_mcsafe - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
ENTRY(memcpy_mcsafe)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
/* Check for bad alignment of source */
testl $7, %esi
/* Already aligned */
jz .L_8byte_aligned
/* Copy one byte at a time until source is 8-byte aligned */
movl %esi, %ecx
andl $7, %ecx
subl $8, %ecx
negl %ecx
subl %ecx, %edx
.L_copy_leading_bytes:
movb (%rsi), %al
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_copy_leading_bytes
.L_8byte_aligned:
/* Figure out how many whole cache lines (64-bytes) to copy */
movl %edx, %ecx
andl $63, %edx
shrl $6, %ecx
jz .L_no_whole_cache_lines
/* Loop copying whole cache lines */
.L_cache_w0: movq (%rsi), %r8
.L_cache_w1: movq 1*8(%rsi), %r9
.L_cache_w2: movq 2*8(%rsi), %r10
.L_cache_w3: movq 3*8(%rsi), %r11
movq %r8, (%rdi)
movq %r9, 1*8(%rdi)
movq %r10, 2*8(%rdi)
movq %r11, 3*8(%rdi)
.L_cache_w4: movq 4*8(%rsi), %r8
.L_cache_w5: movq 5*8(%rsi), %r9
.L_cache_w6: movq 6*8(%rsi), %r10
.L_cache_w7: movq 7*8(%rsi), %r11
movq %r8, 4*8(%rdi)
movq %r9, 5*8(%rdi)
movq %r10, 6*8(%rdi)
movq %r11, 7*8(%rdi)
leaq 64(%rsi), %rsi
leaq 64(%rdi), %rdi
decl %ecx
jnz .L_cache_w0
/* Are there any trailing 8-byte words? */
.L_no_whole_cache_lines:
movl %edx, %ecx
andl $7, %edx
shrl $3, %ecx
jz .L_no_whole_words
/* Copy trailing words */
.L_copy_trailing_words:
movq (%rsi), %r8
mov %r8, (%rdi)
leaq 8(%rsi), %rsi
leaq 8(%rdi), %rdi
decl %ecx
jnz .L_copy_trailing_words
/* Any trailing bytes? */
.L_no_whole_words:
andl %edx, %edx
jz .L_done_memcpy_trap
/* Copy trailing bytes */
movl %edx, %ecx
.L_copy_trailing_bytes:
movb (%rsi), %al
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_copy_trailing_bytes
/* Copy successful. Return zero */
.L_done_memcpy_trap:
xorq %rax, %rax
ret
ENDPROC(memcpy_mcsafe)
.section .fixup, "ax"
/* Return -EFAULT for any failure */
.L_memcpy_mcsafe_fail:
mov $-EFAULT, %rax
ret
/*
* Some CPUs are adding enhanced REP MOVSB/STOSB feature
* If the feature is supported, memcpy_c_e() is the first choice.
* If enhanced rep movsb copy is not available, use fast string copy
* memcpy_c() when possible. This is faster and code is simpler than
* original memcpy().
* Otherwise, original memcpy() is used.
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*
* Replace only beginning, memcpy is used to apply alternatives,
* so it is silly to overwrite itself with nops - reboot is the
* only outcome...
*/
.section .altinstructions, "a"
altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
.Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
.Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
.previous
_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
#endif
......@@ -5,10 +5,8 @@
* This assembly file is re-written from memmove_64.c file.
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
*/
#define _STRING_C
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
#undef memmove
......@@ -24,8 +22,10 @@
* Output:
* rax: dest
*/
.weak memmove
ENTRY(memmove)
CFI_STARTPROC
ENTRY(__memmove)
/* Handle more 32 bytes in loop */
mov %rdi, %rax
......@@ -41,6 +41,8 @@ ENTRY(memmove)
jg 2f
.Lmemmove_begin_forward:
ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
/*
* movsq instruction have many startup latency
* so we handle small size by general register.
......@@ -203,21 +205,5 @@ ENTRY(memmove)
movb %r11b, (%rdi)
13:
retq
CFI_ENDPROC
.section .altinstr_replacement,"ax"
.Lmemmove_begin_forward_efs:
/* Forward moving data. */
movq %rdx, %rcx
rep movsb
retq
.Lmemmove_end_forward_efs:
.previous
.section .altinstructions,"a"
altinstruction_entry .Lmemmove_begin_forward, \
.Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \
.Lmemmove_end_forward-.Lmemmove_begin_forward, \
.Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
.previous
ENDPROC(__memmove)
ENDPROC(memmove)
/* Copyright 2002 Andi Kleen, SuSE Labs */
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
.weak memset
/*
* ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
* simpler and shorter than the orignal function as well.
*
* simpler and shorter than the original function as well.
*
* rdi destination
* rsi value (char)
* rdx count (bytes)
*
* rsi value (char)
* rdx count (bytes)
*
* rax original destination
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemset_c:
*/
ENTRY(memset)
ENTRY(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
* to use it when possible. If not available, use fast string instructions.
*
* Otherwise, use original memset function.
*/
ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memset_erms", X86_FEATURE_ERMS
movq %rdi,%r9
movq %rdx,%rcx
andl $7,%edx
......@@ -31,8 +41,8 @@
rep stosb
movq %r9,%rax
ret
.Lmemset_e:
.previous
ENDPROC(memset)
ENDPROC(__memset)
/*
* ISO C memset - set a memory block to a byte value. This function uses
......@@ -45,20 +55,16 @@
*
* rax original destination
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemset_c_e:
ENTRY(memset_erms)
movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
.Lmemset_e_e:
.previous
ENDPROC(memset_erms)
ENTRY(memset)
ENTRY(__memset)
CFI_STARTPROC
ENTRY(memset_orig)
movq %rdi,%r10
/* expand byte value */
......@@ -70,7 +76,6 @@ ENTRY(__memset)
movl %edi,%r9d
andl $7,%r9d
jnz .Lbad_alignment
CFI_REMEMBER_STATE
.Lafter_bad_alignment:
movq %rdx,%rcx
......@@ -120,7 +125,6 @@ ENTRY(__memset)
movq %r10,%rax
ret
CFI_RESTORE_STATE
.Lbad_alignment:
cmpq $7,%rdx
jbe .Lhandle_7
......@@ -131,24 +135,4 @@ ENTRY(__memset)
subq %r8,%rdx
jmp .Lafter_bad_alignment
.Lfinal:
CFI_ENDPROC
ENDPROC(memset)
ENDPROC(__memset)
/* Some CPUs support enhanced REP MOVSB/STOSB feature.
* It is recommended to use this when possible.
*
* If enhanced REP MOVSB/STOSB feature is not available, use fast string
* instructions.
*
* Otherwise, use original memset function.
*
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*/
.section .altinstructions,"a"
altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
.Lfinal-memset,.Lmemset_e-.Lmemset_c
altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
.Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e
.previous
ENDPROC(memset_orig)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment