Commit c4acf037 authored by Charles Jacobsen's avatar Charles Jacobsen Committed by Vikram Narayanan

deker-setup: Everything builds with clang.

parent bd890f11
......@@ -62,7 +62,7 @@ struct lcd_boot_info * lcd_get_boot_info(void);
static inline void lcd_dump_boot_info(struct lcd_boot_info *b)
{
unsigned long idx;
unsigned char *bits = (char *)b;
unsigned char *bits = (unsigned char *)b;
/*
* This isn't ideal for printing, but it needs to be useable
* before kmalloc etc. is available. (Can't alloc a string
......
Subproject commit 98456ad1b0e6953bbf884398e6efb71ded965ef5
Subproject commit 58432a5225b4c53bc670f92895a1926d93110083
Subproject commit b7777461c32d70acb7a61898bac2fa49f4a511e9
Subproject commit 56deb16191e604464c1cafd07cecdce95d3e97c5
/* Copyright 2002 Andi Kleen */
#include <linux/linkage.h>
#include <asm/cpufeature.h>
#include <asm/dwarf2.h>
#include <asm/alternative-asm.h>
/*
* memcpy - Copy a memory block.
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* rax original destination
*/
/*
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
*
* This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c:
movq %rdi, %rax
movq %rdx, %rcx
shrq $3, %rcx
andl $7, %edx
rep movsq
movl %edx, %ecx
rep movsb
ret
.Lmemcpy_e:
.previous
/*
* memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
* memcpy_c. Use memcpy_c_e when possible.
*
* This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c_e:
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
ret
.Lmemcpy_e_e:
.previous
ENTRY(__memcpy)
ENTRY(memcpy)
CFI_STARTPROC
movq %rdi, %rax
cmpq $0x20, %rdx
jb .Lhandle_tail
/*
* We check whether memory false dependence could occur,
* then jump to corresponding copy mode.
*/
cmp %dil, %sil
jl .Lcopy_backward
subq $0x20, %rdx
.Lcopy_forward_loop:
subq $0x20, %rdx
/*
* Move in blocks of 4x8 bytes:
*/
movq 0*8(%rsi), %r8
movq 1*8(%rsi), %r9
movq 2*8(%rsi), %r10
movq 3*8(%rsi), %r11
leaq 4*8(%rsi), %rsi
movq %r8, 0*8(%rdi)
movq %r9, 1*8(%rdi)
movq %r10, 2*8(%rdi)
movq %r11, 3*8(%rdi)
leaq 4*8(%rdi), %rdi
jae .Lcopy_forward_loop
addl $0x20, %edx
jmp .Lhandle_tail
.Lcopy_backward:
/*
* Calculate copy position to tail.
*/
addq %rdx, %rsi
addq %rdx, %rdi
subq $0x20, %rdx
/*
* At most 3 ALU operations in one cycle,
* so append NOPS in the same 16 bytes trunk.
*/
.p2align 4
.Lcopy_backward_loop:
subq $0x20, %rdx
movq -1*8(%rsi), %r8
movq -2*8(%rsi), %r9
movq -3*8(%rsi), %r10
movq -4*8(%rsi), %r11
leaq -4*8(%rsi), %rsi
movq %r8, -1*8(%rdi)
movq %r9, -2*8(%rdi)
movq %r10, -3*8(%rdi)
movq %r11, -4*8(%rdi)
leaq -4*8(%rdi), %rdi
jae .Lcopy_backward_loop
/*
* Calculate copy position to head.
*/
addl $0x20, %edx
subq %rdx, %rsi
subq %rdx, %rdi
.Lhandle_tail:
cmpl $16, %edx
jb .Lless_16bytes
/*
* Move data from 16 bytes to 31 bytes.
*/
movq 0*8(%rsi), %r8
movq 1*8(%rsi), %r9
movq -2*8(%rsi, %rdx), %r10
movq -1*8(%rsi, %rdx), %r11
movq %r8, 0*8(%rdi)
movq %r9, 1*8(%rdi)
movq %r10, -2*8(%rdi, %rdx)
movq %r11, -1*8(%rdi, %rdx)
retq
.p2align 4
.Lless_16bytes:
cmpl $8, %edx
jb .Lless_8bytes
/*
* Move data from 8 bytes to 15 bytes.
*/
movq 0*8(%rsi), %r8
movq -1*8(%rsi, %rdx), %r9
movq %r8, 0*8(%rdi)
movq %r9, -1*8(%rdi, %rdx)
retq
.p2align 4
.Lless_8bytes:
cmpl $4, %edx
jb .Lless_3bytes
/*
* Move data from 4 bytes to 7 bytes.
*/
movl (%rsi), %ecx
movl -4(%rsi, %rdx), %r8d
movl %ecx, (%rdi)
movl %r8d, -4(%rdi, %rdx)
retq
.p2align 4
.Lless_3bytes:
subl $1, %edx
jb .Lend
/*
* Move data from 1 bytes to 3 bytes.
*/
movzbl (%rsi), %ecx
jz .Lstore_1byte
movzbq 1(%rsi), %r8
movzbq (%rsi, %rdx), %r9
movb %r8b, 1(%rdi)
movb %r9b, (%rdi, %rdx)
.Lstore_1byte:
movb %cl, (%rdi)
.Lend:
retq
CFI_ENDPROC
ENDPROC(memcpy)
ENDPROC(__memcpy)
/*
* Some CPUs are adding enhanced REP MOVSB/STOSB feature
* If the feature is supported, memcpy_c_e() is the first choice.
* If enhanced rep movsb copy is not available, use fast string copy
* memcpy_c() when possible. This is faster and code is simpler than
* original memcpy().
* Otherwise, original memcpy() is used.
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*
* Replace only beginning, memcpy is used to apply alternatives,
* so it is silly to overwrite itself with nops - reboot is the
* only outcome...
*/
.section .altinstructions, "a"
altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
.Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
.Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
.previous
/*
* Normally compiler builtins are used, but sometimes the compiler calls out
* of line code. Based on asm-i386/string.h.
*
* This assembly file is re-written from memmove_64.c file.
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
*/
#define _STRING_C
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
#undef memmove
/*
* Implement memmove(). This can handle overlap between src and dst.
*
* Input:
* rdi: dest
* rsi: src
* rdx: count
*
* Output:
* rax: dest
*/
ENTRY(memmove)
CFI_STARTPROC
/* Handle more 32 bytes in loop */
mov %rdi, %rax
cmp $0x20, %rdx
jb 1f
/* Decide forward/backward copy mode */
cmp %rdi, %rsi
jge .Lmemmove_begin_forward
mov %rsi, %r8
add %rdx, %r8
cmp %rdi, %r8
jg 2f
.Lmemmove_begin_forward:
/*
* movsq instruction have many startup latency
* so we handle small size by general register.
*/
cmp $680, %rdx
jb 3f
/*
* movsq instruction is only good for aligned case.
*/
cmpb %dil, %sil
je 4f
3:
sub $0x20, %rdx
/*
* We gobble 32 bytes forward in each loop.
*/
5:
sub $0x20, %rdx
movq 0*8(%rsi), %r11
movq 1*8(%rsi), %r10
movq 2*8(%rsi), %r9
movq 3*8(%rsi), %r8
leaq 4*8(%rsi), %rsi
movq %r11, 0*8(%rdi)
movq %r10, 1*8(%rdi)
movq %r9, 2*8(%rdi)
movq %r8, 3*8(%rdi)
leaq 4*8(%rdi), %rdi
jae 5b
addq $0x20, %rdx
jmp 1f
/*
* Handle data forward by movsq.
*/
.p2align 4
4:
movq %rdx, %rcx
movq -8(%rsi, %rdx), %r11
lea -8(%rdi, %rdx), %r10
shrq $3, %rcx
rep movsq
movq %r11, (%r10)
jmp 13f
.Lmemmove_end_forward:
/*
* Handle data backward by movsq.
*/
.p2align 4
7:
movq %rdx, %rcx
movq (%rsi), %r11
movq %rdi, %r10
leaq -8(%rsi, %rdx), %rsi
leaq -8(%rdi, %rdx), %rdi
shrq $3, %rcx
std
rep movsq
cld
movq %r11, (%r10)
jmp 13f
/*
* Start to prepare for backward copy.
*/
.p2align 4
2:
cmp $680, %rdx
jb 6f
cmp %dil, %sil
je 7b
6:
/*
* Calculate copy position to tail.
*/
addq %rdx, %rsi
addq %rdx, %rdi
subq $0x20, %rdx
/*
* We gobble 32 bytes backward in each loop.
*/
8:
subq $0x20, %rdx
movq -1*8(%rsi), %r11
movq -2*8(%rsi), %r10
movq -3*8(%rsi), %r9
movq -4*8(%rsi), %r8
leaq -4*8(%rsi), %rsi
movq %r11, -1*8(%rdi)
movq %r10, -2*8(%rdi)
movq %r9, -3*8(%rdi)
movq %r8, -4*8(%rdi)
leaq -4*8(%rdi), %rdi
jae 8b
/*
* Calculate copy position to head.
*/
addq $0x20, %rdx
subq %rdx, %rsi
subq %rdx, %rdi
1:
cmpq $16, %rdx
jb 9f
/*
* Move data from 16 bytes to 31 bytes.
*/
movq 0*8(%rsi), %r11
movq 1*8(%rsi), %r10
movq -2*8(%rsi, %rdx), %r9
movq -1*8(%rsi, %rdx), %r8
movq %r11, 0*8(%rdi)
movq %r10, 1*8(%rdi)
movq %r9, -2*8(%rdi, %rdx)
movq %r8, -1*8(%rdi, %rdx)
jmp 13f
.p2align 4
9:
cmpq $8, %rdx
jb 10f
/*
* Move data from 8 bytes to 15 bytes.
*/
movq 0*8(%rsi), %r11
movq -1*8(%rsi, %rdx), %r10
movq %r11, 0*8(%rdi)
movq %r10, -1*8(%rdi, %rdx)
jmp 13f
10:
cmpq $4, %rdx
jb 11f
/*
* Move data from 4 bytes to 7 bytes.
*/
movl (%rsi), %r11d
movl -4(%rsi, %rdx), %r10d
movl %r11d, (%rdi)
movl %r10d, -4(%rdi, %rdx)
jmp 13f
11:
cmp $2, %rdx
jb 12f
/*
* Move data from 2 bytes to 3 bytes.
*/
movw (%rsi), %r11w
movw -2(%rsi, %rdx), %r10w
movw %r11w, (%rdi)
movw %r10w, -2(%rdi, %rdx)
jmp 13f
12:
cmp $1, %rdx
jb 13f
/*
* Move data for 1 byte.
*/
movb (%rsi), %r11b
movb %r11b, (%rdi)
13:
retq
CFI_ENDPROC
.section .altinstr_replacement,"ax"
.Lmemmove_begin_forward_efs:
/* Forward moving data. */
movq %rdx, %rcx
rep movsb
retq
.Lmemmove_end_forward_efs:
.previous
.section .altinstructions,"a"
altinstruction_entry .Lmemmove_begin_forward, \
.Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \
.Lmemmove_end_forward-.Lmemmove_begin_forward, \
.Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
.previous
ENDPROC(memmove)
/* Copyright 2002 Andi Kleen, SuSE Labs */
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
/*
* ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
* simpler and shorter than the orignal function as well.
*
* rdi destination
* rsi value (char)
* rdx count (bytes)
*
* rax original destination
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemset_c:
movq %rdi,%r9
movq %rdx,%rcx
andl $7,%edx
shrq $3,%rcx
/* expand byte value */
movzbl %sil,%esi
movabs $0x0101010101010101,%rax
imulq %rsi,%rax
rep stosq
movl %edx,%ecx
rep stosb
movq %r9,%rax
ret
.Lmemset_e:
.previous
/*
* ISO C memset - set a memory block to a byte value. This function uses
* enhanced rep stosb to override the fast string function.
* The code is simpler and shorter than the fast string function as well.
*
* rdi destination
* rsi value (char)
* rdx count (bytes)
*
* rax original destination
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemset_c_e:
movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
.Lmemset_e_e:
.previous
ENTRY(memset)
ENTRY(__memset)
CFI_STARTPROC
movq %rdi,%r10
/* expand byte value */
movzbl %sil,%ecx
movabs $0x0101010101010101,%rax
imulq %rcx,%rax
/* align dst */
movl %edi,%r9d
andl $7,%r9d
jnz .Lbad_alignment
CFI_REMEMBER_STATE
.Lafter_bad_alignment:
movq %rdx,%rcx
shrq $6,%rcx
jz .Lhandle_tail
.p2align 4
.Lloop_64:
decq %rcx
movq %rax,(%rdi)
movq %rax,8(%rdi)
movq %rax,16(%rdi)
movq %rax,24(%rdi)
movq %rax,32(%rdi)
movq %rax,40(%rdi)
movq %rax,48(%rdi)
movq %rax,56(%rdi)
leaq 64(%rdi),%rdi
jnz .Lloop_64
/* Handle tail in loops. The loops should be faster than hard
to predict jump tables. */
.p2align 4
.Lhandle_tail:
movl %edx,%ecx
andl $63&(~7),%ecx
jz .Lhandle_7
shrl $3,%ecx
.p2align 4
.Lloop_8:
decl %ecx
movq %rax,(%rdi)
leaq 8(%rdi),%rdi
jnz .Lloop_8
.Lhandle_7:
andl $7,%edx
jz .Lende
.p2align 4
.Lloop_1:
decl %edx
movb %al,(%rdi)
leaq 1(%rdi),%rdi
jnz .Lloop_1
.Lende:
movq %r10,%rax
ret
CFI_RESTORE_STATE
.Lbad_alignment:
cmpq $7,%rdx
jbe .Lhandle_7
movq %rax,(%rdi) /* unaligned store */
movq $8,%r8
subq %r9,%r8
addq %r8,%rdi
subq %r8,%rdx
jmp .Lafter_bad_alignment
.Lfinal:
CFI_ENDPROC
ENDPROC(memset)
ENDPROC(__memset)
/* Some CPUs support enhanced REP MOVSB/STOSB feature.
* It is recommended to use this when possible.
*
* If enhanced REP MOVSB/STOSB feature is not available, use fast string
* instructions.
*
* Otherwise, use original memset function.
*
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*/
.section .altinstructions,"a"