Commit 9c782da4 authored by Peng Tao's avatar Peng Tao Committed by Greg Kroah-Hartman

staging/lustre/libcfs: cleanup linux-crypto

We don't need to implement crc32 and crc32pclmul on our own.
The crc32-pclmul support was merged into the 3.8 kernel in commit
78c37d19, no need to keep a local copy in Lustre anymore.
The crc32 implementation is identical to crypto-crc32. So drop
Lustre's private implementation and select kernel crypto in Kconfig.
Reported-by: default avatarStephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: default avatarPeng Tao <tao.peng@emc.com>
Signed-off-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 4a1a01ea
......@@ -2,6 +2,14 @@ config LUSTRE_FS
tristate "Lustre file system client support"
depends on STAGING && INET && BROKEN
select LNET
select CRYPTO
select CRYPTO_CRC32
select CRYPTO_CRC32_PCLMUL if X86
select CRYPTO_CRC32C
select CRYPTO_MD5
select CRYPTO_SHA1
select CRYPTO_SHA256
select CRYPTO_SHA512
help
This option enables Lustre file system client support. Choose Y
here if you want to access a Lustre file system cluster. To compile
......
......@@ -5,16 +5,15 @@ libcfs-linux-objs += linux-prim.o linux-cpu.o
libcfs-linux-objs += linux-tcpip.o
libcfs-linux-objs += linux-proc.o linux-curproc.o
libcfs-linux-objs += linux-module.o
libcfs-linux-objs += linux-crypto.o linux-crypto-crc32.o
libcfs-linux-objs += linux-crypto.o
libcfs-linux-objs += linux-crypto-adler.o
libcfs-linux-objs += linux-crypto-crc32pclmul.o
libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs))
libcfs-all-objs := debug.o fail.o nidstrings.o module.o tracefile.o \
watchdog.o libcfs_string.o hash.o kernel_user_comm.o \
prng.o workitem.o upcall_cache.o libcfs_cpu.o \
libcfs_mem.o libcfs_lock.o crc32-pclmul_asm.o
libcfs_mem.o libcfs_lock.o
libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs)
......
/* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see http://www.gnu.org/licenses
*
* Please visit http://www.xyratex.com/contact if you need additional
* information or have any questions.
*
* GPL HEADER END
*/
/*
* Copyright 2012 Xyratex Technology Limited
*
* Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
* calculation.
* CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
* PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
* at:
* http://www.intel.com/products/processor/manuals/
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
* Volume 2B: Instruction Set Reference, N-Z
*
* Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
* Alexander Boyko <Alexander_Boyko@xyratex.com>
*/
/* gcc 4.1.2 does not support pclmulqdq instruction
* Use macro defenition from linux kernel 2.6.38 */
#define REG_NUM_INVALID 100
.macro R32_NUM opd r32
\opd = REG_NUM_INVALID
.ifc \r32,%eax
\opd = 0
.endif
.ifc \r32,%ecx
\opd = 1
.endif
.ifc \r32,%edx
\opd = 2
.endif
.ifc \r32,%ebx
\opd = 3
.endif
.ifc \r32,%esp
\opd = 4
.endif
.ifc \r32,%ebp
\opd = 5
.endif
.ifc \r32,%esi
\opd = 6
.endif
.ifc \r32,%edi
\opd = 7
.endif
.endm
.macro XMM_NUM opd xmm
\opd = REG_NUM_INVALID
.ifc \xmm,%xmm0
\opd = 0
.endif
.ifc \xmm,%xmm1
\opd = 1
.endif
.ifc \xmm,%xmm2
\opd = 2
.endif
.ifc \xmm,%xmm3
\opd = 3
.endif
.ifc \xmm,%xmm4
\opd = 4
.endif
.ifc \xmm,%xmm5
\opd = 5
.endif
.ifc \xmm,%xmm6
\opd = 6
.endif
.ifc \xmm,%xmm7
\opd = 7
.endif
.ifc \xmm,%xmm8
\opd = 8
.endif
.ifc \xmm,%xmm9
\opd = 9
.endif
.ifc \xmm,%xmm10
\opd = 10
.endif
.ifc \xmm,%xmm11
\opd = 11
.endif
.ifc \xmm,%xmm12
\opd = 12
.endif
.ifc \xmm,%xmm13
\opd = 13
.endif
.ifc \xmm,%xmm14
\opd = 14
.endif
.ifc \xmm,%xmm15
\opd = 15
.endif
.endm
.macro PFX_OPD_SIZE
.byte 0x66
.endm
.macro PFX_REX opd1 opd2 W=0
.if ((\opd1 | \opd2) & 8) || \W
.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
.endif
.endm
.macro MODRM mod opd1 opd2
.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
.endm
.macro PCLMULQDQ imm8 xmm1 xmm2
XMM_NUM clmul_opd1 \xmm1
XMM_NUM clmul_opd2 \xmm2
PFX_OPD_SIZE
PFX_REX clmul_opd1 clmul_opd2
.byte 0x0f, 0x3a, 0x44
MODRM 0xc0 clmul_opd1 clmul_opd2
.byte \imm8
.endm
.macro PEXTRD imm8 xmm1 reg1
XMM_NUM extrd_opd2 \xmm1
R32_NUM extrd_opd1 \reg1
PFX_OPD_SIZE
PFX_REX extrd_opd1 extrd_opd2
.byte 0x0f, 0x3a, 0x16
MODRM 0xc0 extrd_opd1 extrd_opd2
.byte \imm8
.endm
.align 16
/*
* [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
* #define CONSTANT_R1 0x154442bd4LL
*
* [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
* #define CONSTANT_R2 0x1c6e41596LL
*/
.Lconstant_R2R1:
.octa 0x00000001c6e415960000000154442bd4
/*
* [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
* #define CONSTANT_R3 0x1751997d0LL
*
* [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
* #define CONSTANT_R4 0x0ccaa009eLL
*/
.Lconstant_R4R3:
.octa 0x00000000ccaa009e00000001751997d0
/*
* [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
* #define CONSTANT_R5 0x163cd6124LL
*/
.Lconstant_R5:
.octa 0x00000000000000000000000163cd6124
.Lconstant_mask32:
.octa 0x000000000000000000000000FFFFFFFF
/*
* #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
*
* Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
* #define CONSTANT_RU 0x1F7011641LL
*/
.Lconstant_RUpoly:
.octa 0x00000001F701164100000001DB710641
#define CONSTANT %xmm0
#ifdef __x86_64__
#define BUF %rdi
#define LEN %rsi
#define CRC %edx
#else
#define BUF %eax
#define LEN %edx
#define CRC %ecx
#endif
.text
/**
* Calculate crc32
* BUF - buffer (16 bytes aligned)
* LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63
* CRC - initial crc32
* return %eax crc32
* uint crc32_pclmul_le_16(unsigned char const *buffer,
* size_t len, uint crc32)
*/
.globl crc32_pclmul_le_16
.align 4, 0x90
crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */
movdqa (BUF), %xmm1
movdqa 0x10(BUF), %xmm2
movdqa 0x20(BUF), %xmm3
movdqa 0x30(BUF), %xmm4
movd CRC, CONSTANT
pxor CONSTANT, %xmm1
sub $0x40, LEN
add $0x40, BUF
#ifndef __x86_64__
/* This is for position independed code(-fPIC) support for 32bit */
call delta
delta:
pop %ecx
#endif
cmp $0x40, LEN
jb less_64
#ifdef __x86_64__
movdqa .Lconstant_R2R1(%rip), CONSTANT
#else
movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT
#endif
loop_64:/* 64 bytes Full cache line folding */
prefetchnta 0x40(BUF)
movdqa %xmm1, %xmm5
movdqa %xmm2, %xmm6
movdqa %xmm3, %xmm7
#ifdef __x86_64__
movdqa %xmm4, %xmm8
#endif
PCLMULQDQ 00, CONSTANT, %xmm1
PCLMULQDQ 00, CONSTANT, %xmm2
PCLMULQDQ 00, CONSTANT, %xmm3
#ifdef __x86_64__
PCLMULQDQ 00, CONSTANT, %xmm4
#endif
PCLMULQDQ 0x11, CONSTANT, %xmm5
PCLMULQDQ 0x11, CONSTANT, %xmm6
PCLMULQDQ 0x11, CONSTANT, %xmm7
#ifdef __x86_64__
PCLMULQDQ 0x11, CONSTANT, %xmm8
#endif
pxor %xmm5, %xmm1
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
#ifdef __x86_64__
pxor %xmm8, %xmm4
#else
/* xmm8 unsupported for x32 */
movdqa %xmm4, %xmm5
PCLMULQDQ 00, CONSTANT, %xmm4
PCLMULQDQ 0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm4
#endif
pxor (BUF), %xmm1
pxor 0x10(BUF), %xmm2
pxor 0x20(BUF), %xmm3
pxor 0x30(BUF), %xmm4
sub $0x40, LEN
add $0x40, BUF
cmp $0x40, LEN
jge loop_64
less_64:/* Folding cache line into 128bit */
#ifdef __x86_64__
movdqa .Lconstant_R4R3(%rip), CONSTANT
#else
movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT
#endif
prefetchnta (BUF)
movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm2, %xmm1
movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm3, %xmm1
movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm4, %xmm1
cmp $0x10, LEN
jb fold_64
loop_16:/* Folding rest buffer into 128bit */
movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor (BUF), %xmm1
sub $0x10, LEN
add $0x10, BUF
cmp $0x10, LEN
jge loop_16
fold_64:
/* perform the last 64 bit fold, also adds 32 zeroes
* to the input stream */
PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
psrldq $0x08, %xmm1
pxor CONSTANT, %xmm1
/* final 32-bit fold */
movdqa %xmm1, %xmm2
#ifdef __x86_64__
movdqa .Lconstant_R5(%rip), CONSTANT
movdqa .Lconstant_mask32(%rip), %xmm3
#else
movdqa .Lconstant_R5 - delta(%ecx), CONSTANT
movdqa .Lconstant_mask32 - delta(%ecx), %xmm3
#endif
psrldq $0x04, %xmm2
pand %xmm3, %xmm1
PCLMULQDQ 0x00, CONSTANT, %xmm1
pxor %xmm2, %xmm1
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
#ifdef __x86_64__
movdqa .Lconstant_RUpoly(%rip), CONSTANT
#else
movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT
#endif
movdqa %xmm1, %xmm2
pand %xmm3, %xmm1
PCLMULQDQ 0x10, CONSTANT, %xmm1
pand %xmm3, %xmm1
PCLMULQDQ 0x00, CONSTANT, %xmm1
pxor %xmm2, %xmm1
PEXTRD 0x01, %xmm1, %eax
ret
/* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see http://www.gnu.org/licenses
*
* Please visit http://www.xyratex.com/contact if you need additional
* information or have any questions.
*
* GPL HEADER END
*/
/*
* Copyright 2012 Xyratex Technology Limited
*/
/*
* This is crypto api shash wrappers to crc32_le.
*/
#include <linux/module.h>
#include <linux/crc32.h>
#include <crypto/internal/hash.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
static u32 __crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le(crc, p, len);
}
/** No default init with ~0 */
static int crc32_cra_init(struct crypto_tfm *tfm)
{
u32 *key = crypto_tfm_ctx(tfm);
*key = 0;
return 0;
}
/*
* Setting the seed allows arbitrary accumulators and flexible XOR policy
* If your algorithm starts with ~0, then XOR with ~0 before you set
* the seed.
*/
static int crc32_setkey(struct crypto_shash *hash, const u8 *key,
unsigned int keylen)
{
u32 *mctx = crypto_shash_ctx(hash);
if (keylen != sizeof(u32)) {
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
*mctx = le32_to_cpup((__le32 *)key);
return 0;
}
static int crc32_init(struct shash_desc *desc)
{
u32 *mctx = crypto_shash_ctx(desc->tfm);
u32 *crcp = shash_desc_ctx(desc);
*crcp = *mctx;
return 0;
}
static int crc32_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
u32 *crcp = shash_desc_ctx(desc);
*crcp = __crc32_le(*crcp, data, len);
return 0;
}
/* No final XOR 0xFFFFFFFF, like crc32_le */
static int __crc32_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
*(__le32 *)out = cpu_to_le32(__crc32_le(*crcp, data, len));
return 0;
}
static int crc32_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32_finup(shash_desc_ctx(desc), data, len, out);
}
static int crc32_final(struct shash_desc *desc, u8 *out)
{
u32 *crcp = shash_desc_ctx(desc);
*(__le32 *)out = cpu_to_le32p(crcp);
return 0;
}
static int crc32_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32_finup(crypto_shash_ctx(desc->tfm), data, len,
out);
}
static struct shash_alg alg = {
.setkey = crc32_setkey,
.init = crc32_init,
.update = crc32_update,
.final = crc32_final,
.finup = crc32_finup,
.digest = crc32_digest,
.descsize = sizeof(u32),
.digestsize = CHKSUM_DIGEST_SIZE,
.base = {
.cra_name = "crc32",
.cra_driver_name = "crc32-table",
.cra_priority = 100,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_ctxsize = sizeof(u32),
.cra_module = THIS_MODULE,
.cra_init = crc32_cra_init,
}
};
int cfs_crypto_crc32_register(void)
{
return crypto_register_shash(&alg);
}
EXPORT_SYMBOL(cfs_crypto_crc32_register);
void cfs_crypto_crc32_unregister(void)
{
crypto_unregister_shash(&alg);
}
EXPORT_SYMBOL(cfs_crypto_crc32_unregister);
/* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see http://www.gnu.org/licenses
*
* Please visit http://www.xyratex.com/contact if you need additional
* information or have any questions.
*
* GPL HEADER END
*/
/*
* Copyright 2012 Xyratex Technology Limited
*
* Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation.
*
* Author: Alexander Boyko <Alexander_Boyko@xyratex.com>
*/
#include <linux/crc32.h>
#include <crypto/internal/hash.h>
#include <linux/crc32.h>
#include <asm/cpufeature.h>
#include <asm/i387.h>
#include <linux/libcfs/libcfs.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
#define PCLMUL_MIN_LEN 64L /* minimum size of buffer
* for crc32_pclmul_le_16 */
#define SCALE_F 16L /* size of xmm register */
#define SCALE_F_MASK (SCALE_F - 1)
u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32);
static u32 __attribute__((pure))
crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len)
{
unsigned int iquotient;
unsigned int iremainder;
unsigned int prealign;
if (len < PCLMUL_MIN_LEN + SCALE_F_MASK)
return crc32_le(crc, p, len);
if ((long)p & SCALE_F_MASK) {
/* align p to 16 byte */
prealign = SCALE_F - ((long)p & SCALE_F_MASK);
crc = crc32_le(crc, p, prealign);
len -= prealign;
p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) &
~SCALE_F_MASK);
}
iquotient = len & (~SCALE_F_MASK);
iremainder = len & SCALE_F_MASK;
kernel_fpu_begin();
crc = crc32_pclmul_le_16(p, iquotient, crc);
kernel_fpu_end();
if (iremainder)
crc = crc32_le(crc, p + iquotient, iremainder);
return crc;
}
static int crc32_pclmul_cra_init(struct crypto_tfm *tfm)
{
u32 *key = crypto_tfm_ctx(tfm);
*key = 0;
return 0;
}
/*
* Setting the seed allows arbitrary accumulators and flexible XOR policy
* If your algorithm starts with ~0, then XOR with ~0 before you set
* the seed.
*/
static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
unsigned int keylen)
{
u32 *mctx = crypto_shash_ctx(hash);
if (keylen != sizeof(u32)) {
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
*mctx = le32_to_cpup((__le32 *)key);
return 0;
}
static int crc32_pclmul_init(struct shash_desc *desc)
{
u32 *mctx = crypto_shash_ctx(desc->tfm);
u32 *crcp = shash_desc_ctx(desc);
*crcp = *mctx;
return 0;
}
static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
u32 *crcp = shash_desc_ctx(desc);
*crcp = crc32_pclmul_le(*crcp, data, len);
return 0;
}
/* No final XOR 0xFFFFFFFF, like crc32_le */
static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
*(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len));
return 0;
}
static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out);
}
static int crc32_pclmul_final(struct shash_desc *desc, u8 *out)
{
u32 *crcp = shash_desc_ctx(desc);
*(__le32 *)out = cpu_to_le32p(crcp);
return 0;
}
static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len,
out);
}