mirror of
https://github.com/openwrt/openwrt.git
synced 2025-01-25 05:47:00 +00:00
c0cb86e1d5
Rather than using the clunky, old, slower wireguard-linux-compat out of tree module, this commit does a patch-by-patch backport of upstream's wireguard to 5.4. This specific backport is in widespread use, being part of SUSE's enterprise kernel, Oracle's enterprise kernel, Google's Android kernel, Gentoo's distro kernel, and probably more I've forgotten about. It's definately the "more proper" way of adding wireguard to a kernel than the ugly compat.h hell of the wireguard-linux-compat repo. And most importantly for OpenWRT, it allows using the same module configuration code for 5.10 as for 5.4, with no need for bifurcation. These patches are from the backport tree which is maintained in the open here: https://git.zx2c4.com/wireguard-linux/log/?h=backport-5.4.y I'll be sending PRs to update this as needed. Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> (cherry picked from commit 3888fa78802354ab7bbd19b7d061fd80a16ce06b) (cherry picked from commit d54072587146dd0db9bb52b513234d944edabda3) (cherry picked from commit 196f3d586f11d96ba4ab60068cfb12420bcd20fd) (cherry picked from commit 3500fd7938a6d0c0e320295f0aa2fa34b1ebc08d) (cherry picked from commit 23b801d3ba57e34cc609ea40982c7fbed08164e9) (cherry picked from commit 0c0cb97da7f5cc06919449131dd57ed805f8f78d) (cherry picked from commit 2a27f6f90a430342cdbe84806e8b10acff446a2d) Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
252 lines
7.8 KiB
Diff
252 lines
7.8 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Ard Biesheuvel <ardb@kernel.org>
|
|
Date: Fri, 8 Nov 2019 13:22:20 +0100
|
|
Subject: [PATCH] crypto: x86/poly1305 - unify Poly1305 state struct with
|
|
generic code
|
|
|
|
commit ad8f5b88383ea685f2b8df2a12ee3e08089a1287 upstream.
|
|
|
|
In preparation of exposing a Poly1305 library interface directly from
|
|
the accelerated x86 driver, align the state descriptor of the x86 code
|
|
with the one used by the generic driver. This is needed to make the
|
|
library interface unified between all implementations.
|
|
|
|
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
|
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
|
---
|
|
arch/x86/crypto/poly1305_glue.c | 88 ++++++++++--------------------
|
|
crypto/poly1305_generic.c | 6 +-
|
|
include/crypto/internal/poly1305.h | 4 +-
|
|
include/crypto/poly1305.h | 18 +++---
|
|
4 files changed, 43 insertions(+), 73 deletions(-)
|
|
|
|
--- a/arch/x86/crypto/poly1305_glue.c
|
|
+++ b/arch/x86/crypto/poly1305_glue.c
|
|
@@ -14,40 +14,14 @@
|
|
#include <linux/module.h>
|
|
#include <asm/simd.h>
|
|
|
|
-struct poly1305_simd_desc_ctx {
|
|
- struct poly1305_desc_ctx base;
|
|
- /* derived key u set? */
|
|
- bool uset;
|
|
-#ifdef CONFIG_AS_AVX2
|
|
- /* derived keys r^3, r^4 set? */
|
|
- bool wset;
|
|
-#endif
|
|
- /* derived Poly1305 key r^2 */
|
|
- u32 u[5];
|
|
- /* ... silently appended r^3 and r^4 when using AVX2 */
|
|
-};
|
|
-
|
|
asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
|
|
const u32 *r, unsigned int blocks);
|
|
asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
|
|
unsigned int blocks, const u32 *u);
|
|
-#ifdef CONFIG_AS_AVX2
|
|
asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
|
|
unsigned int blocks, const u32 *u);
|
|
-static bool poly1305_use_avx2;
|
|
-#endif
|
|
|
|
-static int poly1305_simd_init(struct shash_desc *desc)
|
|
-{
|
|
- struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
|
|
-
|
|
- sctx->uset = false;
|
|
-#ifdef CONFIG_AS_AVX2
|
|
- sctx->wset = false;
|
|
-#endif
|
|
-
|
|
- return crypto_poly1305_init(desc);
|
|
-}
|
|
+static bool poly1305_use_avx2 __ro_after_init;
|
|
|
|
static void poly1305_simd_mult(u32 *a, const u32 *b)
|
|
{
|
|
@@ -63,53 +37,49 @@ static void poly1305_simd_mult(u32 *a, c
|
|
static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
|
|
const u8 *src, unsigned int srclen)
|
|
{
|
|
- struct poly1305_simd_desc_ctx *sctx;
|
|
unsigned int blocks, datalen;
|
|
|
|
- BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
|
|
- sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
|
|
-
|
|
if (unlikely(!dctx->sset)) {
|
|
datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
|
|
src += srclen - datalen;
|
|
srclen = datalen;
|
|
}
|
|
|
|
-#ifdef CONFIG_AS_AVX2
|
|
- if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
|
|
- if (unlikely(!sctx->wset)) {
|
|
- if (!sctx->uset) {
|
|
- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
|
|
- poly1305_simd_mult(sctx->u, dctx->r.r);
|
|
- sctx->uset = true;
|
|
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
|
|
+ poly1305_use_avx2 &&
|
|
+ srclen >= POLY1305_BLOCK_SIZE * 4) {
|
|
+ if (unlikely(dctx->rset < 4)) {
|
|
+ if (dctx->rset < 2) {
|
|
+ dctx->r[1] = dctx->r[0];
|
|
+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
|
|
}
|
|
- memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
|
|
- poly1305_simd_mult(sctx->u + 5, dctx->r.r);
|
|
- memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
|
|
- poly1305_simd_mult(sctx->u + 10, dctx->r.r);
|
|
- sctx->wset = true;
|
|
+ dctx->r[2] = dctx->r[1];
|
|
+ poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
|
|
+ dctx->r[3] = dctx->r[2];
|
|
+ poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
|
|
+ dctx->rset = 4;
|
|
}
|
|
blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
|
|
- poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
|
|
- sctx->u);
|
|
+ poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
|
|
+ dctx->r[1].r);
|
|
src += POLY1305_BLOCK_SIZE * 4 * blocks;
|
|
srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
|
|
}
|
|
-#endif
|
|
+
|
|
if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
|
|
- if (unlikely(!sctx->uset)) {
|
|
- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
|
|
- poly1305_simd_mult(sctx->u, dctx->r.r);
|
|
- sctx->uset = true;
|
|
+ if (unlikely(dctx->rset < 2)) {
|
|
+ dctx->r[1] = dctx->r[0];
|
|
+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
|
|
+ dctx->rset = 2;
|
|
}
|
|
blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
|
|
- poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
|
|
- sctx->u);
|
|
+ poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
|
|
+ blocks, dctx->r[1].r);
|
|
src += POLY1305_BLOCK_SIZE * 2 * blocks;
|
|
srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
|
|
}
|
|
if (srclen >= POLY1305_BLOCK_SIZE) {
|
|
- poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
|
|
+ poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
|
|
srclen -= POLY1305_BLOCK_SIZE;
|
|
}
|
|
return srclen;
|
|
@@ -159,10 +129,10 @@ static int poly1305_simd_update(struct s
|
|
|
|
static struct shash_alg alg = {
|
|
.digestsize = POLY1305_DIGEST_SIZE,
|
|
- .init = poly1305_simd_init,
|
|
+ .init = crypto_poly1305_init,
|
|
.update = poly1305_simd_update,
|
|
.final = crypto_poly1305_final,
|
|
- .descsize = sizeof(struct poly1305_simd_desc_ctx),
|
|
+ .descsize = sizeof(struct poly1305_desc_ctx),
|
|
.base = {
|
|
.cra_name = "poly1305",
|
|
.cra_driver_name = "poly1305-simd",
|
|
@@ -177,14 +147,14 @@ static int __init poly1305_simd_mod_init
|
|
if (!boot_cpu_has(X86_FEATURE_XMM2))
|
|
return -ENODEV;
|
|
|
|
-#ifdef CONFIG_AS_AVX2
|
|
- poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
|
|
+ poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
|
|
+ boot_cpu_has(X86_FEATURE_AVX) &&
|
|
boot_cpu_has(X86_FEATURE_AVX2) &&
|
|
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
|
- alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
|
|
+ alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
|
|
if (poly1305_use_avx2)
|
|
alg.descsize += 10 * sizeof(u32);
|
|
-#endif
|
|
+
|
|
return crypto_register_shash(&alg);
|
|
}
|
|
|
|
--- a/crypto/poly1305_generic.c
|
|
+++ b/crypto/poly1305_generic.c
|
|
@@ -25,7 +25,7 @@ int crypto_poly1305_init(struct shash_de
|
|
|
|
poly1305_core_init(&dctx->h);
|
|
dctx->buflen = 0;
|
|
- dctx->rset = false;
|
|
+ dctx->rset = 0;
|
|
dctx->sset = false;
|
|
|
|
return 0;
|
|
@@ -43,7 +43,7 @@ static void poly1305_blocks(struct poly1
|
|
srclen = datalen;
|
|
}
|
|
|
|
- poly1305_core_blocks(&dctx->h, &dctx->r, src,
|
|
+ poly1305_core_blocks(&dctx->h, dctx->r, src,
|
|
srclen / POLY1305_BLOCK_SIZE, 1);
|
|
}
|
|
|
|
@@ -95,7 +95,7 @@ int crypto_poly1305_final(struct shash_d
|
|
dctx->buf[dctx->buflen++] = 1;
|
|
memset(dctx->buf + dctx->buflen, 0,
|
|
POLY1305_BLOCK_SIZE - dctx->buflen);
|
|
- poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
|
|
+ poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0);
|
|
}
|
|
|
|
poly1305_core_emit(&dctx->h, digest);
|
|
--- a/include/crypto/internal/poly1305.h
|
|
+++ b/include/crypto/internal/poly1305.h
|
|
@@ -46,10 +46,10 @@ unsigned int crypto_poly1305_setdesckey(
|
|
{
|
|
if (!dctx->sset) {
|
|
if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
|
|
- poly1305_core_setkey(&dctx->r, src);
|
|
+ poly1305_core_setkey(dctx->r, src);
|
|
src += POLY1305_BLOCK_SIZE;
|
|
srclen -= POLY1305_BLOCK_SIZE;
|
|
- dctx->rset = true;
|
|
+ dctx->rset = 1;
|
|
}
|
|
if (srclen >= POLY1305_BLOCK_SIZE) {
|
|
dctx->s[0] = get_unaligned_le32(src + 0);
|
|
--- a/include/crypto/poly1305.h
|
|
+++ b/include/crypto/poly1305.h
|
|
@@ -22,20 +22,20 @@ struct poly1305_state {
|
|
};
|
|
|
|
struct poly1305_desc_ctx {
|
|
- /* key */
|
|
- struct poly1305_key r;
|
|
- /* finalize key */
|
|
- u32 s[4];
|
|
- /* accumulator */
|
|
- struct poly1305_state h;
|
|
/* partial buffer */
|
|
u8 buf[POLY1305_BLOCK_SIZE];
|
|
/* bytes used in partial buffer */
|
|
unsigned int buflen;
|
|
- /* r key has been set */
|
|
- bool rset;
|
|
- /* s key has been set */
|
|
+ /* how many keys have been set in r[] */
|
|
+ unsigned short rset;
|
|
+ /* whether s[] has been set */
|
|
bool sset;
|
|
+ /* finalize key */
|
|
+ u32 s[4];
|
|
+ /* accumulator */
|
|
+ struct poly1305_state h;
|
|
+ /* key */
|
|
+ struct poly1305_key r[1];
|
|
};
|
|
|
|
#endif
|