mirror of
https://github.com/openwrt/openwrt.git
synced 2025-04-15 15:07:11 +00:00
generic: 5.15: copy config and patch from 5.10
Copy config and patches from kernel 5.10 to kernel 5.15 Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
This commit is contained in:
parent
13960fb0e0
commit
9a038e7fd1
@ -0,0 +1,30 @@
|
||||
From 13b1ecc3401653a355798eb1dee10cc1608202f4 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Mon, 18 Jan 2016 12:27:49 +0100
|
||||
Subject: [PATCH 33/34] Kbuild: don't hardcode path to awk in
|
||||
scripts/ld-version.sh
|
||||
|
||||
On some systems /usr/bin/awk does not exist, or is broken. Find it via
|
||||
$PATH instead.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
scripts/ld-version.sh | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/scripts/ld-version.sh
|
||||
+++ b/scripts/ld-version.sh
|
||||
@@ -1,6 +1,7 @@
|
||||
-#!/usr/bin/awk -f
|
||||
+#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# extract linker version number from stdin and turn into single number
|
||||
+exec awk '
|
||||
{
|
||||
gsub(".*\\)", "");
|
||||
gsub(".*version ", "");
|
||||
@@ -9,3 +10,4 @@
|
||||
print a[1]*100000000 + a[2]*1000000 + a[3]*10000;
|
||||
exit
|
||||
}
|
||||
+'
|
@ -0,0 +1,21 @@
|
||||
From 173019b66dcc9d68ad9333aa744dad1e369b5aa8 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Sun, 9 Jul 2017 00:26:53 +0200
|
||||
Subject: [PATCH 34/34] kernel: add compile fix for linux 4.9 on x86
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
Makefile | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -507,7 +507,7 @@ KBUILD_LDFLAGS_MODULE :=
|
||||
KBUILD_LDFLAGS :=
|
||||
CLANG_FLAGS :=
|
||||
|
||||
-export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC
|
||||
+export ARCH SRCARCH SUBARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC
|
||||
export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
|
||||
export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
|
||||
export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD
|
@ -0,0 +1,27 @@
|
||||
From 1027a42c25cbf8cfc4ade6503c5110aae04866af Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Daniel=20Gonz=C3=A1lez=20Cabanelas?= <dgcbueu@gmail.com>
|
||||
Date: Fri, 16 Oct 2020 20:22:37 +0200
|
||||
Subject: [PATCH] power: reset: linkstation-poweroff: add missing put_device()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
The of_mdio_find_bus() takes a reference to the underlying device
|
||||
structure, we should release that reference using a put_device() call.
|
||||
|
||||
Signed-off-by: Daniel González Cabanelas <dgcbueu@gmail.com>
|
||||
Signed-off-by: Sebastian Reichel <sre@kernel.org>
|
||||
---
|
||||
drivers/power/reset/linkstation-poweroff.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/drivers/power/reset/linkstation-poweroff.c
|
||||
+++ b/drivers/power/reset/linkstation-poweroff.c
|
||||
@@ -113,6 +113,7 @@ static int __init linkstation_poweroff_i
|
||||
return -EPROBE_DEFER;
|
||||
|
||||
phydev = phy_find_first(bus);
|
||||
+ put_device(&bus->dev);
|
||||
if (!phydev)
|
||||
return -EPROBE_DEFER;
|
||||
|
@ -0,0 +1,65 @@
|
||||
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
|
||||
Date: Tue, 5 Oct 2021 18:54:02 +0200
|
||||
Subject: [PATCH] MIPS: uasm: Enable muhu opcode for MIPS R6
|
||||
|
||||
Enable the 'muhu' instruction, complementing the existing 'mulu', needed
|
||||
to implement a MIPS32 BPF JIT.
|
||||
|
||||
Also fix a typo in the existing definition of 'dmulu'.
|
||||
|
||||
Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
|
||||
|
||||
This patch is a dependency for my 32-bit MIPS eBPF JIT.
|
||||
|
||||
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
|
||||
---
|
||||
|
||||
--- a/arch/mips/include/asm/uasm.h
|
||||
+++ b/arch/mips/include/asm/uasm.h
|
||||
@@ -145,6 +145,7 @@ Ip_u1(_mtlo);
|
||||
Ip_u3u1u2(_mul);
|
||||
Ip_u1u2(_multu);
|
||||
Ip_u3u1u2(_mulu);
|
||||
+Ip_u3u1u2(_muhu);
|
||||
Ip_u3u1u2(_nor);
|
||||
Ip_u3u1u2(_or);
|
||||
Ip_u2u1u3(_ori);
|
||||
--- a/arch/mips/mm/uasm-mips.c
|
||||
+++ b/arch/mips/mm/uasm-mips.c
|
||||
@@ -90,7 +90,7 @@ static const struct insn insn_table[insn
|
||||
RS | RT | RD},
|
||||
[insn_dmtc0] = {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET},
|
||||
[insn_dmultu] = {M(spec_op, 0, 0, 0, 0, dmultu_op), RS | RT},
|
||||
- [insn_dmulu] = {M(spec_op, 0, 0, 0, dmult_dmul_op, dmultu_op),
|
||||
+ [insn_dmulu] = {M(spec_op, 0, 0, 0, dmultu_dmulu_op, dmultu_op),
|
||||
RS | RT | RD},
|
||||
[insn_drotr] = {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE},
|
||||
[insn_drotr32] = {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE},
|
||||
@@ -150,6 +150,8 @@ static const struct insn insn_table[insn
|
||||
[insn_mtlo] = {M(spec_op, 0, 0, 0, 0, mtlo_op), RS},
|
||||
[insn_mulu] = {M(spec_op, 0, 0, 0, multu_mulu_op, multu_op),
|
||||
RS | RT | RD},
|
||||
+ [insn_muhu] = {M(spec_op, 0, 0, 0, multu_muhu_op, multu_op),
|
||||
+ RS | RT | RD},
|
||||
#ifndef CONFIG_CPU_MIPSR6
|
||||
[insn_mul] = {M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD},
|
||||
#else
|
||||
--- a/arch/mips/mm/uasm.c
|
||||
+++ b/arch/mips/mm/uasm.c
|
||||
@@ -59,7 +59,7 @@ enum opcode {
|
||||
insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu, insn_ll, insn_lld,
|
||||
insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi,
|
||||
insn_mflo, insn_modu, insn_movn, insn_movz, insn_mtc0, insn_mthc0,
|
||||
- insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_nor,
|
||||
+ insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_muhu, insn_nor,
|
||||
insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb, insn_sc,
|
||||
insn_scd, insn_seleqz, insn_selnez, insn_sd, insn_sh, insn_sll,
|
||||
insn_sllv, insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra,
|
||||
@@ -344,6 +344,7 @@ I_u1(_mtlo)
|
||||
I_u3u1u2(_mul)
|
||||
I_u1u2(_multu)
|
||||
I_u3u1u2(_mulu)
|
||||
+I_u3u1u2(_muhu)
|
||||
I_u3u1u2(_nor)
|
||||
I_u3u1u2(_or)
|
||||
I_u2u1u3(_ori)
|
@ -0,0 +1,31 @@
|
||||
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
|
||||
Date: Tue, 5 Oct 2021 18:54:03 +0200
|
||||
Subject: [PATCH] mips: uasm: Add workaround for Loongson-2F nop CPU errata
|
||||
|
||||
This patch implements a workaround for the Loongson-2F nop in generated,
|
||||
code, if the existing option CONFIG_CPU_NOP_WORKAROUND is set. Before,
|
||||
the binutils option -mfix-loongson2f-nop was enabled, but no workaround
|
||||
was done when emitting MIPS code. Now, the nop pseudo instruction is
|
||||
emitted as "or ax,ax,zero" instead of the default "sll zero,zero,0". This
|
||||
is consistent with the workaround implemented by binutils.
|
||||
|
||||
Link: https://sourceware.org/legacy-ml/binutils/2009-11/msg00387.html
|
||||
|
||||
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
|
||||
Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
|
||||
---
|
||||
|
||||
--- a/arch/mips/include/asm/uasm.h
|
||||
+++ b/arch/mips/include/asm/uasm.h
|
||||
@@ -249,7 +249,11 @@ static inline void uasm_l##lb(struct uas
|
||||
#define uasm_i_bnezl(buf, rs, off) uasm_i_bnel(buf, rs, 0, off)
|
||||
#define uasm_i_ehb(buf) uasm_i_sll(buf, 0, 0, 3)
|
||||
#define uasm_i_move(buf, a, b) UASM_i_ADDU(buf, a, 0, b)
|
||||
+#ifdef CONFIG_CPU_NOP_WORKAROUNDS
|
||||
+#define uasm_i_nop(buf) uasm_i_or(buf, 1, 1, 0)
|
||||
+#else
|
||||
#define uasm_i_nop(buf) uasm_i_sll(buf, 0, 0, 0)
|
||||
+#endif
|
||||
#define uasm_i_ssnop(buf) uasm_i_sll(buf, 0, 0, 1)
|
||||
|
||||
static inline void uasm_i_drotr_safe(u32 **p, unsigned int a1,
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,120 @@
|
||||
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
|
||||
Date: Tue, 5 Oct 2021 18:54:06 +0200
|
||||
Subject: [PATCH] mips: bpf: Add JIT workarounds for CPU errata
|
||||
|
||||
This patch adds workarounds for the following CPU errata to the MIPS
|
||||
eBPF JIT, if enabled in the kernel configuration.
|
||||
|
||||
- R10000 ll/sc weak ordering
|
||||
- Loongson-3 ll/sc weak ordering
|
||||
- Loongson-2F jump hang
|
||||
|
||||
The Loongson-2F nop errata is implemented in uasm, which the JIT uses,
|
||||
so no additional mitigations are needed for that.
|
||||
|
||||
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
|
||||
Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
|
||||
---
|
||||
|
||||
--- a/arch/mips/net/bpf_jit_comp.c
|
||||
+++ b/arch/mips/net/bpf_jit_comp.c
|
||||
@@ -404,6 +404,7 @@ void emit_alu_r(struct jit_context *ctx,
|
||||
/* Atomic read-modify-write (32-bit) */
|
||||
void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code)
|
||||
{
|
||||
+ LLSC_sync(ctx);
|
||||
emit(ctx, ll, MIPS_R_T9, off, dst);
|
||||
switch (code) {
|
||||
case BPF_ADD:
|
||||
@@ -420,18 +421,19 @@ void emit_atomic_r(struct jit_context *c
|
||||
break;
|
||||
}
|
||||
emit(ctx, sc, MIPS_R_T8, off, dst);
|
||||
- emit(ctx, beqz, MIPS_R_T8, -16);
|
||||
+ emit(ctx, LLSC_beqz, MIPS_R_T8, -16 - LLSC_offset);
|
||||
emit(ctx, nop); /* Delay slot */
|
||||
}
|
||||
|
||||
/* Atomic compare-and-exchange (32-bit) */
|
||||
void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off)
|
||||
{
|
||||
+ LLSC_sync(ctx);
|
||||
emit(ctx, ll, MIPS_R_T9, off, dst);
|
||||
emit(ctx, bne, MIPS_R_T9, res, 12);
|
||||
emit(ctx, move, MIPS_R_T8, src); /* Delay slot */
|
||||
emit(ctx, sc, MIPS_R_T8, off, dst);
|
||||
- emit(ctx, beqz, MIPS_R_T8, -20);
|
||||
+ emit(ctx, LLSC_beqz, MIPS_R_T8, -20 - LLSC_offset);
|
||||
emit(ctx, move, res, MIPS_R_T9); /* Delay slot */
|
||||
clobber_reg(ctx, res);
|
||||
}
|
||||
--- a/arch/mips/net/bpf_jit_comp.h
|
||||
+++ b/arch/mips/net/bpf_jit_comp.h
|
||||
@@ -87,7 +87,7 @@ struct jit_context {
|
||||
};
|
||||
|
||||
/* Emit the instruction if the JIT memory space has been allocated */
|
||||
-#define emit(ctx, func, ...) \
|
||||
+#define __emit(ctx, func, ...) \
|
||||
do { \
|
||||
if ((ctx)->target != NULL) { \
|
||||
u32 *p = &(ctx)->target[ctx->jit_index]; \
|
||||
@@ -95,6 +95,30 @@ do { \
|
||||
} \
|
||||
(ctx)->jit_index++; \
|
||||
} while (0)
|
||||
+#define emit(...) __emit(__VA_ARGS__)
|
||||
+
|
||||
+/* Workaround for R10000 ll/sc errata */
|
||||
+#ifdef CONFIG_WAR_R10000
|
||||
+#define LLSC_beqz beqzl
|
||||
+#else
|
||||
+#define LLSC_beqz beqz
|
||||
+#endif
|
||||
+
|
||||
+/* Workaround for Loongson-3 ll/sc errata */
|
||||
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
|
||||
+#define LLSC_sync(ctx) emit(ctx, sync, 0)
|
||||
+#define LLSC_offset 4
|
||||
+#else
|
||||
+#define LLSC_sync(ctx)
|
||||
+#define LLSC_offset 0
|
||||
+#endif
|
||||
+
|
||||
+/* Workaround for Loongson-2F jump errata */
|
||||
+#ifdef CONFIG_CPU_JUMP_WORKAROUNDS
|
||||
+#define JALR_MASK 0xffffffffcfffffffULL
|
||||
+#else
|
||||
+#define JALR_MASK (~0ULL)
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* Mark a BPF register as accessed, it needs to be
|
||||
--- a/arch/mips/net/bpf_jit_comp64.c
|
||||
+++ b/arch/mips/net/bpf_jit_comp64.c
|
||||
@@ -375,6 +375,7 @@ static void emit_atomic_r64(struct jit_c
|
||||
u8 t1 = MIPS_R_T6;
|
||||
u8 t2 = MIPS_R_T7;
|
||||
|
||||
+ LLSC_sync(ctx);
|
||||
emit(ctx, lld, t1, off, dst);
|
||||
switch (code) {
|
||||
case BPF_ADD:
|
||||
@@ -391,7 +392,7 @@ static void emit_atomic_r64(struct jit_c
|
||||
break;
|
||||
}
|
||||
emit(ctx, scd, t2, off, dst);
|
||||
- emit(ctx, beqz, t2, -16);
|
||||
+ emit(ctx, LLSC_beqz, t2, -16 - LLSC_offset);
|
||||
emit(ctx, nop); /* Delay slot */
|
||||
}
|
||||
|
||||
@@ -414,7 +415,7 @@ static int emit_call(struct jit_context
|
||||
push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
|
||||
|
||||
/* Emit function call */
|
||||
- emit_mov_i64(ctx, tmp, addr);
|
||||
+ emit_mov_i64(ctx, tmp, addr & JALR_MASK);
|
||||
emit(ctx, jalr, MIPS_R_RA, tmp);
|
||||
emit(ctx, nop); /* Delay slot */
|
||||
|
@ -0,0 +1,61 @@
|
||||
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
|
||||
Date: Tue, 5 Oct 2021 18:54:07 +0200
|
||||
Subject: [PATCH] mips: bpf: Enable eBPF JITs
|
||||
|
||||
This patch enables the new eBPF JITs for 32-bit and 64-bit MIPS. It also
|
||||
disables the old cBPF JIT to so cBPF programs are converted to use the
|
||||
new JIT.
|
||||
|
||||
Workarounds for R4000 CPU errata are not implemented by the JIT, so the
|
||||
JIT is disabled if any of those workarounds are configured.
|
||||
|
||||
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
|
||||
---
|
||||
|
||||
--- a/MAINTAINERS
|
||||
+++ b/MAINTAINERS
|
||||
@@ -3294,6 +3294,7 @@ S: Supported
|
||||
F: arch/arm64/net/
|
||||
|
||||
BPF JIT for MIPS (32-BIT AND 64-BIT)
|
||||
+M: Johan Almbladh <johan.almbladh@anyfinetworks.com>
|
||||
M: Paul Burton <paulburton@kernel.org>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
--- a/arch/mips/Kconfig
|
||||
+++ b/arch/mips/Kconfig
|
||||
@@ -49,7 +49,6 @@ config MIPS
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES
|
||||
select HAVE_ASM_MODVERSIONS
|
||||
- select HAVE_CBPF_JIT if !64BIT && !CPU_MICROMIPS
|
||||
select HAVE_CONTEXT_TRACKING
|
||||
select HAVE_TIF_NOHZ
|
||||
select HAVE_C_RECORDMCOUNT
|
||||
@@ -57,7 +56,10 @@ config MIPS
|
||||
select HAVE_DEBUG_STACKOVERFLOW
|
||||
select HAVE_DMA_CONTIGUOUS
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
- select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2
|
||||
+ select HAVE_EBPF_JIT if !CPU_MICROMIPS && \
|
||||
+ !CPU_DADDI_WORKAROUNDS && \
|
||||
+ !CPU_R4000_WORKAROUNDS && \
|
||||
+ !CPU_R4400_WORKAROUNDS
|
||||
select HAVE_EXIT_THREAD
|
||||
select HAVE_FAST_GUP
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
--- a/arch/mips/net/Makefile
|
||||
+++ b/arch/mips/net/Makefile
|
||||
@@ -2,9 +2,10 @@
|
||||
# MIPS networking code
|
||||
|
||||
obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o
|
||||
+obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o
|
||||
|
||||
ifeq ($(CONFIG_32BIT),y)
|
||||
- obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o bpf_jit_comp32.o
|
||||
+ obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp32.o
|
||||
else
|
||||
- obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o
|
||||
+ obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp64.o
|
||||
endif
|
@ -0,0 +1,387 @@
|
||||
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
|
||||
Date: Tue, 5 Oct 2021 18:54:08 +0200
|
||||
Subject: [PATCH] mips: bpf: Remove old BPF JIT implementations
|
||||
|
||||
This patch removes the old 32-bit cBPF and 64-bit eBPF JIT implementations.
|
||||
They are replaced by a new eBPF implementation that supports both 32-bit
|
||||
and 64-bit MIPS CPUs.
|
||||
|
||||
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
|
||||
---
|
||||
delete mode 100644 arch/mips/net/bpf_jit.c
|
||||
delete mode 100644 arch/mips/net/bpf_jit.h
|
||||
delete mode 100644 arch/mips/net/bpf_jit_asm.S
|
||||
delete mode 100644 arch/mips/net/ebpf_jit.c
|
||||
|
||||
--- a/arch/mips/net/bpf_jit.h
|
||||
+++ /dev/null
|
||||
@@ -1,81 +0,0 @@
|
||||
-/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
-/*
|
||||
- * Just-In-Time compiler for BPF filters on MIPS
|
||||
- *
|
||||
- * Copyright (c) 2014 Imagination Technologies Ltd.
|
||||
- * Author: Markos Chandras <markos.chandras@imgtec.com>
|
||||
- */
|
||||
-
|
||||
-#ifndef BPF_JIT_MIPS_OP_H
|
||||
-#define BPF_JIT_MIPS_OP_H
|
||||
-
|
||||
-/* Registers used by JIT */
|
||||
-#define MIPS_R_ZERO 0
|
||||
-#define MIPS_R_V0 2
|
||||
-#define MIPS_R_A0 4
|
||||
-#define MIPS_R_A1 5
|
||||
-#define MIPS_R_T4 12
|
||||
-#define MIPS_R_T5 13
|
||||
-#define MIPS_R_T6 14
|
||||
-#define MIPS_R_T7 15
|
||||
-#define MIPS_R_S0 16
|
||||
-#define MIPS_R_S1 17
|
||||
-#define MIPS_R_S2 18
|
||||
-#define MIPS_R_S3 19
|
||||
-#define MIPS_R_S4 20
|
||||
-#define MIPS_R_S5 21
|
||||
-#define MIPS_R_S6 22
|
||||
-#define MIPS_R_S7 23
|
||||
-#define MIPS_R_SP 29
|
||||
-#define MIPS_R_RA 31
|
||||
-
|
||||
-/* Conditional codes */
|
||||
-#define MIPS_COND_EQ 0x1
|
||||
-#define MIPS_COND_GE (0x1 << 1)
|
||||
-#define MIPS_COND_GT (0x1 << 2)
|
||||
-#define MIPS_COND_NE (0x1 << 3)
|
||||
-#define MIPS_COND_ALL (0x1 << 4)
|
||||
-/* Conditionals on X register or K immediate */
|
||||
-#define MIPS_COND_X (0x1 << 5)
|
||||
-#define MIPS_COND_K (0x1 << 6)
|
||||
-
|
||||
-#define r_ret MIPS_R_V0
|
||||
-
|
||||
-/*
|
||||
- * Use 2 scratch registers to avoid pipeline interlocks.
|
||||
- * There is no overhead during epilogue and prologue since
|
||||
- * any of the $s0-$s6 registers will only be preserved if
|
||||
- * they are going to actually be used.
|
||||
- */
|
||||
-#define r_skb_hl MIPS_R_S0 /* skb header length */
|
||||
-#define r_skb_data MIPS_R_S1 /* skb actual data */
|
||||
-#define r_off MIPS_R_S2
|
||||
-#define r_A MIPS_R_S3
|
||||
-#define r_X MIPS_R_S4
|
||||
-#define r_skb MIPS_R_S5
|
||||
-#define r_M MIPS_R_S6
|
||||
-#define r_skb_len MIPS_R_S7
|
||||
-#define r_s0 MIPS_R_T4 /* scratch reg 1 */
|
||||
-#define r_s1 MIPS_R_T5 /* scratch reg 2 */
|
||||
-#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */
|
||||
-#define r_tmp MIPS_R_T7 /* No need to preserve this */
|
||||
-#define r_zero MIPS_R_ZERO
|
||||
-#define r_sp MIPS_R_SP
|
||||
-#define r_ra MIPS_R_RA
|
||||
-
|
||||
-#ifndef __ASSEMBLY__
|
||||
-
|
||||
-/* Declare ASM helpers */
|
||||
-
|
||||
-#define DECLARE_LOAD_FUNC(func) \
|
||||
- extern u8 func(unsigned long *skb, int offset); \
|
||||
- extern u8 func##_negative(unsigned long *skb, int offset); \
|
||||
- extern u8 func##_positive(unsigned long *skb, int offset)
|
||||
-
|
||||
-DECLARE_LOAD_FUNC(sk_load_word);
|
||||
-DECLARE_LOAD_FUNC(sk_load_half);
|
||||
-DECLARE_LOAD_FUNC(sk_load_byte);
|
||||
-
|
||||
-#endif
|
||||
-
|
||||
-#endif /* BPF_JIT_MIPS_OP_H */
|
||||
--- a/arch/mips/net/bpf_jit_asm.S
|
||||
+++ /dev/null
|
||||
@@ -1,285 +0,0 @@
|
||||
-/*
|
||||
- * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF
|
||||
- * compiler.
|
||||
- *
|
||||
- * Copyright (C) 2015 Imagination Technologies Ltd.
|
||||
- * Author: Markos Chandras <markos.chandras@imgtec.com>
|
||||
- *
|
||||
- * This program is free software; you can redistribute it and/or modify it
|
||||
- * under the terms of the GNU General Public License as published by the
|
||||
- * Free Software Foundation; version 2 of the License.
|
||||
- */
|
||||
-
|
||||
-#include <asm/asm.h>
|
||||
-#include <asm/isa-rev.h>
|
||||
-#include <asm/regdef.h>
|
||||
-#include "bpf_jit.h"
|
||||
-
|
||||
-/* ABI
|
||||
- *
|
||||
- * r_skb_hl skb header length
|
||||
- * r_skb_data skb data
|
||||
- * r_off(a1) offset register
|
||||
- * r_A BPF register A
|
||||
- * r_X PF register X
|
||||
- * r_skb(a0) *skb
|
||||
- * r_M *scratch memory
|
||||
- * r_skb_le skb length
|
||||
- * r_s0 Scratch register 0
|
||||
- * r_s1 Scratch register 1
|
||||
- *
|
||||
- * On entry:
|
||||
- * a0: *skb
|
||||
- * a1: offset (imm or imm + X)
|
||||
- *
|
||||
- * All non-BPF-ABI registers are free for use. On return, we only
|
||||
- * care about r_ret. The BPF-ABI registers are assumed to remain
|
||||
- * unmodified during the entire filter operation.
|
||||
- */
|
||||
-
|
||||
-#define skb a0
|
||||
-#define offset a1
|
||||
-#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */
|
||||
-
|
||||
- /* We know better :) so prevent assembler reordering etc */
|
||||
- .set noreorder
|
||||
-
|
||||
-#define is_offset_negative(TYPE) \
|
||||
- /* If offset is negative we have more work to do */ \
|
||||
- slti t0, offset, 0; \
|
||||
- bgtz t0, bpf_slow_path_##TYPE##_neg; \
|
||||
- /* Be careful what follows in DS. */
|
||||
-
|
||||
-#define is_offset_in_header(SIZE, TYPE) \
|
||||
- /* Reading from header? */ \
|
||||
- addiu $r_s0, $r_skb_hl, -SIZE; \
|
||||
- slt t0, $r_s0, offset; \
|
||||
- bgtz t0, bpf_slow_path_##TYPE; \
|
||||
-
|
||||
-LEAF(sk_load_word)
|
||||
- is_offset_negative(word)
|
||||
-FEXPORT(sk_load_word_positive)
|
||||
- is_offset_in_header(4, word)
|
||||
- /* Offset within header boundaries */
|
||||
- PTR_ADDU t1, $r_skb_data, offset
|
||||
- .set reorder
|
||||
- lw $r_A, 0(t1)
|
||||
- .set noreorder
|
||||
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
|
||||
-# if MIPS_ISA_REV >= 2
|
||||
- wsbh t0, $r_A
|
||||
- rotr $r_A, t0, 16
|
||||
-# else
|
||||
- sll t0, $r_A, 24
|
||||
- srl t1, $r_A, 24
|
||||
- srl t2, $r_A, 8
|
||||
- or t0, t0, t1
|
||||
- andi t2, t2, 0xff00
|
||||
- andi t1, $r_A, 0xff00
|
||||
- or t0, t0, t2
|
||||
- sll t1, t1, 8
|
||||
- or $r_A, t0, t1
|
||||
-# endif
|
||||
-#endif
|
||||
- jr $r_ra
|
||||
- move $r_ret, zero
|
||||
- END(sk_load_word)
|
||||
-
|
||||
-LEAF(sk_load_half)
|
||||
- is_offset_negative(half)
|
||||
-FEXPORT(sk_load_half_positive)
|
||||
- is_offset_in_header(2, half)
|
||||
- /* Offset within header boundaries */
|
||||
- PTR_ADDU t1, $r_skb_data, offset
|
||||
- lhu $r_A, 0(t1)
|
||||
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
|
||||
-# if MIPS_ISA_REV >= 2
|
||||
- wsbh $r_A, $r_A
|
||||
-# else
|
||||
- sll t0, $r_A, 8
|
||||
- srl t1, $r_A, 8
|
||||
- andi t0, t0, 0xff00
|
||||
- or $r_A, t0, t1
|
||||
-# endif
|
||||
-#endif
|
||||
- jr $r_ra
|
||||
- move $r_ret, zero
|
||||
- END(sk_load_half)
|
||||
-
|
||||
-LEAF(sk_load_byte)
|
||||
- is_offset_negative(byte)
|
||||
-FEXPORT(sk_load_byte_positive)
|
||||
- is_offset_in_header(1, byte)
|
||||
- /* Offset within header boundaries */
|
||||
- PTR_ADDU t1, $r_skb_data, offset
|
||||
- lbu $r_A, 0(t1)
|
||||
- jr $r_ra
|
||||
- move $r_ret, zero
|
||||
- END(sk_load_byte)
|
||||
-
|
||||
-/*
|
||||
- * call skb_copy_bits:
|
||||
- * (prototype in linux/skbuff.h)
|
||||
- *
|
||||
- * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len)
|
||||
- *
|
||||
- * o32 mandates we leave 4 spaces for argument registers in case
|
||||
- * the callee needs to use them. Even though we don't care about
|
||||
- * the argument registers ourselves, we need to allocate that space
|
||||
- * to remain ABI compliant since the callee may want to use that space.
|
||||
- * We also allocate 2 more spaces for $r_ra and our return register (*to).
|
||||
- *
|
||||
- * n64 is a bit different. The *caller* will allocate the space to preserve
|
||||
- * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no
|
||||
- * good reason but it does not matter that much really.
|
||||
- *
|
||||
- * (void *to) is returned in r_s0
|
||||
- *
|
||||
- */
|
||||
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
|
||||
-#define DS_OFFSET(SIZE) (4 * SZREG)
|
||||
-#else
|
||||
-#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE))
|
||||
-#endif
|
||||
-#define bpf_slow_path_common(SIZE) \
|
||||
- /* Quick check. Are we within reasonable boundaries? */ \
|
||||
- LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \
|
||||
- sltu $r_s0, offset, $r_s1; \
|
||||
- beqz $r_s0, fault; \
|
||||
- /* Load 4th argument in DS */ \
|
||||
- LONG_ADDIU a3, zero, SIZE; \
|
||||
- PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
|
||||
- PTR_LA t0, skb_copy_bits; \
|
||||
- PTR_S $r_ra, (5 * SZREG)($r_sp); \
|
||||
- /* Assign low slot to a2 */ \
|
||||
- PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \
|
||||
- jalr t0; \
|
||||
- /* Reset our destination slot (DS but it's ok) */ \
|
||||
- INT_S zero, (4 * SZREG)($r_sp); \
|
||||
- /* \
|
||||
- * skb_copy_bits returns 0 on success and -EFAULT \
|
||||
- * on error. Our data live in a2. Do not bother with \
|
||||
- * our data if an error has been returned. \
|
||||
- */ \
|
||||
- /* Restore our frame */ \
|
||||
- PTR_L $r_ra, (5 * SZREG)($r_sp); \
|
||||
- INT_L $r_s0, (4 * SZREG)($r_sp); \
|
||||
- bltz v0, fault; \
|
||||
- PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
|
||||
- move $r_ret, zero; \
|
||||
-
|
||||
-NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
|
||||
- bpf_slow_path_common(4)
|
||||
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
|
||||
-# if MIPS_ISA_REV >= 2
|
||||
- wsbh t0, $r_s0
|
||||
- jr $r_ra
|
||||
- rotr $r_A, t0, 16
|
||||
-# else
|
||||
- sll t0, $r_s0, 24
|
||||
- srl t1, $r_s0, 24
|
||||
- srl t2, $r_s0, 8
|
||||
- or t0, t0, t1
|
||||
- andi t2, t2, 0xff00
|
||||
- andi t1, $r_s0, 0xff00
|
||||
- or t0, t0, t2
|
||||
- sll t1, t1, 8
|
||||
- jr $r_ra
|
||||
- or $r_A, t0, t1
|
||||
-# endif
|
||||
-#else
|
||||
- jr $r_ra
|
||||
- move $r_A, $r_s0
|
||||
-#endif
|
||||
-
|
||||
- END(bpf_slow_path_word)
|
||||
-
|
||||
-NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
|
||||
- bpf_slow_path_common(2)
|
||||
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
|
||||
-# if MIPS_ISA_REV >= 2
|
||||
- jr $r_ra
|
||||
- wsbh $r_A, $r_s0
|
||||
-# else
|
||||
- sll t0, $r_s0, 8
|
||||
- andi t1, $r_s0, 0xff00
|
||||
- andi t0, t0, 0xff00
|
||||
- srl t1, t1, 8
|
||||
- jr $r_ra
|
||||
- or $r_A, t0, t1
|
||||
-# endif
|
||||
-#else
|
||||
- jr $r_ra
|
||||
- move $r_A, $r_s0
|
||||
-#endif
|
||||
-
|
||||
- END(bpf_slow_path_half)
|
||||
-
|
||||
-NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp)
|
||||
- bpf_slow_path_common(1)
|
||||
- jr $r_ra
|
||||
- move $r_A, $r_s0
|
||||
-
|
||||
- END(bpf_slow_path_byte)
|
||||
-
|
||||
-/*
|
||||
- * Negative entry points
|
||||
- */
|
||||
- .macro bpf_is_end_of_data
|
||||
- li t0, SKF_LL_OFF
|
||||
- /* Reading link layer data? */
|
||||
- slt t1, offset, t0
|
||||
- bgtz t1, fault
|
||||
- /* Be careful what follows in DS. */
|
||||
- .endm
|
||||
-/*
|
||||
- * call skb_copy_bits:
|
||||
- * (prototype in linux/filter.h)
|
||||
- *
|
||||
- * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
|
||||
- * int k, unsigned int size)
|
||||
- *
|
||||
- * see above (bpf_slow_path_common) for ABI restrictions
|
||||
- */
|
||||
-#define bpf_negative_common(SIZE) \
|
||||
- PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
|
||||
- PTR_LA t0, bpf_internal_load_pointer_neg_helper; \
|
||||
- PTR_S $r_ra, (5 * SZREG)($r_sp); \
|
||||
- jalr t0; \
|
||||
- li a2, SIZE; \
|
||||
- PTR_L $r_ra, (5 * SZREG)($r_sp); \
|
||||
- /* Check return pointer */ \
|
||||
- beqz v0, fault; \
|
||||
- PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
|
||||
- /* Preserve our pointer */ \
|
||||
- move $r_s0, v0; \
|
||||
- /* Set return value */ \
|
||||
- move $r_ret, zero; \
|
||||
-
|
||||
-bpf_slow_path_word_neg:
|
||||
- bpf_is_end_of_data
|
||||
-NESTED(sk_load_word_negative, (6 * SZREG), $r_sp)
|
||||
- bpf_negative_common(4)
|
||||
- jr $r_ra
|
||||
- lw $r_A, 0($r_s0)
|
||||
- END(sk_load_word_negative)
|
||||
-
|
||||
-bpf_slow_path_half_neg:
|
||||
- bpf_is_end_of_data
|
||||
-NESTED(sk_load_half_negative, (6 * SZREG), $r_sp)
|
||||
- bpf_negative_common(2)
|
||||
- jr $r_ra
|
||||
- lhu $r_A, 0($r_s0)
|
||||
- END(sk_load_half_negative)
|
||||
-
|
||||
-bpf_slow_path_byte_neg:
|
||||
- bpf_is_end_of_data
|
||||
-NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp)
|
||||
- bpf_negative_common(1)
|
||||
- jr $r_ra
|
||||
- lbu $r_A, 0($r_s0)
|
||||
- END(sk_load_byte_negative)
|
||||
-
|
||||
-fault:
|
||||
- jr $r_ra
|
||||
- addiu $r_ret, zero, 1
|
@ -0,0 +1,272 @@
|
||||
From 03662fcd41f4b764857f17b95f9a2a63c24bddd4 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Tue, 3 Nov 2020 17:28:09 +0100
|
||||
Subject: [PATCH 1/2] crypto: arm/chacha-neon - optimize for non-block size
|
||||
multiples
|
||||
|
||||
commit 86cd97ec4b943af35562a74688bc4e909b32c3d1 upstream.
|
||||
|
||||
The current NEON based ChaCha implementation for ARM is optimized for
|
||||
multiples of 4x the ChaCha block size (64 bytes). This makes sense for
|
||||
block encryption, but given that ChaCha is also often used in the
|
||||
context of networking, it makes sense to consider arbitrary length
|
||||
inputs as well.
|
||||
|
||||
For example, WireGuard typically uses 1420 byte packets, and performing
|
||||
ChaCha encryption involves 5 invocations of chacha_4block_xor_neon()
|
||||
and 3 invocations of chacha_block_xor_neon(), where the last one also
|
||||
involves a memcpy() using a buffer on the stack to process the final
|
||||
chunk of 1420 % 64 == 12 bytes.
|
||||
|
||||
Let's optimize for this case as well, by letting chacha_4block_xor_neon()
|
||||
deal with any input size between 64 and 256 bytes, using NEON permutation
|
||||
instructions and overlapping loads and stores. This way, the 140 byte
|
||||
tail of a 1420 byte input buffer can simply be processed in one go.
|
||||
|
||||
This results in the following performance improvements for 1420 byte
|
||||
blocks, without significant impact on power-of-2 input sizes. (Note
|
||||
that Raspberry Pi is widely used in combination with a 32-bit kernel,
|
||||
even though the core is 64-bit capable)
|
||||
|
||||
Cortex-A8 (BeagleBone) : 7%
|
||||
Cortex-A15 (Calxeda Midway) : 21%
|
||||
Cortex-A53 (Raspberry Pi 3) : 3%
|
||||
Cortex-A72 (Raspberry Pi 4) : 19%
|
||||
|
||||
Cc: Eric Biggers <ebiggers@google.com>
|
||||
Cc: "Jason A . Donenfeld" <Jason@zx2c4.com>
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/chacha-glue.c | 34 +++++------
|
||||
arch/arm/crypto/chacha-neon-core.S | 97 +++++++++++++++++++++++++++---
|
||||
2 files changed, 107 insertions(+), 24 deletions(-)
|
||||
|
||||
--- a/arch/arm/crypto/chacha-glue.c
|
||||
+++ b/arch/arm/crypto/chacha-glue.c
|
||||
@@ -23,7 +23,7 @@
|
||||
asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
int nrounds);
|
||||
asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
- int nrounds);
|
||||
+ int nrounds, unsigned int nbytes);
|
||||
asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
|
||||
asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
|
||||
|
||||
@@ -42,24 +42,24 @@ static void chacha_doneon(u32 *state, u8
|
||||
{
|
||||
u8 buf[CHACHA_BLOCK_SIZE];
|
||||
|
||||
- while (bytes >= CHACHA_BLOCK_SIZE * 4) {
|
||||
- chacha_4block_xor_neon(state, dst, src, nrounds);
|
||||
- bytes -= CHACHA_BLOCK_SIZE * 4;
|
||||
- src += CHACHA_BLOCK_SIZE * 4;
|
||||
- dst += CHACHA_BLOCK_SIZE * 4;
|
||||
- state[12] += 4;
|
||||
- }
|
||||
- while (bytes >= CHACHA_BLOCK_SIZE) {
|
||||
- chacha_block_xor_neon(state, dst, src, nrounds);
|
||||
- bytes -= CHACHA_BLOCK_SIZE;
|
||||
- src += CHACHA_BLOCK_SIZE;
|
||||
- dst += CHACHA_BLOCK_SIZE;
|
||||
- state[12]++;
|
||||
+ while (bytes > CHACHA_BLOCK_SIZE) {
|
||||
+ unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
|
||||
+
|
||||
+ chacha_4block_xor_neon(state, dst, src, nrounds, l);
|
||||
+ bytes -= l;
|
||||
+ src += l;
|
||||
+ dst += l;
|
||||
+ state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
|
||||
}
|
||||
if (bytes) {
|
||||
- memcpy(buf, src, bytes);
|
||||
- chacha_block_xor_neon(state, buf, buf, nrounds);
|
||||
- memcpy(dst, buf, bytes);
|
||||
+ const u8 *s = src;
|
||||
+ u8 *d = dst;
|
||||
+
|
||||
+ if (bytes != CHACHA_BLOCK_SIZE)
|
||||
+ s = d = memcpy(buf, src, bytes);
|
||||
+ chacha_block_xor_neon(state, d, s, nrounds);
|
||||
+ if (d != dst)
|
||||
+ memcpy(dst, buf, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
--- a/arch/arm/crypto/chacha-neon-core.S
|
||||
+++ b/arch/arm/crypto/chacha-neon-core.S
|
||||
@@ -47,6 +47,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
+#include <asm/cache.h>
|
||||
|
||||
.text
|
||||
.fpu neon
|
||||
@@ -205,7 +206,7 @@ ENDPROC(hchacha_block_neon)
|
||||
|
||||
.align 5
|
||||
ENTRY(chacha_4block_xor_neon)
|
||||
- push {r4-r5}
|
||||
+ push {r4, lr}
|
||||
mov r4, sp // preserve the stack pointer
|
||||
sub ip, sp, #0x20 // allocate a 32 byte buffer
|
||||
bic ip, ip, #0x1f // aligned to 32 bytes
|
||||
@@ -229,10 +230,10 @@ ENTRY(chacha_4block_xor_neon)
|
||||
vld1.32 {q0-q1}, [r0]
|
||||
vld1.32 {q2-q3}, [ip]
|
||||
|
||||
- adr r5, .Lctrinc
|
||||
+ adr lr, .Lctrinc
|
||||
vdup.32 q15, d7[1]
|
||||
vdup.32 q14, d7[0]
|
||||
- vld1.32 {q4}, [r5, :128]
|
||||
+ vld1.32 {q4}, [lr, :128]
|
||||
vdup.32 q13, d6[1]
|
||||
vdup.32 q12, d6[0]
|
||||
vdup.32 q11, d5[1]
|
||||
@@ -455,7 +456,7 @@ ENTRY(chacha_4block_xor_neon)
|
||||
|
||||
// Re-interleave the words in the first two rows of each block (x0..7).
|
||||
// Also add the counter values 0-3 to x12[0-3].
|
||||
- vld1.32 {q8}, [r5, :128] // load counter values 0-3
|
||||
+ vld1.32 {q8}, [lr, :128] // load counter values 0-3
|
||||
vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1)
|
||||
vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3)
|
||||
vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5)
|
||||
@@ -493,6 +494,8 @@ ENTRY(chacha_4block_xor_neon)
|
||||
|
||||
// Re-interleave the words in the last two rows of each block (x8..15).
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
+ mov sp, r4 // restore original stack pointer
|
||||
+ ldr r4, [r4, #8] // load number of bytes
|
||||
vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13)
|
||||
vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15)
|
||||
vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9)
|
||||
@@ -520,41 +523,121 @@ ENTRY(chacha_4block_xor_neon)
|
||||
// XOR the rest of the data with the keystream
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #96
|
||||
veor q0, q0, q8
|
||||
veor q1, q1, q12
|
||||
+ ble .Lle96
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q2
|
||||
veor q1, q1, q6
|
||||
+ ble .Lle128
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q10
|
||||
veor q1, q1, q14
|
||||
+ ble .Lle160
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q4
|
||||
veor q1, q1, q5
|
||||
+ ble .Lle192
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q9
|
||||
veor q1, q1, q13
|
||||
+ ble .Lle224
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q7
|
||||
+ blt .Llt256
|
||||
+.Lout:
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]
|
||||
- mov sp, r4 // restore original stack pointer
|
||||
veor q0, q0, q11
|
||||
veor q1, q1, q15
|
||||
vst1.8 {q0-q1}, [r1]
|
||||
|
||||
- pop {r4-r5}
|
||||
- bx lr
|
||||
+ pop {r4, pc}
|
||||
+
|
||||
+.Lle192:
|
||||
+ vmov q4, q9
|
||||
+ vmov q5, q13
|
||||
+
|
||||
+.Lle160:
|
||||
+ // nothing to do
|
||||
+
|
||||
+.Lfinalblock:
|
||||
+ // Process the final block if processing less than 4 full blocks.
|
||||
+ // Entered with 32 bytes of ChaCha cipher stream in q4-q5, and the
|
||||
+ // previous 32 byte output block that still needs to be written at
|
||||
+ // [r1] in q0-q1.
|
||||
+ beq .Lfullblock
|
||||
+
|
||||
+.Lpartialblock:
|
||||
+ adr lr, .Lpermute + 32
|
||||
+ add r2, r2, r4
|
||||
+ add lr, lr, r4
|
||||
+ add r4, r4, r1
|
||||
+
|
||||
+ vld1.8 {q2-q3}, [lr]
|
||||
+ vld1.8 {q6-q7}, [r2]
|
||||
+
|
||||
+ add r4, r4, #32
|
||||
+
|
||||
+ vtbl.8 d4, {q4-q5}, d4
|
||||
+ vtbl.8 d5, {q4-q5}, d5
|
||||
+ vtbl.8 d6, {q4-q5}, d6
|
||||
+ vtbl.8 d7, {q4-q5}, d7
|
||||
+
|
||||
+ veor q6, q6, q2
|
||||
+ veor q7, q7, q3
|
||||
+
|
||||
+ vst1.8 {q6-q7}, [r4] // overlapping stores
|
||||
+ vst1.8 {q0-q1}, [r1]
|
||||
+ pop {r4, pc}
|
||||
+
|
||||
+.Lfullblock:
|
||||
+ vmov q11, q4
|
||||
+ vmov q15, q5
|
||||
+ b .Lout
|
||||
+.Lle96:
|
||||
+ vmov q4, q2
|
||||
+ vmov q5, q6
|
||||
+ b .Lfinalblock
|
||||
+.Lle128:
|
||||
+ vmov q4, q10
|
||||
+ vmov q5, q14
|
||||
+ b .Lfinalblock
|
||||
+.Lle224:
|
||||
+ vmov q4, q3
|
||||
+ vmov q5, q7
|
||||
+ b .Lfinalblock
|
||||
+.Llt256:
|
||||
+ vmov q4, q11
|
||||
+ vmov q5, q15
|
||||
+ b .Lpartialblock
|
||||
ENDPROC(chacha_4block_xor_neon)
|
||||
+
|
||||
+ .align L1_CACHE_SHIFT
|
||||
+.Lpermute:
|
||||
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
+ .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
|
||||
+ .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
|
||||
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
+ .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
|
||||
+ .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
|
@ -0,0 +1,38 @@
|
||||
From 7f63462faf9eab69132bea9abd48c2c05a93145b Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Sun, 13 Dec 2020 15:39:29 +0100
|
||||
Subject: [PATCH 2/2] crypto: arm/chacha-neon - add missing counter increment
|
||||
|
||||
commit fd16931a2f518a32753920ff20895e5cf04c8ff1 upstream.
|
||||
|
||||
Commit 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block
|
||||
size multiples") refactored the chacha block handling in the glue code in
|
||||
a way that may result in the counter increment to be omitted when calling
|
||||
chacha_block_xor_neon() to process a full block. This violates the skcipher
|
||||
API, which requires that the output IV is suitable for handling more input
|
||||
as long as the preceding input has been presented in round multiples of the
|
||||
block size. Also, the same code is exposed via the chacha library interface
|
||||
whose callers may actually rely on this increment to occur even for final
|
||||
blocks that are smaller than the chacha block size.
|
||||
|
||||
So increment the counter after calling chacha_block_xor_neon().
|
||||
|
||||
Fixes: 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block size multiples")
|
||||
Reported-by: Eric Biggers <ebiggers@kernel.org>
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/chacha-glue.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/arch/arm/crypto/chacha-glue.c
|
||||
+++ b/arch/arm/crypto/chacha-glue.c
|
||||
@@ -60,6 +60,7 @@ static void chacha_doneon(u32 *state, u8
|
||||
chacha_block_xor_neon(state, d, s, nrounds);
|
||||
if (d != dst)
|
||||
memcpy(dst, buf, bytes);
|
||||
+ state[12]++;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,42 @@
|
||||
From a13827e9091c07e25cdeec9a402d74a27e2a1111 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Mon, 22 Feb 2021 17:25:46 +0100
|
||||
Subject: [PATCH] wireguard: peer: put frequently used members above cache
|
||||
lines
|
||||
|
||||
commit 5a0598695634a6bb4126818902dd9140cd9df8b6 upstream.
|
||||
|
||||
The is_dead boolean is checked for every single packet, while the
|
||||
internal_id member is used basically only for pr_debug messages. So it
|
||||
makes sense to hoist up is_dead into some space formerly unused by a
|
||||
struct hole, while demoting internal_api to below the lowest struct
|
||||
cache line.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/peer.h | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/peer.h
|
||||
+++ b/drivers/net/wireguard/peer.h
|
||||
@@ -39,6 +39,7 @@ struct wg_peer {
|
||||
struct prev_queue tx_queue, rx_queue;
|
||||
struct sk_buff_head staged_packet_queue;
|
||||
int serial_work_cpu;
|
||||
+ bool is_dead;
|
||||
struct noise_keypairs keypairs;
|
||||
struct endpoint endpoint;
|
||||
struct dst_cache endpoint_cache;
|
||||
@@ -61,9 +62,8 @@ struct wg_peer {
|
||||
struct rcu_head rcu;
|
||||
struct list_head peer_list;
|
||||
struct list_head allowedips_list;
|
||||
- u64 internal_id;
|
||||
struct napi_struct napi;
|
||||
- bool is_dead;
|
||||
+ u64 internal_id;
|
||||
};
|
||||
|
||||
struct wg_peer *wg_peer_create(struct wg_device *wg,
|
@ -0,0 +1,52 @@
|
||||
From 02d6fdecb9c38de19065f6bed8d5214556fd061d Mon Sep 17 00:00:00 2001
|
||||
From: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Date: Thu, 4 Nov 2021 16:00:40 +0100
|
||||
Subject: regmap: allow to define reg_update_bits for no bus configuration
|
||||
|
||||
Some device requires a special handling for reg_update_bits and can't use
|
||||
the normal regmap read write logic. An example is when locking is
|
||||
handled by the device and rmw operations requires to do atomic operations.
|
||||
Allow to declare a dedicated function in regmap_config for
|
||||
reg_update_bits in no bus configuration.
|
||||
|
||||
Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Link: https://lore.kernel.org/r/20211104150040.1260-1-ansuelsmth@gmail.com
|
||||
Signed-off-by: Mark Brown <broonie@kernel.org>
|
||||
---
|
||||
drivers/base/regmap/regmap.c | 1 +
|
||||
include/linux/regmap.h | 7 +++++++
|
||||
2 files changed, 8 insertions(+)
|
||||
|
||||
--- a/drivers/base/regmap/regmap.c
|
||||
+++ b/drivers/base/regmap/regmap.c
|
||||
@@ -842,6 +842,7 @@ struct regmap *__regmap_init(struct devi
|
||||
if (!bus) {
|
||||
map->reg_read = config->reg_read;
|
||||
map->reg_write = config->reg_write;
|
||||
+ map->reg_update_bits = config->reg_update_bits;
|
||||
|
||||
map->defer_caching = false;
|
||||
goto skip_format_initialization;
|
||||
--- a/include/linux/regmap.h
|
||||
+++ b/include/linux/regmap.h
|
||||
@@ -289,6 +289,11 @@ typedef void (*regmap_unlock)(void *);
|
||||
* read operation on a bus such as SPI, I2C, etc. Most of the
|
||||
* devices do not need this.
|
||||
* @reg_write: Same as above for writing.
|
||||
+ * @reg_update_bits: Optional callback that if filled will be used to perform
|
||||
+ * all the update_bits(rmw) operation. Should only be provided
|
||||
+ * if the function require special handling with lock and reg
|
||||
+ * handling and the operation cannot be represented as a simple
|
||||
+ * update_bits operation on a bus such as SPI, I2C, etc.
|
||||
* @fast_io: Register IO is fast. Use a spinlock instead of a mutex
|
||||
* to perform locking. This field is ignored if custom lock/unlock
|
||||
* functions are used (see fields lock/unlock of struct regmap_config).
|
||||
@@ -366,6 +371,8 @@ struct regmap_config {
|
||||
|
||||
int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
|
||||
int (*reg_write)(void *context, unsigned int reg, unsigned int val);
|
||||
+ int (*reg_update_bits)(void *context, unsigned int reg,
|
||||
+ unsigned int mask, unsigned int val);
|
||||
|
||||
bool fast_io;
|
||||
|
@ -0,0 +1,36 @@
|
||||
From 6523061868212473f63812a0c477a161742bed42 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Sat, 27 Feb 2021 13:20:24 +0100
|
||||
Subject: [PATCH] MIPS: select CPU_MIPS64 for remaining MIPS64 CPUs
|
||||
|
||||
The CPU_MIPS64 and CPU_MIPS32 variables are supposed to be able to
|
||||
distinguish broadly between 64-bit and 32-bit MIPS CPUs. However, they
|
||||
weren't selected by the specialty CPUs, Octeon and Loongson, which meant
|
||||
it was possible to hit a weird state of:
|
||||
|
||||
MIPS=y, CONFIG_64BIT=y, CPU_MIPS64=n
|
||||
|
||||
This commit rectifies the issue by having CPU_MIPS64 be selected when
|
||||
the missing Octeon or Loongson models are selected.
|
||||
|
||||
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
|
||||
Cc: Ralf Baechle <ralf@linux-mips.org>
|
||||
Cc: George Cherian <gcherian@marvell.com>
|
||||
Cc: Huacai Chen <chenhuacai@kernel.org>
|
||||
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/mips/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/mips/Kconfig
|
||||
+++ b/arch/mips/Kconfig
|
||||
@@ -2088,7 +2088,7 @@ config CPU_MIPS32
|
||||
config CPU_MIPS64
|
||||
bool
|
||||
default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R5 || \
|
||||
- CPU_MIPS64_R6
|
||||
+ CPU_MIPS64_R6 || CPU_LOONGSON64 || CPU_CAVIUM_OCTEON
|
||||
|
||||
#
|
||||
# These indicate the revision of the architecture
|
@ -0,0 +1,36 @@
|
||||
From 7d1531c81c0fb4c93bea8dc316043ad0e4d0c270 Mon Sep 17 00:00:00 2001
|
||||
From: Chuanhong Guo <gch981213@gmail.com>
|
||||
Date: Sun, 25 Oct 2020 23:19:40 +0800
|
||||
Subject: [PATCH] MIPS: zboot: put appended dtb into a section
|
||||
|
||||
This will make a separated section for dtb appear in ELF, and we can
|
||||
then use objcopy to patch a dtb into vmlinuz when RAW_APPENDED_DTB
|
||||
is set in kernel config.
|
||||
|
||||
command to patch a dtb:
|
||||
objcopy --set-section-flags=.appended_dtb=alloc,contents \
|
||||
--update-section=.appended_dtb=<target>.dtb vmlinuz vmlinuz-dtb
|
||||
|
||||
Signed-off-by: Chuanhong Guo <gch981213@gmail.com>
|
||||
---
|
||||
arch/mips/boot/compressed/ld.script | 9 ++++++---
|
||||
1 file changed, 6 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/arch/mips/boot/compressed/ld.script
|
||||
+++ b/arch/mips/boot/compressed/ld.script
|
||||
@@ -31,9 +31,12 @@ SECTIONS
|
||||
CONSTRUCTORS
|
||||
. = ALIGN(16);
|
||||
}
|
||||
- __appended_dtb = .;
|
||||
- /* leave space for appended DTB */
|
||||
- . += 0x100000;
|
||||
+
|
||||
+ .appended_dtb : {
|
||||
+ __appended_dtb = .;
|
||||
+ /* leave space for appended DTB */
|
||||
+ . += 0x100000;
|
||||
+ }
|
||||
|
||||
_edata = .;
|
||||
/* End of data section */
|
@ -0,0 +1,106 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Thu, 25 Jan 2018 12:58:55 +0100
|
||||
Subject: [PATCH] netfilter: nft_flow_offload: handle netdevice events from
|
||||
nf_flow_table
|
||||
|
||||
Move the code that deals with device events to the core.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -576,13 +576,41 @@ void nf_flow_table_free(struct nf_flowta
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_table_free);
|
||||
|
||||
+static int nf_flow_table_netdev_event(struct notifier_block *this,
|
||||
+ unsigned long event, void *ptr)
|
||||
+{
|
||||
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||
+
|
||||
+ if (event != NETDEV_DOWN)
|
||||
+ return NOTIFY_DONE;
|
||||
+
|
||||
+ nf_flow_table_cleanup(dev);
|
||||
+
|
||||
+ return NOTIFY_DONE;
|
||||
+}
|
||||
+
|
||||
+static struct notifier_block flow_offload_netdev_notifier = {
|
||||
+ .notifier_call = nf_flow_table_netdev_event,
|
||||
+};
|
||||
+
|
||||
static int __init nf_flow_table_module_init(void)
|
||||
{
|
||||
- return nf_flow_table_offload_init();
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = nf_flow_table_offload_init();
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||
+ if (ret)
|
||||
+ nf_flow_table_offload_exit();
|
||||
+
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
static void __exit nf_flow_table_module_exit(void)
|
||||
{
|
||||
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||
nf_flow_table_offload_exit();
|
||||
}
|
||||
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -237,47 +237,14 @@ static struct nft_expr_type nft_flow_off
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
-static int flow_offload_netdev_event(struct notifier_block *this,
|
||||
- unsigned long event, void *ptr)
|
||||
-{
|
||||
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||
-
|
||||
- if (event != NETDEV_DOWN)
|
||||
- return NOTIFY_DONE;
|
||||
-
|
||||
- nf_flow_table_cleanup(dev);
|
||||
-
|
||||
- return NOTIFY_DONE;
|
||||
-}
|
||||
-
|
||||
-static struct notifier_block flow_offload_netdev_notifier = {
|
||||
- .notifier_call = flow_offload_netdev_event,
|
||||
-};
|
||||
-
|
||||
static int __init nft_flow_offload_module_init(void)
|
||||
{
|
||||
- int err;
|
||||
-
|
||||
- err = register_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||
- if (err)
|
||||
- goto err;
|
||||
-
|
||||
- err = nft_register_expr(&nft_flow_offload_type);
|
||||
- if (err < 0)
|
||||
- goto register_expr;
|
||||
-
|
||||
- return 0;
|
||||
-
|
||||
-register_expr:
|
||||
- unregister_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||
-err:
|
||||
- return err;
|
||||
+ return nft_register_expr(&nft_flow_offload_type);
|
||||
}
|
||||
|
||||
static void __exit nft_flow_offload_module_exit(void)
|
||||
{
|
||||
nft_unregister_expr(&nft_flow_offload_type);
|
||||
- unregister_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||
}
|
||||
|
||||
module_init(nft_flow_offload_module_init);
|
@ -0,0 +1,324 @@
|
||||
From 04e9ab75267489224364fa510a88ada83e11c325 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Thu, 10 Dec 2020 18:23:52 +0100
|
||||
Subject: [PATCH] dt-bindings: mtd: convert "fixed-partitions" to the
|
||||
json-schema
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This standardizes its documentation, allows validating with Makefile
|
||||
checks and helps writing DTS files.
|
||||
|
||||
Noticeable changes:
|
||||
1. Dropped "Partitions can be represented by sub-nodes of a flash
|
||||
device." as we also support subpartitions (don't have to be part of
|
||||
flash device node)
|
||||
2. Dropped "to Linux" as bindings are meant to be os agnostic.
|
||||
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Link: https://lore.kernel.org/r/20201210172352.31632-1-zajec5@gmail.com
|
||||
Signed-off-by: Rob Herring <robh@kernel.org>
|
||||
---
|
||||
.../devicetree/bindings/mtd/partition.txt | 131 +--------------
|
||||
.../mtd/partitions/fixed-partitions.yaml | 152 ++++++++++++++++++
|
||||
2 files changed, 154 insertions(+), 129 deletions(-)
|
||||
create mode 100644 Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
|
||||
|
||||
--- a/Documentation/devicetree/bindings/mtd/partition.txt
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partition.txt
|
||||
@@ -24,137 +24,10 @@ another partitioning method.
|
||||
Available bindings are listed in the "partitions" subdirectory.
|
||||
|
||||
|
||||
-Fixed Partitions
|
||||
-================
|
||||
-
|
||||
-Partitions can be represented by sub-nodes of a flash device. This can be used
|
||||
-on platforms which have strong conventions about which portions of a flash are
|
||||
-used for what purposes, but which don't use an on-flash partition table such
|
||||
-as RedBoot.
|
||||
-
|
||||
-The partition table should be a subnode of the flash node and should be named
|
||||
-'partitions'. This node should have the following property:
|
||||
-- compatible : (required) must be "fixed-partitions"
|
||||
-Partitions are then defined in subnodes of the partitions node.
|
||||
+Deprecated: partitions defined in flash node
|
||||
+============================================
|
||||
|
||||
For backwards compatibility partitions as direct subnodes of the flash device are
|
||||
supported. This use is discouraged.
|
||||
NOTE: also for backwards compatibility, direct subnodes that have a compatible
|
||||
string are not considered partitions, as they may be used for other bindings.
|
||||
-
|
||||
-#address-cells & #size-cells must both be present in the partitions subnode of the
|
||||
-flash device. There are two valid values for both:
|
||||
-<1>: for partitions that require a single 32-bit cell to represent their
|
||||
- size/address (aka the value is below 4 GiB)
|
||||
-<2>: for partitions that require two 32-bit cells to represent their
|
||||
- size/address (aka the value is 4 GiB or greater).
|
||||
-
|
||||
-Required properties:
|
||||
-- reg : The partition's offset and size within the flash
|
||||
-
|
||||
-Optional properties:
|
||||
-- label : The label / name for this partition. If omitted, the label is taken
|
||||
- from the node name (excluding the unit address).
|
||||
-- read-only : This parameter, if present, is a hint to Linux that this
|
||||
- partition should only be mounted read-only. This is usually used for flash
|
||||
- partitions containing early-boot firmware images or data which should not be
|
||||
- clobbered.
|
||||
-- lock : Do not unlock the partition at initialization time (not supported on
|
||||
- all devices)
|
||||
-- slc-mode: This parameter, if present, allows one to emulate SLC mode on a
|
||||
- partition attached to an MLC NAND thus making this partition immune to
|
||||
- paired-pages corruptions
|
||||
-
|
||||
-Examples:
|
||||
-
|
||||
-
|
||||
-flash@0 {
|
||||
- partitions {
|
||||
- compatible = "fixed-partitions";
|
||||
- #address-cells = <1>;
|
||||
- #size-cells = <1>;
|
||||
-
|
||||
- partition@0 {
|
||||
- label = "u-boot";
|
||||
- reg = <0x0000000 0x100000>;
|
||||
- read-only;
|
||||
- };
|
||||
-
|
||||
- uimage@100000 {
|
||||
- reg = <0x0100000 0x200000>;
|
||||
- };
|
||||
- };
|
||||
-};
|
||||
-
|
||||
-flash@1 {
|
||||
- partitions {
|
||||
- compatible = "fixed-partitions";
|
||||
- #address-cells = <1>;
|
||||
- #size-cells = <2>;
|
||||
-
|
||||
- /* a 4 GiB partition */
|
||||
- partition@0 {
|
||||
- label = "filesystem";
|
||||
- reg = <0x00000000 0x1 0x00000000>;
|
||||
- };
|
||||
- };
|
||||
-};
|
||||
-
|
||||
-flash@2 {
|
||||
- partitions {
|
||||
- compatible = "fixed-partitions";
|
||||
- #address-cells = <2>;
|
||||
- #size-cells = <2>;
|
||||
-
|
||||
- /* an 8 GiB partition */
|
||||
- partition@0 {
|
||||
- label = "filesystem #1";
|
||||
- reg = <0x0 0x00000000 0x2 0x00000000>;
|
||||
- };
|
||||
-
|
||||
- /* a 4 GiB partition */
|
||||
- partition@200000000 {
|
||||
- label = "filesystem #2";
|
||||
- reg = <0x2 0x00000000 0x1 0x00000000>;
|
||||
- };
|
||||
- };
|
||||
-};
|
||||
-
|
||||
-flash@3 {
|
||||
- partitions {
|
||||
- compatible = "fixed-partitions";
|
||||
- #address-cells = <1>;
|
||||
- #size-cells = <1>;
|
||||
-
|
||||
- partition@0 {
|
||||
- label = "bootloader";
|
||||
- reg = <0x000000 0x100000>;
|
||||
- read-only;
|
||||
- };
|
||||
-
|
||||
- firmware@100000 {
|
||||
- label = "firmware";
|
||||
- reg = <0x100000 0xe00000>;
|
||||
- compatible = "brcm,trx";
|
||||
- };
|
||||
-
|
||||
- calibration@f00000 {
|
||||
- label = "calibration";
|
||||
- reg = <0xf00000 0x100000>;
|
||||
- compatible = "fixed-partitions";
|
||||
- ranges = <0 0xf00000 0x100000>;
|
||||
- #address-cells = <1>;
|
||||
- #size-cells = <1>;
|
||||
-
|
||||
- partition@0 {
|
||||
- label = "wifi0";
|
||||
- reg = <0x000000 0x080000>;
|
||||
- };
|
||||
-
|
||||
- partition@80000 {
|
||||
- label = "wifi1";
|
||||
- reg = <0x080000 0x080000>;
|
||||
- };
|
||||
- };
|
||||
- };
|
||||
-};
|
||||
--- /dev/null
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
|
||||
@@ -0,0 +1,152 @@
|
||||
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
+%YAML 1.2
|
||||
+---
|
||||
+$id: http://devicetree.org/schemas/mtd/partitions/fixed-partitions.yaml#
|
||||
+$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
+
|
||||
+title: Fixed partitions
|
||||
+
|
||||
+description: |
|
||||
+ This binding can be used on platforms which have strong conventions about
|
||||
+ which portions of a flash are used for what purposes, but which don't use an
|
||||
+ on-flash partition table such as RedBoot.
|
||||
+
|
||||
+ The partition table should be a node named "partitions". Partitions are then
|
||||
+ defined as subnodes.
|
||||
+
|
||||
+maintainers:
|
||||
+ - Rafał Miłecki <rafal@milecki.pl>
|
||||
+
|
||||
+properties:
|
||||
+ compatible:
|
||||
+ const: fixed-partitions
|
||||
+
|
||||
+ "#address-cells": true
|
||||
+
|
||||
+ "#size-cells": true
|
||||
+
|
||||
+patternProperties:
|
||||
+ "@[0-9a-f]+$":
|
||||
+ description: node describing a single flash partition
|
||||
+ type: object
|
||||
+
|
||||
+ properties:
|
||||
+ reg:
|
||||
+ description: partition's offset and size within the flash
|
||||
+ maxItems: 1
|
||||
+
|
||||
+ label:
|
||||
+ description: The label / name for this partition. If omitted, the label
|
||||
+ is taken from the node name (excluding the unit address).
|
||||
+
|
||||
+ read-only:
|
||||
+ description: This parameter, if present, is a hint that this partition
|
||||
+ should only be mounted read-only. This is usually used for flash
|
||||
+ partitions containing early-boot firmware images or data which should
|
||||
+ not be clobbered.
|
||||
+ type: boolean
|
||||
+
|
||||
+ lock:
|
||||
+ description: Do not unlock the partition at initialization time (not
|
||||
+ supported on all devices)
|
||||
+ type: boolean
|
||||
+
|
||||
+ slc-mode:
|
||||
+ description: This parameter, if present, allows one to emulate SLC mode
|
||||
+ on a partition attached to an MLC NAND thus making this partition
|
||||
+ immune to paired-pages corruptions
|
||||
+ type: boolean
|
||||
+
|
||||
+ required:
|
||||
+ - reg
|
||||
+
|
||||
+required:
|
||||
+ - "#address-cells"
|
||||
+ - "#size-cells"
|
||||
+
|
||||
+additionalProperties: true
|
||||
+
|
||||
+examples:
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ partition@0 {
|
||||
+ label = "u-boot";
|
||||
+ reg = <0x0000000 0x100000>;
|
||||
+ read-only;
|
||||
+ };
|
||||
+
|
||||
+ uimage@100000 {
|
||||
+ reg = <0x0100000 0x200000>;
|
||||
+ };
|
||||
+ };
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <2>;
|
||||
+
|
||||
+ /* a 4 GiB partition */
|
||||
+ partition@0 {
|
||||
+ label = "filesystem";
|
||||
+ reg = <0x00000000 0x1 0x00000000>;
|
||||
+ };
|
||||
+ };
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ #address-cells = <2>;
|
||||
+ #size-cells = <2>;
|
||||
+
|
||||
+ /* an 8 GiB partition */
|
||||
+ partition@0 {
|
||||
+ label = "filesystem #1";
|
||||
+ reg = <0x0 0x00000000 0x2 0x00000000>;
|
||||
+ };
|
||||
+
|
||||
+ /* a 4 GiB partition */
|
||||
+ partition@200000000 {
|
||||
+ label = "filesystem #2";
|
||||
+ reg = <0x2 0x00000000 0x1 0x00000000>;
|
||||
+ };
|
||||
+ };
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ partition@0 {
|
||||
+ label = "bootloader";
|
||||
+ reg = <0x000000 0x100000>;
|
||||
+ read-only;
|
||||
+ };
|
||||
+
|
||||
+ firmware@100000 {
|
||||
+ compatible = "brcm,trx";
|
||||
+ label = "firmware";
|
||||
+ reg = <0x100000 0xe00000>;
|
||||
+ };
|
||||
+
|
||||
+ calibration@f00000 {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ label = "calibration";
|
||||
+ reg = <0xf00000 0x100000>;
|
||||
+ ranges = <0 0xf00000 0x100000>;
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ partition@0 {
|
||||
+ label = "wifi0";
|
||||
+ reg = <0x000000 0x080000>;
|
||||
+ };
|
||||
+
|
||||
+ partition@80000 {
|
||||
+ label = "wifi1";
|
||||
+ reg = <0x080000 0x080000>;
|
||||
+ };
|
||||
+ };
|
||||
+ };
|
@ -0,0 +1,115 @@
|
||||
From 6418522022c706fd867b00b2571edba48b8fa8c7 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Thu, 11 Feb 2021 23:04:25 +0100
|
||||
Subject: [PATCH] dt-bindings: mtd: move partition binding to its own file
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Single partition binding is quite common and may be:
|
||||
1. Used by multiple parsers
|
||||
2. Extended for more specific cases
|
||||
|
||||
Move it to separated file to avoid code duplication.
|
||||
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Reviewed-by: Rob Herring <robh@kernel.org>
|
||||
Signed-off-by: Richard Weinberger <richard@nod.at>
|
||||
---
|
||||
.../mtd/partitions/fixed-partitions.yaml | 33 +------------
|
||||
.../bindings/mtd/partitions/partition.yaml | 47 +++++++++++++++++++
|
||||
2 files changed, 48 insertions(+), 32 deletions(-)
|
||||
create mode 100644 Documentation/devicetree/bindings/mtd/partitions/partition.yaml
|
||||
|
||||
--- a/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
|
||||
@@ -27,38 +27,7 @@ properties:
|
||||
|
||||
patternProperties:
|
||||
"@[0-9a-f]+$":
|
||||
- description: node describing a single flash partition
|
||||
- type: object
|
||||
-
|
||||
- properties:
|
||||
- reg:
|
||||
- description: partition's offset and size within the flash
|
||||
- maxItems: 1
|
||||
-
|
||||
- label:
|
||||
- description: The label / name for this partition. If omitted, the label
|
||||
- is taken from the node name (excluding the unit address).
|
||||
-
|
||||
- read-only:
|
||||
- description: This parameter, if present, is a hint that this partition
|
||||
- should only be mounted read-only. This is usually used for flash
|
||||
- partitions containing early-boot firmware images or data which should
|
||||
- not be clobbered.
|
||||
- type: boolean
|
||||
-
|
||||
- lock:
|
||||
- description: Do not unlock the partition at initialization time (not
|
||||
- supported on all devices)
|
||||
- type: boolean
|
||||
-
|
||||
- slc-mode:
|
||||
- description: This parameter, if present, allows one to emulate SLC mode
|
||||
- on a partition attached to an MLC NAND thus making this partition
|
||||
- immune to paired-pages corruptions
|
||||
- type: boolean
|
||||
-
|
||||
- required:
|
||||
- - reg
|
||||
+ $ref: "partition.yaml#"
|
||||
|
||||
required:
|
||||
- "#address-cells"
|
||||
--- /dev/null
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/partition.yaml
|
||||
@@ -0,0 +1,47 @@
|
||||
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
+%YAML 1.2
|
||||
+---
|
||||
+$id: http://devicetree.org/schemas/mtd/partitions/partition.yaml#
|
||||
+$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
+
|
||||
+title: Partition
|
||||
+
|
||||
+description: |
|
||||
+ This binding describes a single flash partition. Each partition must have its
|
||||
+ relative offset and size specified. Depending on partition function extra
|
||||
+ properties can be used.
|
||||
+
|
||||
+maintainers:
|
||||
+ - Rafał Miłecki <rafal@milecki.pl>
|
||||
+
|
||||
+properties:
|
||||
+ reg:
|
||||
+ description: partition's offset and size within the flash
|
||||
+ maxItems: 1
|
||||
+
|
||||
+ label:
|
||||
+ description: The label / name for this partition. If omitted, the label
|
||||
+ is taken from the node name (excluding the unit address).
|
||||
+
|
||||
+ read-only:
|
||||
+ description: This parameter, if present, is a hint that this partition
|
||||
+ should only be mounted read-only. This is usually used for flash
|
||||
+ partitions containing early-boot firmware images or data which should
|
||||
+ not be clobbered.
|
||||
+ type: boolean
|
||||
+
|
||||
+ lock:
|
||||
+ description: Do not unlock the partition at initialization time (not
|
||||
+ supported on all devices)
|
||||
+ type: boolean
|
||||
+
|
||||
+ slc-mode:
|
||||
+ description: This parameter, if present, allows one to emulate SLC mode
|
||||
+ on a partition attached to an MLC NAND thus making this partition
|
||||
+ immune to paired-pages corruptions
|
||||
+ type: boolean
|
||||
+
|
||||
+required:
|
||||
+ - reg
|
||||
+
|
||||
+additionalProperties: true
|
@ -0,0 +1,92 @@
|
||||
From 6e9dff6fe3fbc452f16566e4a7e293b0decefdba Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Thu, 11 Feb 2021 23:04:26 +0100
|
||||
Subject: [PATCH] dt-bindings: mtd: add binding for BCM4908 partitions
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
BCM4908 uses fixed partitions layout but function of some partitions may
|
||||
vary. Some devices use multiple firmware partitions and those partitions
|
||||
should be marked to let system discover their purpose.
|
||||
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Signed-off-by: Richard Weinberger <richard@nod.at>
|
||||
---
|
||||
.../partitions/brcm,bcm4908-partitions.yaml | 70 +++++++++++++++++++
|
||||
1 file changed, 70 insertions(+)
|
||||
create mode 100644 Documentation/devicetree/bindings/mtd/partitions/brcm,bcm4908-partitions.yaml
|
||||
|
||||
--- /dev/null
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/brcm,bcm4908-partitions.yaml
|
||||
@@ -0,0 +1,70 @@
|
||||
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
+%YAML 1.2
|
||||
+---
|
||||
+$id: http://devicetree.org/schemas/mtd/partitions/brcm,bcm4908-partitions.yaml#
|
||||
+$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
+
|
||||
+title: Broadcom BCM4908 partitioning
|
||||
+
|
||||
+description: |
|
||||
+ Broadcom BCM4908 CFE bootloader supports two firmware partitions. One is used
|
||||
+ for regular booting, the other is treated as fallback.
|
||||
+
|
||||
+ This binding allows defining all fixed partitions and marking those containing
|
||||
+ firmware. System can use that information e.g. for booting or flashing
|
||||
+ purposes.
|
||||
+
|
||||
+maintainers:
|
||||
+ - Rafał Miłecki <rafal@milecki.pl>
|
||||
+
|
||||
+properties:
|
||||
+ compatible:
|
||||
+ const: brcm,bcm4908-partitions
|
||||
+
|
||||
+ "#address-cells":
|
||||
+ enum: [ 1, 2 ]
|
||||
+
|
||||
+ "#size-cells":
|
||||
+ enum: [ 1, 2 ]
|
||||
+
|
||||
+patternProperties:
|
||||
+ "^partition@[0-9a-f]+$":
|
||||
+ $ref: "partition.yaml#"
|
||||
+ properties:
|
||||
+ compatible:
|
||||
+ const: brcm,bcm4908-firmware
|
||||
+ unevaluatedProperties: false
|
||||
+
|
||||
+required:
|
||||
+ - "#address-cells"
|
||||
+ - "#size-cells"
|
||||
+
|
||||
+additionalProperties: false
|
||||
+
|
||||
+examples:
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "brcm,bcm4908-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ partition@0 {
|
||||
+ label = "cferom";
|
||||
+ reg = <0x0 0x100000>;
|
||||
+ };
|
||||
+
|
||||
+ partition@100000 {
|
||||
+ compatible = "brcm,bcm4908-firmware";
|
||||
+ reg = <0x100000 0xf00000>;
|
||||
+ };
|
||||
+
|
||||
+ partition@1000000 {
|
||||
+ compatible = "brcm,bcm4908-firmware";
|
||||
+ reg = <0x1000000 0xf00000>;
|
||||
+ };
|
||||
+
|
||||
+ partition@1f00000 {
|
||||
+ label = "calibration";
|
||||
+ reg = <0x1f00000 0x100000>;
|
||||
+ };
|
||||
+ };
|
@ -0,0 +1,654 @@
|
||||
From afbef8efb591792579c633a7c545f914c6165f82 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Thu, 11 Feb 2021 23:04:27 +0100
|
||||
Subject: [PATCH] mtd: parsers: ofpart: support BCM4908 fixed partitions
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Some devices use fixed partitioning with some partitions requiring some
|
||||
extra logic. E.g. BCM4908 may have multiple firmware partitions but
|
||||
detecting currently used one requires checking bootloader parameters.
|
||||
|
||||
To support such cases without duplicating a lot of code (without copying
|
||||
most of the ofpart.c code) support for post-parsing callback was added.
|
||||
|
||||
BCM4908 support in ofpart can be enabled using config option and results
|
||||
in compiling & executing a specific callback. It simply reads offset of
|
||||
currently used firmware partition from the DT. Bootloader specifies it
|
||||
using the "brcm_blparms" property.
|
||||
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
---
|
||||
drivers/mtd/parsers/Kconfig | 9 +++
|
||||
drivers/mtd/parsers/Makefile | 2 +
|
||||
drivers/mtd/parsers/ofpart_bcm4908.c | 64 +++++++++++++++++++
|
||||
drivers/mtd/parsers/ofpart_bcm4908.h | 15 +++++
|
||||
.../mtd/parsers/{ofpart.c => ofpart_core.c} | 28 +++++++-
|
||||
5 files changed, 116 insertions(+), 2 deletions(-)
|
||||
create mode 100644 drivers/mtd/parsers/ofpart_bcm4908.c
|
||||
create mode 100644 drivers/mtd/parsers/ofpart_bcm4908.h
|
||||
rename drivers/mtd/parsers/{ofpart.c => ofpart_core.c} (88%)
|
||||
|
||||
--- a/drivers/mtd/parsers/Kconfig
|
||||
+++ b/drivers/mtd/parsers/Kconfig
|
||||
@@ -67,6 +67,15 @@ config MTD_OF_PARTS
|
||||
flash memory node, as described in
|
||||
Documentation/devicetree/bindings/mtd/partition.txt.
|
||||
|
||||
+config MTD_OF_PARTS_BCM4908
|
||||
+ bool "BCM4908 partitioning support"
|
||||
+ depends on MTD_OF_PARTS && (ARCH_BCM4908 || COMPILE_TEST)
|
||||
+ default ARCH_BCM4908
|
||||
+ help
|
||||
+ This provides partitions parser for BCM4908 family devices
|
||||
+ that can have multiple "firmware" partitions. It takes care of
|
||||
+ finding currently used one and backup ones.
|
||||
+
|
||||
config MTD_PARSER_IMAGETAG
|
||||
tristate "Parser for BCM963XX Image Tag format partitions"
|
||||
depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST
|
||||
--- a/drivers/mtd/parsers/Makefile
|
||||
+++ b/drivers/mtd/parsers/Makefile
|
||||
@@ -4,6 +4,8 @@ obj-$(CONFIG_MTD_BCM47XX_PARTS) += bcm4
|
||||
obj-$(CONFIG_MTD_BCM63XX_PARTS) += bcm63xxpart.o
|
||||
obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdlinepart.o
|
||||
obj-$(CONFIG_MTD_OF_PARTS) += ofpart.o
|
||||
+ofpart-y += ofpart_core.o
|
||||
+ofpart-$(CONFIG_MTD_OF_PARTS_BCM4908) += ofpart_bcm4908.o
|
||||
obj-$(CONFIG_MTD_PARSER_IMAGETAG) += parser_imagetag.o
|
||||
obj-$(CONFIG_MTD_AFS_PARTS) += afs.o
|
||||
obj-$(CONFIG_MTD_PARSER_TRX) += parser_trx.o
|
||||
--- /dev/null
|
||||
+++ b/drivers/mtd/parsers/ofpart_bcm4908.c
|
||||
@@ -0,0 +1,64 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0
|
||||
+/*
|
||||
+ * Copyright (C) 2021 Rafał Miłecki <rafal@milecki.pl>
|
||||
+ */
|
||||
+
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/of.h>
|
||||
+#include <linux/mtd/mtd.h>
|
||||
+#include <linux/slab.h>
|
||||
+#include <linux/mtd/partitions.h>
|
||||
+
|
||||
+#include "ofpart_bcm4908.h"
|
||||
+
|
||||
+#define BLPARAMS_FW_OFFSET "NAND_RFS_OFS"
|
||||
+
|
||||
+static long long bcm4908_partitions_fw_offset(void)
|
||||
+{
|
||||
+ struct device_node *root;
|
||||
+ struct property *prop;
|
||||
+ const char *s;
|
||||
+
|
||||
+ root = of_find_node_by_path("/");
|
||||
+ if (!root)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ of_property_for_each_string(root, "brcm_blparms", prop, s) {
|
||||
+ size_t len = strlen(BLPARAMS_FW_OFFSET);
|
||||
+ unsigned long offset;
|
||||
+ int err;
|
||||
+
|
||||
+ if (strncmp(s, BLPARAMS_FW_OFFSET, len) || s[len] != '=')
|
||||
+ continue;
|
||||
+
|
||||
+ err = kstrtoul(s + len + 1, 0, &offset);
|
||||
+ if (err) {
|
||||
+ pr_err("failed to parse %s\n", s + len + 1);
|
||||
+ return err;
|
||||
+ }
|
||||
+
|
||||
+ return offset << 10;
|
||||
+ }
|
||||
+
|
||||
+ return -ENOENT;
|
||||
+}
|
||||
+
|
||||
+int bcm4908_partitions_post_parse(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts)
|
||||
+{
|
||||
+ long long fw_offset;
|
||||
+ int i;
|
||||
+
|
||||
+ fw_offset = bcm4908_partitions_fw_offset();
|
||||
+
|
||||
+ for (i = 0; i < nr_parts; i++) {
|
||||
+ if (of_device_is_compatible(parts[i].of_node, "brcm,bcm4908-firmware")) {
|
||||
+ if (fw_offset < 0 || parts[i].offset == fw_offset)
|
||||
+ parts[i].name = "firmware";
|
||||
+ else
|
||||
+ parts[i].name = "backup";
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
--- /dev/null
|
||||
+++ b/drivers/mtd/parsers/ofpart_bcm4908.h
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 */
|
||||
+#ifndef __BCM4908_PARTITIONS_H
|
||||
+#define __BCM4908_PARTITIONS_H
|
||||
+
|
||||
+#ifdef CONFIG_MTD_OF_PARTS_BCM4908
|
||||
+int bcm4908_partitions_post_parse(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts);
|
||||
+#else
|
||||
+static inline int bcm4908_partitions_post_parse(struct mtd_info *mtd, struct mtd_partition *parts,
|
||||
+ int nr_parts)
|
||||
+{
|
||||
+ return -EOPNOTSUPP;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#endif
|
||||
--- a/drivers/mtd/parsers/ofpart.c
|
||||
+++ /dev/null
|
||||
@@ -1,239 +0,0 @@
|
||||
-// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
-/*
|
||||
- * Flash partitions described by the OF (or flattened) device tree
|
||||
- *
|
||||
- * Copyright © 2006 MontaVista Software Inc.
|
||||
- * Author: Vitaly Wool <vwool@ru.mvista.com>
|
||||
- *
|
||||
- * Revised to handle newer style flash binding by:
|
||||
- * Copyright © 2007 David Gibson, IBM Corporation.
|
||||
- */
|
||||
-
|
||||
-#include <linux/module.h>
|
||||
-#include <linux/init.h>
|
||||
-#include <linux/of.h>
|
||||
-#include <linux/mtd/mtd.h>
|
||||
-#include <linux/slab.h>
|
||||
-#include <linux/mtd/partitions.h>
|
||||
-
|
||||
-static bool node_has_compatible(struct device_node *pp)
|
||||
-{
|
||||
- return of_get_property(pp, "compatible", NULL);
|
||||
-}
|
||||
-
|
||||
-static int parse_fixed_partitions(struct mtd_info *master,
|
||||
- const struct mtd_partition **pparts,
|
||||
- struct mtd_part_parser_data *data)
|
||||
-{
|
||||
- struct mtd_partition *parts;
|
||||
- struct device_node *mtd_node;
|
||||
- struct device_node *ofpart_node;
|
||||
- const char *partname;
|
||||
- struct device_node *pp;
|
||||
- int nr_parts, i, ret = 0;
|
||||
- bool dedicated = true;
|
||||
-
|
||||
-
|
||||
- /* Pull of_node from the master device node */
|
||||
- mtd_node = mtd_get_of_node(master);
|
||||
- if (!mtd_node)
|
||||
- return 0;
|
||||
-
|
||||
- ofpart_node = of_get_child_by_name(mtd_node, "partitions");
|
||||
- if (!ofpart_node) {
|
||||
- /*
|
||||
- * We might get here even when ofpart isn't used at all (e.g.,
|
||||
- * when using another parser), so don't be louder than
|
||||
- * KERN_DEBUG
|
||||
- */
|
||||
- pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n",
|
||||
- master->name, mtd_node);
|
||||
- ofpart_node = mtd_node;
|
||||
- dedicated = false;
|
||||
- } else if (!of_device_is_compatible(ofpart_node, "fixed-partitions")) {
|
||||
- /* The 'partitions' subnode might be used by another parser */
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- /* First count the subnodes */
|
||||
- nr_parts = 0;
|
||||
- for_each_child_of_node(ofpart_node, pp) {
|
||||
- if (!dedicated && node_has_compatible(pp))
|
||||
- continue;
|
||||
-
|
||||
- nr_parts++;
|
||||
- }
|
||||
-
|
||||
- if (nr_parts == 0)
|
||||
- return 0;
|
||||
-
|
||||
- parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL);
|
||||
- if (!parts)
|
||||
- return -ENOMEM;
|
||||
-
|
||||
- i = 0;
|
||||
- for_each_child_of_node(ofpart_node, pp) {
|
||||
- const __be32 *reg;
|
||||
- int len;
|
||||
- int a_cells, s_cells;
|
||||
-
|
||||
- if (!dedicated && node_has_compatible(pp))
|
||||
- continue;
|
||||
-
|
||||
- reg = of_get_property(pp, "reg", &len);
|
||||
- if (!reg) {
|
||||
- if (dedicated) {
|
||||
- pr_debug("%s: ofpart partition %pOF (%pOF) missing reg property.\n",
|
||||
- master->name, pp,
|
||||
- mtd_node);
|
||||
- goto ofpart_fail;
|
||||
- } else {
|
||||
- nr_parts--;
|
||||
- continue;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- a_cells = of_n_addr_cells(pp);
|
||||
- s_cells = of_n_size_cells(pp);
|
||||
- if (len / 4 != a_cells + s_cells) {
|
||||
- pr_debug("%s: ofpart partition %pOF (%pOF) error parsing reg property.\n",
|
||||
- master->name, pp,
|
||||
- mtd_node);
|
||||
- goto ofpart_fail;
|
||||
- }
|
||||
-
|
||||
- parts[i].offset = of_read_number(reg, a_cells);
|
||||
- parts[i].size = of_read_number(reg + a_cells, s_cells);
|
||||
- parts[i].of_node = pp;
|
||||
-
|
||||
- partname = of_get_property(pp, "label", &len);
|
||||
- if (!partname)
|
||||
- partname = of_get_property(pp, "name", &len);
|
||||
- parts[i].name = partname;
|
||||
-
|
||||
- if (of_get_property(pp, "read-only", &len))
|
||||
- parts[i].mask_flags |= MTD_WRITEABLE;
|
||||
-
|
||||
- if (of_get_property(pp, "lock", &len))
|
||||
- parts[i].mask_flags |= MTD_POWERUP_LOCK;
|
||||
-
|
||||
- if (of_property_read_bool(pp, "slc-mode"))
|
||||
- parts[i].add_flags |= MTD_SLC_ON_MLC_EMULATION;
|
||||
-
|
||||
- i++;
|
||||
- }
|
||||
-
|
||||
- if (!nr_parts)
|
||||
- goto ofpart_none;
|
||||
-
|
||||
- *pparts = parts;
|
||||
- return nr_parts;
|
||||
-
|
||||
-ofpart_fail:
|
||||
- pr_err("%s: error parsing ofpart partition %pOF (%pOF)\n",
|
||||
- master->name, pp, mtd_node);
|
||||
- ret = -EINVAL;
|
||||
-ofpart_none:
|
||||
- of_node_put(pp);
|
||||
- kfree(parts);
|
||||
- return ret;
|
||||
-}
|
||||
-
|
||||
-static const struct of_device_id parse_ofpart_match_table[] = {
|
||||
- { .compatible = "fixed-partitions" },
|
||||
- {},
|
||||
-};
|
||||
-MODULE_DEVICE_TABLE(of, parse_ofpart_match_table);
|
||||
-
|
||||
-static struct mtd_part_parser ofpart_parser = {
|
||||
- .parse_fn = parse_fixed_partitions,
|
||||
- .name = "fixed-partitions",
|
||||
- .of_match_table = parse_ofpart_match_table,
|
||||
-};
|
||||
-
|
||||
-static int parse_ofoldpart_partitions(struct mtd_info *master,
|
||||
- const struct mtd_partition **pparts,
|
||||
- struct mtd_part_parser_data *data)
|
||||
-{
|
||||
- struct mtd_partition *parts;
|
||||
- struct device_node *dp;
|
||||
- int i, plen, nr_parts;
|
||||
- const struct {
|
||||
- __be32 offset, len;
|
||||
- } *part;
|
||||
- const char *names;
|
||||
-
|
||||
- /* Pull of_node from the master device node */
|
||||
- dp = mtd_get_of_node(master);
|
||||
- if (!dp)
|
||||
- return 0;
|
||||
-
|
||||
- part = of_get_property(dp, "partitions", &plen);
|
||||
- if (!part)
|
||||
- return 0; /* No partitions found */
|
||||
-
|
||||
- pr_warn("Device tree uses obsolete partition map binding: %pOF\n", dp);
|
||||
-
|
||||
- nr_parts = plen / sizeof(part[0]);
|
||||
-
|
||||
- parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL);
|
||||
- if (!parts)
|
||||
- return -ENOMEM;
|
||||
-
|
||||
- names = of_get_property(dp, "partition-names", &plen);
|
||||
-
|
||||
- for (i = 0; i < nr_parts; i++) {
|
||||
- parts[i].offset = be32_to_cpu(part->offset);
|
||||
- parts[i].size = be32_to_cpu(part->len) & ~1;
|
||||
- /* bit 0 set signifies read only partition */
|
||||
- if (be32_to_cpu(part->len) & 1)
|
||||
- parts[i].mask_flags = MTD_WRITEABLE;
|
||||
-
|
||||
- if (names && (plen > 0)) {
|
||||
- int len = strlen(names) + 1;
|
||||
-
|
||||
- parts[i].name = names;
|
||||
- plen -= len;
|
||||
- names += len;
|
||||
- } else {
|
||||
- parts[i].name = "unnamed";
|
||||
- }
|
||||
-
|
||||
- part++;
|
||||
- }
|
||||
-
|
||||
- *pparts = parts;
|
||||
- return nr_parts;
|
||||
-}
|
||||
-
|
||||
-static struct mtd_part_parser ofoldpart_parser = {
|
||||
- .parse_fn = parse_ofoldpart_partitions,
|
||||
- .name = "ofoldpart",
|
||||
-};
|
||||
-
|
||||
-static int __init ofpart_parser_init(void)
|
||||
-{
|
||||
- register_mtd_parser(&ofpart_parser);
|
||||
- register_mtd_parser(&ofoldpart_parser);
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static void __exit ofpart_parser_exit(void)
|
||||
-{
|
||||
- deregister_mtd_parser(&ofpart_parser);
|
||||
- deregister_mtd_parser(&ofoldpart_parser);
|
||||
-}
|
||||
-
|
||||
-module_init(ofpart_parser_init);
|
||||
-module_exit(ofpart_parser_exit);
|
||||
-
|
||||
-MODULE_LICENSE("GPL");
|
||||
-MODULE_DESCRIPTION("Parser for MTD partitioning information in device tree");
|
||||
-MODULE_AUTHOR("Vitaly Wool, David Gibson");
|
||||
-/*
|
||||
- * When MTD core cannot find the requested parser, it tries to load the module
|
||||
- * with the same name. Since we provide the ofoldpart parser, we should have
|
||||
- * the corresponding alias.
|
||||
- */
|
||||
-MODULE_ALIAS("fixed-partitions");
|
||||
-MODULE_ALIAS("ofoldpart");
|
||||
--- /dev/null
|
||||
+++ b/drivers/mtd/parsers/ofpart_core.c
|
||||
@@ -0,0 +1,263 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
+/*
|
||||
+ * Flash partitions described by the OF (or flattened) device tree
|
||||
+ *
|
||||
+ * Copyright © 2006 MontaVista Software Inc.
|
||||
+ * Author: Vitaly Wool <vwool@ru.mvista.com>
|
||||
+ *
|
||||
+ * Revised to handle newer style flash binding by:
|
||||
+ * Copyright © 2007 David Gibson, IBM Corporation.
|
||||
+ */
|
||||
+
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/of.h>
|
||||
+#include <linux/mtd/mtd.h>
|
||||
+#include <linux/slab.h>
|
||||
+#include <linux/mtd/partitions.h>
|
||||
+
|
||||
+#include "ofpart_bcm4908.h"
|
||||
+
|
||||
+struct fixed_partitions_quirks {
|
||||
+ int (*post_parse)(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts);
|
||||
+};
|
||||
+
|
||||
+struct fixed_partitions_quirks bcm4908_partitions_quirks = {
|
||||
+ .post_parse = bcm4908_partitions_post_parse,
|
||||
+};
|
||||
+
|
||||
+static const struct of_device_id parse_ofpart_match_table[];
|
||||
+
|
||||
+static bool node_has_compatible(struct device_node *pp)
|
||||
+{
|
||||
+ return of_get_property(pp, "compatible", NULL);
|
||||
+}
|
||||
+
|
||||
+static int parse_fixed_partitions(struct mtd_info *master,
|
||||
+ const struct mtd_partition **pparts,
|
||||
+ struct mtd_part_parser_data *data)
|
||||
+{
|
||||
+ const struct fixed_partitions_quirks *quirks;
|
||||
+ const struct of_device_id *of_id;
|
||||
+ struct mtd_partition *parts;
|
||||
+ struct device_node *mtd_node;
|
||||
+ struct device_node *ofpart_node;
|
||||
+ const char *partname;
|
||||
+ struct device_node *pp;
|
||||
+ int nr_parts, i, ret = 0;
|
||||
+ bool dedicated = true;
|
||||
+
|
||||
+ /* Pull of_node from the master device node */
|
||||
+ mtd_node = mtd_get_of_node(master);
|
||||
+ if (!mtd_node)
|
||||
+ return 0;
|
||||
+
|
||||
+ ofpart_node = of_get_child_by_name(mtd_node, "partitions");
|
||||
+ if (!ofpart_node) {
|
||||
+ /*
|
||||
+ * We might get here even when ofpart isn't used at all (e.g.,
|
||||
+ * when using another parser), so don't be louder than
|
||||
+ * KERN_DEBUG
|
||||
+ */
|
||||
+ pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n",
|
||||
+ master->name, mtd_node);
|
||||
+ ofpart_node = mtd_node;
|
||||
+ dedicated = false;
|
||||
+ }
|
||||
+
|
||||
+ of_id = of_match_node(parse_ofpart_match_table, ofpart_node);
|
||||
+ if (dedicated && !of_id) {
|
||||
+ /* The 'partitions' subnode might be used by another parser */
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ quirks = of_id ? of_id->data : NULL;
|
||||
+
|
||||
+ /* First count the subnodes */
|
||||
+ nr_parts = 0;
|
||||
+ for_each_child_of_node(ofpart_node, pp) {
|
||||
+ if (!dedicated && node_has_compatible(pp))
|
||||
+ continue;
|
||||
+
|
||||
+ nr_parts++;
|
||||
+ }
|
||||
+
|
||||
+ if (nr_parts == 0)
|
||||
+ return 0;
|
||||
+
|
||||
+ parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL);
|
||||
+ if (!parts)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ i = 0;
|
||||
+ for_each_child_of_node(ofpart_node, pp) {
|
||||
+ const __be32 *reg;
|
||||
+ int len;
|
||||
+ int a_cells, s_cells;
|
||||
+
|
||||
+ if (!dedicated && node_has_compatible(pp))
|
||||
+ continue;
|
||||
+
|
||||
+ reg = of_get_property(pp, "reg", &len);
|
||||
+ if (!reg) {
|
||||
+ if (dedicated) {
|
||||
+ pr_debug("%s: ofpart partition %pOF (%pOF) missing reg property.\n",
|
||||
+ master->name, pp,
|
||||
+ mtd_node);
|
||||
+ goto ofpart_fail;
|
||||
+ } else {
|
||||
+ nr_parts--;
|
||||
+ continue;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ a_cells = of_n_addr_cells(pp);
|
||||
+ s_cells = of_n_size_cells(pp);
|
||||
+ if (len / 4 != a_cells + s_cells) {
|
||||
+ pr_debug("%s: ofpart partition %pOF (%pOF) error parsing reg property.\n",
|
||||
+ master->name, pp,
|
||||
+ mtd_node);
|
||||
+ goto ofpart_fail;
|
||||
+ }
|
||||
+
|
||||
+ parts[i].offset = of_read_number(reg, a_cells);
|
||||
+ parts[i].size = of_read_number(reg + a_cells, s_cells);
|
||||
+ parts[i].of_node = pp;
|
||||
+
|
||||
+ partname = of_get_property(pp, "label", &len);
|
||||
+ if (!partname)
|
||||
+ partname = of_get_property(pp, "name", &len);
|
||||
+ parts[i].name = partname;
|
||||
+
|
||||
+ if (of_get_property(pp, "read-only", &len))
|
||||
+ parts[i].mask_flags |= MTD_WRITEABLE;
|
||||
+
|
||||
+ if (of_get_property(pp, "lock", &len))
|
||||
+ parts[i].mask_flags |= MTD_POWERUP_LOCK;
|
||||
+
|
||||
+ if (of_property_read_bool(pp, "slc-mode"))
|
||||
+ parts[i].add_flags |= MTD_SLC_ON_MLC_EMULATION;
|
||||
+
|
||||
+ i++;
|
||||
+ }
|
||||
+
|
||||
+ if (!nr_parts)
|
||||
+ goto ofpart_none;
|
||||
+
|
||||
+ if (quirks && quirks->post_parse)
|
||||
+ quirks->post_parse(master, parts, nr_parts);
|
||||
+
|
||||
+ *pparts = parts;
|
||||
+ return nr_parts;
|
||||
+
|
||||
+ofpart_fail:
|
||||
+ pr_err("%s: error parsing ofpart partition %pOF (%pOF)\n",
|
||||
+ master->name, pp, mtd_node);
|
||||
+ ret = -EINVAL;
|
||||
+ofpart_none:
|
||||
+ of_node_put(pp);
|
||||
+ kfree(parts);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static const struct of_device_id parse_ofpart_match_table[] = {
|
||||
+ /* Generic */
|
||||
+ { .compatible = "fixed-partitions" },
|
||||
+ /* Customized */
|
||||
+ { .compatible = "brcm,bcm4908-partitions", .data = &bcm4908_partitions_quirks, },
|
||||
+ {},
|
||||
+};
|
||||
+MODULE_DEVICE_TABLE(of, parse_ofpart_match_table);
|
||||
+
|
||||
+static struct mtd_part_parser ofpart_parser = {
|
||||
+ .parse_fn = parse_fixed_partitions,
|
||||
+ .name = "fixed-partitions",
|
||||
+ .of_match_table = parse_ofpart_match_table,
|
||||
+};
|
||||
+
|
||||
+static int parse_ofoldpart_partitions(struct mtd_info *master,
|
||||
+ const struct mtd_partition **pparts,
|
||||
+ struct mtd_part_parser_data *data)
|
||||
+{
|
||||
+ struct mtd_partition *parts;
|
||||
+ struct device_node *dp;
|
||||
+ int i, plen, nr_parts;
|
||||
+ const struct {
|
||||
+ __be32 offset, len;
|
||||
+ } *part;
|
||||
+ const char *names;
|
||||
+
|
||||
+ /* Pull of_node from the master device node */
|
||||
+ dp = mtd_get_of_node(master);
|
||||
+ if (!dp)
|
||||
+ return 0;
|
||||
+
|
||||
+ part = of_get_property(dp, "partitions", &plen);
|
||||
+ if (!part)
|
||||
+ return 0; /* No partitions found */
|
||||
+
|
||||
+ pr_warn("Device tree uses obsolete partition map binding: %pOF\n", dp);
|
||||
+
|
||||
+ nr_parts = plen / sizeof(part[0]);
|
||||
+
|
||||
+ parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL);
|
||||
+ if (!parts)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ names = of_get_property(dp, "partition-names", &plen);
|
||||
+
|
||||
+ for (i = 0; i < nr_parts; i++) {
|
||||
+ parts[i].offset = be32_to_cpu(part->offset);
|
||||
+ parts[i].size = be32_to_cpu(part->len) & ~1;
|
||||
+ /* bit 0 set signifies read only partition */
|
||||
+ if (be32_to_cpu(part->len) & 1)
|
||||
+ parts[i].mask_flags = MTD_WRITEABLE;
|
||||
+
|
||||
+ if (names && (plen > 0)) {
|
||||
+ int len = strlen(names) + 1;
|
||||
+
|
||||
+ parts[i].name = names;
|
||||
+ plen -= len;
|
||||
+ names += len;
|
||||
+ } else {
|
||||
+ parts[i].name = "unnamed";
|
||||
+ }
|
||||
+
|
||||
+ part++;
|
||||
+ }
|
||||
+
|
||||
+ *pparts = parts;
|
||||
+ return nr_parts;
|
||||
+}
|
||||
+
|
||||
+static struct mtd_part_parser ofoldpart_parser = {
|
||||
+ .parse_fn = parse_ofoldpart_partitions,
|
||||
+ .name = "ofoldpart",
|
||||
+};
|
||||
+
|
||||
+static int __init ofpart_parser_init(void)
|
||||
+{
|
||||
+ register_mtd_parser(&ofpart_parser);
|
||||
+ register_mtd_parser(&ofoldpart_parser);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void __exit ofpart_parser_exit(void)
|
||||
+{
|
||||
+ deregister_mtd_parser(&ofpart_parser);
|
||||
+ deregister_mtd_parser(&ofoldpart_parser);
|
||||
+}
|
||||
+
|
||||
+module_init(ofpart_parser_init);
|
||||
+module_exit(ofpart_parser_exit);
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
||||
+MODULE_DESCRIPTION("Parser for MTD partitioning information in device tree");
|
||||
+MODULE_AUTHOR("Vitaly Wool, David Gibson");
|
||||
+/*
|
||||
+ * When MTD core cannot find the requested parser, it tries to load the module
|
||||
+ * with the same name. Since we provide the ofoldpart parser, we should have
|
||||
+ * the corresponding alias.
|
||||
+ */
|
||||
+MODULE_ALIAS("fixed-partitions");
|
||||
+MODULE_ALIAS("ofoldpart");
|
@ -0,0 +1,69 @@
|
||||
From 2d751203aacf86a1b301a188d8551c7da91043ab Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Tue, 2 Mar 2021 20:00:12 +0100
|
||||
Subject: [PATCH] mtd: parsers: ofpart: limit parsing of deprecated DT syntax
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
For backward compatibility ofpart still supports the old syntax like:
|
||||
spi-flash@0 {
|
||||
compatible = "jedec,spi-nor";
|
||||
reg = <0x0>;
|
||||
|
||||
partition@0 {
|
||||
label = "bootloader";
|
||||
reg = <0x0 0x100000>;
|
||||
};
|
||||
};
|
||||
(without "partitions" subnode).
|
||||
|
||||
There is no reason however to support nested partitions without a clear
|
||||
"compatible" string like:
|
||||
partitions {
|
||||
compatible = "fixed-partitions";
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
|
||||
partition@0 {
|
||||
label = "bootloader";
|
||||
reg = <0x0 0x100000>;
|
||||
|
||||
partition@0 {
|
||||
label = "config";
|
||||
reg = <0x80000 0x80000>;
|
||||
};
|
||||
};
|
||||
};
|
||||
(we never officially supported or documented that).
|
||||
|
||||
Make sure ofpart doesn't attempt to parse above.
|
||||
|
||||
Cc: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210302190012.1255-1-zajec5@gmail.com
|
||||
---
|
||||
drivers/mtd/parsers/ofpart_core.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/mtd/parsers/ofpart_core.c
|
||||
+++ b/drivers/mtd/parsers/ofpart_core.c
|
||||
@@ -53,7 +53,7 @@ static int parse_fixed_partitions(struct
|
||||
return 0;
|
||||
|
||||
ofpart_node = of_get_child_by_name(mtd_node, "partitions");
|
||||
- if (!ofpart_node) {
|
||||
+ if (!ofpart_node && !master->parent) {
|
||||
/*
|
||||
* We might get here even when ofpart isn't used at all (e.g.,
|
||||
* when using another parser), so don't be louder than
|
||||
@@ -64,6 +64,8 @@ static int parse_fixed_partitions(struct
|
||||
ofpart_node = mtd_node;
|
||||
dedicated = false;
|
||||
}
|
||||
+ if (!ofpart_node)
|
||||
+ return 0;
|
||||
|
||||
of_id = of_match_node(parse_ofpart_match_table, ofpart_node);
|
||||
if (dedicated && !of_id) {
|
@ -0,0 +1,34 @@
|
||||
From b87b6d2d6f540e29c3f98e1572d64e560d73d6c1 Mon Sep 17 00:00:00 2001
|
||||
From: Wei Yongjun <weiyongjun1@huawei.com>
|
||||
Date: Thu, 4 Mar 2021 06:46:00 +0000
|
||||
Subject: [PATCH] mtd: parsers: ofpart: make symbol 'bcm4908_partitions_quirks'
|
||||
static
|
||||
|
||||
The sparse tool complains as follows:
|
||||
|
||||
drivers/mtd/parsers/ofpart_core.c:25:32: warning:
|
||||
symbol 'bcm4908_partitions_quirks' was not declared. Should it be static?
|
||||
|
||||
This symbol is not used outside of ofpart_core.c, so this
|
||||
commit marks it static.
|
||||
|
||||
Fixes: 457da931b608 ("mtd: parsers: ofpart: support BCM4908 fixed partitions")
|
||||
Reported-by: Hulk Robot <hulkci@huawei.com>
|
||||
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210304064600.3279138-1-weiyongjun1@huawei.com
|
||||
---
|
||||
drivers/mtd/parsers/ofpart_core.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/mtd/parsers/ofpart_core.c
|
||||
+++ b/drivers/mtd/parsers/ofpart_core.c
|
||||
@@ -22,7 +22,7 @@ struct fixed_partitions_quirks {
|
||||
int (*post_parse)(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts);
|
||||
};
|
||||
|
||||
-struct fixed_partitions_quirks bcm4908_partitions_quirks = {
|
||||
+static struct fixed_partitions_quirks bcm4908_partitions_quirks = {
|
||||
.post_parse = bcm4908_partitions_post_parse,
|
||||
};
|
||||
|
@ -0,0 +1,38 @@
|
||||
From a5d83d6e2bc747b13f347962d4b335d70b23559b Mon Sep 17 00:00:00 2001
|
||||
From: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Date: Fri, 12 Mar 2021 07:28:19 +0100
|
||||
Subject: [PATCH] mtd: core: add nvmem-cells compatible to parse mtd as nvmem
|
||||
cells
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Partitions that contains the nvmem-cells compatible will register
|
||||
their direct subonodes as nvmem cells and the node will be treated as a
|
||||
nvmem provider.
|
||||
|
||||
Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Tested-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
---
|
||||
drivers/mtd/mtdcore.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/mtd/mtdcore.c
|
||||
+++ b/drivers/mtd/mtdcore.c
|
||||
@@ -531,6 +531,7 @@ static int mtd_nvmem_reg_read(void *priv
|
||||
|
||||
static int mtd_nvmem_add(struct mtd_info *mtd)
|
||||
{
|
||||
+ struct device_node *node = mtd_get_of_node(mtd);
|
||||
struct nvmem_config config = {};
|
||||
|
||||
config.id = -1;
|
||||
@@ -543,7 +544,7 @@ static int mtd_nvmem_add(struct mtd_info
|
||||
config.stride = 1;
|
||||
config.read_only = true;
|
||||
config.root_only = true;
|
||||
- config.no_of_node = true;
|
||||
+ config.no_of_node = !of_device_is_compatible(node, "nvmem-cells");
|
||||
config.priv = mtd;
|
||||
|
||||
mtd->nvmem = nvmem_register(&config);
|
@ -0,0 +1,25 @@
|
||||
From 42645976c3289b03a12f1bd2bc131fd98fc27170 Mon Sep 17 00:00:00 2001
|
||||
From: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Date: Fri, 12 Mar 2021 07:28:20 +0100
|
||||
Subject: [PATCH] devicetree: nvmem: nvmem: drop $nodename restriction
|
||||
|
||||
Drop $nodename restriction as now mtd partition can also be used as
|
||||
nvmem provider.
|
||||
|
||||
Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
---
|
||||
Documentation/devicetree/bindings/nvmem/nvmem.yaml | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
--- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml
|
||||
+++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml
|
||||
@@ -20,9 +20,6 @@ description: |
|
||||
storage device.
|
||||
|
||||
properties:
|
||||
- $nodename:
|
||||
- pattern: "^(eeprom|efuse|nvram)(@.*|-[0-9a-f])*$"
|
||||
-
|
||||
"#address-cells":
|
||||
const: 1
|
||||
|
@ -0,0 +1,117 @@
|
||||
From 377aa0135dc8489312edd3184d143ce3a89ff7ee Mon Sep 17 00:00:00 2001
|
||||
From: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Date: Fri, 12 Mar 2021 07:28:21 +0100
|
||||
Subject: [PATCH] dt-bindings: mtd: Document use of nvmem-cells compatible
|
||||
|
||||
Document nvmem-cells compatible used to treat mtd partitions as a
|
||||
nvmem provider.
|
||||
|
||||
Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Reviewed-by: Rob Herring <robh@kernel.org>
|
||||
---
|
||||
.../bindings/mtd/partitions/nvmem-cells.yaml | 99 +++++++++++++++++++
|
||||
1 file changed, 99 insertions(+)
|
||||
create mode 100644 Documentation/devicetree/bindings/mtd/partitions/nvmem-cells.yaml
|
||||
|
||||
--- /dev/null
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/nvmem-cells.yaml
|
||||
@@ -0,0 +1,99 @@
|
||||
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
+%YAML 1.2
|
||||
+---
|
||||
+$id: http://devicetree.org/schemas/mtd/partitions/nvmem-cells.yaml#
|
||||
+$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
+
|
||||
+title: Nvmem cells
|
||||
+
|
||||
+description: |
|
||||
+ Any partition containing the compatible "nvmem-cells" will register as a
|
||||
+ nvmem provider.
|
||||
+ Each direct subnodes represents a nvmem cell following the nvmem binding.
|
||||
+ Nvmem binding to declare nvmem-cells can be found in:
|
||||
+ Documentation/devicetree/bindings/nvmem/nvmem.yaml
|
||||
+
|
||||
+maintainers:
|
||||
+ - Ansuel Smith <ansuelsmth@gmail.com>
|
||||
+
|
||||
+allOf:
|
||||
+ - $ref: /schemas/nvmem/nvmem.yaml#
|
||||
+
|
||||
+properties:
|
||||
+ compatible:
|
||||
+ const: nvmem-cells
|
||||
+
|
||||
+required:
|
||||
+ - compatible
|
||||
+
|
||||
+additionalProperties: true
|
||||
+
|
||||
+examples:
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ /* ... */
|
||||
+
|
||||
+ };
|
||||
+ art: art@1200000 {
|
||||
+ compatible = "nvmem-cells";
|
||||
+ reg = <0x1200000 0x0140000>;
|
||||
+ label = "art";
|
||||
+ read-only;
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ macaddr_gmac1: macaddr_gmac1@0 {
|
||||
+ reg = <0x0 0x6>;
|
||||
+ };
|
||||
+
|
||||
+ macaddr_gmac2: macaddr_gmac2@6 {
|
||||
+ reg = <0x6 0x6>;
|
||||
+ };
|
||||
+
|
||||
+ pre_cal_24g: pre_cal_24g@1000 {
|
||||
+ reg = <0x1000 0x2f20>;
|
||||
+ };
|
||||
+
|
||||
+ pre_cal_5g: pre_cal_5g@5000{
|
||||
+ reg = <0x5000 0x2f20>;
|
||||
+ };
|
||||
+ };
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ partition@0 {
|
||||
+ label = "bootloader";
|
||||
+ reg = <0x000000 0x100000>;
|
||||
+ read-only;
|
||||
+ };
|
||||
+
|
||||
+ firmware@100000 {
|
||||
+ compatible = "brcm,trx";
|
||||
+ label = "firmware";
|
||||
+ reg = <0x100000 0xe00000>;
|
||||
+ };
|
||||
+
|
||||
+ calibration@f00000 {
|
||||
+ compatible = "nvmem-cells";
|
||||
+ label = "calibration";
|
||||
+ reg = <0xf00000 0x100000>;
|
||||
+ ranges = <0 0xf00000 0x100000>;
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ wifi0@0 {
|
||||
+ reg = <0x000000 0x080000>;
|
||||
+ };
|
||||
+
|
||||
+ wifi1@80000 {
|
||||
+ reg = <0x080000 0x080000>;
|
||||
+ };
|
||||
+ };
|
||||
+ };
|
@ -0,0 +1,98 @@
|
||||
From 2fa7294175c76e1ec568aa75c1891fd908728c8d Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Fri, 12 Mar 2021 14:49:18 +0100
|
||||
Subject: [PATCH] dt-bindings: mtd: add binding for Linksys Northstar
|
||||
partitions
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Linksys on Broadcom Northstar devices uses fixed flash layout with
|
||||
multiple firmware partitions.
|
||||
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Reviewed-by: Rob Herring <robh@kernel.org>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210312134919.7767-1-zajec5@gmail.com
|
||||
---
|
||||
.../mtd/partitions/linksys,ns-partitions.yaml | 74 +++++++++++++++++++
|
||||
1 file changed, 74 insertions(+)
|
||||
create mode 100644 Documentation/devicetree/bindings/mtd/partitions/linksys,ns-partitions.yaml
|
||||
|
||||
--- /dev/null
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/linksys,ns-partitions.yaml
|
||||
@@ -0,0 +1,74 @@
|
||||
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
+%YAML 1.2
|
||||
+---
|
||||
+$id: http://devicetree.org/schemas/mtd/partitions/linksys,ns-partitions.yaml#
|
||||
+$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
+
|
||||
+title: Linksys Northstar partitioning
|
||||
+
|
||||
+description: |
|
||||
+ Linksys devices based on Broadcom Northstar architecture often use two
|
||||
+ firmware partitions. One is used for regular booting, the other is treated as
|
||||
+ fallback.
|
||||
+
|
||||
+ This binding allows defining all fixed partitions and marking those containing
|
||||
+ firmware. System can use that information e.g. for booting or flashing
|
||||
+ purposes.
|
||||
+
|
||||
+maintainers:
|
||||
+ - Rafał Miłecki <rafal@milecki.pl>
|
||||
+
|
||||
+properties:
|
||||
+ compatible:
|
||||
+ const: linksys,ns-partitions
|
||||
+
|
||||
+ "#address-cells":
|
||||
+ enum: [ 1, 2 ]
|
||||
+
|
||||
+ "#size-cells":
|
||||
+ enum: [ 1, 2 ]
|
||||
+
|
||||
+patternProperties:
|
||||
+ "^partition@[0-9a-f]+$":
|
||||
+ $ref: "partition.yaml#"
|
||||
+ properties:
|
||||
+ compatible:
|
||||
+ items:
|
||||
+ - const: linksys,ns-firmware
|
||||
+ - const: brcm,trx
|
||||
+ unevaluatedProperties: false
|
||||
+
|
||||
+required:
|
||||
+ - "#address-cells"
|
||||
+ - "#size-cells"
|
||||
+
|
||||
+additionalProperties: false
|
||||
+
|
||||
+examples:
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "linksys,ns-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ partition@0 {
|
||||
+ label = "boot";
|
||||
+ reg = <0x0 0x100000>;
|
||||
+ read-only;
|
||||
+ };
|
||||
+
|
||||
+ partition@100000 {
|
||||
+ label = "nvram";
|
||||
+ reg = <0x100000 0x100000>;
|
||||
+ };
|
||||
+
|
||||
+ partition@200000 {
|
||||
+ compatible = "linksys,ns-firmware", "brcm,trx";
|
||||
+ reg = <0x200000 0xf00000>;
|
||||
+ };
|
||||
+
|
||||
+ partition@1100000 {
|
||||
+ compatible = "linksys,ns-firmware", "brcm,trx";
|
||||
+ reg = <0x1100000 0xf00000>;
|
||||
+ };
|
||||
+ };
|
@ -0,0 +1,156 @@
|
||||
From 7134a2d026d942210b4d26d6059c9d979ca7866e Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Fri, 12 Mar 2021 14:49:19 +0100
|
||||
Subject: [PATCH] mtd: parsers: ofpart: support Linksys Northstar partitions
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This allows extending ofpart parser with support for Linksys Northstar
|
||||
devices. That support uses recently added quirks mechanism.
|
||||
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210312134919.7767-2-zajec5@gmail.com
|
||||
---
|
||||
drivers/mtd/parsers/Kconfig | 10 +++++
|
||||
drivers/mtd/parsers/Makefile | 1 +
|
||||
drivers/mtd/parsers/ofpart_core.c | 6 +++
|
||||
drivers/mtd/parsers/ofpart_linksys_ns.c | 50 +++++++++++++++++++++++++
|
||||
drivers/mtd/parsers/ofpart_linksys_ns.h | 18 +++++++++
|
||||
5 files changed, 85 insertions(+)
|
||||
create mode 100644 drivers/mtd/parsers/ofpart_linksys_ns.c
|
||||
create mode 100644 drivers/mtd/parsers/ofpart_linksys_ns.h
|
||||
|
||||
--- a/drivers/mtd/parsers/Kconfig
|
||||
+++ b/drivers/mtd/parsers/Kconfig
|
||||
@@ -76,6 +76,16 @@ config MTD_OF_PARTS_BCM4908
|
||||
that can have multiple "firmware" partitions. It takes care of
|
||||
finding currently used one and backup ones.
|
||||
|
||||
+config MTD_OF_PARTS_LINKSYS_NS
|
||||
+ bool "Linksys Northstar partitioning support"
|
||||
+ depends on MTD_OF_PARTS && (ARCH_BCM_5301X || ARCH_BCM4908 || COMPILE_TEST)
|
||||
+ default ARCH_BCM_5301X
|
||||
+ help
|
||||
+ This provides partitions parser for Linksys devices based on Broadcom
|
||||
+ Northstar architecture. Linksys commonly uses fixed flash layout with
|
||||
+ two "firmware" partitions. Currently used firmware has to be detected
|
||||
+ using CFE environment variable.
|
||||
+
|
||||
config MTD_PARSER_IMAGETAG
|
||||
tristate "Parser for BCM963XX Image Tag format partitions"
|
||||
depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST
|
||||
--- a/drivers/mtd/parsers/Makefile
|
||||
+++ b/drivers/mtd/parsers/Makefile
|
||||
@@ -6,6 +6,7 @@ obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdl
|
||||
obj-$(CONFIG_MTD_OF_PARTS) += ofpart.o
|
||||
ofpart-y += ofpart_core.o
|
||||
ofpart-$(CONFIG_MTD_OF_PARTS_BCM4908) += ofpart_bcm4908.o
|
||||
+ofpart-$(CONFIG_MTD_OF_PARTS_LINKSYS_NS)+= ofpart_linksys_ns.o
|
||||
obj-$(CONFIG_MTD_PARSER_IMAGETAG) += parser_imagetag.o
|
||||
obj-$(CONFIG_MTD_AFS_PARTS) += afs.o
|
||||
obj-$(CONFIG_MTD_PARSER_TRX) += parser_trx.o
|
||||
--- a/drivers/mtd/parsers/ofpart_core.c
|
||||
+++ b/drivers/mtd/parsers/ofpart_core.c
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <linux/mtd/partitions.h>
|
||||
|
||||
#include "ofpart_bcm4908.h"
|
||||
+#include "ofpart_linksys_ns.h"
|
||||
|
||||
struct fixed_partitions_quirks {
|
||||
int (*post_parse)(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts);
|
||||
@@ -26,6 +27,10 @@ static struct fixed_partitions_quirks bc
|
||||
.post_parse = bcm4908_partitions_post_parse,
|
||||
};
|
||||
|
||||
+static struct fixed_partitions_quirks linksys_ns_partitions_quirks = {
|
||||
+ .post_parse = linksys_ns_partitions_post_parse,
|
||||
+};
|
||||
+
|
||||
static const struct of_device_id parse_ofpart_match_table[];
|
||||
|
||||
static bool node_has_compatible(struct device_node *pp)
|
||||
@@ -167,6 +172,7 @@ static const struct of_device_id parse_o
|
||||
{ .compatible = "fixed-partitions" },
|
||||
/* Customized */
|
||||
{ .compatible = "brcm,bcm4908-partitions", .data = &bcm4908_partitions_quirks, },
|
||||
+ { .compatible = "linksys,ns-partitions", .data = &linksys_ns_partitions_quirks, },
|
||||
{},
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, parse_ofpart_match_table);
|
||||
--- /dev/null
|
||||
+++ b/drivers/mtd/parsers/ofpart_linksys_ns.c
|
||||
@@ -0,0 +1,50 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0
|
||||
+/*
|
||||
+ * Copyright (C) 2021 Rafał Miłecki <rafal@milecki.pl>
|
||||
+ */
|
||||
+
|
||||
+#include <linux/bcm47xx_nvram.h>
|
||||
+#include <linux/mtd/mtd.h>
|
||||
+#include <linux/mtd/partitions.h>
|
||||
+
|
||||
+#include "ofpart_linksys_ns.h"
|
||||
+
|
||||
+#define NVRAM_BOOT_PART "bootpartition"
|
||||
+
|
||||
+static int ofpart_linksys_ns_bootpartition(void)
|
||||
+{
|
||||
+ char buf[4];
|
||||
+ int bootpartition;
|
||||
+
|
||||
+ /* Check CFE environment variable */
|
||||
+ if (bcm47xx_nvram_getenv(NVRAM_BOOT_PART, buf, sizeof(buf)) > 0) {
|
||||
+ if (!kstrtoint(buf, 0, &bootpartition))
|
||||
+ return bootpartition;
|
||||
+ pr_warn("Failed to parse %s value \"%s\"\n", NVRAM_BOOT_PART,
|
||||
+ buf);
|
||||
+ } else {
|
||||
+ pr_warn("Failed to get NVRAM \"%s\"\n", NVRAM_BOOT_PART);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+int linksys_ns_partitions_post_parse(struct mtd_info *mtd,
|
||||
+ struct mtd_partition *parts,
|
||||
+ int nr_parts)
|
||||
+{
|
||||
+ int bootpartition = ofpart_linksys_ns_bootpartition();
|
||||
+ int trx_idx = 0;
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < nr_parts; i++) {
|
||||
+ if (of_device_is_compatible(parts[i].of_node, "linksys,ns-firmware")) {
|
||||
+ if (trx_idx++ == bootpartition)
|
||||
+ parts[i].name = "firmware";
|
||||
+ else
|
||||
+ parts[i].name = "backup";
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
--- /dev/null
|
||||
+++ b/drivers/mtd/parsers/ofpart_linksys_ns.h
|
||||
@@ -0,0 +1,18 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 */
|
||||
+#ifndef __OFPART_LINKSYS_NS_H
|
||||
+#define __OFPART_LINKSYS_NS_H
|
||||
+
|
||||
+#ifdef CONFIG_MTD_OF_PARTS_LINKSYS_NS
|
||||
+int linksys_ns_partitions_post_parse(struct mtd_info *mtd,
|
||||
+ struct mtd_partition *parts,
|
||||
+ int nr_parts);
|
||||
+#else
|
||||
+static inline int linksys_ns_partitions_post_parse(struct mtd_info *mtd,
|
||||
+ struct mtd_partition *parts,
|
||||
+ int nr_parts)
|
||||
+{
|
||||
+ return -EOPNOTSUPP;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#endif
|
@ -0,0 +1,54 @@
|
||||
From 7e4404113686868858a34210c28ae122e967aa64 Mon Sep 17 00:00:00 2001
|
||||
From: Mauri Sandberg <sandberg@mailfence.com>
|
||||
Date: Tue, 9 Mar 2021 19:48:59 +0200
|
||||
Subject: [PATCH] mtd: cfi_cmdset_0002: Disable buffered writes for AMD chip
|
||||
0x2201
|
||||
|
||||
Buffer writes do not work with AMD chip 0x2201. The chip in question
|
||||
is a AMD/Spansion/Cypress Semiconductor S29GL256N and datasheet [1]
|
||||
talks about writing buffers being possible. While waiting for a neater
|
||||
solution resort to writing word-sized chunks only.
|
||||
|
||||
Without the patch kernel logs will be flooded with entries like below:
|
||||
|
||||
jffs2_scan_eraseblock(): End of filesystem marker found at 0x0
|
||||
jffs2_build_filesystem(): unlocking the mtd device...
|
||||
done.
|
||||
jffs2_build_filesystem(): erasing all blocks after the end marker...
|
||||
MTD do_write_buffer_wait(): software timeout, address:0x01ec000a.
|
||||
jffs2: Write clean marker to block at 0x01920000 failed: -5
|
||||
MTD do_write_buffer_wait(): software timeout, address:0x01e2000a.
|
||||
jffs2: Write clean marker to block at 0x01880000 failed: -5
|
||||
MTD do_write_buffer_wait(): software timeout, address:0x01e0000a.
|
||||
jffs2: Write clean marker to block at 0x01860000 failed: -5
|
||||
MTD do_write_buffer_wait(): software timeout, address:0x01dc000a.
|
||||
jffs2: Write clean marker to block at 0x01820000 failed: -5
|
||||
MTD do_write_buffer_wait(): software timeout, address:0x01da000a.
|
||||
jffs2: Write clean marker to block at 0x01800000 failed: -5
|
||||
...
|
||||
|
||||
Tested on a Buffalo wzr-hp-g300nh running kernel 5.10.16.
|
||||
|
||||
[1] https://www.cypress.com/file/219941/download
|
||||
or https://datasheetspdf.com/pdf-file/565708/SPANSION/S29GL256N/1
|
||||
|
||||
Signed-off-by: Mauri Sandberg <sandberg@mailfence.com>
|
||||
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
|
||||
Link: https://lore.kernel.org/r/20210309174859.362060-1-sandberg@mailfence.com
|
||||
---
|
||||
drivers/mtd/chips/cfi_cmdset_0002.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
|
||||
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
|
||||
@@ -272,6 +272,10 @@ static void fixup_use_write_buffers(stru
|
||||
{
|
||||
struct map_info *map = mtd->priv;
|
||||
struct cfi_private *cfi = map->fldrv_priv;
|
||||
+
|
||||
+ if (cfi->mfr == CFI_MFR_AMD && cfi->id == 0x2201)
|
||||
+ return;
|
||||
+
|
||||
if (cfi->cfiq->BufWriteTimeoutTyp) {
|
||||
pr_debug("Using buffer write method\n");
|
||||
mtd->_write = cfi_amdstd_write_buffers;
|
@ -0,0 +1,32 @@
|
||||
From a4d82940ff85a7e307953dfa715f65d5ab487e10 Mon Sep 17 00:00:00 2001
|
||||
From: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
Date: Sun, 18 Apr 2021 23:46:14 +0200
|
||||
Subject: dt-bindings: mtd: brcm,trx: Add brcm,trx-magic
|
||||
|
||||
This adds the description of an additional property which allows to
|
||||
specify a custom partition parser magic to detect a trx partition.
|
||||
Buffalo has multiple device which are using the trx format, but with
|
||||
different magic values.
|
||||
|
||||
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
Acked-by: Rob Herring <robh@kernel.org>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210418214616.239574-2-hauke@hauke-m.de
|
||||
---
|
||||
.../devicetree/bindings/mtd/partitions/brcm,trx.txt | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
--- a/Documentation/devicetree/bindings/mtd/partitions/brcm,trx.txt
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/brcm,trx.txt
|
||||
@@ -28,6 +28,11 @@ detected by a software parsing TRX heade
|
||||
Required properties:
|
||||
- compatible : (required) must be "brcm,trx"
|
||||
|
||||
+Optional properties:
|
||||
+
|
||||
+- brcm,trx-magic: TRX magic, if it is different from the default magic
|
||||
+ 0x30524448 as a u32.
|
||||
+
|
||||
Example:
|
||||
|
||||
flash@0 {
|
@ -0,0 +1,50 @@
|
||||
From d7f7e04f8b67571a4bf5a0dcd4f9da4214f5262c Mon Sep 17 00:00:00 2001
|
||||
From: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
Date: Sun, 18 Apr 2021 23:46:15 +0200
|
||||
Subject: mtd: parsers: trx: Allow to specify brcm, trx-magic in DT
|
||||
|
||||
Buffalo uses a different TRX magic for every device, to be able to use
|
||||
this trx parser, make it possible to specify the TRX magic in device
|
||||
tree. If no TRX magic is specified in device tree, the standard value
|
||||
will be used. This value should only be specified if a vendor chooses to
|
||||
use a non standard TRX magic.
|
||||
|
||||
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210418214616.239574-3-hauke@hauke-m.de
|
||||
---
|
||||
drivers/mtd/parsers/parser_trx.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/mtd/parsers/parser_trx.c
|
||||
+++ b/drivers/mtd/parsers/parser_trx.c
|
||||
@@ -51,13 +51,20 @@ static int parser_trx_parse(struct mtd_i
|
||||
const struct mtd_partition **pparts,
|
||||
struct mtd_part_parser_data *data)
|
||||
{
|
||||
+ struct device_node *np = mtd_get_of_node(mtd);
|
||||
struct mtd_partition *parts;
|
||||
struct mtd_partition *part;
|
||||
struct trx_header trx;
|
||||
size_t bytes_read;
|
||||
uint8_t curr_part = 0, i = 0;
|
||||
+ uint32_t trx_magic = TRX_MAGIC;
|
||||
int err;
|
||||
|
||||
+ /* Get different magic from device tree if specified */
|
||||
+ err = of_property_read_u32(np, "brcm,trx-magic", &trx_magic);
|
||||
+ if (err != 0 && err != -EINVAL)
|
||||
+ pr_err("failed to parse \"brcm,trx-magic\" DT attribute, using default: %d\n", err);
|
||||
+
|
||||
parts = kcalloc(TRX_PARSER_MAX_PARTS, sizeof(struct mtd_partition),
|
||||
GFP_KERNEL);
|
||||
if (!parts)
|
||||
@@ -70,7 +77,7 @@ static int parser_trx_parse(struct mtd_i
|
||||
return err;
|
||||
}
|
||||
|
||||
- if (trx.magic != TRX_MAGIC) {
|
||||
+ if (trx.magic != trx_magic) {
|
||||
kfree(parts);
|
||||
return -ENOENT;
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
From 81bb218c829246962a6327c64eec18ddcc049936 Mon Sep 17 00:00:00 2001
|
||||
From: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
Date: Sun, 18 Apr 2021 23:46:16 +0200
|
||||
Subject: mtd: parsers: trx: Allow to use TRX parser on Mediatek SoCs
|
||||
|
||||
Buffalo uses the TRX partition format also on Mediatek MT7622 SoCs.
|
||||
|
||||
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210418214616.239574-4-hauke@hauke-m.de
|
||||
---
|
||||
drivers/mtd/parsers/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/mtd/parsers/Kconfig
|
||||
+++ b/drivers/mtd/parsers/Kconfig
|
||||
@@ -115,7 +115,7 @@ config MTD_AFS_PARTS
|
||||
|
||||
config MTD_PARSER_TRX
|
||||
tristate "Parser for TRX format partitions"
|
||||
- depends on MTD && (BCM47XX || ARCH_BCM_5301X || COMPILE_TEST)
|
||||
+ depends on MTD && (BCM47XX || ARCH_BCM_5301X || ARCH_MEDIATEK || COMPILE_TEST)
|
||||
help
|
||||
TRX is a firmware format used by Broadcom on their devices. It
|
||||
may contain up to 3/4 partitions (depending on the version).
|
@ -0,0 +1,33 @@
|
||||
From 2365f91c861cbfeef7141c69842848c7b2d3c2db Mon Sep 17 00:00:00 2001
|
||||
From: INAGAKI Hiroshi <musashino.open@gmail.com>
|
||||
Date: Sun, 13 Feb 2022 15:40:44 +0900
|
||||
Subject: [PATCH] mtd: parsers: trx: allow to use on MediaTek MIPS SoCs
|
||||
|
||||
Buffalo sells some router devices which have trx-formatted firmware,
|
||||
based on MediaTek MIPS SoCs. To use parser_trx on those devices, add
|
||||
"RALINK" to dependency and allow to compile for MediaTek MIPS SoCs.
|
||||
|
||||
examples:
|
||||
|
||||
- WCR-1166DS (MT7628)
|
||||
- WSR-1166DHP (MT7621)
|
||||
- WSR-2533DHP (MT7621)
|
||||
|
||||
Signed-off-by: INAGAKI Hiroshi <musashino.open@gmail.com>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20220213064045.1781-1-musashino.open@gmail.com
|
||||
---
|
||||
drivers/mtd/parsers/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/mtd/parsers/Kconfig
|
||||
+++ b/drivers/mtd/parsers/Kconfig
|
||||
@@ -115,7 +115,7 @@ config MTD_AFS_PARTS
|
||||
|
||||
config MTD_PARSER_TRX
|
||||
tristate "Parser for TRX format partitions"
|
||||
- depends on MTD && (BCM47XX || ARCH_BCM_5301X || ARCH_MEDIATEK || COMPILE_TEST)
|
||||
+ depends on MTD && (BCM47XX || ARCH_BCM_5301X || ARCH_MEDIATEK || RALINK || COMPILE_TEST)
|
||||
help
|
||||
TRX is a firmware format used by Broadcom on their devices. It
|
||||
may contain up to 3/4 partitions (depending on the version).
|
@ -0,0 +1,25 @@
|
||||
From dcdf415b740923530dc71d89fecc8361078473f5 Mon Sep 17 00:00:00 2001
|
||||
From: Rui Salvaterra <rsalvaterra@gmail.com>
|
||||
Date: Mon, 5 Apr 2021 16:11:55 +0100
|
||||
Subject: [PATCH] ubifs: default to zstd compression
|
||||
|
||||
Compared to lzo and zlib, zstd is the best all-around performer, both in terms
|
||||
of speed and compression ratio. Set it as the default, if available.
|
||||
|
||||
Signed-off-by: Rui Salvaterra <rsalvaterra@gmail.com>
|
||||
---
|
||||
fs/ubifs/sb.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
--- a/fs/ubifs/sb.c
|
||||
+++ b/fs/ubifs/sb.c
|
||||
@@ -53,6 +53,9 @@
|
||||
|
||||
static int get_default_compressor(struct ubifs_info *c)
|
||||
{
|
||||
+ if (ubifs_compr_present(c, UBIFS_COMPR_ZSTD))
|
||||
+ return UBIFS_COMPR_ZSTD;
|
||||
+
|
||||
if (ubifs_compr_present(c, UBIFS_COMPR_LZO))
|
||||
return UBIFS_COMPR_LZO;
|
||||
|
@ -0,0 +1,88 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Mon, 8 Feb 2021 11:34:08 -0800
|
||||
Subject: [PATCH] net: extract napi poll functionality to __napi_poll()
|
||||
|
||||
This commit introduces a new function __napi_poll() which does the main
|
||||
logic of the existing napi_poll() function, and will be called by other
|
||||
functions in later commits.
|
||||
This idea and implementation is done by Felix Fietkau <nbd@nbd.name> and
|
||||
is proposed as part of the patch to move napi work to work_queue
|
||||
context.
|
||||
This commit by itself is a code restructure.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Wei Wang <weiwan@google.com>
|
||||
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -6805,15 +6805,10 @@ void __netif_napi_del(struct napi_struct
|
||||
}
|
||||
EXPORT_SYMBOL(__netif_napi_del);
|
||||
|
||||
-static int napi_poll(struct napi_struct *n, struct list_head *repoll)
|
||||
+static int __napi_poll(struct napi_struct *n, bool *repoll)
|
||||
{
|
||||
- void *have;
|
||||
int work, weight;
|
||||
|
||||
- list_del_init(&n->poll_list);
|
||||
-
|
||||
- have = netpoll_poll_lock(n);
|
||||
-
|
||||
weight = n->weight;
|
||||
|
||||
/* This NAPI_STATE_SCHED test is for avoiding a race
|
||||
@@ -6833,7 +6828,7 @@ static int napi_poll(struct napi_struct
|
||||
n->poll, work, weight);
|
||||
|
||||
if (likely(work < weight))
|
||||
- goto out_unlock;
|
||||
+ return work;
|
||||
|
||||
/* Drivers must not modify the NAPI state if they
|
||||
* consume the entire weight. In such cases this code
|
||||
@@ -6842,7 +6837,7 @@ static int napi_poll(struct napi_struct
|
||||
*/
|
||||
if (unlikely(napi_disable_pending(n))) {
|
||||
napi_complete(n);
|
||||
- goto out_unlock;
|
||||
+ return work;
|
||||
}
|
||||
|
||||
if (n->gro_bitmask) {
|
||||
@@ -6860,12 +6855,29 @@ static int napi_poll(struct napi_struct
|
||||
if (unlikely(!list_empty(&n->poll_list))) {
|
||||
pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
|
||||
n->dev ? n->dev->name : "backlog");
|
||||
- goto out_unlock;
|
||||
+ return work;
|
||||
}
|
||||
|
||||
- list_add_tail(&n->poll_list, repoll);
|
||||
+ *repoll = true;
|
||||
+
|
||||
+ return work;
|
||||
+}
|
||||
+
|
||||
+static int napi_poll(struct napi_struct *n, struct list_head *repoll)
|
||||
+{
|
||||
+ bool do_repoll = false;
|
||||
+ void *have;
|
||||
+ int work;
|
||||
+
|
||||
+ list_del_init(&n->poll_list);
|
||||
+
|
||||
+ have = netpoll_poll_lock(n);
|
||||
+
|
||||
+ work = __napi_poll(n, &do_repoll);
|
||||
+
|
||||
+ if (do_repoll)
|
||||
+ list_add_tail(&n->poll_list, repoll);
|
||||
|
||||
-out_unlock:
|
||||
netpoll_poll_unlock(have);
|
||||
|
||||
return work;
|
@ -0,0 +1,261 @@
|
||||
From: Wei Wang <weiwan@google.com>
|
||||
Date: Mon, 8 Feb 2021 11:34:09 -0800
|
||||
Subject: [PATCH] net: implement threaded-able napi poll loop support
|
||||
|
||||
This patch allows running each napi poll loop inside its own
|
||||
kernel thread.
|
||||
The kthread is created during netif_napi_add() if dev->threaded
|
||||
is set. And threaded mode is enabled in napi_enable(). We will
|
||||
provide a way to set dev->threaded and enable threaded mode
|
||||
without a device up/down in the following patch.
|
||||
|
||||
Once that threaded mode is enabled and the kthread is
|
||||
started, napi_schedule() will wake-up such thread instead
|
||||
of scheduling the softirq.
|
||||
|
||||
The threaded poll loop behaves quite likely the net_rx_action,
|
||||
but it does not have to manipulate local irqs and uses
|
||||
an explicit scheduling point based on netdev_budget.
|
||||
|
||||
Co-developed-by: Paolo Abeni <pabeni@redhat.com>
|
||||
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
||||
Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
||||
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
||||
Co-developed-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Signed-off-by: Wei Wang <weiwan@google.com>
|
||||
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -347,6 +347,7 @@ struct napi_struct {
|
||||
struct list_head dev_list;
|
||||
struct hlist_node napi_hash_node;
|
||||
unsigned int napi_id;
|
||||
+ struct task_struct *thread;
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -357,6 +358,7 @@ enum {
|
||||
NAPI_STATE_LISTED, /* NAPI added to system lists */
|
||||
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
|
||||
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
|
||||
+ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -367,6 +369,7 @@ enum {
|
||||
NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED),
|
||||
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
|
||||
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
|
||||
+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
|
||||
};
|
||||
|
||||
enum gro_result {
|
||||
@@ -497,20 +500,7 @@ static inline bool napi_complete(struct
|
||||
*/
|
||||
void napi_disable(struct napi_struct *n);
|
||||
|
||||
-/**
|
||||
- * napi_enable - enable NAPI scheduling
|
||||
- * @n: NAPI context
|
||||
- *
|
||||
- * Resume NAPI from being scheduled on this context.
|
||||
- * Must be paired with napi_disable.
|
||||
- */
|
||||
-static inline void napi_enable(struct napi_struct *n)
|
||||
-{
|
||||
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
|
||||
- smp_mb__before_atomic();
|
||||
- clear_bit(NAPI_STATE_SCHED, &n->state);
|
||||
- clear_bit(NAPI_STATE_NPSVC, &n->state);
|
||||
-}
|
||||
+void napi_enable(struct napi_struct *n);
|
||||
|
||||
/**
|
||||
* napi_synchronize - wait until NAPI is not running
|
||||
@@ -1842,6 +1832,8 @@ enum netdev_ml_priv_type {
|
||||
*
|
||||
* @wol_enabled: Wake-on-LAN is enabled
|
||||
*
|
||||
+ * @threaded: napi threaded mode is enabled
|
||||
+ *
|
||||
* @net_notifier_list: List of per-net netdev notifier block
|
||||
* that follow this device when it is moved
|
||||
* to another network namespace.
|
||||
@@ -2161,6 +2153,7 @@ struct net_device {
|
||||
struct lock_class_key *qdisc_running_key;
|
||||
bool proto_down;
|
||||
unsigned wol_enabled:1;
|
||||
+ unsigned threaded:1;
|
||||
|
||||
struct list_head net_notifier_list;
|
||||
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -91,6 +91,7 @@
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/ethtool.h>
|
||||
#include <linux/skbuff.h>
|
||||
+#include <linux/kthread.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf_trace.h>
|
||||
#include <net/net_namespace.h>
|
||||
@@ -1500,6 +1501,27 @@ void netdev_notify_peers(struct net_devi
|
||||
}
|
||||
EXPORT_SYMBOL(netdev_notify_peers);
|
||||
|
||||
+static int napi_threaded_poll(void *data);
|
||||
+
|
||||
+static int napi_kthread_create(struct napi_struct *n)
|
||||
+{
|
||||
+ int err = 0;
|
||||
+
|
||||
+ /* Create and wake up the kthread once to put it in
|
||||
+ * TASK_INTERRUPTIBLE mode to avoid the blocked task
|
||||
+ * warning and work with loadavg.
|
||||
+ */
|
||||
+ n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
|
||||
+ n->dev->name, n->napi_id);
|
||||
+ if (IS_ERR(n->thread)) {
|
||||
+ err = PTR_ERR(n->thread);
|
||||
+ pr_err("kthread_run failed with err %d\n", err);
|
||||
+ n->thread = NULL;
|
||||
+ }
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
|
||||
{
|
||||
const struct net_device_ops *ops = dev->netdev_ops;
|
||||
@@ -4267,6 +4289,21 @@ int gro_normal_batch __read_mostly = 8;
|
||||
static inline void ____napi_schedule(struct softnet_data *sd,
|
||||
struct napi_struct *napi)
|
||||
{
|
||||
+ struct task_struct *thread;
|
||||
+
|
||||
+ if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
|
||||
+ /* Paired with smp_mb__before_atomic() in
|
||||
+ * napi_enable(). Use READ_ONCE() to guarantee
|
||||
+ * a complete read on napi->thread. Only call
|
||||
+ * wake_up_process() when it's not NULL.
|
||||
+ */
|
||||
+ thread = READ_ONCE(napi->thread);
|
||||
+ if (thread) {
|
||||
+ wake_up_process(thread);
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
list_add_tail(&napi->poll_list, &sd->poll_list);
|
||||
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
|
||||
}
|
||||
@@ -6758,6 +6795,12 @@ void netif_napi_add(struct net_device *d
|
||||
set_bit(NAPI_STATE_NPSVC, &napi->state);
|
||||
list_add_rcu(&napi->dev_list, &dev->napi_list);
|
||||
napi_hash_add(napi);
|
||||
+ /* Create kthread for this napi if dev->threaded is set.
|
||||
+ * Clear dev->threaded if kthread creation failed so that
|
||||
+ * threaded mode will not be enabled in napi_enable().
|
||||
+ */
|
||||
+ if (dev->threaded && napi_kthread_create(napi))
|
||||
+ dev->threaded = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(netif_napi_add);
|
||||
|
||||
@@ -6774,9 +6817,28 @@ void napi_disable(struct napi_struct *n)
|
||||
hrtimer_cancel(&n->timer);
|
||||
|
||||
clear_bit(NAPI_STATE_DISABLE, &n->state);
|
||||
+ clear_bit(NAPI_STATE_THREADED, &n->state);
|
||||
}
|
||||
EXPORT_SYMBOL(napi_disable);
|
||||
|
||||
+/**
|
||||
+ * napi_enable - enable NAPI scheduling
|
||||
+ * @n: NAPI context
|
||||
+ *
|
||||
+ * Resume NAPI from being scheduled on this context.
|
||||
+ * Must be paired with napi_disable.
|
||||
+ */
|
||||
+void napi_enable(struct napi_struct *n)
|
||||
+{
|
||||
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
|
||||
+ smp_mb__before_atomic();
|
||||
+ clear_bit(NAPI_STATE_SCHED, &n->state);
|
||||
+ clear_bit(NAPI_STATE_NPSVC, &n->state);
|
||||
+ if (n->dev->threaded && n->thread)
|
||||
+ set_bit(NAPI_STATE_THREADED, &n->state);
|
||||
+}
|
||||
+EXPORT_SYMBOL(napi_enable);
|
||||
+
|
||||
static void flush_gro_hash(struct napi_struct *napi)
|
||||
{
|
||||
int i;
|
||||
@@ -6802,6 +6864,11 @@ void __netif_napi_del(struct napi_struct
|
||||
|
||||
flush_gro_hash(napi);
|
||||
napi->gro_bitmask = 0;
|
||||
+
|
||||
+ if (napi->thread) {
|
||||
+ kthread_stop(napi->thread);
|
||||
+ napi->thread = NULL;
|
||||
+ }
|
||||
}
|
||||
EXPORT_SYMBOL(__netif_napi_del);
|
||||
|
||||
@@ -6883,6 +6950,51 @@ static int napi_poll(struct napi_struct
|
||||
return work;
|
||||
}
|
||||
|
||||
+static int napi_thread_wait(struct napi_struct *napi)
|
||||
+{
|
||||
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||
+
|
||||
+ while (!kthread_should_stop() && !napi_disable_pending(napi)) {
|
||||
+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
|
||||
+ WARN_ON(!list_empty(&napi->poll_list));
|
||||
+ __set_current_state(TASK_RUNNING);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ schedule();
|
||||
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||
+ }
|
||||
+ __set_current_state(TASK_RUNNING);
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+static int napi_threaded_poll(void *data)
|
||||
+{
|
||||
+ struct napi_struct *napi = data;
|
||||
+ void *have;
|
||||
+
|
||||
+ while (!napi_thread_wait(napi)) {
|
||||
+ for (;;) {
|
||||
+ bool repoll = false;
|
||||
+
|
||||
+ local_bh_disable();
|
||||
+
|
||||
+ have = netpoll_poll_lock(napi);
|
||||
+ __napi_poll(napi, &repoll);
|
||||
+ netpoll_poll_unlock(have);
|
||||
+
|
||||
+ __kfree_skb_flush();
|
||||
+ local_bh_enable();
|
||||
+
|
||||
+ if (!repoll)
|
||||
+ break;
|
||||
+
|
||||
+ cond_resched();
|
||||
+ }
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static __latent_entropy void net_rx_action(struct softirq_action *h)
|
||||
{
|
||||
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
|
@ -0,0 +1,177 @@
|
||||
From: Wei Wang <weiwan@google.com>
|
||||
Date: Mon, 8 Feb 2021 11:34:10 -0800
|
||||
Subject: [PATCH] net: add sysfs attribute to control napi threaded mode
|
||||
|
||||
This patch adds a new sysfs attribute to the network device class.
|
||||
Said attribute provides a per-device control to enable/disable the
|
||||
threaded mode for all the napi instances of the given network device,
|
||||
without the need for a device up/down.
|
||||
User sets it to 1 or 0 to enable or disable threaded mode.
|
||||
Note: when switching between threaded and the current softirq based mode
|
||||
for a napi instance, it will not immediately take effect if the napi is
|
||||
currently being polled. The mode switch will happen for the next time
|
||||
napi_schedule() is called.
|
||||
|
||||
Co-developed-by: Paolo Abeni <pabeni@redhat.com>
|
||||
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
||||
Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
||||
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
||||
Co-developed-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Wei Wang <weiwan@google.com>
|
||||
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/Documentation/ABI/testing/sysfs-class-net
|
||||
+++ b/Documentation/ABI/testing/sysfs-class-net
|
||||
@@ -337,3 +337,18 @@ Contact: netdev@vger.kernel.org
|
||||
Description:
|
||||
32-bit unsigned integer counting the number of times the link has
|
||||
been down
|
||||
+
|
||||
+What: /sys/class/net/<iface>/threaded
|
||||
+Date: Jan 2021
|
||||
+KernelVersion: 5.12
|
||||
+Contact: netdev@vger.kernel.org
|
||||
+Description:
|
||||
+ Boolean value to control the threaded mode per device. User could
|
||||
+ set this value to enable/disable threaded mode for all napi
|
||||
+ belonging to this device, without the need to do device up/down.
|
||||
+
|
||||
+ Possible values:
|
||||
+ == ==================================
|
||||
+ 0 threaded mode disabled for this dev
|
||||
+ 1 threaded mode enabled for this dev
|
||||
+ == ==================================
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -491,6 +491,8 @@ static inline bool napi_complete(struct
|
||||
return napi_complete_done(n, 0);
|
||||
}
|
||||
|
||||
+int dev_set_threaded(struct net_device *dev, bool threaded);
|
||||
+
|
||||
/**
|
||||
* napi_disable - prevent NAPI from scheduling
|
||||
* @n: NAPI context
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -4293,8 +4293,9 @@ static inline void ____napi_schedule(str
|
||||
|
||||
if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
|
||||
/* Paired with smp_mb__before_atomic() in
|
||||
- * napi_enable(). Use READ_ONCE() to guarantee
|
||||
- * a complete read on napi->thread. Only call
|
||||
+ * napi_enable()/dev_set_threaded().
|
||||
+ * Use READ_ONCE() to guarantee a complete
|
||||
+ * read on napi->thread. Only call
|
||||
* wake_up_process() when it's not NULL.
|
||||
*/
|
||||
thread = READ_ONCE(napi->thread);
|
||||
@@ -6768,6 +6769,49 @@ static void init_gro_hash(struct napi_st
|
||||
napi->gro_bitmask = 0;
|
||||
}
|
||||
|
||||
+int dev_set_threaded(struct net_device *dev, bool threaded)
|
||||
+{
|
||||
+ struct napi_struct *napi;
|
||||
+ int err = 0;
|
||||
+
|
||||
+ if (dev->threaded == threaded)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (threaded) {
|
||||
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
|
||||
+ if (!napi->thread) {
|
||||
+ err = napi_kthread_create(napi);
|
||||
+ if (err) {
|
||||
+ threaded = false;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ dev->threaded = threaded;
|
||||
+
|
||||
+ /* Make sure kthread is created before THREADED bit
|
||||
+ * is set.
|
||||
+ */
|
||||
+ smp_mb__before_atomic();
|
||||
+
|
||||
+ /* Setting/unsetting threaded mode on a napi might not immediately
|
||||
+ * take effect, if the current napi instance is actively being
|
||||
+ * polled. In this case, the switch between threaded mode and
|
||||
+ * softirq mode will happen in the next round of napi_schedule().
|
||||
+ * This should not cause hiccups/stalls to the live traffic.
|
||||
+ */
|
||||
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
|
||||
+ if (threaded)
|
||||
+ set_bit(NAPI_STATE_THREADED, &napi->state);
|
||||
+ else
|
||||
+ clear_bit(NAPI_STATE_THREADED, &napi->state);
|
||||
+ }
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
|
||||
int (*poll)(struct napi_struct *, int), int weight)
|
||||
{
|
||||
--- a/net/core/net-sysfs.c
|
||||
+++ b/net/core/net-sysfs.c
|
||||
@@ -587,6 +587,45 @@ static ssize_t phys_switch_id_show(struc
|
||||
}
|
||||
static DEVICE_ATTR_RO(phys_switch_id);
|
||||
|
||||
+static ssize_t threaded_show(struct device *dev,
|
||||
+ struct device_attribute *attr, char *buf)
|
||||
+{
|
||||
+ struct net_device *netdev = to_net_dev(dev);
|
||||
+ ssize_t ret = -EINVAL;
|
||||
+
|
||||
+ if (!rtnl_trylock())
|
||||
+ return restart_syscall();
|
||||
+
|
||||
+ if (dev_isalive(netdev))
|
||||
+ ret = sprintf(buf, fmt_dec, netdev->threaded);
|
||||
+
|
||||
+ rtnl_unlock();
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static int modify_napi_threaded(struct net_device *dev, unsigned long val)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ if (list_empty(&dev->napi_list))
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ if (val != 0 && val != 1)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ ret = dev_set_threaded(dev, val);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static ssize_t threaded_store(struct device *dev,
|
||||
+ struct device_attribute *attr,
|
||||
+ const char *buf, size_t len)
|
||||
+{
|
||||
+ return netdev_store(dev, attr, buf, len, modify_napi_threaded);
|
||||
+}
|
||||
+static DEVICE_ATTR_RW(threaded);
|
||||
+
|
||||
static struct attribute *net_class_attrs[] __ro_after_init = {
|
||||
&dev_attr_netdev_group.attr,
|
||||
&dev_attr_type.attr,
|
||||
@@ -619,6 +658,7 @@ static struct attribute *net_class_attrs
|
||||
&dev_attr_proto_down.attr,
|
||||
&dev_attr_carrier_up_count.attr,
|
||||
&dev_attr_carrier_down_count.attr,
|
||||
+ &dev_attr_threaded.attr,
|
||||
NULL,
|
||||
};
|
||||
ATTRIBUTE_GROUPS(net_class);
|
@ -0,0 +1,93 @@
|
||||
From: Wei Wang <weiwan@google.com>
|
||||
Date: Mon, 1 Mar 2021 17:21:13 -0800
|
||||
Subject: [PATCH] net: fix race between napi kthread mode and busy poll
|
||||
|
||||
Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to
|
||||
determine if the kthread owns this napi and could call napi->poll() on
|
||||
it. However, if socket busy poll is enabled, it is possible that the
|
||||
busy poll thread grabs this SCHED bit (after the previous napi->poll()
|
||||
invokes napi_complete_done() and clears SCHED bit) and tries to poll
|
||||
on the same napi. napi_disable() could grab the SCHED bit as well.
|
||||
This patch tries to fix this race by adding a new bit
|
||||
NAPI_STATE_SCHED_THREADED in napi->state. This bit gets set in
|
||||
____napi_schedule() if the threaded mode is enabled, and gets cleared
|
||||
in napi_complete_done(), and we only poll the napi in kthread if this
|
||||
bit is set. This helps distinguish the ownership of the napi between
|
||||
kthread and other scenarios and fixes the race issue.
|
||||
|
||||
Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support")
|
||||
Reported-by: Martin Zaharinov <micron10@gmail.com>
|
||||
Suggested-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Signed-off-by: Wei Wang <weiwan@google.com>
|
||||
Cc: Alexander Duyck <alexanderduyck@fb.com>
|
||||
Cc: Eric Dumazet <edumazet@google.com>
|
||||
Cc: Paolo Abeni <pabeni@redhat.com>
|
||||
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -359,6 +359,7 @@ enum {
|
||||
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
|
||||
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
|
||||
NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
|
||||
+ NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -370,6 +371,7 @@ enum {
|
||||
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
|
||||
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
|
||||
NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
|
||||
+ NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED),
|
||||
};
|
||||
|
||||
enum gro_result {
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -4300,6 +4300,8 @@ static inline void ____napi_schedule(str
|
||||
*/
|
||||
thread = READ_ONCE(napi->thread);
|
||||
if (thread) {
|
||||
+ if (thread->state != TASK_INTERRUPTIBLE)
|
||||
+ set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
|
||||
wake_up_process(thread);
|
||||
return;
|
||||
}
|
||||
@@ -6560,7 +6562,8 @@ bool napi_complete_done(struct napi_stru
|
||||
|
||||
WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
|
||||
|
||||
- new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
|
||||
+ new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
|
||||
+ NAPIF_STATE_SCHED_THREADED);
|
||||
|
||||
/* If STATE_MISSED was set, leave STATE_SCHED set,
|
||||
* because we will call napi->poll() one more time.
|
||||
@@ -6996,16 +6999,25 @@ static int napi_poll(struct napi_struct
|
||||
|
||||
static int napi_thread_wait(struct napi_struct *napi)
|
||||
{
|
||||
+ bool woken = false;
|
||||
+
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
while (!kthread_should_stop() && !napi_disable_pending(napi)) {
|
||||
- if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
|
||||
+ /* Testing SCHED_THREADED bit here to make sure the current
|
||||
+ * kthread owns this napi and could poll on this napi.
|
||||
+ * Testing SCHED bit is not enough because SCHED bit might be
|
||||
+ * set by some other busy poll thread or by napi_disable().
|
||||
+ */
|
||||
+ if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
|
||||
WARN_ON(!list_empty(&napi->poll_list));
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return 0;
|
||||
}
|
||||
|
||||
schedule();
|
||||
+ /* woken being true indicates this thread owns this napi. */
|
||||
+ woken = true;
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
@ -0,0 +1,53 @@
|
||||
From: Paolo Abeni <pabeni@redhat.com>
|
||||
Date: Fri, 9 Apr 2021 17:24:17 +0200
|
||||
Subject: [PATCH] net: fix hangup on napi_disable for threaded napi
|
||||
|
||||
napi_disable() is subject to an hangup, when the threaded
|
||||
mode is enabled and the napi is under heavy traffic.
|
||||
|
||||
If the relevant napi has been scheduled and the napi_disable()
|
||||
kicks in before the next napi_threaded_wait() completes - so
|
||||
that the latter quits due to the napi_disable_pending() condition,
|
||||
the existing code leaves the NAPI_STATE_SCHED bit set and the
|
||||
napi_disable() loop waiting for such bit will hang.
|
||||
|
||||
This patch addresses the issue by dropping the NAPI_STATE_DISABLE
|
||||
bit test in napi_thread_wait(). The later napi_threaded_poll()
|
||||
iteration will take care of clearing the NAPI_STATE_SCHED.
|
||||
|
||||
This also addresses a related problem reported by Jakub:
|
||||
before this patch a napi_disable()/napi_enable() pair killed
|
||||
the napi thread, effectively disabling the threaded mode.
|
||||
On the patched kernel napi_disable() simply stops scheduling
|
||||
the relevant thread.
|
||||
|
||||
v1 -> v2:
|
||||
- let the main napi_thread_poll() loop clear the SCHED bit
|
||||
|
||||
Reported-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support")
|
||||
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
||||
Reviewed-by: Eric Dumazet <edumazet@google.com>
|
||||
Link: https://lore.kernel.org/r/883923fa22745a9589e8610962b7dc59df09fb1f.1617981844.git.pabeni@redhat.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -7003,7 +7003,7 @@ static int napi_thread_wait(struct napi_
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
- while (!kthread_should_stop() && !napi_disable_pending(napi)) {
|
||||
+ while (!kthread_should_stop()) {
|
||||
/* Testing SCHED_THREADED bit here to make sure the current
|
||||
* kthread owns this napi and could poll on this napi.
|
||||
* Testing SCHED bit is not enough because SCHED bit might be
|
||||
@@ -7021,6 +7021,7 @@ static int napi_thread_wait(struct napi_
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
+
|
||||
return -1;
|
||||
}
|
||||
|
@ -0,0 +1,52 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Fri, 20 Nov 2020 13:49:13 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add hash offset field to tuple
|
||||
|
||||
Add a placeholder field to calculate hash tuple offset. Similar to
|
||||
2c407aca6497 ("netfilter: conntrack: avoid gcc-10 zero-length-bounds
|
||||
warning").
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -107,6 +107,10 @@ struct flow_offload_tuple {
|
||||
|
||||
u8 l3proto;
|
||||
u8 l4proto;
|
||||
+
|
||||
+ /* All members above are keys for lookups, see flow_offload_hash(). */
|
||||
+ struct { } __hash;
|
||||
+
|
||||
u8 dir;
|
||||
|
||||
u16 mtu;
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -191,14 +191,14 @@ static u32 flow_offload_hash(const void
|
||||
{
|
||||
const struct flow_offload_tuple *tuple = data;
|
||||
|
||||
- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||
+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
|
||||
}
|
||||
|
||||
static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
|
||||
{
|
||||
const struct flow_offload_tuple_rhash *tuplehash = data;
|
||||
|
||||
- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||
+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
|
||||
}
|
||||
|
||||
static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
|
||||
@@ -207,7 +207,7 @@ static int flow_offload_hash_cmp(struct
|
||||
const struct flow_offload_tuple *tuple = arg->key;
|
||||
const struct flow_offload_tuple_rhash *x = ptr;
|
||||
|
||||
- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
|
||||
+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
|
||||
return 1;
|
||||
|
||||
return 0;
|
@ -0,0 +1,98 @@
|
||||
From: Oz Shlomo <ozsh@nvidia.com>
|
||||
Date: Tue, 23 Mar 2021 00:56:19 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: separate replace, destroy and
|
||||
stats to different workqueues
|
||||
|
||||
Currently the flow table offload replace, destroy and stats work items are
|
||||
executed on a single workqueue. As such, DESTROY and STATS commands may
|
||||
be backloged after a burst of REPLACE work items. This scenario can bloat
|
||||
up memory and may cause active connections to age.
|
||||
|
||||
Instatiate add, del and stats workqueues to avoid backlogs of non-dependent
|
||||
actions. Provide sysfs control over the workqueue attributes, allowing
|
||||
userspace applications to control the workqueue cpumask.
|
||||
|
||||
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
|
||||
Reviewed-by: Paul Blakey <paulb@nvidia.com>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_offload.c
|
||||
+++ b/net/netfilter/nf_flow_table_offload.c
|
||||
@@ -13,7 +13,9 @@
|
||||
#include <net/netfilter/nf_conntrack_core.h>
|
||||
#include <net/netfilter/nf_conntrack_tuple.h>
|
||||
|
||||
-static struct workqueue_struct *nf_flow_offload_wq;
|
||||
+static struct workqueue_struct *nf_flow_offload_add_wq;
|
||||
+static struct workqueue_struct *nf_flow_offload_del_wq;
|
||||
+static struct workqueue_struct *nf_flow_offload_stats_wq;
|
||||
|
||||
struct flow_offload_work {
|
||||
struct list_head list;
|
||||
@@ -827,7 +829,12 @@ static void flow_offload_work_handler(st
|
||||
|
||||
static void flow_offload_queue_work(struct flow_offload_work *offload)
|
||||
{
|
||||
- queue_work(nf_flow_offload_wq, &offload->work);
|
||||
+ if (offload->cmd == FLOW_CLS_REPLACE)
|
||||
+ queue_work(nf_flow_offload_add_wq, &offload->work);
|
||||
+ else if (offload->cmd == FLOW_CLS_DESTROY)
|
||||
+ queue_work(nf_flow_offload_del_wq, &offload->work);
|
||||
+ else
|
||||
+ queue_work(nf_flow_offload_stats_wq, &offload->work);
|
||||
}
|
||||
|
||||
static struct flow_offload_work *
|
||||
@@ -899,8 +906,11 @@ void nf_flow_offload_stats(struct nf_flo
|
||||
|
||||
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
|
||||
{
|
||||
- if (nf_flowtable_hw_offload(flowtable))
|
||||
- flush_workqueue(nf_flow_offload_wq);
|
||||
+ if (nf_flowtable_hw_offload(flowtable)) {
|
||||
+ flush_workqueue(nf_flow_offload_add_wq);
|
||||
+ flush_workqueue(nf_flow_offload_del_wq);
|
||||
+ flush_workqueue(nf_flow_offload_stats_wq);
|
||||
+ }
|
||||
}
|
||||
|
||||
static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
|
||||
@@ -1013,15 +1023,33 @@ EXPORT_SYMBOL_GPL(nf_flow_table_offload_
|
||||
|
||||
int nf_flow_table_offload_init(void)
|
||||
{
|
||||
- nf_flow_offload_wq = alloc_workqueue("nf_flow_table_offload",
|
||||
- WQ_UNBOUND, 0);
|
||||
- if (!nf_flow_offload_wq)
|
||||
+ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
|
||||
+ WQ_UNBOUND | WQ_SYSFS, 0);
|
||||
+ if (!nf_flow_offload_add_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
|
||||
+ WQ_UNBOUND | WQ_SYSFS, 0);
|
||||
+ if (!nf_flow_offload_del_wq)
|
||||
+ goto err_del_wq;
|
||||
+
|
||||
+ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
|
||||
+ WQ_UNBOUND | WQ_SYSFS, 0);
|
||||
+ if (!nf_flow_offload_stats_wq)
|
||||
+ goto err_stats_wq;
|
||||
+
|
||||
return 0;
|
||||
+
|
||||
+err_stats_wq:
|
||||
+ destroy_workqueue(nf_flow_offload_del_wq);
|
||||
+err_del_wq:
|
||||
+ destroy_workqueue(nf_flow_offload_add_wq);
|
||||
+ return -ENOMEM;
|
||||
}
|
||||
|
||||
void nf_flow_table_offload_exit(void)
|
||||
{
|
||||
- destroy_workqueue(nf_flow_offload_wq);
|
||||
+ destroy_workqueue(nf_flow_offload_add_wq);
|
||||
+ destroy_workqueue(nf_flow_offload_del_wq);
|
||||
+ destroy_workqueue(nf_flow_offload_stats_wq);
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
From: YueHaibing <yuehaibing@huawei.com>
|
||||
Date: Tue, 23 Mar 2021 00:56:21 +0100
|
||||
Subject: [PATCH] netfilter: conntrack: Remove unused variable
|
||||
declaration
|
||||
|
||||
commit e97c3e278e95 ("tproxy: split off ipv6 defragmentation to a separate
|
||||
module") left behind this.
|
||||
|
||||
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
|
||||
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
|
||||
@@ -4,7 +4,4 @@
|
||||
|
||||
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
|
||||
|
||||
-#include <linux/sysctl.h>
|
||||
-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
|
||||
-
|
||||
#endif /* _NF_CONNTRACK_IPV6_H*/
|
@ -0,0 +1,291 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:22 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: consolidate
|
||||
skb_try_make_writable() call
|
||||
|
||||
Fetch the layer 4 header size to be mangled by NAT when building the
|
||||
tuple, then use it to make writable the network and the transport
|
||||
headers. After this update, the NAT routines now assumes that the skbuff
|
||||
area is writable. Do the pointer refetch only after the single
|
||||
skb_try_make_writable() call.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -394,9 +394,6 @@ static int nf_flow_nat_port_tcp(struct s
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||
- return -1;
|
||||
-
|
||||
tcph = (void *)(skb_network_header(skb) + thoff);
|
||||
inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
|
||||
|
||||
@@ -408,9 +405,6 @@ static int nf_flow_nat_port_udp(struct s
|
||||
{
|
||||
struct udphdr *udph;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||
- return -1;
|
||||
-
|
||||
udph = (void *)(skb_network_header(skb) + thoff);
|
||||
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||
inet_proto_csum_replace2(&udph->check, skb, port,
|
||||
@@ -446,9 +440,6 @@ int nf_flow_snat_port(const struct flow_
|
||||
struct flow_ports *hdr;
|
||||
__be16 port, new_port;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*hdr)))
|
||||
- return -1;
|
||||
-
|
||||
hdr = (void *)(skb_network_header(skb) + thoff);
|
||||
|
||||
switch (dir) {
|
||||
@@ -477,9 +468,6 @@ int nf_flow_dnat_port(const struct flow_
|
||||
struct flow_ports *hdr;
|
||||
__be16 port, new_port;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*hdr)))
|
||||
- return -1;
|
||||
-
|
||||
hdr = (void *)(skb_network_header(skb) + thoff);
|
||||
|
||||
switch (dir) {
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -39,9 +39,6 @@ static int nf_flow_nat_ip_tcp(struct sk_
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||
- return -1;
|
||||
-
|
||||
tcph = (void *)(skb_network_header(skb) + thoff);
|
||||
inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
|
||||
|
||||
@@ -53,9 +50,6 @@ static int nf_flow_nat_ip_udp(struct sk_
|
||||
{
|
||||
struct udphdr *udph;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||
- return -1;
|
||||
-
|
||||
udph = (void *)(skb_network_header(skb) + thoff);
|
||||
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||
inet_proto_csum_replace4(&udph->check, skb, addr,
|
||||
@@ -136,19 +130,17 @@ static int nf_flow_dnat_ip(const struct
|
||||
}
|
||||
|
||||
static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||
- unsigned int thoff, enum flow_offload_tuple_dir dir)
|
||||
+ unsigned int thoff, enum flow_offload_tuple_dir dir,
|
||||
+ struct iphdr *iph)
|
||||
{
|
||||
- struct iphdr *iph = ip_hdr(skb);
|
||||
-
|
||||
if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
|
||||
(nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||
- nf_flow_snat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
|
||||
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||
return -1;
|
||||
|
||||
- iph = ip_hdr(skb);
|
||||
if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
|
||||
(nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||
- nf_flow_dnat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
|
||||
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
@@ -160,10 +152,10 @@ static bool ip_has_options(unsigned int
|
||||
}
|
||||
|
||||
static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
|
||||
- struct flow_offload_tuple *tuple)
|
||||
+ struct flow_offload_tuple *tuple, u32 *hdrsize)
|
||||
{
|
||||
- unsigned int thoff, hdrsize;
|
||||
struct flow_ports *ports;
|
||||
+ unsigned int thoff;
|
||||
struct iphdr *iph;
|
||||
|
||||
if (!pskb_may_pull(skb, sizeof(*iph)))
|
||||
@@ -178,10 +170,10 @@ static int nf_flow_tuple_ip(struct sk_bu
|
||||
|
||||
switch (iph->protocol) {
|
||||
case IPPROTO_TCP:
|
||||
- hdrsize = sizeof(struct tcphdr);
|
||||
+ *hdrsize = sizeof(struct tcphdr);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
- hdrsize = sizeof(struct udphdr);
|
||||
+ *hdrsize = sizeof(struct udphdr);
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
@@ -191,7 +183,7 @@ static int nf_flow_tuple_ip(struct sk_bu
|
||||
return -1;
|
||||
|
||||
thoff = iph->ihl * 4;
|
||||
- if (!pskb_may_pull(skb, thoff + hdrsize))
|
||||
+ if (!pskb_may_pull(skb, thoff + *hdrsize))
|
||||
return -1;
|
||||
|
||||
iph = ip_hdr(skb);
|
||||
@@ -252,11 +244,12 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
unsigned int thoff;
|
||||
struct iphdr *iph;
|
||||
__be32 nexthop;
|
||||
+ u32 hdrsize;
|
||||
|
||||
if (skb->protocol != htons(ETH_P_IP))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
|
||||
+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize) < 0)
|
||||
return NF_ACCEPT;
|
||||
|
||||
tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||
@@ -271,11 +264,13 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (skb_try_make_writable(skb, sizeof(*iph)))
|
||||
+ iph = ip_hdr(skb);
|
||||
+ thoff = iph->ihl * 4;
|
||||
+ if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
- thoff = ip_hdr(skb)->ihl * 4;
|
||||
- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
|
||||
+ iph = ip_hdr(skb);
|
||||
+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
flow_offload_refresh(flow_table, flow);
|
||||
@@ -285,10 +280,9 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
|
||||
- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
|
||||
+ if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0)
|
||||
return NF_DROP;
|
||||
|
||||
- iph = ip_hdr(skb);
|
||||
ip_decrease_ttl(iph);
|
||||
skb->tstamp = 0;
|
||||
|
||||
@@ -317,9 +311,6 @@ static int nf_flow_nat_ipv6_tcp(struct s
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||
- return -1;
|
||||
-
|
||||
tcph = (void *)(skb_network_header(skb) + thoff);
|
||||
inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
|
||||
new_addr->s6_addr32, true);
|
||||
@@ -333,9 +324,6 @@ static int nf_flow_nat_ipv6_udp(struct s
|
||||
{
|
||||
struct udphdr *udph;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||
- return -1;
|
||||
-
|
||||
udph = (void *)(skb_network_header(skb) + thoff);
|
||||
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||
inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
|
||||
@@ -417,31 +405,30 @@ static int nf_flow_dnat_ipv6(const struc
|
||||
|
||||
static int nf_flow_nat_ipv6(const struct flow_offload *flow,
|
||||
struct sk_buff *skb,
|
||||
- enum flow_offload_tuple_dir dir)
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct ipv6hdr *ip6h)
|
||||
{
|
||||
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
|
||||
unsigned int thoff = sizeof(*ip6h);
|
||||
|
||||
if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
|
||||
(nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||
- nf_flow_snat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
|
||||
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||
return -1;
|
||||
|
||||
- ip6h = ipv6_hdr(skb);
|
||||
if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
|
||||
(nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||
- nf_flow_dnat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
|
||||
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
|
||||
- struct flow_offload_tuple *tuple)
|
||||
+ struct flow_offload_tuple *tuple, u32 *hdrsize)
|
||||
{
|
||||
- unsigned int thoff, hdrsize;
|
||||
struct flow_ports *ports;
|
||||
struct ipv6hdr *ip6h;
|
||||
+ unsigned int thoff;
|
||||
|
||||
if (!pskb_may_pull(skb, sizeof(*ip6h)))
|
||||
return -1;
|
||||
@@ -450,10 +437,10 @@ static int nf_flow_tuple_ipv6(struct sk_
|
||||
|
||||
switch (ip6h->nexthdr) {
|
||||
case IPPROTO_TCP:
|
||||
- hdrsize = sizeof(struct tcphdr);
|
||||
+ *hdrsize = sizeof(struct tcphdr);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
- hdrsize = sizeof(struct udphdr);
|
||||
+ *hdrsize = sizeof(struct udphdr);
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
@@ -463,7 +450,7 @@ static int nf_flow_tuple_ipv6(struct sk_
|
||||
return -1;
|
||||
|
||||
thoff = sizeof(*ip6h);
|
||||
- if (!pskb_may_pull(skb, thoff + hdrsize))
|
||||
+ if (!pskb_may_pull(skb, thoff + *hdrsize))
|
||||
return -1;
|
||||
|
||||
ip6h = ipv6_hdr(skb);
|
||||
@@ -493,11 +480,12 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
struct net_device *outdev;
|
||||
struct ipv6hdr *ip6h;
|
||||
struct rt6_info *rt;
|
||||
+ u32 hdrsize;
|
||||
|
||||
if (skb->protocol != htons(ETH_P_IPV6))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
|
||||
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize) < 0)
|
||||
return NF_ACCEPT;
|
||||
|
||||
tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||
@@ -523,13 +511,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
|
||||
- if (skb_try_make_writable(skb, sizeof(*ip6h)))
|
||||
+ if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
|
||||
+ ip6h = ipv6_hdr(skb);
|
||||
+ if (nf_flow_nat_ipv6(flow, skb, dir, ip6h) < 0)
|
||||
return NF_DROP;
|
||||
|
||||
- ip6h = ipv6_hdr(skb);
|
||||
ip6h->hop_limit--;
|
||||
skb->tstamp = 0;
|
||||
|
@ -0,0 +1,35 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:23 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: move skb_try_make_writable()
|
||||
before NAT in IPv4
|
||||
|
||||
For consistency with the IPv6 flowtable datapath and to make sure the
|
||||
skbuff is writable right before the NAT header updates.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -266,10 +266,6 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
|
||||
iph = ip_hdr(skb);
|
||||
thoff = iph->ihl * 4;
|
||||
- if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
- return NF_DROP;
|
||||
-
|
||||
- iph = ip_hdr(skb);
|
||||
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
@@ -280,6 +276,10 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
|
||||
+ if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
+ return NF_DROP;
|
||||
+
|
||||
+ iph = ip_hdr(skb);
|
||||
if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0)
|
||||
return NF_DROP;
|
||||
|
@ -0,0 +1,82 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:24 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: move FLOW_OFFLOAD_DIR_MAX away
|
||||
from enumeration
|
||||
|
||||
This allows to remove the default case which should not ever happen and
|
||||
that was added to avoid gcc warnings on unhandled FLOW_OFFLOAD_DIR_MAX
|
||||
enumeration case.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -86,8 +86,8 @@ static inline bool nf_flowtable_hw_offlo
|
||||
enum flow_offload_tuple_dir {
|
||||
FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
|
||||
FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
|
||||
- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
|
||||
};
|
||||
+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
|
||||
|
||||
struct flow_offload_tuple {
|
||||
union {
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -453,8 +453,6 @@ int nf_flow_snat_port(const struct flow_
|
||||
new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
|
||||
hdr->dest = new_port;
|
||||
break;
|
||||
- default:
|
||||
- return -1;
|
||||
}
|
||||
|
||||
return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||
@@ -481,8 +479,6 @@ int nf_flow_dnat_port(const struct flow_
|
||||
new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
|
||||
hdr->source = new_port;
|
||||
break;
|
||||
- default:
|
||||
- return -1;
|
||||
}
|
||||
|
||||
return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -96,8 +96,6 @@ static int nf_flow_snat_ip(const struct
|
||||
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
|
||||
iph->daddr = new_addr;
|
||||
break;
|
||||
- default:
|
||||
- return -1;
|
||||
}
|
||||
csum_replace4(&iph->check, addr, new_addr);
|
||||
|
||||
@@ -121,8 +119,6 @@ static int nf_flow_dnat_ip(const struct
|
||||
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
|
||||
iph->saddr = new_addr;
|
||||
break;
|
||||
- default:
|
||||
- return -1;
|
||||
}
|
||||
csum_replace4(&iph->check, addr, new_addr);
|
||||
|
||||
@@ -371,8 +367,6 @@ static int nf_flow_snat_ipv6(const struc
|
||||
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
|
||||
ip6h->daddr = new_addr;
|
||||
break;
|
||||
- default:
|
||||
- return -1;
|
||||
}
|
||||
|
||||
return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||
@@ -396,8 +390,6 @@ static int nf_flow_dnat_ipv6(const struc
|
||||
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
|
||||
ip6h->saddr = new_addr;
|
||||
break;
|
||||
- default:
|
||||
- return -1;
|
||||
}
|
||||
|
||||
return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
@ -0,0 +1,394 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:25 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: fast NAT functions never fail
|
||||
|
||||
Simplify existing fast NAT routines by returning void. After the
|
||||
skb_try_make_writable() call consolidation, these routines cannot ever
|
||||
fail.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -228,12 +228,12 @@ void nf_flow_table_free(struct nf_flowta
|
||||
|
||||
void flow_offload_teardown(struct flow_offload *flow);
|
||||
|
||||
-int nf_flow_snat_port(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb, unsigned int thoff,
|
||||
- u8 protocol, enum flow_offload_tuple_dir dir);
|
||||
-int nf_flow_dnat_port(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb, unsigned int thoff,
|
||||
- u8 protocol, enum flow_offload_tuple_dir dir);
|
||||
+void nf_flow_snat_port(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, unsigned int thoff,
|
||||
+ u8 protocol, enum flow_offload_tuple_dir dir);
|
||||
+void nf_flow_dnat_port(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, unsigned int thoff,
|
||||
+ u8 protocol, enum flow_offload_tuple_dir dir);
|
||||
|
||||
struct flow_ports {
|
||||
__be16 source, dest;
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -388,20 +388,17 @@ static void nf_flow_offload_work_gc(stru
|
||||
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
|
||||
}
|
||||
|
||||
-
|
||||
-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||
- __be16 port, __be16 new_port)
|
||||
+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||
+ __be16 port, __be16 new_port)
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
|
||||
tcph = (void *)(skb_network_header(skb) + thoff);
|
||||
inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
|
||||
- __be16 port, __be16 new_port)
|
||||
+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
|
||||
+ __be16 port, __be16 new_port)
|
||||
{
|
||||
struct udphdr *udph;
|
||||
|
||||
@@ -412,30 +409,24 @@ static int nf_flow_nat_port_udp(struct s
|
||||
if (!udph->check)
|
||||
udph->check = CSUM_MANGLED_0;
|
||||
}
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
|
||||
- u8 protocol, __be16 port, __be16 new_port)
|
||||
+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
|
||||
+ u8 protocol, __be16 port, __be16 new_port)
|
||||
{
|
||||
switch (protocol) {
|
||||
case IPPROTO_TCP:
|
||||
- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_port_udp(skb, thoff, port, new_port);
|
||||
break;
|
||||
}
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-int nf_flow_snat_port(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb, unsigned int thoff,
|
||||
- u8 protocol, enum flow_offload_tuple_dir dir)
|
||||
+void nf_flow_snat_port(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, unsigned int thoff,
|
||||
+ u8 protocol, enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct flow_ports *hdr;
|
||||
__be16 port, new_port;
|
||||
@@ -455,13 +446,13 @@ int nf_flow_snat_port(const struct flow_
|
||||
break;
|
||||
}
|
||||
|
||||
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||
+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_snat_port);
|
||||
|
||||
-int nf_flow_dnat_port(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb, unsigned int thoff,
|
||||
- u8 protocol, enum flow_offload_tuple_dir dir)
|
||||
+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
|
||||
+ unsigned int thoff, u8 protocol,
|
||||
+ enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct flow_ports *hdr;
|
||||
__be16 port, new_port;
|
||||
@@ -481,7 +472,7 @@ int nf_flow_dnat_port(const struct flow_
|
||||
break;
|
||||
}
|
||||
|
||||
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||
+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -34,19 +34,17 @@ static int nf_flow_state_check(struct fl
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||
- __be32 addr, __be32 new_addr)
|
||||
+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||
+ __be32 addr, __be32 new_addr)
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
|
||||
tcph = (void *)(skb_network_header(skb) + thoff);
|
||||
inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
|
||||
- __be32 addr, __be32 new_addr)
|
||||
+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
|
||||
+ __be32 addr, __be32 new_addr)
|
||||
{
|
||||
struct udphdr *udph;
|
||||
|
||||
@@ -57,31 +55,25 @@ static int nf_flow_nat_ip_udp(struct sk_
|
||||
if (!udph->check)
|
||||
udph->check = CSUM_MANGLED_0;
|
||||
}
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
|
||||
- unsigned int thoff, __be32 addr,
|
||||
- __be32 new_addr)
|
||||
+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
|
||||
+ unsigned int thoff, __be32 addr,
|
||||
+ __be32 new_addr)
|
||||
{
|
||||
switch (iph->protocol) {
|
||||
case IPPROTO_TCP:
|
||||
- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
|
||||
break;
|
||||
}
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||
- struct iphdr *iph, unsigned int thoff,
|
||||
- enum flow_offload_tuple_dir dir)
|
||||
+static void nf_flow_snat_ip(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, struct iphdr *iph,
|
||||
+ unsigned int thoff, enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
__be32 addr, new_addr;
|
||||
|
||||
@@ -99,12 +91,12 @@ static int nf_flow_snat_ip(const struct
|
||||
}
|
||||
csum_replace4(&iph->check, addr, new_addr);
|
||||
|
||||
- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||
+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||
}
|
||||
|
||||
-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||
- struct iphdr *iph, unsigned int thoff,
|
||||
- enum flow_offload_tuple_dir dir)
|
||||
+static void nf_flow_dnat_ip(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, struct iphdr *iph,
|
||||
+ unsigned int thoff, enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
__be32 addr, new_addr;
|
||||
|
||||
@@ -122,24 +114,21 @@ static int nf_flow_dnat_ip(const struct
|
||||
}
|
||||
csum_replace4(&iph->check, addr, new_addr);
|
||||
|
||||
- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||
+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||
+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||
unsigned int thoff, enum flow_offload_tuple_dir dir,
|
||||
struct iphdr *iph)
|
||||
{
|
||||
- if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
|
||||
- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||
- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||
- return -1;
|
||||
-
|
||||
- if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
|
||||
- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||
- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||
- return -1;
|
||||
-
|
||||
- return 0;
|
||||
+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
|
||||
+ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
|
||||
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
|
||||
+ }
|
||||
+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
|
||||
+ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
|
||||
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
|
||||
+ }
|
||||
}
|
||||
|
||||
static bool ip_has_options(unsigned int thoff)
|
||||
@@ -276,8 +265,7 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
return NF_DROP;
|
||||
|
||||
iph = ip_hdr(skb);
|
||||
- if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
|
||||
|
||||
ip_decrease_ttl(iph);
|
||||
skb->tstamp = 0;
|
||||
@@ -301,22 +289,21 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
|
||||
|
||||
-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||
- struct in6_addr *addr,
|
||||
- struct in6_addr *new_addr)
|
||||
+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||
+ struct in6_addr *addr,
|
||||
+ struct in6_addr *new_addr,
|
||||
+ struct ipv6hdr *ip6h)
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
|
||||
tcph = (void *)(skb_network_header(skb) + thoff);
|
||||
inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
|
||||
new_addr->s6_addr32, true);
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
|
||||
- struct in6_addr *addr,
|
||||
- struct in6_addr *new_addr)
|
||||
+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
|
||||
+ struct in6_addr *addr,
|
||||
+ struct in6_addr *new_addr)
|
||||
{
|
||||
struct udphdr *udph;
|
||||
|
||||
@@ -327,32 +314,26 @@ static int nf_flow_nat_ipv6_udp(struct s
|
||||
if (!udph->check)
|
||||
udph->check = CSUM_MANGLED_0;
|
||||
}
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||
- unsigned int thoff, struct in6_addr *addr,
|
||||
- struct in6_addr *new_addr)
|
||||
+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||
+ unsigned int thoff, struct in6_addr *addr,
|
||||
+ struct in6_addr *new_addr)
|
||||
{
|
||||
switch (ip6h->nexthdr) {
|
||||
case IPPROTO_TCP:
|
||||
- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
|
||||
break;
|
||||
}
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||
- unsigned int thoff,
|
||||
- enum flow_offload_tuple_dir dir)
|
||||
+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||
+ unsigned int thoff,
|
||||
+ enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct in6_addr addr, new_addr;
|
||||
|
||||
@@ -369,13 +350,13 @@ static int nf_flow_snat_ipv6(const struc
|
||||
break;
|
||||
}
|
||||
|
||||
- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||
+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||
}
|
||||
|
||||
-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||
- unsigned int thoff,
|
||||
- enum flow_offload_tuple_dir dir)
|
||||
+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||
+ unsigned int thoff,
|
||||
+ enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct in6_addr addr, new_addr;
|
||||
|
||||
@@ -392,27 +373,24 @@ static int nf_flow_dnat_ipv6(const struc
|
||||
break;
|
||||
}
|
||||
|
||||
- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||
+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb,
|
||||
- enum flow_offload_tuple_dir dir,
|
||||
- struct ipv6hdr *ip6h)
|
||||
+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct ipv6hdr *ip6h)
|
||||
{
|
||||
unsigned int thoff = sizeof(*ip6h);
|
||||
|
||||
- if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
|
||||
- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||
- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||
- return -1;
|
||||
-
|
||||
- if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
|
||||
- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||
- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||
- return -1;
|
||||
-
|
||||
- return 0;
|
||||
+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
|
||||
+ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
|
||||
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
|
||||
+ }
|
||||
+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
|
||||
+ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
|
||||
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
|
||||
+ }
|
||||
}
|
||||
|
||||
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
|
||||
@@ -507,8 +485,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
return NF_DROP;
|
||||
|
||||
ip6h = ipv6_hdr(skb);
|
||||
- if (nf_flow_nat_ipv6(flow, skb, dir, ip6h) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
|
||||
|
||||
ip6h->hop_limit--;
|
||||
skb->tstamp = 0;
|
@ -0,0 +1,46 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:26 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: call dst_check() to fall back to
|
||||
classic forwarding
|
||||
|
||||
In case the route is stale, pass up the packet to the classic forwarding
|
||||
path for re-evaluation and schedule this flow entry for removal.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -197,14 +197,6 @@ static bool nf_flow_exceeds_mtu(const st
|
||||
return true;
|
||||
}
|
||||
|
||||
-static int nf_flow_offload_dst_check(struct dst_entry *dst)
|
||||
-{
|
||||
- if (unlikely(dst_xfrm(dst)))
|
||||
- return dst_check(dst, 0) ? 0 : -1;
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
|
||||
const struct nf_hook_state *state,
|
||||
struct dst_entry *dst)
|
||||
@@ -256,7 +248,7 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
|
||||
flow_offload_refresh(flow_table, flow);
|
||||
|
||||
- if (nf_flow_offload_dst_check(&rt->dst)) {
|
||||
+ if (!dst_check(&rt->dst, 0)) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
@@ -476,7 +468,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
|
||||
flow_offload_refresh(flow_table, flow);
|
||||
|
||||
- if (nf_flow_offload_dst_check(&rt->dst)) {
|
||||
+ if (!dst_check(&rt->dst, 0)) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_ACCEPT;
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:27 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: refresh timeout after dst and
|
||||
writable checks
|
||||
|
||||
Refresh the timeout (and retry hardware offload) once the skbuff dst
|
||||
is confirmed to be current and after the skbuff is made writable.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -246,8 +246,6 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- flow_offload_refresh(flow_table, flow);
|
||||
-
|
||||
if (!dst_check(&rt->dst, 0)) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_ACCEPT;
|
||||
@@ -256,6 +254,8 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
+ flow_offload_refresh(flow_table, flow);
|
||||
+
|
||||
iph = ip_hdr(skb);
|
||||
nf_flow_nat_ip(flow, skb, thoff, dir, iph);
|
||||
|
||||
@@ -466,8 +466,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
sizeof(*ip6h)))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- flow_offload_refresh(flow_table, flow);
|
||||
-
|
||||
if (!dst_check(&rt->dst, 0)) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_ACCEPT;
|
||||
@@ -476,6 +474,8 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
+ flow_offload_refresh(flow_table, flow);
|
||||
+
|
||||
ip6h = ipv6_hdr(skb);
|
||||
nf_flow_nat_ipv6(flow, skb, dir, ip6h);
|
||||
|
@ -0,0 +1,103 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:28 +0100
|
||||
Subject: [PATCH] netfilter: nftables: update table flags from the commit
|
||||
phase
|
||||
|
||||
Do not update table flags from the preparation phase. Store the flags
|
||||
update into the transaction, then update the flags from the commit
|
||||
phase.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_tables.h
|
||||
+++ b/include/net/netfilter/nf_tables.h
|
||||
@@ -1470,13 +1470,16 @@ struct nft_trans_chain {
|
||||
|
||||
struct nft_trans_table {
|
||||
bool update;
|
||||
- bool enable;
|
||||
+ u8 state;
|
||||
+ u32 flags;
|
||||
};
|
||||
|
||||
#define nft_trans_table_update(trans) \
|
||||
(((struct nft_trans_table *)trans->data)->update)
|
||||
-#define nft_trans_table_enable(trans) \
|
||||
- (((struct nft_trans_table *)trans->data)->enable)
|
||||
+#define nft_trans_table_state(trans) \
|
||||
+ (((struct nft_trans_table *)trans->data)->state)
|
||||
+#define nft_trans_table_flags(trans) \
|
||||
+ (((struct nft_trans_table *)trans->data)->flags)
|
||||
|
||||
struct nft_trans_elem {
|
||||
struct nft_set *set;
|
||||
--- a/net/netfilter/nf_tables_api.c
|
||||
+++ b/net/netfilter/nf_tables_api.c
|
||||
@@ -891,6 +891,12 @@ static void nf_tables_table_disable(stru
|
||||
nft_table_disable(net, table, 0);
|
||||
}
|
||||
|
||||
+enum {
|
||||
+ NFT_TABLE_STATE_UNCHANGED = 0,
|
||||
+ NFT_TABLE_STATE_DORMANT,
|
||||
+ NFT_TABLE_STATE_WAKEUP
|
||||
+};
|
||||
+
|
||||
static int nf_tables_updtable(struct nft_ctx *ctx)
|
||||
{
|
||||
struct nft_trans *trans;
|
||||
@@ -914,19 +920,17 @@ static int nf_tables_updtable(struct nft
|
||||
|
||||
if ((flags & NFT_TABLE_F_DORMANT) &&
|
||||
!(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
|
||||
- nft_trans_table_enable(trans) = false;
|
||||
+ nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT;
|
||||
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
|
||||
ctx->table->flags & NFT_TABLE_F_DORMANT) {
|
||||
- ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
|
||||
ret = nf_tables_table_enable(ctx->net, ctx->table);
|
||||
if (ret >= 0)
|
||||
- nft_trans_table_enable(trans) = true;
|
||||
- else
|
||||
- ctx->table->flags |= NFT_TABLE_F_DORMANT;
|
||||
+ nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP;
|
||||
}
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
+ nft_trans_table_flags(trans) = flags;
|
||||
nft_trans_table_update(trans) = true;
|
||||
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
|
||||
return 0;
|
||||
@@ -7908,11 +7912,10 @@ static int nf_tables_commit(struct net *
|
||||
switch (trans->msg_type) {
|
||||
case NFT_MSG_NEWTABLE:
|
||||
if (nft_trans_table_update(trans)) {
|
||||
- if (!nft_trans_table_enable(trans)) {
|
||||
- nf_tables_table_disable(net,
|
||||
- trans->ctx.table);
|
||||
- trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
|
||||
- }
|
||||
+ if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT)
|
||||
+ nf_tables_table_disable(net, trans->ctx.table);
|
||||
+
|
||||
+ trans->ctx.table->flags = nft_trans_table_flags(trans);
|
||||
} else {
|
||||
nft_clear(net, trans->ctx.table);
|
||||
}
|
||||
@@ -8125,11 +8128,9 @@ static int __nf_tables_abort(struct net
|
||||
switch (trans->msg_type) {
|
||||
case NFT_MSG_NEWTABLE:
|
||||
if (nft_trans_table_update(trans)) {
|
||||
- if (nft_trans_table_enable(trans)) {
|
||||
- nf_tables_table_disable(net,
|
||||
- trans->ctx.table);
|
||||
- trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
|
||||
- }
|
||||
+ if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP)
|
||||
+ nf_tables_table_disable(net, trans->ctx.table);
|
||||
+
|
||||
nft_trans_destroy(trans);
|
||||
} else {
|
||||
list_del_rcu(&trans->ctx.table->list);
|
@ -0,0 +1,170 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:32 +0100
|
||||
Subject: [PATCH] net: resolve forwarding path from virtual netdevice and
|
||||
HW destination address
|
||||
|
||||
This patch adds dev_fill_forward_path() which resolves the path to reach
|
||||
the real netdevice from the IP forwarding side. This function takes as
|
||||
input the netdevice and the destination hardware address and it walks
|
||||
down the devices calling .ndo_fill_forward_path() for each device until
|
||||
the real device is found.
|
||||
|
||||
For instance, assuming the following topology:
|
||||
|
||||
IP forwarding
|
||||
/ \
|
||||
br0 eth0
|
||||
/ \
|
||||
eth1 eth2
|
||||
.
|
||||
.
|
||||
.
|
||||
ethX
|
||||
ab:cd:ef:ab:cd:ef
|
||||
|
||||
where eth1 and eth2 are bridge ports and eth0 provides WAN connectivity.
|
||||
ethX is the interface in another box which is connected to the eth1
|
||||
bridge port.
|
||||
|
||||
For packets going through IP forwarding to br0 whose destination MAC
|
||||
address is ab:cd:ef:ab:cd:ef, dev_fill_forward_path() provides the
|
||||
following path:
|
||||
|
||||
br0 -> eth1
|
||||
|
||||
.ndo_fill_forward_path for br0 looks up at the FDB for the bridge port
|
||||
from the destination MAC address to get the bridge port eth1.
|
||||
|
||||
This information allows to create a fast path that bypasses the classic
|
||||
bridge and IP forwarding paths, so packets go directly from the bridge
|
||||
port eth1 to eth0 (wan interface) and vice versa.
|
||||
|
||||
fast path
|
||||
.------------------------.
|
||||
/ \
|
||||
| IP forwarding |
|
||||
| / \ \/
|
||||
| br0 eth0
|
||||
. / \
|
||||
-> eth1 eth2
|
||||
.
|
||||
.
|
||||
.
|
||||
ethX
|
||||
ab:cd:ef:ab:cd:ef
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -827,6 +827,27 @@ typedef u16 (*select_queue_fallback_t)(s
|
||||
struct sk_buff *skb,
|
||||
struct net_device *sb_dev);
|
||||
|
||||
+enum net_device_path_type {
|
||||
+ DEV_PATH_ETHERNET = 0,
|
||||
+};
|
||||
+
|
||||
+struct net_device_path {
|
||||
+ enum net_device_path_type type;
|
||||
+ const struct net_device *dev;
|
||||
+};
|
||||
+
|
||||
+#define NET_DEVICE_PATH_STACK_MAX 5
|
||||
+
|
||||
+struct net_device_path_stack {
|
||||
+ int num_paths;
|
||||
+ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
|
||||
+};
|
||||
+
|
||||
+struct net_device_path_ctx {
|
||||
+ const struct net_device *dev;
|
||||
+ const u8 *daddr;
|
||||
+};
|
||||
+
|
||||
enum tc_setup_type {
|
||||
TC_SETUP_QDISC_MQPRIO,
|
||||
TC_SETUP_CLSU32,
|
||||
@@ -1273,6 +1294,8 @@ struct netdev_net_notifier {
|
||||
* struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
|
||||
* If a device is paired with a peer device, return the peer instance.
|
||||
* The caller must be under RCU read context.
|
||||
+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
|
||||
+ * Get the forwarding path to reach the real device from the HW destination address
|
||||
*/
|
||||
struct net_device_ops {
|
||||
int (*ndo_init)(struct net_device *dev);
|
||||
@@ -1481,6 +1504,8 @@ struct net_device_ops {
|
||||
int (*ndo_tunnel_ctl)(struct net_device *dev,
|
||||
struct ip_tunnel_parm *p, int cmd);
|
||||
struct net_device * (*ndo_get_peer_dev)(struct net_device *dev);
|
||||
+ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path);
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -2828,6 +2853,8 @@ void dev_remove_offload(struct packet_of
|
||||
|
||||
int dev_get_iflink(const struct net_device *dev);
|
||||
int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
|
||||
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
|
||||
+ struct net_device_path_stack *stack);
|
||||
struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
|
||||
unsigned short mask);
|
||||
struct net_device *dev_get_by_name(struct net *net, const char *name);
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -847,6 +847,52 @@ int dev_fill_metadata_dst(struct net_dev
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
|
||||
|
||||
+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
|
||||
+{
|
||||
+ int k = stack->num_paths++;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
|
||||
+ return NULL;
|
||||
+
|
||||
+ return &stack->path[k];
|
||||
+}
|
||||
+
|
||||
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
|
||||
+ struct net_device_path_stack *stack)
|
||||
+{
|
||||
+ const struct net_device *last_dev;
|
||||
+ struct net_device_path_ctx ctx = {
|
||||
+ .dev = dev,
|
||||
+ .daddr = daddr,
|
||||
+ };
|
||||
+ struct net_device_path *path;
|
||||
+ int ret = 0;
|
||||
+
|
||||
+ stack->num_paths = 0;
|
||||
+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
|
||||
+ last_dev = ctx.dev;
|
||||
+ path = dev_fwd_path(stack);
|
||||
+ if (!path)
|
||||
+ return -1;
|
||||
+
|
||||
+ memset(path, 0, sizeof(struct net_device_path));
|
||||
+ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
|
||||
+ if (ret < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(last_dev == ctx.dev))
|
||||
+ return -1;
|
||||
+ }
|
||||
+ path = dev_fwd_path(stack);
|
||||
+ if (!path)
|
||||
+ return -1;
|
||||
+ path->type = DEV_PATH_ETHERNET;
|
||||
+ path->dev = ctx.dev;
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
|
||||
+
|
||||
/**
|
||||
* __dev_get_by_name - find a device by its name
|
||||
* @net: the applicable net namespace
|
@ -0,0 +1,80 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:33 +0100
|
||||
Subject: [PATCH] net: 8021q: resolve forwarding path for vlan devices
|
||||
|
||||
Add .ndo_fill_forward_path for vlan devices.
|
||||
|
||||
For instance, assuming the following topology:
|
||||
|
||||
IP forwarding
|
||||
/ \
|
||||
eth0.100 eth0
|
||||
|
|
||||
eth0
|
||||
.
|
||||
.
|
||||
.
|
||||
ethX
|
||||
ab:cd:ef:ab:cd:ef
|
||||
|
||||
For packets going through IP forwarding to eth0.100 whose destination
|
||||
MAC address is ab:cd:ef:ab:cd:ef, dev_fill_forward_path() provides the
|
||||
following path:
|
||||
|
||||
eth0.100 -> eth0
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -829,11 +829,18 @@ typedef u16 (*select_queue_fallback_t)(s
|
||||
|
||||
enum net_device_path_type {
|
||||
DEV_PATH_ETHERNET = 0,
|
||||
+ DEV_PATH_VLAN,
|
||||
};
|
||||
|
||||
struct net_device_path {
|
||||
enum net_device_path_type type;
|
||||
const struct net_device *dev;
|
||||
+ union {
|
||||
+ struct {
|
||||
+ u16 id;
|
||||
+ __be16 proto;
|
||||
+ } encap;
|
||||
+ };
|
||||
};
|
||||
|
||||
#define NET_DEVICE_PATH_STACK_MAX 5
|
||||
--- a/net/8021q/vlan_dev.c
|
||||
+++ b/net/8021q/vlan_dev.c
|
||||
@@ -770,6 +770,20 @@ static int vlan_dev_get_iflink(const str
|
||||
return real_dev->ifindex;
|
||||
}
|
||||
|
||||
+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
|
||||
+
|
||||
+ path->type = DEV_PATH_VLAN;
|
||||
+ path->encap.id = vlan->vlan_id;
|
||||
+ path->encap.proto = vlan->vlan_proto;
|
||||
+ path->dev = ctx->dev;
|
||||
+ ctx->dev = vlan->real_dev;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static const struct ethtool_ops vlan_ethtool_ops = {
|
||||
.get_link_ksettings = vlan_ethtool_get_link_ksettings,
|
||||
.get_drvinfo = vlan_ethtool_get_drvinfo,
|
||||
@@ -808,6 +822,7 @@ static const struct net_device_ops vlan_
|
||||
#endif
|
||||
.ndo_fix_features = vlan_dev_fix_features,
|
||||
.ndo_get_iflink = vlan_dev_get_iflink,
|
||||
+ .ndo_fill_forward_path = vlan_dev_fill_forward_path,
|
||||
};
|
||||
|
||||
static void vlan_dev_free(struct net_device *dev)
|
@ -0,0 +1,62 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:34 +0100
|
||||
Subject: [PATCH] net: bridge: resolve forwarding path for bridge devices
|
||||
|
||||
Add .ndo_fill_forward_path for bridge devices.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -830,6 +830,7 @@ typedef u16 (*select_queue_fallback_t)(s
|
||||
enum net_device_path_type {
|
||||
DEV_PATH_ETHERNET = 0,
|
||||
DEV_PATH_VLAN,
|
||||
+ DEV_PATH_BRIDGE,
|
||||
};
|
||||
|
||||
struct net_device_path {
|
||||
--- a/net/bridge/br_device.c
|
||||
+++ b/net/bridge/br_device.c
|
||||
@@ -398,6 +398,32 @@ static int br_del_slave(struct net_devic
|
||||
return br_del_if(br, slave_dev);
|
||||
}
|
||||
|
||||
+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ struct net_bridge_fdb_entry *f;
|
||||
+ struct net_bridge_port *dst;
|
||||
+ struct net_bridge *br;
|
||||
+
|
||||
+ if (netif_is_bridge_port(ctx->dev))
|
||||
+ return -1;
|
||||
+
|
||||
+ br = netdev_priv(ctx->dev);
|
||||
+ f = br_fdb_find_rcu(br, ctx->daddr, 0);
|
||||
+ if (!f || !f->dst)
|
||||
+ return -1;
|
||||
+
|
||||
+ dst = READ_ONCE(f->dst);
|
||||
+ if (!dst)
|
||||
+ return -1;
|
||||
+
|
||||
+ path->type = DEV_PATH_BRIDGE;
|
||||
+ path->dev = dst->br->dev;
|
||||
+ ctx->dev = dst->dev;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static const struct ethtool_ops br_ethtool_ops = {
|
||||
.get_drvinfo = br_getinfo,
|
||||
.get_link = ethtool_op_get_link,
|
||||
@@ -432,6 +458,7 @@ static const struct net_device_ops br_ne
|
||||
.ndo_bridge_setlink = br_setlink,
|
||||
.ndo_bridge_dellink = br_dellink,
|
||||
.ndo_features_check = passthru_features_check,
|
||||
+ .ndo_fill_forward_path = br_fill_forward_path,
|
||||
};
|
||||
|
||||
static struct device_type br_type = {
|
@ -0,0 +1,207 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 24 Mar 2021 02:30:35 +0100
|
||||
Subject: [PATCH] net: bridge: resolve forwarding path for VLAN tag
|
||||
actions in bridge devices
|
||||
|
||||
Depending on the VLAN settings of the bridge and the port, the bridge can
|
||||
either add or remove a tag. When vlan filtering is enabled, the fdb lookup
|
||||
also needs to know the VLAN tag/proto for the destination address
|
||||
To provide this, keep track of the stack of VLAN tags for the path in the
|
||||
lookup context
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -841,10 +841,20 @@ struct net_device_path {
|
||||
u16 id;
|
||||
__be16 proto;
|
||||
} encap;
|
||||
+ struct {
|
||||
+ enum {
|
||||
+ DEV_PATH_BR_VLAN_KEEP,
|
||||
+ DEV_PATH_BR_VLAN_TAG,
|
||||
+ DEV_PATH_BR_VLAN_UNTAG,
|
||||
+ } vlan_mode;
|
||||
+ u16 vlan_id;
|
||||
+ __be16 vlan_proto;
|
||||
+ } bridge;
|
||||
};
|
||||
};
|
||||
|
||||
#define NET_DEVICE_PATH_STACK_MAX 5
|
||||
+#define NET_DEVICE_PATH_VLAN_MAX 2
|
||||
|
||||
struct net_device_path_stack {
|
||||
int num_paths;
|
||||
@@ -854,6 +864,12 @@ struct net_device_path_stack {
|
||||
struct net_device_path_ctx {
|
||||
const struct net_device *dev;
|
||||
const u8 *daddr;
|
||||
+
|
||||
+ int num_vlans;
|
||||
+ struct {
|
||||
+ u16 id;
|
||||
+ __be16 proto;
|
||||
+ } vlan[NET_DEVICE_PATH_VLAN_MAX];
|
||||
};
|
||||
|
||||
enum tc_setup_type {
|
||||
--- a/net/8021q/vlan_dev.c
|
||||
+++ b/net/8021q/vlan_dev.c
|
||||
@@ -780,6 +780,12 @@ static int vlan_dev_fill_forward_path(st
|
||||
path->encap.proto = vlan->vlan_proto;
|
||||
path->dev = ctx->dev;
|
||||
ctx->dev = vlan->real_dev;
|
||||
+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
|
||||
+ return -ENOSPC;
|
||||
+
|
||||
+ ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
|
||||
+ ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
|
||||
+ ctx->num_vlans++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
--- a/net/bridge/br_device.c
|
||||
+++ b/net/bridge/br_device.c
|
||||
@@ -409,7 +409,10 @@ static int br_fill_forward_path(struct n
|
||||
return -1;
|
||||
|
||||
br = netdev_priv(ctx->dev);
|
||||
- f = br_fdb_find_rcu(br, ctx->daddr, 0);
|
||||
+
|
||||
+ br_vlan_fill_forward_path_pvid(br, ctx, path);
|
||||
+
|
||||
+ f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
|
||||
if (!f || !f->dst)
|
||||
return -1;
|
||||
|
||||
@@ -417,10 +420,28 @@ static int br_fill_forward_path(struct n
|
||||
if (!dst)
|
||||
return -1;
|
||||
|
||||
+ if (br_vlan_fill_forward_path_mode(br, dst, path))
|
||||
+ return -1;
|
||||
+
|
||||
path->type = DEV_PATH_BRIDGE;
|
||||
path->dev = dst->br->dev;
|
||||
ctx->dev = dst->dev;
|
||||
|
||||
+ switch (path->bridge.vlan_mode) {
|
||||
+ case DEV_PATH_BR_VLAN_TAG:
|
||||
+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
|
||||
+ return -ENOSPC;
|
||||
+ ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
|
||||
+ ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
|
||||
+ ctx->num_vlans++;
|
||||
+ break;
|
||||
+ case DEV_PATH_BR_VLAN_UNTAG:
|
||||
+ ctx->num_vlans--;
|
||||
+ break;
|
||||
+ case DEV_PATH_BR_VLAN_KEEP:
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
--- a/net/bridge/br_private.h
|
||||
+++ b/net/bridge/br_private.h
|
||||
@@ -1093,6 +1093,13 @@ void br_vlan_notify(const struct net_bri
|
||||
bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
|
||||
const struct net_bridge_vlan *range_end);
|
||||
|
||||
+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
|
||||
+ struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path);
|
||||
+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
|
||||
+ struct net_bridge_port *dst,
|
||||
+ struct net_device_path *path);
|
||||
+
|
||||
static inline struct net_bridge_vlan_group *br_vlan_group(
|
||||
const struct net_bridge *br)
|
||||
{
|
||||
@@ -1250,6 +1257,19 @@ static inline int nbp_get_num_vlan_infos
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
+
|
||||
+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
|
||||
+ struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
|
||||
+ struct net_bridge_port *dst,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
|
||||
static inline struct net_bridge_vlan_group *br_vlan_group(
|
||||
const struct net_bridge *br)
|
||||
--- a/net/bridge/br_vlan.c
|
||||
+++ b/net/bridge/br_vlan.c
|
||||
@@ -1327,6 +1327,59 @@ int br_vlan_get_pvid_rcu(const struct ne
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
|
||||
|
||||
+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
|
||||
+ struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ struct net_bridge_vlan_group *vg;
|
||||
+ int idx = ctx->num_vlans - 1;
|
||||
+ u16 vid;
|
||||
+
|
||||
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
|
||||
+
|
||||
+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
|
||||
+ return;
|
||||
+
|
||||
+ vg = br_vlan_group(br);
|
||||
+
|
||||
+ if (idx >= 0 &&
|
||||
+ ctx->vlan[idx].proto == br->vlan_proto) {
|
||||
+ vid = ctx->vlan[idx].id;
|
||||
+ } else {
|
||||
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
|
||||
+ vid = br_get_pvid(vg);
|
||||
+ }
|
||||
+
|
||||
+ path->bridge.vlan_id = vid;
|
||||
+ path->bridge.vlan_proto = br->vlan_proto;
|
||||
+}
|
||||
+
|
||||
+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
|
||||
+ struct net_bridge_port *dst,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ struct net_bridge_vlan_group *vg;
|
||||
+ struct net_bridge_vlan *v;
|
||||
+
|
||||
+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
|
||||
+ return 0;
|
||||
+
|
||||
+ vg = nbp_vlan_group_rcu(dst);
|
||||
+ v = br_vlan_find(vg, path->bridge.vlan_id);
|
||||
+ if (!v || !br_vlan_should_use(v))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
|
||||
+ return 0;
|
||||
+
|
||||
+ if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
|
||||
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
|
||||
+ else
|
||||
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
int br_vlan_get_info(const struct net_device *dev, u16 vid,
|
||||
struct bridge_vlan_info *p_vinfo)
|
||||
{
|
@ -0,0 +1,113 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 24 Mar 2021 02:30:36 +0100
|
||||
Subject: [PATCH] net: ppp: resolve forwarding path for bridge pppoe
|
||||
devices
|
||||
|
||||
Pass on the PPPoE session ID, destination hardware address and the real
|
||||
device.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/drivers/net/ppp/ppp_generic.c
|
||||
+++ b/drivers/net/ppp/ppp_generic.c
|
||||
@@ -1466,12 +1466,34 @@ static void ppp_dev_priv_destructor(stru
|
||||
ppp_destroy_interface(ppp);
|
||||
}
|
||||
|
||||
+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ struct ppp *ppp = netdev_priv(ctx->dev);
|
||||
+ struct ppp_channel *chan;
|
||||
+ struct channel *pch;
|
||||
+
|
||||
+ if (ppp->flags & SC_MULTILINK)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ if (list_empty(&ppp->channels))
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ pch = list_first_entry(&ppp->channels, struct channel, clist);
|
||||
+ chan = pch->chan;
|
||||
+ if (!chan->ops->fill_forward_path)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ return chan->ops->fill_forward_path(ctx, path, chan);
|
||||
+}
|
||||
+
|
||||
static const struct net_device_ops ppp_netdev_ops = {
|
||||
.ndo_init = ppp_dev_init,
|
||||
.ndo_uninit = ppp_dev_uninit,
|
||||
.ndo_start_xmit = ppp_start_xmit,
|
||||
.ndo_do_ioctl = ppp_net_ioctl,
|
||||
.ndo_get_stats64 = ppp_get_stats64,
|
||||
+ .ndo_fill_forward_path = ppp_fill_forward_path,
|
||||
};
|
||||
|
||||
static struct device_type ppp_type = {
|
||||
--- a/drivers/net/ppp/pppoe.c
|
||||
+++ b/drivers/net/ppp/pppoe.c
|
||||
@@ -972,8 +972,31 @@ static int pppoe_xmit(struct ppp_channel
|
||||
return __pppoe_xmit(sk, skb);
|
||||
}
|
||||
|
||||
+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path,
|
||||
+ const struct ppp_channel *chan)
|
||||
+{
|
||||
+ struct sock *sk = (struct sock *)chan->private;
|
||||
+ struct pppox_sock *po = pppox_sk(sk);
|
||||
+ struct net_device *dev = po->pppoe_dev;
|
||||
+
|
||||
+ if (sock_flag(sk, SOCK_DEAD) ||
|
||||
+ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
|
||||
+ return -1;
|
||||
+
|
||||
+ path->type = DEV_PATH_PPPOE;
|
||||
+ path->encap.proto = htons(ETH_P_PPP_SES);
|
||||
+ path->encap.id = be16_to_cpu(po->num);
|
||||
+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
|
||||
+ path->dev = ctx->dev;
|
||||
+ ctx->dev = dev;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static const struct ppp_channel_ops pppoe_chan_ops = {
|
||||
.start_xmit = pppoe_xmit,
|
||||
+ .fill_forward_path = pppoe_fill_forward_path,
|
||||
};
|
||||
|
||||
static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -831,6 +831,7 @@ enum net_device_path_type {
|
||||
DEV_PATH_ETHERNET = 0,
|
||||
DEV_PATH_VLAN,
|
||||
DEV_PATH_BRIDGE,
|
||||
+ DEV_PATH_PPPOE,
|
||||
};
|
||||
|
||||
struct net_device_path {
|
||||
@@ -840,6 +841,7 @@ struct net_device_path {
|
||||
struct {
|
||||
u16 id;
|
||||
__be16 proto;
|
||||
+ u8 h_dest[ETH_ALEN];
|
||||
} encap;
|
||||
struct {
|
||||
enum {
|
||||
--- a/include/linux/ppp_channel.h
|
||||
+++ b/include/linux/ppp_channel.h
|
||||
@@ -28,6 +28,9 @@ struct ppp_channel_ops {
|
||||
int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
|
||||
/* Handle an ioctl call that has come in via /dev/ppp. */
|
||||
int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
|
||||
+ int (*fill_forward_path)(struct net_device_path_ctx *,
|
||||
+ struct net_device_path *,
|
||||
+ const struct ppp_channel *);
|
||||
};
|
||||
|
||||
struct ppp_channel {
|
@ -0,0 +1,63 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 24 Mar 2021 02:30:37 +0100
|
||||
Subject: [PATCH] net: dsa: resolve forwarding path for dsa slave ports
|
||||
|
||||
Add .ndo_fill_forward_path for dsa slave port devices
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -832,6 +832,7 @@ enum net_device_path_type {
|
||||
DEV_PATH_VLAN,
|
||||
DEV_PATH_BRIDGE,
|
||||
DEV_PATH_PPPOE,
|
||||
+ DEV_PATH_DSA,
|
||||
};
|
||||
|
||||
struct net_device_path {
|
||||
@@ -852,6 +853,10 @@ struct net_device_path {
|
||||
u16 vlan_id;
|
||||
__be16 vlan_proto;
|
||||
} bridge;
|
||||
+ struct {
|
||||
+ int port;
|
||||
+ u16 proto;
|
||||
+ } dsa;
|
||||
};
|
||||
};
|
||||
|
||||
--- a/net/dsa/slave.c
|
||||
+++ b/net/dsa/slave.c
|
||||
@@ -1619,6 +1619,21 @@ static struct devlink_port *dsa_slave_ge
|
||||
return dp->ds->devlink ? &dp->devlink_port : NULL;
|
||||
}
|
||||
|
||||
+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
|
||||
+ struct dsa_port *cpu_dp = dp->cpu_dp;
|
||||
+
|
||||
+ path->dev = ctx->dev;
|
||||
+ path->type = DEV_PATH_DSA;
|
||||
+ path->dsa.proto = cpu_dp->tag_ops->proto;
|
||||
+ path->dsa.port = dp->index;
|
||||
+ ctx->dev = cpu_dp->master;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static const struct net_device_ops dsa_slave_netdev_ops = {
|
||||
.ndo_open = dsa_slave_open,
|
||||
.ndo_stop = dsa_slave_close,
|
||||
@@ -1644,6 +1659,7 @@ static const struct net_device_ops dsa_s
|
||||
.ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
|
||||
.ndo_get_devlink_port = dsa_slave_get_devlink_port,
|
||||
.ndo_change_mtu = dsa_slave_change_mtu,
|
||||
+ .ndo_fill_forward_path = dsa_slave_fill_forward_path,
|
||||
};
|
||||
|
||||
static struct device_type dsa_type = {
|
@ -0,0 +1,147 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:38 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add xmit path types
|
||||
|
||||
Add the xmit_type field that defines the two supported xmit paths in the
|
||||
flowtable data plane, which are the neighbour and the xfrm xmit paths.
|
||||
This patch prepares for new flowtable xmit path types to come.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -89,6 +89,11 @@ enum flow_offload_tuple_dir {
|
||||
};
|
||||
#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
|
||||
|
||||
+enum flow_offload_xmit_type {
|
||||
+ FLOW_OFFLOAD_XMIT_NEIGH = 0,
|
||||
+ FLOW_OFFLOAD_XMIT_XFRM,
|
||||
+};
|
||||
+
|
||||
struct flow_offload_tuple {
|
||||
union {
|
||||
struct in_addr src_v4;
|
||||
@@ -111,7 +116,8 @@ struct flow_offload_tuple {
|
||||
/* All members above are keys for lookups, see flow_offload_hash(). */
|
||||
struct { } __hash;
|
||||
|
||||
- u8 dir;
|
||||
+ u8 dir:6,
|
||||
+ xmit_type:2;
|
||||
|
||||
u16 mtu;
|
||||
|
||||
@@ -157,7 +163,8 @@ static inline __s32 nf_flow_timeout_delt
|
||||
|
||||
struct nf_flow_route {
|
||||
struct {
|
||||
- struct dst_entry *dst;
|
||||
+ struct dst_entry *dst;
|
||||
+ enum flow_offload_xmit_type xmit_type;
|
||||
} tuple[FLOW_OFFLOAD_DIR_MAX];
|
||||
};
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -95,6 +95,7 @@ static int flow_offload_fill_route(struc
|
||||
}
|
||||
|
||||
flow_tuple->iifidx = other_dst->dev->ifindex;
|
||||
+ flow_tuple->xmit_type = route->tuple[dir].xmit_type;
|
||||
flow_tuple->dst_cache = dst;
|
||||
|
||||
return 0;
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -235,8 +235,6 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
|
||||
dir = tuplehash->tuple.dir;
|
||||
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||
- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
|
||||
- outdev = rt->dst.dev;
|
||||
|
||||
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||
return NF_ACCEPT;
|
||||
@@ -265,13 +263,16 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
|
||||
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
|
||||
|
||||
- if (unlikely(dst_xfrm(&rt->dst))) {
|
||||
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
|
||||
+
|
||||
+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
|
||||
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
|
||||
IPCB(skb)->iif = skb->dev->ifindex;
|
||||
IPCB(skb)->flags = IPSKB_FORWARDED;
|
||||
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
|
||||
}
|
||||
|
||||
+ outdev = rt->dst.dev;
|
||||
skb->dev = outdev;
|
||||
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
|
||||
skb_dst_set_noref(skb, &rt->dst);
|
||||
@@ -456,8 +457,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
|
||||
dir = tuplehash->tuple.dir;
|
||||
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||
- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
|
||||
- outdev = rt->dst.dev;
|
||||
|
||||
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||
return NF_ACCEPT;
|
||||
@@ -485,13 +484,16 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
|
||||
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
|
||||
|
||||
- if (unlikely(dst_xfrm(&rt->dst))) {
|
||||
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
|
||||
+
|
||||
+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
|
||||
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
|
||||
IP6CB(skb)->iif = skb->dev->ifindex;
|
||||
IP6CB(skb)->flags = IP6SKB_FORWARDED;
|
||||
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
|
||||
}
|
||||
|
||||
+ outdev = rt->dst.dev;
|
||||
skb->dev = outdev;
|
||||
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
|
||||
skb_dst_set_noref(skb, &rt->dst);
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -19,6 +19,22 @@ struct nft_flow_offload {
|
||||
struct nft_flowtable *flowtable;
|
||||
};
|
||||
|
||||
+static enum flow_offload_xmit_type nft_xmit_type(struct dst_entry *dst)
|
||||
+{
|
||||
+ if (dst_xfrm(dst))
|
||||
+ return FLOW_OFFLOAD_XMIT_XFRM;
|
||||
+
|
||||
+ return FLOW_OFFLOAD_XMIT_NEIGH;
|
||||
+}
|
||||
+
|
||||
+static void nft_default_forward_path(struct nf_flow_route *route,
|
||||
+ struct dst_entry *dst_cache,
|
||||
+ enum ip_conntrack_dir dir)
|
||||
+{
|
||||
+ route->tuple[dir].dst = dst_cache;
|
||||
+ route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
|
||||
+}
|
||||
+
|
||||
static int nft_flow_route(const struct nft_pktinfo *pkt,
|
||||
const struct nf_conn *ct,
|
||||
struct nf_flow_route *route,
|
||||
@@ -44,8 +60,8 @@ static int nft_flow_route(const struct n
|
||||
if (!other_dst)
|
||||
return -ENOENT;
|
||||
|
||||
- route->tuple[dir].dst = this_dst;
|
||||
- route->tuple[!dir].dst = other_dst;
|
||||
+ nft_default_forward_path(route, this_dst, dir);
|
||||
+ nft_default_forward_path(route, other_dst, !dir);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,191 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:39 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: use dev_fill_forward_path() to
|
||||
obtain ingress device
|
||||
|
||||
Obtain the ingress device in the tuple from the route in the reply
|
||||
direction. Use dev_fill_forward_path() instead to get the real ingress
|
||||
device for this flow.
|
||||
|
||||
Fall back to use the ingress device that the IP forwarding route
|
||||
provides if:
|
||||
|
||||
- dev_fill_forward_path() finds no real ingress device.
|
||||
- the ingress device that is obtained is not part of the flowtable
|
||||
devices.
|
||||
- this route has a xfrm policy.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -164,6 +164,9 @@ static inline __s32 nf_flow_timeout_delt
|
||||
struct nf_flow_route {
|
||||
struct {
|
||||
struct dst_entry *dst;
|
||||
+ struct {
|
||||
+ u32 ifindex;
|
||||
+ } in;
|
||||
enum flow_offload_xmit_type xmit_type;
|
||||
} tuple[FLOW_OFFLOAD_DIR_MAX];
|
||||
};
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -79,7 +79,6 @@ static int flow_offload_fill_route(struc
|
||||
enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
|
||||
- struct dst_entry *other_dst = route->tuple[!dir].dst;
|
||||
struct dst_entry *dst = route->tuple[dir].dst;
|
||||
|
||||
if (!dst_hold_safe(route->tuple[dir].dst))
|
||||
@@ -94,7 +93,7 @@ static int flow_offload_fill_route(struc
|
||||
break;
|
||||
}
|
||||
|
||||
- flow_tuple->iifidx = other_dst->dev->ifindex;
|
||||
+ flow_tuple->iifidx = route->tuple[dir].in.ifindex;
|
||||
flow_tuple->xmit_type = route->tuple[dir].xmit_type;
|
||||
flow_tuple->dst_cache = dst;
|
||||
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -31,14 +31,104 @@ static void nft_default_forward_path(str
|
||||
struct dst_entry *dst_cache,
|
||||
enum ip_conntrack_dir dir)
|
||||
{
|
||||
+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
|
||||
route->tuple[dir].dst = dst_cache;
|
||||
route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
|
||||
}
|
||||
|
||||
+static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
|
||||
+ const struct dst_entry *dst_cache,
|
||||
+ const struct nf_conn *ct,
|
||||
+ enum ip_conntrack_dir dir,
|
||||
+ struct net_device_path_stack *stack)
|
||||
+{
|
||||
+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
|
||||
+ struct net_device *dev = dst_cache->dev;
|
||||
+ unsigned char ha[ETH_ALEN];
|
||||
+ struct neighbour *n;
|
||||
+ u8 nud_state;
|
||||
+
|
||||
+ n = dst_neigh_lookup(dst_cache, daddr);
|
||||
+ if (!n)
|
||||
+ return -1;
|
||||
+
|
||||
+ read_lock_bh(&n->lock);
|
||||
+ nud_state = n->nud_state;
|
||||
+ ether_addr_copy(ha, n->ha);
|
||||
+ read_unlock_bh(&n->lock);
|
||||
+ neigh_release(n);
|
||||
+
|
||||
+ if (!(nud_state & NUD_VALID))
|
||||
+ return -1;
|
||||
+
|
||||
+ return dev_fill_forward_path(dev, ha, stack);
|
||||
+}
|
||||
+
|
||||
+struct nft_forward_info {
|
||||
+ const struct net_device *indev;
|
||||
+};
|
||||
+
|
||||
+static void nft_dev_path_info(const struct net_device_path_stack *stack,
|
||||
+ struct nft_forward_info *info)
|
||||
+{
|
||||
+ const struct net_device_path *path;
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < stack->num_paths; i++) {
|
||||
+ path = &stack->path[i];
|
||||
+ switch (path->type) {
|
||||
+ case DEV_PATH_ETHERNET:
|
||||
+ info->indev = path->dev;
|
||||
+ break;
|
||||
+ case DEV_PATH_VLAN:
|
||||
+ case DEV_PATH_BRIDGE:
|
||||
+ default:
|
||||
+ info->indev = NULL;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static bool nft_flowtable_find_dev(const struct net_device *dev,
|
||||
+ struct nft_flowtable *ft)
|
||||
+{
|
||||
+ struct nft_hook *hook;
|
||||
+ bool found = false;
|
||||
+
|
||||
+ list_for_each_entry_rcu(hook, &ft->hook_list, list) {
|
||||
+ if (hook->ops.dev != dev)
|
||||
+ continue;
|
||||
+
|
||||
+ found = true;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ return found;
|
||||
+}
|
||||
+
|
||||
+static void nft_dev_forward_path(struct nf_flow_route *route,
|
||||
+ const struct nf_conn *ct,
|
||||
+ enum ip_conntrack_dir dir,
|
||||
+ struct nft_flowtable *ft)
|
||||
+{
|
||||
+ const struct dst_entry *dst = route->tuple[dir].dst;
|
||||
+ struct net_device_path_stack stack;
|
||||
+ struct nft_forward_info info = {};
|
||||
+
|
||||
+ if (nft_dev_fill_forward_path(route, dst, ct, dir, &stack) >= 0)
|
||||
+ nft_dev_path_info(&stack, &info);
|
||||
+
|
||||
+ if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
|
||||
+ return;
|
||||
+
|
||||
+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
|
||||
+}
|
||||
+
|
||||
static int nft_flow_route(const struct nft_pktinfo *pkt,
|
||||
const struct nf_conn *ct,
|
||||
struct nf_flow_route *route,
|
||||
- enum ip_conntrack_dir dir)
|
||||
+ enum ip_conntrack_dir dir,
|
||||
+ struct nft_flowtable *ft)
|
||||
{
|
||||
struct dst_entry *this_dst = skb_dst(pkt->skb);
|
||||
struct dst_entry *other_dst = NULL;
|
||||
@@ -63,6 +153,12 @@ static int nft_flow_route(const struct n
|
||||
nft_default_forward_path(route, this_dst, dir);
|
||||
nft_default_forward_path(route, other_dst, !dir);
|
||||
|
||||
+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
|
||||
+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
|
||||
+ nft_dev_forward_path(route, ct, dir, ft);
|
||||
+ nft_dev_forward_path(route, ct, !dir, ft);
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -90,8 +186,8 @@ static void nft_flow_offload_eval(const
|
||||
struct nft_flow_offload *priv = nft_expr_priv(expr);
|
||||
struct nf_flowtable *flowtable = &priv->flowtable->data;
|
||||
struct tcphdr _tcph, *tcph = NULL;
|
||||
+ struct nf_flow_route route = {};
|
||||
enum ip_conntrack_info ctinfo;
|
||||
- struct nf_flow_route route;
|
||||
struct flow_offload *flow;
|
||||
enum ip_conntrack_dir dir;
|
||||
struct nf_conn *ct;
|
||||
@@ -128,7 +224,7 @@ static void nft_flow_offload_eval(const
|
||||
goto out;
|
||||
|
||||
dir = CTINFO2DIR(ctinfo);
|
||||
- if (nft_flow_route(pkt, ct, &route, dir) < 0)
|
||||
+ if (nft_flow_route(pkt, ct, &route, dir, priv->flowtable) < 0)
|
||||
goto err_flow_route;
|
||||
|
||||
flow = flow_offload_alloc(ct);
|
@ -0,0 +1,374 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:40 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: use dev_fill_forward_path() to
|
||||
obtain egress device
|
||||
|
||||
The egress device in the tuple is obtained from route. Use
|
||||
dev_fill_forward_path() instead to provide the real egress device for
|
||||
this flow whenever this is available.
|
||||
|
||||
The new FLOW_OFFLOAD_XMIT_DIRECT type uses dev_queue_xmit() to transmit
|
||||
ethernet frames. Cache the source and destination hardware address to
|
||||
use dev_queue_xmit() to transfer packets.
|
||||
|
||||
The FLOW_OFFLOAD_XMIT_DIRECT replaces FLOW_OFFLOAD_XMIT_NEIGH if
|
||||
dev_fill_forward_path() finds a direct transmit path.
|
||||
|
||||
In case of topology updates, if peer is moved to different bridge port,
|
||||
the connection will time out, reconnect will result in a new entry with
|
||||
the correct path. Snooping fdb updates would allow for cleaning up stale
|
||||
flowtable entries.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -92,6 +92,7 @@ enum flow_offload_tuple_dir {
|
||||
enum flow_offload_xmit_type {
|
||||
FLOW_OFFLOAD_XMIT_NEIGH = 0,
|
||||
FLOW_OFFLOAD_XMIT_XFRM,
|
||||
+ FLOW_OFFLOAD_XMIT_DIRECT,
|
||||
};
|
||||
|
||||
struct flow_offload_tuple {
|
||||
@@ -120,8 +121,14 @@ struct flow_offload_tuple {
|
||||
xmit_type:2;
|
||||
|
||||
u16 mtu;
|
||||
-
|
||||
- struct dst_entry *dst_cache;
|
||||
+ union {
|
||||
+ struct dst_entry *dst_cache;
|
||||
+ struct {
|
||||
+ u32 ifidx;
|
||||
+ u8 h_source[ETH_ALEN];
|
||||
+ u8 h_dest[ETH_ALEN];
|
||||
+ } out;
|
||||
+ };
|
||||
};
|
||||
|
||||
struct flow_offload_tuple_rhash {
|
||||
@@ -167,6 +174,11 @@ struct nf_flow_route {
|
||||
struct {
|
||||
u32 ifindex;
|
||||
} in;
|
||||
+ struct {
|
||||
+ u32 ifindex;
|
||||
+ u8 h_source[ETH_ALEN];
|
||||
+ u8 h_dest[ETH_ALEN];
|
||||
+ } out;
|
||||
enum flow_offload_xmit_type xmit_type;
|
||||
} tuple[FLOW_OFFLOAD_DIR_MAX];
|
||||
};
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -81,9 +81,6 @@ static int flow_offload_fill_route(struc
|
||||
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
|
||||
struct dst_entry *dst = route->tuple[dir].dst;
|
||||
|
||||
- if (!dst_hold_safe(route->tuple[dir].dst))
|
||||
- return -1;
|
||||
-
|
||||
switch (flow_tuple->l3proto) {
|
||||
case NFPROTO_IPV4:
|
||||
flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
|
||||
@@ -94,12 +91,36 @@ static int flow_offload_fill_route(struc
|
||||
}
|
||||
|
||||
flow_tuple->iifidx = route->tuple[dir].in.ifindex;
|
||||
+
|
||||
+ switch (route->tuple[dir].xmit_type) {
|
||||
+ case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
+ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
|
||||
+ ETH_ALEN);
|
||||
+ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
|
||||
+ ETH_ALEN);
|
||||
+ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
|
||||
+ break;
|
||||
+ case FLOW_OFFLOAD_XMIT_XFRM:
|
||||
+ case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ if (!dst_hold_safe(route->tuple[dir].dst))
|
||||
+ return -1;
|
||||
+
|
||||
+ flow_tuple->dst_cache = dst;
|
||||
+ break;
|
||||
+ }
|
||||
flow_tuple->xmit_type = route->tuple[dir].xmit_type;
|
||||
- flow_tuple->dst_cache = dst;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void nft_flow_dst_release(struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir)
|
||||
+{
|
||||
+ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
+ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
|
||||
+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
|
||||
+}
|
||||
+
|
||||
int flow_offload_route_init(struct flow_offload *flow,
|
||||
const struct nf_flow_route *route)
|
||||
{
|
||||
@@ -118,7 +139,7 @@ int flow_offload_route_init(struct flow_
|
||||
return 0;
|
||||
|
||||
err_route_reply:
|
||||
- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
|
||||
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -169,8 +190,8 @@ static void flow_offload_fixup_ct(struct
|
||||
|
||||
static void flow_offload_route_release(struct flow_offload *flow)
|
||||
{
|
||||
- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
|
||||
- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
|
||||
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
|
||||
}
|
||||
|
||||
void flow_offload_free(struct flow_offload *flow)
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -207,6 +207,24 @@ static unsigned int nf_flow_xmit_xfrm(st
|
||||
return NF_STOLEN;
|
||||
}
|
||||
|
||||
+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
|
||||
+ const struct flow_offload_tuple_rhash *tuplehash,
|
||||
+ unsigned short type)
|
||||
+{
|
||||
+ struct net_device *outdev;
|
||||
+
|
||||
+ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
|
||||
+ if (!outdev)
|
||||
+ return NF_DROP;
|
||||
+
|
||||
+ skb->dev = outdev;
|
||||
+ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
|
||||
+ tuplehash->tuple.out.h_source, skb->len);
|
||||
+ dev_queue_xmit(skb);
|
||||
+
|
||||
+ return NF_STOLEN;
|
||||
+}
|
||||
+
|
||||
unsigned int
|
||||
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
|
||||
const struct nf_hook_state *state)
|
||||
@@ -222,6 +240,7 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
struct iphdr *iph;
|
||||
__be32 nexthop;
|
||||
u32 hdrsize;
|
||||
+ int ret;
|
||||
|
||||
if (skb->protocol != htons(ETH_P_IP))
|
||||
return NF_ACCEPT;
|
||||
@@ -244,9 +263,13 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (!dst_check(&rt->dst, 0)) {
|
||||
- flow_offload_teardown(flow);
|
||||
- return NF_ACCEPT;
|
||||
+ if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
+ tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
|
||||
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
|
||||
+ if (!dst_check(&rt->dst, 0)) {
|
||||
+ flow_offload_teardown(flow);
|
||||
+ return NF_ACCEPT;
|
||||
+ }
|
||||
}
|
||||
|
||||
if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
@@ -263,8 +286,6 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
|
||||
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
|
||||
|
||||
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
|
||||
-
|
||||
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
|
||||
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
|
||||
IPCB(skb)->iif = skb->dev->ifindex;
|
||||
@@ -272,13 +293,23 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
|
||||
}
|
||||
|
||||
- outdev = rt->dst.dev;
|
||||
- skb->dev = outdev;
|
||||
- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
|
||||
- skb_dst_set_noref(skb, &rt->dst);
|
||||
- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
|
||||
+ switch (tuplehash->tuple.xmit_type) {
|
||||
+ case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ outdev = rt->dst.dev;
|
||||
+ skb->dev = outdev;
|
||||
+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
|
||||
+ skb_dst_set_noref(skb, &rt->dst);
|
||||
+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
|
||||
+ ret = NF_STOLEN;
|
||||
+ break;
|
||||
+ case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
|
||||
+ if (ret == NF_DROP)
|
||||
+ flow_offload_teardown(flow);
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
- return NF_STOLEN;
|
||||
+ return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
|
||||
|
||||
@@ -444,6 +475,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
struct ipv6hdr *ip6h;
|
||||
struct rt6_info *rt;
|
||||
u32 hdrsize;
|
||||
+ int ret;
|
||||
|
||||
if (skb->protocol != htons(ETH_P_IPV6))
|
||||
return NF_ACCEPT;
|
||||
@@ -465,9 +497,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
sizeof(*ip6h)))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (!dst_check(&rt->dst, 0)) {
|
||||
- flow_offload_teardown(flow);
|
||||
- return NF_ACCEPT;
|
||||
+ if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
+ tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
|
||||
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
|
||||
+ if (!dst_check(&rt->dst, 0)) {
|
||||
+ flow_offload_teardown(flow);
|
||||
+ return NF_ACCEPT;
|
||||
+ }
|
||||
}
|
||||
|
||||
if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
|
||||
@@ -484,8 +520,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
|
||||
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
|
||||
|
||||
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
|
||||
-
|
||||
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
|
||||
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
|
||||
IP6CB(skb)->iif = skb->dev->ifindex;
|
||||
@@ -493,12 +527,22 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
|
||||
}
|
||||
|
||||
- outdev = rt->dst.dev;
|
||||
- skb->dev = outdev;
|
||||
- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
|
||||
- skb_dst_set_noref(skb, &rt->dst);
|
||||
- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
|
||||
+ switch (tuplehash->tuple.xmit_type) {
|
||||
+ case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ outdev = rt->dst.dev;
|
||||
+ skb->dev = outdev;
|
||||
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
|
||||
+ skb_dst_set_noref(skb, &rt->dst);
|
||||
+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
|
||||
+ ret = NF_STOLEN;
|
||||
+ break;
|
||||
+ case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
|
||||
+ if (ret == NF_DROP)
|
||||
+ flow_offload_teardown(flow);
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
- return NF_STOLEN;
|
||||
+ return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -39,12 +39,11 @@ static void nft_default_forward_path(str
|
||||
static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
|
||||
const struct dst_entry *dst_cache,
|
||||
const struct nf_conn *ct,
|
||||
- enum ip_conntrack_dir dir,
|
||||
+ enum ip_conntrack_dir dir, u8 *ha,
|
||||
struct net_device_path_stack *stack)
|
||||
{
|
||||
const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
|
||||
struct net_device *dev = dst_cache->dev;
|
||||
- unsigned char ha[ETH_ALEN];
|
||||
struct neighbour *n;
|
||||
u8 nud_state;
|
||||
|
||||
@@ -66,27 +65,43 @@ static int nft_dev_fill_forward_path(con
|
||||
|
||||
struct nft_forward_info {
|
||||
const struct net_device *indev;
|
||||
+ const struct net_device *outdev;
|
||||
+ u8 h_source[ETH_ALEN];
|
||||
+ u8 h_dest[ETH_ALEN];
|
||||
+ enum flow_offload_xmit_type xmit_type;
|
||||
};
|
||||
|
||||
static void nft_dev_path_info(const struct net_device_path_stack *stack,
|
||||
- struct nft_forward_info *info)
|
||||
+ struct nft_forward_info *info,
|
||||
+ unsigned char *ha)
|
||||
{
|
||||
const struct net_device_path *path;
|
||||
int i;
|
||||
|
||||
+ memcpy(info->h_dest, ha, ETH_ALEN);
|
||||
+
|
||||
for (i = 0; i < stack->num_paths; i++) {
|
||||
path = &stack->path[i];
|
||||
switch (path->type) {
|
||||
case DEV_PATH_ETHERNET:
|
||||
info->indev = path->dev;
|
||||
+ if (is_zero_ether_addr(info->h_source))
|
||||
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
break;
|
||||
- case DEV_PATH_VLAN:
|
||||
case DEV_PATH_BRIDGE:
|
||||
+ if (is_zero_ether_addr(info->h_source))
|
||||
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
+
|
||||
+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
|
||||
+ break;
|
||||
+ case DEV_PATH_VLAN:
|
||||
default:
|
||||
info->indev = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
+ if (!info->outdev)
|
||||
+ info->outdev = info->indev;
|
||||
}
|
||||
|
||||
static bool nft_flowtable_find_dev(const struct net_device *dev,
|
||||
@@ -114,14 +129,22 @@ static void nft_dev_forward_path(struct
|
||||
const struct dst_entry *dst = route->tuple[dir].dst;
|
||||
struct net_device_path_stack stack;
|
||||
struct nft_forward_info info = {};
|
||||
+ unsigned char ha[ETH_ALEN];
|
||||
|
||||
- if (nft_dev_fill_forward_path(route, dst, ct, dir, &stack) >= 0)
|
||||
- nft_dev_path_info(&stack, &info);
|
||||
+ if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
|
||||
+ nft_dev_path_info(&stack, &info, ha);
|
||||
|
||||
if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
|
||||
return;
|
||||
|
||||
route->tuple[!dir].in.ifindex = info.indev->ifindex;
|
||||
+
|
||||
+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
|
||||
+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
|
||||
+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
|
||||
+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
|
||||
+ route->tuple[dir].xmit_type = info.xmit_type;
|
||||
+ }
|
||||
}
|
||||
|
||||
static int nft_flow_route(const struct nft_pktinfo *pkt,
|
@ -0,0 +1,410 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:41 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add vlan support
|
||||
|
||||
Add the vlan id and protocol to the flow tuple to uniquely identify
|
||||
flows from the receive path. For the transmit path, dev_hard_header() on
|
||||
the vlan device push the headers. This patch includes support for two
|
||||
vlan headers (QinQ) from the ingress path.
|
||||
|
||||
Add a generic encap field to the flowtable entry which stores the
|
||||
protocol and the tag id. This allows to reuse these fields in the PPPoE
|
||||
support coming in a later patch.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -95,6 +95,8 @@ enum flow_offload_xmit_type {
|
||||
FLOW_OFFLOAD_XMIT_DIRECT,
|
||||
};
|
||||
|
||||
+#define NF_FLOW_TABLE_ENCAP_MAX 2
|
||||
+
|
||||
struct flow_offload_tuple {
|
||||
union {
|
||||
struct in_addr src_v4;
|
||||
@@ -113,13 +115,17 @@ struct flow_offload_tuple {
|
||||
|
||||
u8 l3proto;
|
||||
u8 l4proto;
|
||||
+ struct {
|
||||
+ u16 id;
|
||||
+ __be16 proto;
|
||||
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
|
||||
/* All members above are keys for lookups, see flow_offload_hash(). */
|
||||
struct { } __hash;
|
||||
|
||||
- u8 dir:6,
|
||||
- xmit_type:2;
|
||||
-
|
||||
+ u8 dir:4,
|
||||
+ xmit_type:2,
|
||||
+ encap_num:2;
|
||||
u16 mtu;
|
||||
union {
|
||||
struct dst_entry *dst_cache;
|
||||
@@ -173,6 +179,11 @@ struct nf_flow_route {
|
||||
struct dst_entry *dst;
|
||||
struct {
|
||||
u32 ifindex;
|
||||
+ struct {
|
||||
+ u16 id;
|
||||
+ __be16 proto;
|
||||
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
+ u8 num_encaps;
|
||||
} in;
|
||||
struct {
|
||||
u32 ifindex;
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -80,6 +80,7 @@ static int flow_offload_fill_route(struc
|
||||
{
|
||||
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
|
||||
struct dst_entry *dst = route->tuple[dir].dst;
|
||||
+ int i, j = 0;
|
||||
|
||||
switch (flow_tuple->l3proto) {
|
||||
case NFPROTO_IPV4:
|
||||
@@ -91,6 +92,12 @@ static int flow_offload_fill_route(struc
|
||||
}
|
||||
|
||||
flow_tuple->iifidx = route->tuple[dir].in.ifindex;
|
||||
+ for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
|
||||
+ flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
|
||||
+ flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
|
||||
+ j++;
|
||||
+ }
|
||||
+ flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
|
||||
|
||||
switch (route->tuple[dir].xmit_type) {
|
||||
case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -136,23 +136,44 @@ static bool ip_has_options(unsigned int
|
||||
return thoff != sizeof(struct iphdr);
|
||||
}
|
||||
|
||||
+static void nf_flow_tuple_encap(struct sk_buff *skb,
|
||||
+ struct flow_offload_tuple *tuple)
|
||||
+{
|
||||
+ int i = 0;
|
||||
+
|
||||
+ if (skb_vlan_tag_present(skb)) {
|
||||
+ tuple->encap[i].id = skb_vlan_tag_get(skb);
|
||||
+ tuple->encap[i].proto = skb->vlan_proto;
|
||||
+ i++;
|
||||
+ }
|
||||
+ if (skb->protocol == htons(ETH_P_8021Q)) {
|
||||
+ struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
+
|
||||
+ tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
|
||||
+ tuple->encap[i].proto = skb->protocol;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
|
||||
- struct flow_offload_tuple *tuple, u32 *hdrsize)
|
||||
+ struct flow_offload_tuple *tuple, u32 *hdrsize,
|
||||
+ u32 offset)
|
||||
{
|
||||
struct flow_ports *ports;
|
||||
unsigned int thoff;
|
||||
struct iphdr *iph;
|
||||
|
||||
- if (!pskb_may_pull(skb, sizeof(*iph)))
|
||||
+ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
|
||||
return -1;
|
||||
|
||||
- iph = ip_hdr(skb);
|
||||
- thoff = iph->ihl * 4;
|
||||
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
|
||||
+ thoff = (iph->ihl * 4);
|
||||
|
||||
if (ip_is_fragment(iph) ||
|
||||
unlikely(ip_has_options(thoff)))
|
||||
return -1;
|
||||
|
||||
+ thoff += offset;
|
||||
+
|
||||
switch (iph->protocol) {
|
||||
case IPPROTO_TCP:
|
||||
*hdrsize = sizeof(struct tcphdr);
|
||||
@@ -167,11 +188,10 @@ static int nf_flow_tuple_ip(struct sk_bu
|
||||
if (iph->ttl <= 1)
|
||||
return -1;
|
||||
|
||||
- thoff = iph->ihl * 4;
|
||||
if (!pskb_may_pull(skb, thoff + *hdrsize))
|
||||
return -1;
|
||||
|
||||
- iph = ip_hdr(skb);
|
||||
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
|
||||
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
|
||||
|
||||
tuple->src_v4.s_addr = iph->saddr;
|
||||
@@ -181,6 +201,7 @@ static int nf_flow_tuple_ip(struct sk_bu
|
||||
tuple->l3proto = AF_INET;
|
||||
tuple->l4proto = iph->protocol;
|
||||
tuple->iifidx = dev->ifindex;
|
||||
+ nf_flow_tuple_encap(skb, tuple);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -207,6 +228,43 @@ static unsigned int nf_flow_xmit_xfrm(st
|
||||
return NF_STOLEN;
|
||||
}
|
||||
|
||||
+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
|
||||
+ u32 *offset)
|
||||
+{
|
||||
+ if (skb->protocol == htons(ETH_P_8021Q)) {
|
||||
+ struct vlan_ethhdr *veth;
|
||||
+
|
||||
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
+ if (veth->h_vlan_encapsulated_proto == proto) {
|
||||
+ *offset += VLAN_HLEN;
|
||||
+ return true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static void nf_flow_encap_pop(struct sk_buff *skb,
|
||||
+ struct flow_offload_tuple_rhash *tuplehash)
|
||||
+{
|
||||
+ struct vlan_hdr *vlan_hdr;
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < tuplehash->tuple.encap_num; i++) {
|
||||
+ if (skb_vlan_tag_present(skb)) {
|
||||
+ __vlan_hwaccel_clear_tag(skb);
|
||||
+ continue;
|
||||
+ }
|
||||
+ if (skb->protocol == htons(ETH_P_8021Q)) {
|
||||
+ vlan_hdr = (struct vlan_hdr *)skb->data;
|
||||
+ __skb_pull(skb, VLAN_HLEN);
|
||||
+ vlan_set_encap_proto(skb, vlan_hdr);
|
||||
+ skb_reset_network_header(skb);
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
|
||||
const struct flow_offload_tuple_rhash *tuplehash,
|
||||
unsigned short type)
|
||||
@@ -235,17 +293,18 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
enum flow_offload_tuple_dir dir;
|
||||
struct flow_offload *flow;
|
||||
struct net_device *outdev;
|
||||
+ u32 hdrsize, offset = 0;
|
||||
+ unsigned int thoff, mtu;
|
||||
struct rtable *rt;
|
||||
- unsigned int thoff;
|
||||
struct iphdr *iph;
|
||||
__be32 nexthop;
|
||||
- u32 hdrsize;
|
||||
int ret;
|
||||
|
||||
- if (skb->protocol != htons(ETH_P_IP))
|
||||
+ if (skb->protocol != htons(ETH_P_IP) &&
|
||||
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize) < 0)
|
||||
+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
|
||||
return NF_ACCEPT;
|
||||
|
||||
tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||
@@ -255,11 +314,12 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
dir = tuplehash->tuple.dir;
|
||||
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||
|
||||
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||
+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
|
||||
+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- iph = ip_hdr(skb);
|
||||
- thoff = iph->ihl * 4;
|
||||
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
|
||||
+ thoff = (iph->ihl * 4) + offset;
|
||||
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
@@ -277,6 +337,9 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
|
||||
flow_offload_refresh(flow_table, flow);
|
||||
|
||||
+ nf_flow_encap_pop(skb, tuplehash);
|
||||
+ thoff -= offset;
|
||||
+
|
||||
iph = ip_hdr(skb);
|
||||
nf_flow_nat_ip(flow, skb, thoff, dir, iph);
|
||||
|
||||
@@ -418,16 +481,18 @@ static void nf_flow_nat_ipv6(const struc
|
||||
}
|
||||
|
||||
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
|
||||
- struct flow_offload_tuple *tuple, u32 *hdrsize)
|
||||
+ struct flow_offload_tuple *tuple, u32 *hdrsize,
|
||||
+ u32 offset)
|
||||
{
|
||||
struct flow_ports *ports;
|
||||
struct ipv6hdr *ip6h;
|
||||
unsigned int thoff;
|
||||
|
||||
- if (!pskb_may_pull(skb, sizeof(*ip6h)))
|
||||
+ thoff = sizeof(*ip6h) + offset;
|
||||
+ if (!pskb_may_pull(skb, thoff))
|
||||
return -1;
|
||||
|
||||
- ip6h = ipv6_hdr(skb);
|
||||
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
|
||||
|
||||
switch (ip6h->nexthdr) {
|
||||
case IPPROTO_TCP:
|
||||
@@ -443,11 +508,10 @@ static int nf_flow_tuple_ipv6(struct sk_
|
||||
if (ip6h->hop_limit <= 1)
|
||||
return -1;
|
||||
|
||||
- thoff = sizeof(*ip6h);
|
||||
if (!pskb_may_pull(skb, thoff + *hdrsize))
|
||||
return -1;
|
||||
|
||||
- ip6h = ipv6_hdr(skb);
|
||||
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
|
||||
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
|
||||
|
||||
tuple->src_v6 = ip6h->saddr;
|
||||
@@ -457,6 +521,7 @@ static int nf_flow_tuple_ipv6(struct sk_
|
||||
tuple->l3proto = AF_INET6;
|
||||
tuple->l4proto = ip6h->nexthdr;
|
||||
tuple->iifidx = dev->ifindex;
|
||||
+ nf_flow_tuple_encap(skb, tuple);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -472,15 +537,17 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
const struct in6_addr *nexthop;
|
||||
struct flow_offload *flow;
|
||||
struct net_device *outdev;
|
||||
+ unsigned int thoff, mtu;
|
||||
+ u32 hdrsize, offset = 0;
|
||||
struct ipv6hdr *ip6h;
|
||||
struct rt6_info *rt;
|
||||
- u32 hdrsize;
|
||||
int ret;
|
||||
|
||||
- if (skb->protocol != htons(ETH_P_IPV6))
|
||||
+ if (skb->protocol != htons(ETH_P_IPV6) &&
|
||||
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize) < 0)
|
||||
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
|
||||
return NF_ACCEPT;
|
||||
|
||||
tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||
@@ -490,11 +557,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
dir = tuplehash->tuple.dir;
|
||||
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||
|
||||
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||
+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
|
||||
+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
|
||||
- sizeof(*ip6h)))
|
||||
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
|
||||
+ thoff = sizeof(*ip6h) + offset;
|
||||
+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
@@ -506,11 +575,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
}
|
||||
}
|
||||
|
||||
- if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
|
||||
+ if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
flow_offload_refresh(flow_table, flow);
|
||||
|
||||
+ nf_flow_encap_pop(skb, tuplehash);
|
||||
+
|
||||
ip6h = ipv6_hdr(skb);
|
||||
nf_flow_nat_ipv6(flow, skb, dir, ip6h);
|
||||
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -66,6 +66,11 @@ static int nft_dev_fill_forward_path(con
|
||||
struct nft_forward_info {
|
||||
const struct net_device *indev;
|
||||
const struct net_device *outdev;
|
||||
+ struct id {
|
||||
+ __u16 id;
|
||||
+ __be16 proto;
|
||||
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
+ u8 num_encaps;
|
||||
u8 h_source[ETH_ALEN];
|
||||
u8 h_dest[ETH_ALEN];
|
||||
enum flow_offload_xmit_type xmit_type;
|
||||
@@ -84,9 +89,23 @@ static void nft_dev_path_info(const stru
|
||||
path = &stack->path[i];
|
||||
switch (path->type) {
|
||||
case DEV_PATH_ETHERNET:
|
||||
+ case DEV_PATH_VLAN:
|
||||
info->indev = path->dev;
|
||||
if (is_zero_ether_addr(info->h_source))
|
||||
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
+
|
||||
+ if (path->type == DEV_PATH_ETHERNET)
|
||||
+ break;
|
||||
+
|
||||
+ /* DEV_PATH_VLAN */
|
||||
+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
|
||||
+ info->indev = NULL;
|
||||
+ break;
|
||||
+ }
|
||||
+ info->outdev = path->dev;
|
||||
+ info->encap[info->num_encaps].id = path->encap.id;
|
||||
+ info->encap[info->num_encaps].proto = path->encap.proto;
|
||||
+ info->num_encaps++;
|
||||
break;
|
||||
case DEV_PATH_BRIDGE:
|
||||
if (is_zero_ether_addr(info->h_source))
|
||||
@@ -94,7 +113,6 @@ static void nft_dev_path_info(const stru
|
||||
|
||||
info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
|
||||
break;
|
||||
- case DEV_PATH_VLAN:
|
||||
default:
|
||||
info->indev = NULL;
|
||||
break;
|
||||
@@ -130,6 +148,7 @@ static void nft_dev_forward_path(struct
|
||||
struct net_device_path_stack stack;
|
||||
struct nft_forward_info info = {};
|
||||
unsigned char ha[ETH_ALEN];
|
||||
+ int i;
|
||||
|
||||
if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
|
||||
nft_dev_path_info(&stack, &info, ha);
|
||||
@@ -138,6 +157,11 @@ static void nft_dev_forward_path(struct
|
||||
return;
|
||||
|
||||
route->tuple[!dir].in.ifindex = info.indev->ifindex;
|
||||
+ for (i = 0; i < info.num_encaps; i++) {
|
||||
+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
|
||||
+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
|
||||
+ }
|
||||
+ route->tuple[!dir].in.num_encaps = info.num_encaps;
|
||||
|
||||
if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
|
||||
memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
|
@ -0,0 +1,30 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:42 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add bridge vlan filtering support
|
||||
|
||||
Add the vlan tag based when PVID is set on.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -111,6 +111,18 @@ static void nft_dev_path_info(const stru
|
||||
if (is_zero_ether_addr(info->h_source))
|
||||
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
|
||||
+ switch (path->bridge.vlan_mode) {
|
||||
+ case DEV_PATH_BR_VLAN_TAG:
|
||||
+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
|
||||
+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
|
||||
+ info->num_encaps++;
|
||||
+ break;
|
||||
+ case DEV_PATH_BR_VLAN_UNTAG:
|
||||
+ info->num_encaps--;
|
||||
+ break;
|
||||
+ case DEV_PATH_BR_VLAN_KEEP:
|
||||
+ break;
|
||||
+ }
|
||||
info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
|
||||
break;
|
||||
default:
|
@ -0,0 +1,145 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:43 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add pppoe support
|
||||
|
||||
Add the PPPoE protocol and session id to the flow tuple using the encap
|
||||
fields to uniquely identify flows from the receive path. For the
|
||||
transmit path, dev_hard_header() on the vlan device push the headers.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -7,6 +7,9 @@
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/netdevice.h>
|
||||
+#include <linux/if_ether.h>
|
||||
+#include <linux/if_pppox.h>
|
||||
+#include <linux/ppp_defs.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/ipv6.h>
|
||||
#include <net/ip6_route.h>
|
||||
@@ -139,6 +142,8 @@ static bool ip_has_options(unsigned int
|
||||
static void nf_flow_tuple_encap(struct sk_buff *skb,
|
||||
struct flow_offload_tuple *tuple)
|
||||
{
|
||||
+ struct vlan_ethhdr *veth;
|
||||
+ struct pppoe_hdr *phdr;
|
||||
int i = 0;
|
||||
|
||||
if (skb_vlan_tag_present(skb)) {
|
||||
@@ -146,11 +151,17 @@ static void nf_flow_tuple_encap(struct s
|
||||
tuple->encap[i].proto = skb->vlan_proto;
|
||||
i++;
|
||||
}
|
||||
- if (skb->protocol == htons(ETH_P_8021Q)) {
|
||||
- struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
-
|
||||
+ switch (skb->protocol) {
|
||||
+ case htons(ETH_P_8021Q):
|
||||
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
|
||||
tuple->encap[i].proto = skb->protocol;
|
||||
+ break;
|
||||
+ case htons(ETH_P_PPP_SES):
|
||||
+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
|
||||
+ tuple->encap[i].id = ntohs(phdr->sid);
|
||||
+ tuple->encap[i].proto = skb->protocol;
|
||||
+ break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -228,17 +239,41 @@ static unsigned int nf_flow_xmit_xfrm(st
|
||||
return NF_STOLEN;
|
||||
}
|
||||
|
||||
+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
|
||||
+{
|
||||
+ __be16 proto;
|
||||
+
|
||||
+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
|
||||
+ sizeof(struct pppoe_hdr)));
|
||||
+ switch (proto) {
|
||||
+ case htons(PPP_IP):
|
||||
+ return htons(ETH_P_IP);
|
||||
+ case htons(PPP_IPV6):
|
||||
+ return htons(ETH_P_IPV6);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
|
||||
u32 *offset)
|
||||
{
|
||||
- if (skb->protocol == htons(ETH_P_8021Q)) {
|
||||
- struct vlan_ethhdr *veth;
|
||||
+ struct vlan_ethhdr *veth;
|
||||
|
||||
+ switch (skb->protocol) {
|
||||
+ case htons(ETH_P_8021Q):
|
||||
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
if (veth->h_vlan_encapsulated_proto == proto) {
|
||||
*offset += VLAN_HLEN;
|
||||
return true;
|
||||
}
|
||||
+ break;
|
||||
+ case htons(ETH_P_PPP_SES):
|
||||
+ if (nf_flow_pppoe_proto(skb) == proto) {
|
||||
+ *offset += PPPOE_SES_HLEN;
|
||||
+ return true;
|
||||
+ }
|
||||
+ break;
|
||||
}
|
||||
|
||||
return false;
|
||||
@@ -255,12 +290,18 @@ static void nf_flow_encap_pop(struct sk_
|
||||
__vlan_hwaccel_clear_tag(skb);
|
||||
continue;
|
||||
}
|
||||
- if (skb->protocol == htons(ETH_P_8021Q)) {
|
||||
+ switch (skb->protocol) {
|
||||
+ case htons(ETH_P_8021Q):
|
||||
vlan_hdr = (struct vlan_hdr *)skb->data;
|
||||
__skb_pull(skb, VLAN_HLEN);
|
||||
vlan_set_encap_proto(skb, vlan_hdr);
|
||||
skb_reset_network_header(skb);
|
||||
break;
|
||||
+ case htons(ETH_P_PPP_SES):
|
||||
+ skb->protocol = nf_flow_pppoe_proto(skb);
|
||||
+ skb_pull(skb, PPPOE_SES_HLEN);
|
||||
+ skb_reset_network_header(skb);
|
||||
+ break;
|
||||
}
|
||||
}
|
||||
}
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -90,6 +90,7 @@ static void nft_dev_path_info(const stru
|
||||
switch (path->type) {
|
||||
case DEV_PATH_ETHERNET:
|
||||
case DEV_PATH_VLAN:
|
||||
+ case DEV_PATH_PPPOE:
|
||||
info->indev = path->dev;
|
||||
if (is_zero_ether_addr(info->h_source))
|
||||
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
@@ -97,7 +98,7 @@ static void nft_dev_path_info(const stru
|
||||
if (path->type == DEV_PATH_ETHERNET)
|
||||
break;
|
||||
|
||||
- /* DEV_PATH_VLAN */
|
||||
+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
|
||||
if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
|
||||
info->indev = NULL;
|
||||
break;
|
||||
@@ -106,6 +107,8 @@ static void nft_dev_path_info(const stru
|
||||
info->encap[info->num_encaps].id = path->encap.id;
|
||||
info->encap[info->num_encaps].proto = path->encap.proto;
|
||||
info->num_encaps++;
|
||||
+ if (path->type == DEV_PATH_PPPOE)
|
||||
+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
|
||||
break;
|
||||
case DEV_PATH_BRIDGE:
|
||||
if (is_zero_ether_addr(info->h_source))
|
@ -0,0 +1,32 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:44 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add dsa support
|
||||
|
||||
Replace the master ethernet device by the dsa slave port. Packets coming
|
||||
in from the software ingress path use the dsa slave port as input
|
||||
device.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -89,6 +89,7 @@ static void nft_dev_path_info(const stru
|
||||
path = &stack->path[i];
|
||||
switch (path->type) {
|
||||
case DEV_PATH_ETHERNET:
|
||||
+ case DEV_PATH_DSA:
|
||||
case DEV_PATH_VLAN:
|
||||
case DEV_PATH_PPPOE:
|
||||
info->indev = path->dev;
|
||||
@@ -97,6 +98,10 @@ static void nft_dev_path_info(const stru
|
||||
|
||||
if (path->type == DEV_PATH_ETHERNET)
|
||||
break;
|
||||
+ if (path->type == DEV_PATH_DSA) {
|
||||
+ i = stack->num_paths;
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
/* DEV_PATH_VLAN and DEV_PATH_PPPOE */
|
||||
if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
|
@ -0,0 +1,107 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:45 +0100
|
||||
Subject: [PATCH] selftests: netfilter: flowtable bridge and vlan support
|
||||
|
||||
This patch adds two new tests to cover bridge and vlan support:
|
||||
|
||||
- Add a bridge device to the Router1 (nsr1) container and attach the
|
||||
veth0 device to the bridge. Set the IP address to the bridge device
|
||||
to exercise the bridge forwarding path.
|
||||
|
||||
- Add vlan encapsulation between to the bridge device in the Router1 and
|
||||
one of the sender containers (ns1).
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
|
||||
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
|
||||
@@ -370,6 +370,88 @@ else
|
||||
ip netns exec nsr1 nft list ruleset
|
||||
fi
|
||||
|
||||
+# Another test:
|
||||
+# Add bridge interface br0 to Router1, with NAT enabled.
|
||||
+ip -net nsr1 link add name br0 type bridge
|
||||
+ip -net nsr1 addr flush dev veth0
|
||||
+ip -net nsr1 link set up dev veth0
|
||||
+ip -net nsr1 link set veth0 master br0
|
||||
+ip -net nsr1 addr add 10.0.1.1/24 dev br0
|
||||
+ip -net nsr1 addr add dead:1::1/64 dev br0
|
||||
+ip -net nsr1 link set up dev br0
|
||||
+
|
||||
+ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
|
||||
+
|
||||
+# br0 with NAT enabled.
|
||||
+ip netns exec nsr1 nft -f - <<EOF
|
||||
+flush table ip nat
|
||||
+table ip nat {
|
||||
+ chain prerouting {
|
||||
+ type nat hook prerouting priority 0; policy accept;
|
||||
+ meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
|
||||
+ }
|
||||
+
|
||||
+ chain postrouting {
|
||||
+ type nat hook postrouting priority 0; policy accept;
|
||||
+ meta oifname "veth1" counter masquerade
|
||||
+ }
|
||||
+}
|
||||
+EOF
|
||||
+
|
||||
+if test_tcp_forwarding_nat ns1 ns2; then
|
||||
+ echo "PASS: flow offloaded for ns1/ns2 with bridge NAT"
|
||||
+else
|
||||
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
|
||||
+ ip netns exec nsr1 nft list ruleset
|
||||
+ ret=1
|
||||
+fi
|
||||
+
|
||||
+# Another test:
|
||||
+# Add bridge interface br0 to Router1, with NAT and VLAN.
|
||||
+ip -net nsr1 link set veth0 nomaster
|
||||
+ip -net nsr1 link set down dev veth0
|
||||
+ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10
|
||||
+ip -net nsr1 link set up dev veth0
|
||||
+ip -net nsr1 link set up dev veth0.10
|
||||
+ip -net nsr1 link set veth0.10 master br0
|
||||
+
|
||||
+ip -net ns1 addr flush dev eth0
|
||||
+ip -net ns1 link add link eth0 name eth0.10 type vlan id 10
|
||||
+ip -net ns1 link set eth0 up
|
||||
+ip -net ns1 link set eth0.10 up
|
||||
+ip -net ns1 addr add 10.0.1.99/24 dev eth0.10
|
||||
+ip -net ns1 route add default via 10.0.1.1
|
||||
+ip -net ns1 addr add dead:1::99/64 dev eth0.10
|
||||
+
|
||||
+if test_tcp_forwarding_nat ns1 ns2; then
|
||||
+ echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN"
|
||||
+else
|
||||
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
|
||||
+ ip netns exec nsr1 nft list ruleset
|
||||
+ ret=1
|
||||
+fi
|
||||
+
|
||||
+# restore test topology (remove bridge and VLAN)
|
||||
+ip -net nsr1 link set veth0 nomaster
|
||||
+ip -net nsr1 link set veth0 down
|
||||
+ip -net nsr1 link set veth0.10 down
|
||||
+ip -net nsr1 link delete veth0.10 type vlan
|
||||
+ip -net nsr1 link delete br0 type bridge
|
||||
+ip -net ns1 addr flush dev eth0.10
|
||||
+ip -net ns1 link set eth0.10 down
|
||||
+ip -net ns1 link set eth0 down
|
||||
+ip -net ns1 link delete eth0.10 type vlan
|
||||
+
|
||||
+# restore address in ns1 and nsr1
|
||||
+ip -net ns1 link set eth0 up
|
||||
+ip -net ns1 addr add 10.0.1.99/24 dev eth0
|
||||
+ip -net ns1 route add default via 10.0.1.1
|
||||
+ip -net ns1 addr add dead:1::99/64 dev eth0
|
||||
+ip -net ns1 route add default via dead:1::1
|
||||
+ip -net nsr1 addr add 10.0.1.1/24 dev veth0
|
||||
+ip -net nsr1 addr add dead:1::1/64 dev veth0
|
||||
+ip -net nsr1 link set up dev veth0
|
||||
+
|
||||
KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
|
||||
KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
|
||||
SPI1=$RANDOM
|
@ -0,0 +1,310 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:46 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add offload support for xmit path
|
||||
types
|
||||
|
||||
When the flow tuple xmit_type is set to FLOW_OFFLOAD_XMIT_DIRECT, the
|
||||
dst_cache pointer is not valid, and the h_source/h_dest/ifidx out fields
|
||||
need to be used.
|
||||
|
||||
This patch also adds the FLOW_ACTION_VLAN_PUSH action to pass the VLAN
|
||||
tag to the driver.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_offload.c
|
||||
+++ b/net/netfilter/nf_flow_table_offload.c
|
||||
@@ -177,28 +177,45 @@ static int flow_offload_eth_src(struct n
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
- const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple;
|
||||
struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
|
||||
struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
|
||||
- struct net_device *dev;
|
||||
+ const struct flow_offload_tuple *other_tuple, *this_tuple;
|
||||
+ struct net_device *dev = NULL;
|
||||
+ const unsigned char *addr;
|
||||
u32 mask, val;
|
||||
u16 val16;
|
||||
|
||||
- dev = dev_get_by_index(net, tuple->iifidx);
|
||||
- if (!dev)
|
||||
- return -ENOENT;
|
||||
+ this_tuple = &flow->tuplehash[dir].tuple;
|
||||
+
|
||||
+ switch (this_tuple->xmit_type) {
|
||||
+ case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
+ addr = this_tuple->out.h_source;
|
||||
+ break;
|
||||
+ case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
+ dev = dev_get_by_index(net, other_tuple->iifidx);
|
||||
+ if (!dev)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ addr = dev->dev_addr;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
|
||||
mask = ~0xffff0000;
|
||||
- memcpy(&val16, dev->dev_addr, 2);
|
||||
+ memcpy(&val16, addr, 2);
|
||||
val = val16 << 16;
|
||||
flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
|
||||
&val, &mask);
|
||||
|
||||
mask = ~0xffffffff;
|
||||
- memcpy(&val, dev->dev_addr + 2, 4);
|
||||
+ memcpy(&val, addr + 2, 4);
|
||||
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
|
||||
&val, &mask);
|
||||
- dev_put(dev);
|
||||
+
|
||||
+ if (dev)
|
||||
+ dev_put(dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -210,27 +227,40 @@ static int flow_offload_eth_dst(struct n
|
||||
{
|
||||
struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
|
||||
struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
|
||||
- const void *daddr = &flow->tuplehash[!dir].tuple.src_v4;
|
||||
+ const struct flow_offload_tuple *other_tuple, *this_tuple;
|
||||
const struct dst_entry *dst_cache;
|
||||
unsigned char ha[ETH_ALEN];
|
||||
struct neighbour *n;
|
||||
+ const void *daddr;
|
||||
u32 mask, val;
|
||||
u8 nud_state;
|
||||
u16 val16;
|
||||
|
||||
- dst_cache = flow->tuplehash[dir].tuple.dst_cache;
|
||||
- n = dst_neigh_lookup(dst_cache, daddr);
|
||||
- if (!n)
|
||||
- return -ENOENT;
|
||||
-
|
||||
- read_lock_bh(&n->lock);
|
||||
- nud_state = n->nud_state;
|
||||
- ether_addr_copy(ha, n->ha);
|
||||
- read_unlock_bh(&n->lock);
|
||||
+ this_tuple = &flow->tuplehash[dir].tuple;
|
||||
|
||||
- if (!(nud_state & NUD_VALID)) {
|
||||
+ switch (this_tuple->xmit_type) {
|
||||
+ case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
+ ether_addr_copy(ha, this_tuple->out.h_dest);
|
||||
+ break;
|
||||
+ case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
+ daddr = &other_tuple->src_v4;
|
||||
+ dst_cache = this_tuple->dst_cache;
|
||||
+ n = dst_neigh_lookup(dst_cache, daddr);
|
||||
+ if (!n)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ read_lock_bh(&n->lock);
|
||||
+ nud_state = n->nud_state;
|
||||
+ ether_addr_copy(ha, n->ha);
|
||||
+ read_unlock_bh(&n->lock);
|
||||
neigh_release(n);
|
||||
- return -ENOENT;
|
||||
+
|
||||
+ if (!(nud_state & NUD_VALID))
|
||||
+ return -ENOENT;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
mask = ~0xffffffff;
|
||||
@@ -243,7 +273,6 @@ static int flow_offload_eth_dst(struct n
|
||||
val = val16;
|
||||
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
|
||||
&val, &mask);
|
||||
- neigh_release(n);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -465,27 +494,52 @@ static void flow_offload_ipv4_checksum(s
|
||||
}
|
||||
}
|
||||
|
||||
-static void flow_offload_redirect(const struct flow_offload *flow,
|
||||
+static void flow_offload_redirect(struct net *net,
|
||||
+ const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
- struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
|
||||
- struct rtable *rt;
|
||||
+ const struct flow_offload_tuple *this_tuple, *other_tuple;
|
||||
+ struct flow_action_entry *entry;
|
||||
+ struct net_device *dev;
|
||||
+ int ifindex;
|
||||
+
|
||||
+ this_tuple = &flow->tuplehash[dir].tuple;
|
||||
+ switch (this_tuple->xmit_type) {
|
||||
+ case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
+ this_tuple = &flow->tuplehash[dir].tuple;
|
||||
+ ifindex = this_tuple->out.ifidx;
|
||||
+ break;
|
||||
+ case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
+ ifindex = other_tuple->iifidx;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
|
||||
+ dev = dev_get_by_index(net, ifindex);
|
||||
+ if (!dev)
|
||||
+ return;
|
||||
+
|
||||
+ entry = flow_action_entry_next(flow_rule);
|
||||
entry->id = FLOW_ACTION_REDIRECT;
|
||||
- entry->dev = rt->dst.dev;
|
||||
- dev_hold(rt->dst.dev);
|
||||
+ entry->dev = dev;
|
||||
}
|
||||
|
||||
static void flow_offload_encap_tunnel(const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
+ const struct flow_offload_tuple *this_tuple;
|
||||
struct flow_action_entry *entry;
|
||||
struct dst_entry *dst;
|
||||
|
||||
- dst = flow->tuplehash[dir].tuple.dst_cache;
|
||||
+ this_tuple = &flow->tuplehash[dir].tuple;
|
||||
+ if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
|
||||
+ return;
|
||||
+
|
||||
+ dst = this_tuple->dst_cache;
|
||||
if (dst && dst->lwtstate) {
|
||||
struct ip_tunnel_info *tun_info;
|
||||
|
||||
@@ -502,10 +556,15 @@ static void flow_offload_decap_tunnel(co
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
+ const struct flow_offload_tuple *other_tuple;
|
||||
struct flow_action_entry *entry;
|
||||
struct dst_entry *dst;
|
||||
|
||||
- dst = flow->tuplehash[!dir].tuple.dst_cache;
|
||||
+ other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
|
||||
+ return;
|
||||
+
|
||||
+ dst = other_tuple->dst_cache;
|
||||
if (dst && dst->lwtstate) {
|
||||
struct ip_tunnel_info *tun_info;
|
||||
|
||||
@@ -517,10 +576,14 @@ static void flow_offload_decap_tunnel(co
|
||||
}
|
||||
}
|
||||
|
||||
-int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
|
||||
- enum flow_offload_tuple_dir dir,
|
||||
- struct nf_flow_rule *flow_rule)
|
||||
+static int
|
||||
+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
+ const struct flow_offload_tuple *other_tuple;
|
||||
+ int i;
|
||||
+
|
||||
flow_offload_decap_tunnel(flow, dir, flow_rule);
|
||||
flow_offload_encap_tunnel(flow, dir, flow_rule);
|
||||
|
||||
@@ -528,6 +591,26 @@ int nf_flow_rule_route_ipv4(struct net *
|
||||
flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
|
||||
return -1;
|
||||
|
||||
+ other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
+
|
||||
+ for (i = 0; i < other_tuple->encap_num; i++) {
|
||||
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
|
||||
+
|
||||
+ entry->id = FLOW_ACTION_VLAN_PUSH;
|
||||
+ entry->vlan.vid = other_tuple->encap[i].id;
|
||||
+ entry->vlan.proto = other_tuple->encap[i].proto;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
+{
|
||||
+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
|
||||
+ return -1;
|
||||
+
|
||||
if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
|
||||
flow_offload_ipv4_snat(net, flow, dir, flow_rule);
|
||||
flow_offload_port_snat(net, flow, dir, flow_rule);
|
||||
@@ -540,7 +623,7 @@ int nf_flow_rule_route_ipv4(struct net *
|
||||
test_bit(NF_FLOW_DNAT, &flow->flags))
|
||||
flow_offload_ipv4_checksum(net, flow, flow_rule);
|
||||
|
||||
- flow_offload_redirect(flow, dir, flow_rule);
|
||||
+ flow_offload_redirect(net, flow, dir, flow_rule);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -550,11 +633,7 @@ int nf_flow_rule_route_ipv6(struct net *
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
- flow_offload_decap_tunnel(flow, dir, flow_rule);
|
||||
- flow_offload_encap_tunnel(flow, dir, flow_rule);
|
||||
-
|
||||
- if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
|
||||
- flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
|
||||
+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
|
||||
return -1;
|
||||
|
||||
if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
|
||||
@@ -566,7 +645,7 @@ int nf_flow_rule_route_ipv6(struct net *
|
||||
flow_offload_port_dnat(net, flow, dir, flow_rule);
|
||||
}
|
||||
|
||||
- flow_offload_redirect(flow, dir, flow_rule);
|
||||
+ flow_offload_redirect(net, flow, dir, flow_rule);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -580,10 +659,10 @@ nf_flow_offload_rule_alloc(struct net *n
|
||||
enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
const struct nf_flowtable *flowtable = offload->flowtable;
|
||||
+ const struct flow_offload_tuple *tuple, *other_tuple;
|
||||
const struct flow_offload *flow = offload->flow;
|
||||
- const struct flow_offload_tuple *tuple;
|
||||
+ struct dst_entry *other_dst = NULL;
|
||||
struct nf_flow_rule *flow_rule;
|
||||
- struct dst_entry *other_dst;
|
||||
int err = -ENOMEM;
|
||||
|
||||
flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
|
||||
@@ -599,7 +678,10 @@ nf_flow_offload_rule_alloc(struct net *n
|
||||
flow_rule->rule->match.key = &flow_rule->match.key;
|
||||
|
||||
tuple = &flow->tuplehash[dir].tuple;
|
||||
- other_dst = flow->tuplehash[!dir].tuple.dst_cache;
|
||||
+ other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
|
||||
+ other_dst = other_tuple->dst_cache;
|
||||
+
|
||||
err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
|
||||
if (err < 0)
|
||||
goto err_flow_match;
|
@ -0,0 +1,114 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:47 +0100
|
||||
Subject: [PATCH] netfilter: nft_flow_offload: use direct xmit if
|
||||
hardware offload is enabled
|
||||
|
||||
If there is a forward path to reach an ethernet device and hardware
|
||||
offload is enabled, then use the direct xmit path.
|
||||
|
||||
Moreover, store the real device in the direct xmit path info since
|
||||
software datapath uses dev_hard_header() to push the layer encapsulation
|
||||
headers while hardware offload refers to the real device.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -131,6 +131,7 @@ struct flow_offload_tuple {
|
||||
struct dst_entry *dst_cache;
|
||||
struct {
|
||||
u32 ifidx;
|
||||
+ u32 hw_ifidx;
|
||||
u8 h_source[ETH_ALEN];
|
||||
u8 h_dest[ETH_ALEN];
|
||||
} out;
|
||||
@@ -187,6 +188,7 @@ struct nf_flow_route {
|
||||
} in;
|
||||
struct {
|
||||
u32 ifindex;
|
||||
+ u32 hw_ifindex;
|
||||
u8 h_source[ETH_ALEN];
|
||||
u8 h_dest[ETH_ALEN];
|
||||
} out;
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -106,6 +106,7 @@ static int flow_offload_fill_route(struc
|
||||
memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
|
||||
ETH_ALEN);
|
||||
flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
|
||||
+ flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
|
||||
break;
|
||||
case FLOW_OFFLOAD_XMIT_XFRM:
|
||||
case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
--- a/net/netfilter/nf_flow_table_offload.c
|
||||
+++ b/net/netfilter/nf_flow_table_offload.c
|
||||
@@ -508,7 +508,7 @@ static void flow_offload_redirect(struct
|
||||
switch (this_tuple->xmit_type) {
|
||||
case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
this_tuple = &flow->tuplehash[dir].tuple;
|
||||
- ifindex = this_tuple->out.ifidx;
|
||||
+ ifindex = this_tuple->out.hw_ifidx;
|
||||
break;
|
||||
case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -66,6 +66,7 @@ static int nft_dev_fill_forward_path(con
|
||||
struct nft_forward_info {
|
||||
const struct net_device *indev;
|
||||
const struct net_device *outdev;
|
||||
+ const struct net_device *hw_outdev;
|
||||
struct id {
|
||||
__u16 id;
|
||||
__be16 proto;
|
||||
@@ -76,9 +77,18 @@ struct nft_forward_info {
|
||||
enum flow_offload_xmit_type xmit_type;
|
||||
};
|
||||
|
||||
+static bool nft_is_valid_ether_device(const struct net_device *dev)
|
||||
+{
|
||||
+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
|
||||
+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
|
||||
+ return false;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
static void nft_dev_path_info(const struct net_device_path_stack *stack,
|
||||
struct nft_forward_info *info,
|
||||
- unsigned char *ha)
|
||||
+ unsigned char *ha, struct nf_flowtable *flowtable)
|
||||
{
|
||||
const struct net_device_path *path;
|
||||
int i;
|
||||
@@ -140,6 +150,12 @@ static void nft_dev_path_info(const stru
|
||||
}
|
||||
if (!info->outdev)
|
||||
info->outdev = info->indev;
|
||||
+
|
||||
+ info->hw_outdev = info->indev;
|
||||
+
|
||||
+ if (nf_flowtable_hw_offload(flowtable) &&
|
||||
+ nft_is_valid_ether_device(info->indev))
|
||||
+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
|
||||
}
|
||||
|
||||
static bool nft_flowtable_find_dev(const struct net_device *dev,
|
||||
@@ -171,7 +187,7 @@ static void nft_dev_forward_path(struct
|
||||
int i;
|
||||
|
||||
if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
|
||||
- nft_dev_path_info(&stack, &info, ha);
|
||||
+ nft_dev_path_info(&stack, &info, ha, &ft->data);
|
||||
|
||||
if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
|
||||
return;
|
||||
@@ -187,6 +203,7 @@ static void nft_dev_forward_path(struct
|
||||
memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
|
||||
memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
|
||||
route->tuple[dir].out.ifindex = info.outdev->ifindex;
|
||||
+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
|
||||
route->tuple[dir].xmit_type = info.xmit_type;
|
||||
}
|
||||
}
|
@ -0,0 +1,123 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 24 Mar 2021 02:30:48 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: bridge vlan hardware offload and
|
||||
switchdev
|
||||
|
||||
The switch might have already added the VLAN tag through PVID hardware
|
||||
offload. Keep this extra VLAN in the flowtable but skip it on egress.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -849,6 +849,7 @@ struct net_device_path {
|
||||
DEV_PATH_BR_VLAN_KEEP,
|
||||
DEV_PATH_BR_VLAN_TAG,
|
||||
DEV_PATH_BR_VLAN_UNTAG,
|
||||
+ DEV_PATH_BR_VLAN_UNTAG_HW,
|
||||
} vlan_mode;
|
||||
u16 vlan_id;
|
||||
__be16 vlan_proto;
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -123,9 +123,10 @@ struct flow_offload_tuple {
|
||||
/* All members above are keys for lookups, see flow_offload_hash(). */
|
||||
struct { } __hash;
|
||||
|
||||
- u8 dir:4,
|
||||
+ u8 dir:2,
|
||||
xmit_type:2,
|
||||
- encap_num:2;
|
||||
+ encap_num:2,
|
||||
+ in_vlan_ingress:2;
|
||||
u16 mtu;
|
||||
union {
|
||||
struct dst_entry *dst_cache;
|
||||
@@ -184,7 +185,8 @@ struct nf_flow_route {
|
||||
u16 id;
|
||||
__be16 proto;
|
||||
} encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
- u8 num_encaps;
|
||||
+ u8 num_encaps:2,
|
||||
+ ingress_vlans:2;
|
||||
} in;
|
||||
struct {
|
||||
u32 ifindex;
|
||||
--- a/net/bridge/br_device.c
|
||||
+++ b/net/bridge/br_device.c
|
||||
@@ -435,6 +435,7 @@ static int br_fill_forward_path(struct n
|
||||
ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
|
||||
ctx->num_vlans++;
|
||||
break;
|
||||
+ case DEV_PATH_BR_VLAN_UNTAG_HW:
|
||||
case DEV_PATH_BR_VLAN_UNTAG:
|
||||
ctx->num_vlans--;
|
||||
break;
|
||||
--- a/net/bridge/br_vlan.c
|
||||
+++ b/net/bridge/br_vlan.c
|
||||
@@ -1374,6 +1374,8 @@ int br_vlan_fill_forward_path_mode(struc
|
||||
|
||||
if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
|
||||
path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
|
||||
+ else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
|
||||
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
|
||||
else
|
||||
path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -95,6 +95,8 @@ static int flow_offload_fill_route(struc
|
||||
for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
|
||||
flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
|
||||
flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
|
||||
+ if (route->tuple[dir].in.ingress_vlans & BIT(i))
|
||||
+ flow_tuple->in_vlan_ingress |= BIT(j);
|
||||
j++;
|
||||
}
|
||||
flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
|
||||
--- a/net/netfilter/nf_flow_table_offload.c
|
||||
+++ b/net/netfilter/nf_flow_table_offload.c
|
||||
@@ -594,8 +594,12 @@ nf_flow_rule_route_common(struct net *ne
|
||||
other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
|
||||
for (i = 0; i < other_tuple->encap_num; i++) {
|
||||
- struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
|
||||
+ struct flow_action_entry *entry;
|
||||
|
||||
+ if (other_tuple->in_vlan_ingress & BIT(i))
|
||||
+ continue;
|
||||
+
|
||||
+ entry = flow_action_entry_next(flow_rule);
|
||||
entry->id = FLOW_ACTION_VLAN_PUSH;
|
||||
entry->vlan.vid = other_tuple->encap[i].id;
|
||||
entry->vlan.proto = other_tuple->encap[i].proto;
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -72,6 +72,7 @@ struct nft_forward_info {
|
||||
__be16 proto;
|
||||
} encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
u8 num_encaps;
|
||||
+ u8 ingress_vlans;
|
||||
u8 h_source[ETH_ALEN];
|
||||
u8 h_dest[ETH_ALEN];
|
||||
enum flow_offload_xmit_type xmit_type;
|
||||
@@ -130,6 +131,9 @@ static void nft_dev_path_info(const stru
|
||||
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
|
||||
switch (path->bridge.vlan_mode) {
|
||||
+ case DEV_PATH_BR_VLAN_UNTAG_HW:
|
||||
+ info->ingress_vlans |= BIT(info->num_encaps - 1);
|
||||
+ break;
|
||||
case DEV_PATH_BR_VLAN_TAG:
|
||||
info->encap[info->num_encaps].id = path->bridge.vlan_id;
|
||||
info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
|
||||
@@ -198,6 +202,7 @@ static void nft_dev_forward_path(struct
|
||||
route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
|
||||
}
|
||||
route->tuple[!dir].in.num_encaps = info.num_encaps;
|
||||
+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
|
||||
|
||||
if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
|
||||
memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
|
@ -0,0 +1,30 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:49 +0100
|
||||
Subject: [PATCH] net: flow_offload: add FLOW_ACTION_PPPOE_PUSH
|
||||
|
||||
Add an action to represent the PPPoE hardware offload support that
|
||||
includes the session ID.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/flow_offload.h
|
||||
+++ b/include/net/flow_offload.h
|
||||
@@ -147,6 +147,7 @@ enum flow_action_id {
|
||||
FLOW_ACTION_MPLS_POP,
|
||||
FLOW_ACTION_MPLS_MANGLE,
|
||||
FLOW_ACTION_GATE,
|
||||
+ FLOW_ACTION_PPPOE_PUSH,
|
||||
NUM_FLOW_ACTIONS,
|
||||
};
|
||||
|
||||
@@ -271,6 +272,9 @@ struct flow_action_entry {
|
||||
u32 num_entries;
|
||||
struct action_gate_entry *entries;
|
||||
} gate;
|
||||
+ struct { /* FLOW_ACTION_PPPOE_PUSH */
|
||||
+ u16 sid;
|
||||
+ } pppoe;
|
||||
};
|
||||
struct flow_action_cookie *cookie; /* user defined action cookie */
|
||||
};
|
@ -0,0 +1,35 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:50 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: support for
|
||||
FLOW_ACTION_PPPOE_PUSH
|
||||
|
||||
Add a PPPoE push action if layer 2 protocol is ETH_P_PPP_SES to add
|
||||
PPPoE flowtable hardware offload support.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_offload.c
|
||||
+++ b/net/netfilter/nf_flow_table_offload.c
|
||||
@@ -600,9 +600,18 @@ nf_flow_rule_route_common(struct net *ne
|
||||
continue;
|
||||
|
||||
entry = flow_action_entry_next(flow_rule);
|
||||
- entry->id = FLOW_ACTION_VLAN_PUSH;
|
||||
- entry->vlan.vid = other_tuple->encap[i].id;
|
||||
- entry->vlan.proto = other_tuple->encap[i].proto;
|
||||
+
|
||||
+ switch (other_tuple->encap[i].proto) {
|
||||
+ case htons(ETH_P_PPP_SES):
|
||||
+ entry->id = FLOW_ACTION_PPPOE_PUSH;
|
||||
+ entry->pppoe.sid = other_tuple->encap[i].id;
|
||||
+ break;
|
||||
+ case htons(ETH_P_8021Q):
|
||||
+ entry->id = FLOW_ACTION_VLAN_PUSH;
|
||||
+ entry->vlan.vid = other_tuple->encap[i].id;
|
||||
+ entry->vlan.proto = other_tuple->encap[i].proto;
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
|
||||
return 0;
|
@ -0,0 +1,53 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:51 +0100
|
||||
Subject: [PATCH] dsa: slave: add support for TC_SETUP_FT
|
||||
|
||||
The dsa infrastructure provides a well-defined hierarchy of devices,
|
||||
pass up the call to set up the flow block to the master device. From the
|
||||
software dataplane, the netfilter infrastructure uses the dsa slave
|
||||
devices to refer to the input and output device for the given skbuff.
|
||||
Similarly, the flowtable definition in the ruleset refers to the dsa
|
||||
slave port devices.
|
||||
|
||||
This patch adds the glue code to call ndo_setup_tc with TC_SETUP_FT
|
||||
with the master device via the dsa slave devices.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/dsa/slave.c
|
||||
+++ b/net/dsa/slave.c
|
||||
@@ -1239,14 +1239,32 @@ static int dsa_slave_setup_tc_block(stru
|
||||
}
|
||||
}
|
||||
|
||||
+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
|
||||
+ void *type_data)
|
||||
+{
|
||||
+ struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
|
||||
+ struct net_device *master = cpu_dp->master;
|
||||
+
|
||||
+ if (!master->netdev_ops->ndo_setup_tc)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
|
||||
+}
|
||||
+
|
||||
static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
|
||||
void *type_data)
|
||||
{
|
||||
struct dsa_port *dp = dsa_slave_to_port(dev);
|
||||
struct dsa_switch *ds = dp->ds;
|
||||
|
||||
- if (type == TC_SETUP_BLOCK)
|
||||
+ switch (type) {
|
||||
+ case TC_SETUP_BLOCK:
|
||||
return dsa_slave_setup_tc_block(dev, type_data);
|
||||
+ case TC_SETUP_FT:
|
||||
+ return dsa_slave_setup_ft_block(ds, dp->index, type_data);
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
if (!ds->ops->port_setup_tc)
|
||||
return -EOPNOTSUPP;
|
@ -0,0 +1,68 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 24 Mar 2021 02:30:52 +0100
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: fix parsing packets in GDM
|
||||
|
||||
When using DSA, set the special tag in GDM ingress control to allow the MAC
|
||||
to parse packets properly earlier. This affects rx DMA source port reporting.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/pinctrl/devinfo.h>
|
||||
#include <linux/phylink.h>
|
||||
+#include <net/dsa.h>
|
||||
|
||||
#include "mtk_eth_soc.h"
|
||||
|
||||
@@ -1285,13 +1286,12 @@ static int mtk_poll_rx(struct napi_struc
|
||||
break;
|
||||
|
||||
/* find out which mac the packet come from. values start at 1 */
|
||||
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
|
||||
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) ||
|
||||
+ (trxd.rxd4 & RX_DMA_SPECIAL_TAG))
|
||||
mac = 0;
|
||||
- } else {
|
||||
- mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
|
||||
- RX_DMA_FPORT_MASK;
|
||||
- mac--;
|
||||
- }
|
||||
+ else
|
||||
+ mac = ((trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
|
||||
+ RX_DMA_FPORT_MASK) - 1;
|
||||
|
||||
if (unlikely(mac < 0 || mac >= MTK_MAC_COUNT ||
|
||||
!eth->netdev[mac]))
|
||||
@@ -2254,6 +2254,9 @@ static void mtk_gdm_config(struct mtk_et
|
||||
|
||||
val |= config;
|
||||
|
||||
+ if (!i && eth->netdev[0] && netdev_uses_dsa(eth->netdev[0]))
|
||||
+ val |= MTK_GDMA_SPECIAL_TAG;
|
||||
+
|
||||
mtk_w32(eth, val, MTK_GDMA_FWD_CFG(i));
|
||||
}
|
||||
/* Reset and enable PSE */
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -81,6 +81,7 @@
|
||||
|
||||
/* GDM Exgress Control Register */
|
||||
#define MTK_GDMA_FWD_CFG(x) (0x500 + (x * 0x1000))
|
||||
+#define MTK_GDMA_SPECIAL_TAG BIT(24)
|
||||
#define MTK_GDMA_ICS_EN BIT(22)
|
||||
#define MTK_GDMA_TCS_EN BIT(21)
|
||||
#define MTK_GDMA_UCS_EN BIT(20)
|
||||
@@ -318,6 +319,7 @@
|
||||
#define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
|
||||
#define RX_DMA_FPORT_SHIFT 19
|
||||
#define RX_DMA_FPORT_MASK 0x7
|
||||
+#define RX_DMA_SPECIAL_TAG BIT(22)
|
||||
|
||||
/* PHY Indirect Access Control registers */
|
||||
#define MTK_PHY_IAC 0x10004
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,568 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 24 Mar 2021 02:30:54 +0100
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: add flow offloading support
|
||||
|
||||
This adds support for offloading IPv4 routed flows, including SNAT/DNAT,
|
||||
one VLAN, PPPoE and DSA.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/Makefile
|
||||
+++ b/drivers/net/ethernet/mediatek/Makefile
|
||||
@@ -4,5 +4,5 @@
|
||||
#
|
||||
|
||||
obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
|
||||
-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_ppe.o mtk_ppe_debugfs.o
|
||||
+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
|
||||
obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -2834,6 +2834,7 @@ static const struct net_device_ops mtk_n
|
||||
#ifdef CONFIG_NET_POLL_CONTROLLER
|
||||
.ndo_poll_controller = mtk_poll_controller,
|
||||
#endif
|
||||
+ .ndo_setup_tc = mtk_eth_setup_tc,
|
||||
};
|
||||
|
||||
static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
|
||||
@@ -3092,6 +3093,10 @@ static int mtk_probe(struct platform_dev
|
||||
eth->base + MTK_ETH_PPE_BASE, 2);
|
||||
if (err)
|
||||
goto err_free_dev;
|
||||
+
|
||||
+ err = mtk_eth_offload_init(eth);
|
||||
+ if (err)
|
||||
+ goto err_free_dev;
|
||||
}
|
||||
|
||||
for (i = 0; i < MTK_MAX_DEVS; i++) {
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <linux/u64_stats_sync.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/phylink.h>
|
||||
+#include <linux/rhashtable.h>
|
||||
#include "mtk_ppe.h"
|
||||
|
||||
#define MTK_QDMA_PAGE_SIZE 2048
|
||||
@@ -40,7 +41,8 @@
|
||||
NETIF_F_HW_VLAN_CTAG_RX | \
|
||||
NETIF_F_SG | NETIF_F_TSO | \
|
||||
NETIF_F_TSO6 | \
|
||||
- NETIF_F_IPV6_CSUM)
|
||||
+ NETIF_F_IPV6_CSUM |\
|
||||
+ NETIF_F_HW_TC)
|
||||
#define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
|
||||
#define NEXT_DESP_IDX(X, Y) (((X) + 1) & ((Y) - 1))
|
||||
|
||||
@@ -929,6 +931,7 @@ struct mtk_eth {
|
||||
int ip_align;
|
||||
|
||||
struct mtk_ppe ppe;
|
||||
+ struct rhashtable flow_table;
|
||||
};
|
||||
|
||||
/* struct mtk_mac - the structure that holds the info about the MACs of the
|
||||
@@ -973,4 +976,9 @@ int mtk_gmac_sgmii_path_setup(struct mtk
|
||||
int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id);
|
||||
int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
|
||||
|
||||
+int mtk_eth_offload_init(struct mtk_eth *eth);
|
||||
+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
|
||||
+ void *type_data);
|
||||
+
|
||||
+
|
||||
#endif /* MTK_ETH_H */
|
||||
--- /dev/null
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
@@ -0,0 +1,485 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+/*
|
||||
+ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
|
||||
+ */
|
||||
+
|
||||
+#include <linux/if_ether.h>
|
||||
+#include <linux/rhashtable.h>
|
||||
+#include <linux/if_ether.h>
|
||||
+#include <linux/ip.h>
|
||||
+#include <net/flow_offload.h>
|
||||
+#include <net/pkt_cls.h>
|
||||
+#include <net/dsa.h>
|
||||
+#include "mtk_eth_soc.h"
|
||||
+
|
||||
+struct mtk_flow_data {
|
||||
+ struct ethhdr eth;
|
||||
+
|
||||
+ union {
|
||||
+ struct {
|
||||
+ __be32 src_addr;
|
||||
+ __be32 dst_addr;
|
||||
+ } v4;
|
||||
+ };
|
||||
+
|
||||
+ __be16 src_port;
|
||||
+ __be16 dst_port;
|
||||
+
|
||||
+ struct {
|
||||
+ u16 id;
|
||||
+ __be16 proto;
|
||||
+ u8 num;
|
||||
+ } vlan;
|
||||
+ struct {
|
||||
+ u16 sid;
|
||||
+ u8 num;
|
||||
+ } pppoe;
|
||||
+};
|
||||
+
|
||||
+struct mtk_flow_entry {
|
||||
+ struct rhash_head node;
|
||||
+ unsigned long cookie;
|
||||
+ u16 hash;
|
||||
+};
|
||||
+
|
||||
+static const struct rhashtable_params mtk_flow_ht_params = {
|
||||
+ .head_offset = offsetof(struct mtk_flow_entry, node),
|
||||
+ .head_offset = offsetof(struct mtk_flow_entry, cookie),
|
||||
+ .key_len = sizeof(unsigned long),
|
||||
+ .automatic_shrinking = true,
|
||||
+};
|
||||
+
|
||||
+static u32
|
||||
+mtk_eth_timestamp(struct mtk_eth *eth)
|
||||
+{
|
||||
+ return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
|
||||
+ bool egress)
|
||||
+{
|
||||
+ return mtk_foe_entry_set_ipv4_tuple(foe, egress,
|
||||
+ data->v4.src_addr, data->src_port,
|
||||
+ data->v4.dst_addr, data->dst_port);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
|
||||
+{
|
||||
+ void *dest = eth + act->mangle.offset;
|
||||
+ const void *src = &act->mangle.val;
|
||||
+
|
||||
+ if (act->mangle.offset > 8)
|
||||
+ return;
|
||||
+
|
||||
+ if (act->mangle.mask == 0xffff) {
|
||||
+ src += 2;
|
||||
+ dest += 2;
|
||||
+ }
|
||||
+
|
||||
+ memcpy(dest, src, act->mangle.mask ? 2 : 4);
|
||||
+}
|
||||
+
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_mangle_ports(const struct flow_action_entry *act,
|
||||
+ struct mtk_flow_data *data)
|
||||
+{
|
||||
+ u32 val = ntohl(act->mangle.val);
|
||||
+
|
||||
+ switch (act->mangle.offset) {
|
||||
+ case 0:
|
||||
+ if (act->mangle.mask == ~htonl(0xffff))
|
||||
+ data->dst_port = cpu_to_be16(val);
|
||||
+ else
|
||||
+ data->src_port = cpu_to_be16(val >> 16);
|
||||
+ break;
|
||||
+ case 2:
|
||||
+ data->dst_port = cpu_to_be16(val);
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
|
||||
+ struct mtk_flow_data *data)
|
||||
+{
|
||||
+ __be32 *dest;
|
||||
+
|
||||
+ switch (act->mangle.offset) {
|
||||
+ case offsetof(struct iphdr, saddr):
|
||||
+ dest = &data->v4.src_addr;
|
||||
+ break;
|
||||
+ case offsetof(struct iphdr, daddr):
|
||||
+ dest = &data->v4.dst_addr;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ memcpy(dest, &act->mangle.val, sizeof(u32));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_get_dsa_port(struct net_device **dev)
|
||||
+{
|
||||
+#if IS_ENABLED(CONFIG_NET_DSA)
|
||||
+ struct dsa_port *dp;
|
||||
+
|
||||
+ dp = dsa_port_from_netdev(*dev);
|
||||
+ if (IS_ERR(dp))
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ *dev = dp->cpu_dp->master;
|
||||
+
|
||||
+ return dp->index;
|
||||
+#else
|
||||
+ return -ENODEV;
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
|
||||
+ struct net_device *dev)
|
||||
+{
|
||||
+ int pse_port, dsa_port;
|
||||
+
|
||||
+ dsa_port = mtk_flow_get_dsa_port(&dev);
|
||||
+ if (dsa_port >= 0)
|
||||
+ mtk_foe_entry_set_dsa(foe, dsa_port);
|
||||
+
|
||||
+ if (dev == eth->netdev[0])
|
||||
+ pse_port = 1;
|
||||
+ else if (dev == eth->netdev[1])
|
||||
+ pse_port = 2;
|
||||
+ else
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ mtk_foe_entry_set_pse_port(foe, pse_port);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
|
||||
+{
|
||||
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
|
||||
+ struct flow_action_entry *act;
|
||||
+ struct mtk_flow_data data = {};
|
||||
+ struct mtk_foe_entry foe;
|
||||
+ struct net_device *odev = NULL;
|
||||
+ struct mtk_flow_entry *entry;
|
||||
+ int offload_type = 0;
|
||||
+ u16 addr_type = 0;
|
||||
+ u32 timestamp;
|
||||
+ u8 l4proto = 0;
|
||||
+ int err = 0;
|
||||
+ int hash;
|
||||
+ int i;
|
||||
+
|
||||
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
|
||||
+ struct flow_match_meta match;
|
||||
+
|
||||
+ flow_rule_match_meta(rule, &match);
|
||||
+ } else {
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
|
||||
+ struct flow_match_control match;
|
||||
+
|
||||
+ flow_rule_match_control(rule, &match);
|
||||
+ addr_type = match.key->addr_type;
|
||||
+ } else {
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
|
||||
+ struct flow_match_basic match;
|
||||
+
|
||||
+ flow_rule_match_basic(rule, &match);
|
||||
+ l4proto = match.key->ip_proto;
|
||||
+ } else {
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ flow_action_for_each(i, act, &rule->action) {
|
||||
+ switch (act->id) {
|
||||
+ case FLOW_ACTION_MANGLE:
|
||||
+ if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
|
||||
+ mtk_flow_offload_mangle_eth(act, &data.eth);
|
||||
+ break;
|
||||
+ case FLOW_ACTION_REDIRECT:
|
||||
+ odev = act->dev;
|
||||
+ break;
|
||||
+ case FLOW_ACTION_CSUM:
|
||||
+ break;
|
||||
+ case FLOW_ACTION_VLAN_PUSH:
|
||||
+ if (data.vlan.num == 1 ||
|
||||
+ act->vlan.proto != htons(ETH_P_8021Q))
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ data.vlan.id = act->vlan.vid;
|
||||
+ data.vlan.proto = act->vlan.proto;
|
||||
+ data.vlan.num++;
|
||||
+ break;
|
||||
+ case FLOW_ACTION_PPPOE_PUSH:
|
||||
+ if (data.pppoe.num == 1)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ data.pppoe.sid = act->pppoe.sid;
|
||||
+ data.pppoe.num++;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ switch (addr_type) {
|
||||
+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
|
||||
+ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ if (!is_valid_ether_addr(data.eth.h_source) ||
|
||||
+ !is_valid_ether_addr(data.eth.h_dest))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
|
||||
+ data.eth.h_source,
|
||||
+ data.eth.h_dest);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
|
||||
+ struct flow_match_ports ports;
|
||||
+
|
||||
+ flow_rule_match_ports(rule, &ports);
|
||||
+ data.src_port = ports.key->src;
|
||||
+ data.dst_port = ports.key->dst;
|
||||
+ } else {
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
|
||||
+ struct flow_match_ipv4_addrs addrs;
|
||||
+
|
||||
+ flow_rule_match_ipv4_addrs(rule, &addrs);
|
||||
+
|
||||
+ data.v4.src_addr = addrs.key->src;
|
||||
+ data.v4.dst_addr = addrs.key->dst;
|
||||
+
|
||||
+ mtk_flow_set_ipv4_addr(&foe, &data, false);
|
||||
+ }
|
||||
+
|
||||
+ flow_action_for_each(i, act, &rule->action) {
|
||||
+ if (act->id != FLOW_ACTION_MANGLE)
|
||||
+ continue;
|
||||
+
|
||||
+ switch (act->mangle.htype) {
|
||||
+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
|
||||
+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
|
||||
+ err = mtk_flow_mangle_ports(act, &data);
|
||||
+ break;
|
||||
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
|
||||
+ err = mtk_flow_mangle_ipv4(act, &data);
|
||||
+ break;
|
||||
+ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
|
||||
+ /* handled earlier */
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+ }
|
||||
+
|
||||
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
|
||||
+ err = mtk_flow_set_ipv4_addr(&foe, &data, true);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+ }
|
||||
+
|
||||
+ if (data.vlan.num == 1) {
|
||||
+ if (data.vlan.proto != htons(ETH_P_8021Q))
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ mtk_foe_entry_set_vlan(&foe, data.vlan.id);
|
||||
+ }
|
||||
+ if (data.pppoe.num == 1)
|
||||
+ mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
|
||||
+
|
||||
+ err = mtk_flow_set_output_device(eth, &foe, odev);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
|
||||
+ if (!entry)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ entry->cookie = f->cookie;
|
||||
+ timestamp = mtk_eth_timestamp(eth);
|
||||
+ hash = mtk_foe_entry_commit(ð->ppe, &foe, timestamp);
|
||||
+ if (hash < 0) {
|
||||
+ err = hash;
|
||||
+ goto free;
|
||||
+ }
|
||||
+
|
||||
+ entry->hash = hash;
|
||||
+ err = rhashtable_insert_fast(ð->flow_table, &entry->node,
|
||||
+ mtk_flow_ht_params);
|
||||
+ if (err < 0)
|
||||
+ goto clear_flow;
|
||||
+
|
||||
+ return 0;
|
||||
+clear_flow:
|
||||
+ mtk_foe_entry_clear(ð->ppe, hash);
|
||||
+free:
|
||||
+ kfree(entry);
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
|
||||
+{
|
||||
+ struct mtk_flow_entry *entry;
|
||||
+
|
||||
+ entry = rhashtable_lookup(ð->flow_table, &f->cookie,
|
||||
+ mtk_flow_ht_params);
|
||||
+ if (!entry)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ mtk_foe_entry_clear(ð->ppe, entry->hash);
|
||||
+ rhashtable_remove_fast(ð->flow_table, &entry->node,
|
||||
+ mtk_flow_ht_params);
|
||||
+ kfree(entry);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
|
||||
+{
|
||||
+ struct mtk_flow_entry *entry;
|
||||
+ int timestamp;
|
||||
+ u32 idle;
|
||||
+
|
||||
+ entry = rhashtable_lookup(ð->flow_table, &f->cookie,
|
||||
+ mtk_flow_ht_params);
|
||||
+ if (!entry)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ timestamp = mtk_foe_entry_timestamp(ð->ppe, entry->hash);
|
||||
+ if (timestamp < 0)
|
||||
+ return -ETIMEDOUT;
|
||||
+
|
||||
+ idle = mtk_eth_timestamp(eth) - timestamp;
|
||||
+ f->stats.lastused = jiffies - idle * HZ;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
|
||||
+{
|
||||
+ struct flow_cls_offload *cls = type_data;
|
||||
+ struct net_device *dev = cb_priv;
|
||||
+ struct mtk_mac *mac = netdev_priv(dev);
|
||||
+ struct mtk_eth *eth = mac->hw;
|
||||
+
|
||||
+ if (!tc_can_offload(dev))
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ if (type != TC_SETUP_CLSFLOWER)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ switch (cls->command) {
|
||||
+ case FLOW_CLS_REPLACE:
|
||||
+ return mtk_flow_offload_replace(eth, cls);
|
||||
+ case FLOW_CLS_DESTROY:
|
||||
+ return mtk_flow_offload_destroy(eth, cls);
|
||||
+ case FLOW_CLS_STATS:
|
||||
+ return mtk_flow_offload_stats(eth, cls);
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
|
||||
+{
|
||||
+ struct mtk_mac *mac = netdev_priv(dev);
|
||||
+ struct mtk_eth *eth = mac->hw;
|
||||
+ static LIST_HEAD(block_cb_list);
|
||||
+ struct flow_block_cb *block_cb;
|
||||
+ flow_setup_cb_t *cb;
|
||||
+
|
||||
+ if (!eth->ppe.foe_table)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ cb = mtk_eth_setup_tc_block_cb;
|
||||
+ f->driver_block_list = &block_cb_list;
|
||||
+
|
||||
+ switch (f->command) {
|
||||
+ case FLOW_BLOCK_BIND:
|
||||
+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
|
||||
+ if (block_cb) {
|
||||
+ flow_block_cb_incref(block_cb);
|
||||
+ return 0;
|
||||
+ }
|
||||
+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
|
||||
+ if (IS_ERR(block_cb))
|
||||
+ return PTR_ERR(block_cb);
|
||||
+
|
||||
+ flow_block_cb_add(block_cb, f);
|
||||
+ list_add_tail(&block_cb->driver_list, &block_cb_list);
|
||||
+ return 0;
|
||||
+ case FLOW_BLOCK_UNBIND:
|
||||
+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
|
||||
+ if (!block_cb)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ if (flow_block_cb_decref(block_cb)) {
|
||||
+ flow_block_cb_remove(block_cb, f);
|
||||
+ list_del(&block_cb->driver_list);
|
||||
+ }
|
||||
+ return 0;
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
|
||||
+ void *type_data)
|
||||
+{
|
||||
+ if (type == TC_SETUP_FT)
|
||||
+ return mtk_eth_setup_tc_block(dev, type_data);
|
||||
+
|
||||
+ return -EOPNOTSUPP;
|
||||
+}
|
||||
+
|
||||
+int mtk_eth_offload_init(struct mtk_eth *eth)
|
||||
+{
|
||||
+ if (!eth->ppe.foe_table)
|
||||
+ return 0;
|
||||
+
|
||||
+ return rhashtable_init(ð->flow_table, &mtk_flow_ht_params);
|
||||
+}
|
@ -0,0 +1,236 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:55 +0100
|
||||
Subject: [PATCH] docs: nf_flowtable: update documentation with
|
||||
enhancements
|
||||
|
||||
This patch updates the flowtable documentation to describe recent
|
||||
enhancements:
|
||||
|
||||
- Offload action is available after the first packets go through the
|
||||
classic forwarding path.
|
||||
- IPv4 and IPv6 are supported. Only TCP and UDP layer 4 are supported at
|
||||
this stage.
|
||||
- Tuple has been augmented to track VLAN id and PPPoE session id.
|
||||
- Bridge and IP forwarding integration, including bridge VLAN filtering
|
||||
support.
|
||||
- Hardware offload support.
|
||||
- Describe the [OFFLOAD] and [HW_OFFLOAD] tags in the conntrack table
|
||||
listing.
|
||||
- Replace 'flow offload' by 'flow add' in example rulesets (preferred
|
||||
syntax).
|
||||
- Describe existing cache limitations.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/Documentation/networking/nf_flowtable.rst
|
||||
+++ b/Documentation/networking/nf_flowtable.rst
|
||||
@@ -4,35 +4,38 @@
|
||||
Netfilter's flowtable infrastructure
|
||||
====================================
|
||||
|
||||
-This documentation describes the software flowtable infrastructure available in
|
||||
-Netfilter since Linux kernel 4.16.
|
||||
+This documentation describes the Netfilter flowtable infrastructure which allows
|
||||
+you to define a fastpath through the flowtable datapath. This infrastructure
|
||||
+also provides hardware offload support. The flowtable supports for the layer 3
|
||||
+IPv4 and IPv6 and the layer 4 TCP and UDP protocols.
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
-Initial packets follow the classic forwarding path, once the flow enters the
|
||||
-established state according to the conntrack semantics (ie. we have seen traffic
|
||||
-in both directions), then you can decide to offload the flow to the flowtable
|
||||
-from the forward chain via the 'flow offload' action available in nftables.
|
||||
-
|
||||
-Packets that find an entry in the flowtable (ie. flowtable hit) are sent to the
|
||||
-output netdevice via neigh_xmit(), hence, they bypass the classic forwarding
|
||||
-path (the visible effect is that you do not see these packets from any of the
|
||||
-netfilter hooks coming after the ingress). In case of flowtable miss, the packet
|
||||
-follows the classic forward path.
|
||||
-
|
||||
-The flowtable uses a resizable hashtable, lookups are based on the following
|
||||
-7-tuple selectors: source, destination, layer 3 and layer 4 protocols, source
|
||||
-and destination ports and the input interface (useful in case there are several
|
||||
-conntrack zones in place).
|
||||
-
|
||||
-Flowtables are populated via the 'flow offload' nftables action, so the user can
|
||||
-selectively specify what flows are placed into the flow table. Hence, packets
|
||||
-follow the classic forwarding path unless the user explicitly instruct packets
|
||||
-to use this new alternative forwarding path via nftables policy.
|
||||
+Once the first packet of the flow successfully goes through the IP forwarding
|
||||
+path, from the second packet on, you might decide to offload the flow to the
|
||||
+flowtable through your ruleset. The flowtable infrastructure provides a rule
|
||||
+action that allows you to specify when to add a flow to the flowtable.
|
||||
+
|
||||
+A packet that finds a matching entry in the flowtable (ie. flowtable hit) is
|
||||
+transmitted to the output netdevice via neigh_xmit(), hence, packets bypass the
|
||||
+classic IP forwarding path (the visible effect is that you do not see these
|
||||
+packets from any of the Netfilter hooks coming after ingress). In case that
|
||||
+there is no matching entry in the flowtable (ie. flowtable miss), the packet
|
||||
+follows the classic IP forwarding path.
|
||||
+
|
||||
+The flowtable uses a resizable hashtable. Lookups are based on the following
|
||||
+n-tuple selectors: layer 2 protocol encapsulation (VLAN and PPPoE), layer 3
|
||||
+source and destination, layer 4 source and destination ports and the input
|
||||
+interface (useful in case there are several conntrack zones in place).
|
||||
+
|
||||
+The 'flow add' action allows you to populate the flowtable, the user selectively
|
||||
+specifies what flows are placed into the flowtable. Hence, packets follow the
|
||||
+classic IP forwarding path unless the user explicitly instruct flows to use this
|
||||
+new alternative forwarding path via policy.
|
||||
|
||||
-This is represented in Fig.1, which describes the classic forwarding path
|
||||
-including the Netfilter hooks and the flowtable fastpath bypass.
|
||||
+The flowtable datapath is represented in Fig.1, which describes the classic IP
|
||||
+forwarding path including the Netfilter hooks and the flowtable fastpath bypass.
|
||||
|
||||
::
|
||||
|
||||
@@ -67,11 +70,13 @@ including the Netfilter hooks and the fl
|
||||
Fig.1 Netfilter hooks and flowtable interactions
|
||||
|
||||
The flowtable entry also stores the NAT configuration, so all packets are
|
||||
-mangled according to the NAT policy that matches the initial packets that went
|
||||
-through the classic forwarding path. The TTL is decremented before calling
|
||||
-neigh_xmit(). Fragmented traffic is passed up to follow the classic forwarding
|
||||
-path given that the transport selectors are missing, therefore flowtable lookup
|
||||
-is not possible.
|
||||
+mangled according to the NAT policy that is specified from the classic IP
|
||||
+forwarding path. The TTL is decremented before calling neigh_xmit(). Fragmented
|
||||
+traffic is passed up to follow the classic IP forwarding path given that the
|
||||
+transport header is missing, in this case, flowtable lookups are not possible.
|
||||
+TCP RST and FIN packets are also passed up to the classic IP forwarding path to
|
||||
+release the flow gracefully. Packets that exceed the MTU are also passed up to
|
||||
+the classic forwarding path to report packet-too-big ICMP errors to the sender.
|
||||
|
||||
Example configuration
|
||||
---------------------
|
||||
@@ -85,7 +90,7 @@ flowtable and add one rule to your forwa
|
||||
}
|
||||
chain y {
|
||||
type filter hook forward priority 0; policy accept;
|
||||
- ip protocol tcp flow offload @f
|
||||
+ ip protocol tcp flow add @f
|
||||
counter packets 0 bytes 0
|
||||
}
|
||||
}
|
||||
@@ -103,6 +108,117 @@ flow is offloaded, you will observe that
|
||||
does not get updated for the packets that are being forwarded through the
|
||||
forwarding bypass.
|
||||
|
||||
+You can identify offloaded flows through the [OFFLOAD] tag when listing your
|
||||
+connection tracking table.
|
||||
+
|
||||
+::
|
||||
+ # conntrack -L
|
||||
+ tcp 6 src=10.141.10.2 dst=192.168.10.2 sport=52728 dport=5201 src=192.168.10.2 dst=192.168.10.1 sport=5201 dport=52728 [OFFLOAD] mark=0 use=2
|
||||
+
|
||||
+
|
||||
+Layer 2 encapsulation
|
||||
+---------------------
|
||||
+
|
||||
+Since Linux kernel 5.13, the flowtable infrastructure discovers the real
|
||||
+netdevice behind VLAN and PPPoE netdevices. The flowtable software datapath
|
||||
+parses the VLAN and PPPoE layer 2 headers to extract the ethertype and the
|
||||
+VLAN ID / PPPoE session ID which are used for the flowtable lookups. The
|
||||
+flowtable datapath also deals with layer 2 decapsulation.
|
||||
+
|
||||
+You do not need to add the PPPoE and the VLAN devices to your flowtable,
|
||||
+instead the real device is sufficient for the flowtable to track your flows.
|
||||
+
|
||||
+Bridge and IP forwarding
|
||||
+------------------------
|
||||
+
|
||||
+Since Linux kernel 5.13, you can add bridge ports to the flowtable. The
|
||||
+flowtable infrastructure discovers the topology behind the bridge device. This
|
||||
+allows the flowtable to define a fastpath bypass between the bridge ports
|
||||
+(represented as eth1 and eth2 in the example figure below) and the gateway
|
||||
+device (represented as eth0) in your switch/router.
|
||||
+
|
||||
+::
|
||||
+ fastpath bypass
|
||||
+ .-------------------------.
|
||||
+ / \
|
||||
+ | IP forwarding |
|
||||
+ | / \ \/
|
||||
+ | br0 eth0 ..... eth0
|
||||
+ . / \ *host B*
|
||||
+ -> eth1 eth2
|
||||
+ . *switch/router*
|
||||
+ .
|
||||
+ .
|
||||
+ eth0
|
||||
+ *host A*
|
||||
+
|
||||
+The flowtable infrastructure also supports for bridge VLAN filtering actions
|
||||
+such as PVID and untagged. You can also stack a classic VLAN device on top of
|
||||
+your bridge port.
|
||||
+
|
||||
+If you would like that your flowtable defines a fastpath between your bridge
|
||||
+ports and your IP forwarding path, you have to add your bridge ports (as
|
||||
+represented by the real netdevice) to your flowtable definition.
|
||||
+
|
||||
+Counters
|
||||
+--------
|
||||
+
|
||||
+The flowtable can synchronize packet and byte counters with the existing
|
||||
+connection tracking entry by specifying the counter statement in your flowtable
|
||||
+definition, e.g.
|
||||
+
|
||||
+::
|
||||
+ table inet x {
|
||||
+ flowtable f {
|
||||
+ hook ingress priority 0; devices = { eth0, eth1 };
|
||||
+ counter
|
||||
+ }
|
||||
+ ...
|
||||
+ }
|
||||
+
|
||||
+Counter support is available since Linux kernel 5.7.
|
||||
+
|
||||
+Hardware offload
|
||||
+----------------
|
||||
+
|
||||
+If your network device provides hardware offload support, you can turn it on by
|
||||
+means of the 'offload' flag in your flowtable definition, e.g.
|
||||
+
|
||||
+::
|
||||
+ table inet x {
|
||||
+ flowtable f {
|
||||
+ hook ingress priority 0; devices = { eth0, eth1 };
|
||||
+ flags offload;
|
||||
+ }
|
||||
+ ...
|
||||
+ }
|
||||
+
|
||||
+There is a workqueue that adds the flows to the hardware. Note that a few
|
||||
+packets might still run over the flowtable software path until the workqueue has
|
||||
+a chance to offload the flow to the network device.
|
||||
+
|
||||
+You can identify hardware offloaded flows through the [HW_OFFLOAD] tag when
|
||||
+listing your connection tracking table. Please, note that the [OFFLOAD] tag
|
||||
+refers to the software offload mode, so there is a distinction between [OFFLOAD]
|
||||
+which refers to the software flowtable fastpath and [HW_OFFLOAD] which refers
|
||||
+to the hardware offload datapath being used by the flow.
|
||||
+
|
||||
+The flowtable hardware offload infrastructure also supports for the DSA
|
||||
+(Distributed Switch Architecture).
|
||||
+
|
||||
+Limitations
|
||||
+-----------
|
||||
+
|
||||
+The flowtable behaves like a cache. The flowtable entries might get stale if
|
||||
+either the destination MAC address or the egress netdevice that is used for
|
||||
+transmission changes.
|
||||
+
|
||||
+This might be a problem if:
|
||||
+
|
||||
+- You run the flowtable in software mode and you combine bridge and IP
|
||||
+ forwarding in your setup.
|
||||
+- Hardware offload is enabled.
|
||||
+
|
||||
More reading
|
||||
------------
|
||||
|
@ -0,0 +1,72 @@
|
||||
From c5d66587b8900201e1530b7c18d41e87bd5812f4 Mon Sep 17 00:00:00 2001
|
||||
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Date: Thu, 15 Apr 2021 17:37:48 -0700
|
||||
Subject: [PATCH] net: ethernet: mediatek: ppe: fix busy wait loop
|
||||
|
||||
The intention is for the loop to timeout if the body does not succeed.
|
||||
The current logic calls time_is_before_jiffies(timeout) which is false
|
||||
until after the timeout, so the loop body never executes.
|
||||
|
||||
Fix by using readl_poll_timeout as a more standard and less error-prone
|
||||
solution.
|
||||
|
||||
Fixes: ba37b7caf1ed ("net: ethernet: mtk_eth_soc: add support for initializing the PPE")
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Cc: Felix Fietkau <nbd@nbd.name>
|
||||
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_ppe.c | 20 +++++++++-----------
|
||||
drivers/net/ethernet/mediatek/mtk_ppe.h | 1 +
|
||||
2 files changed, 10 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
|
||||
@@ -2,9 +2,8 @@
|
||||
/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
|
||||
|
||||
#include <linux/kernel.h>
|
||||
-#include <linux/jiffies.h>
|
||||
-#include <linux/delay.h>
|
||||
#include <linux/io.h>
|
||||
+#include <linux/iopoll.h>
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include "mtk_ppe.h"
|
||||
@@ -44,18 +43,17 @@ static u32 ppe_clear(struct mtk_ppe *ppe
|
||||
|
||||
static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
|
||||
{
|
||||
- unsigned long timeout = jiffies + HZ;
|
||||
-
|
||||
- while (time_is_before_jiffies(timeout)) {
|
||||
- if (!(ppe_r32(ppe, MTK_PPE_GLO_CFG) & MTK_PPE_GLO_CFG_BUSY))
|
||||
- return 0;
|
||||
+ int ret;
|
||||
+ u32 val;
|
||||
|
||||
- usleep_range(10, 20);
|
||||
- }
|
||||
+ ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
|
||||
+ !(val & MTK_PPE_GLO_CFG_BUSY),
|
||||
+ 20, MTK_PPE_WAIT_TIMEOUT_US);
|
||||
|
||||
- dev_err(ppe->dev, "PPE table busy");
|
||||
+ if (ret)
|
||||
+ dev_err(ppe->dev, "PPE table busy");
|
||||
|
||||
- return -ETIMEDOUT;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
|
||||
@@ -12,6 +12,7 @@
|
||||
#define MTK_PPE_ENTRIES_SHIFT 3
|
||||
#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
|
||||
#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
|
||||
+#define MTK_PPE_WAIT_TIMEOUT_US 1000000
|
||||
|
||||
#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0)
|
||||
#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8)
|
@ -0,0 +1,29 @@
|
||||
From 6ecaf81d4ac6365f9284f9d68d74f7c209e74f98 Mon Sep 17 00:00:00 2001
|
||||
From: DENG Qingfang <dqfext@gmail.com>
|
||||
Date: Sat, 17 Apr 2021 15:29:04 +0800
|
||||
Subject: [PATCH] net: ethernet: mediatek: fix a typo bug in flow offloading
|
||||
|
||||
Issue was traffic problems after a while with increased ping times if
|
||||
flow offload is active. It turns out that key_offset with cookie is
|
||||
needed in rhashtable_params but was re-assigned to head_offset.
|
||||
Fix the assignment.
|
||||
|
||||
Fixes: 502e84e2382d ("net: ethernet: mtk_eth_soc: add flow offloading support")
|
||||
Signed-off-by: DENG Qingfang <dqfext@gmail.com>
|
||||
Tested-by: Frank Wunderlich <frank-w@public-files.de>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
@@ -44,7 +44,7 @@ struct mtk_flow_entry {
|
||||
|
||||
static const struct rhashtable_params mtk_flow_ht_params = {
|
||||
.head_offset = offsetof(struct mtk_flow_entry, node),
|
||||
- .head_offset = offsetof(struct mtk_flow_entry, cookie),
|
||||
+ .key_offset = offsetof(struct mtk_flow_entry, cookie),
|
||||
.key_len = sizeof(unsigned long),
|
||||
.automatic_shrinking = true,
|
||||
};
|
@ -0,0 +1,38 @@
|
||||
From 5196c417854942e218a59ec87bf7d414b3bd581e Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:20:55 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: unmap RX data before calling
|
||||
build_skb
|
||||
|
||||
Since build_skb accesses the data area (for initializing shinfo), dma unmap
|
||||
needs to happen before that call
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
[Ilya: split build_skb cleanup fix into a separate commit]
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1319,6 +1319,9 @@ static int mtk_poll_rx(struct napi_struc
|
||||
goto release_desc;
|
||||
}
|
||||
|
||||
+ dma_unmap_single(eth->dev, trxd.rxd1,
|
||||
+ ring->buf_size, DMA_FROM_DEVICE);
|
||||
+
|
||||
/* receive data */
|
||||
skb = build_skb(data, ring->frag_size);
|
||||
if (unlikely(!skb)) {
|
||||
@@ -1328,8 +1331,6 @@ static int mtk_poll_rx(struct napi_struc
|
||||
}
|
||||
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
|
||||
|
||||
- dma_unmap_single(eth->dev, trxd.rxd1,
|
||||
- ring->buf_size, DMA_FROM_DEVICE);
|
||||
pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
|
||||
skb->dev = netdev;
|
||||
skb_put(skb, pktlen);
|
@ -0,0 +1,38 @@
|
||||
From 787082ab9f7be4711e52f67c388535eda74a1269 Mon Sep 17 00:00:00 2001
|
||||
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Date: Thu, 22 Apr 2021 22:20:56 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: fix build_skb cleanup
|
||||
|
||||
In case build_skb fails, call skb_free_frag on the correct pointer. Also
|
||||
update the DMA structures with the new mapping before exiting, because
|
||||
the mapping was successful
|
||||
|
||||
Suggested-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1325,9 +1325,9 @@ static int mtk_poll_rx(struct napi_struc
|
||||
/* receive data */
|
||||
skb = build_skb(data, ring->frag_size);
|
||||
if (unlikely(!skb)) {
|
||||
- skb_free_frag(new_data);
|
||||
+ skb_free_frag(data);
|
||||
netdev->stats.rx_dropped++;
|
||||
- goto release_desc;
|
||||
+ goto skip_rx;
|
||||
}
|
||||
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
|
||||
|
||||
@@ -1347,6 +1347,7 @@ static int mtk_poll_rx(struct napi_struc
|
||||
skb_record_rx_queue(skb, 0);
|
||||
napi_gro_receive(napi, skb);
|
||||
|
||||
+skip_rx:
|
||||
ring->data[idx] = new_data;
|
||||
rxd->rxd1 = (unsigned int)dma_addr;
|
||||
|
@ -0,0 +1,77 @@
|
||||
From c30c4a82739090a2de4a4e3f245355ea4fb3ec14 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:20:57 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: use napi_consume_skb
|
||||
|
||||
Should improve performance, since it can use bulk free
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 19 ++++++++++++-------
|
||||
1 file changed, 12 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -879,7 +879,8 @@ static int txd_to_idx(struct mtk_tx_ring
|
||||
return ((void *)dma - (void *)ring->dma) / sizeof(*dma);
|
||||
}
|
||||
|
||||
-static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf)
|
||||
+static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf,
|
||||
+ bool napi)
|
||||
{
|
||||
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
|
||||
if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
|
||||
@@ -911,8 +912,12 @@ static void mtk_tx_unmap(struct mtk_eth
|
||||
|
||||
tx_buf->flags = 0;
|
||||
if (tx_buf->skb &&
|
||||
- (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC))
|
||||
- dev_kfree_skb_any(tx_buf->skb);
|
||||
+ (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC)) {
|
||||
+ if (napi)
|
||||
+ napi_consume_skb(tx_buf->skb, napi);
|
||||
+ else
|
||||
+ dev_kfree_skb_any(tx_buf->skb);
|
||||
+ }
|
||||
tx_buf->skb = NULL;
|
||||
}
|
||||
|
||||
@@ -1090,7 +1095,7 @@ err_dma:
|
||||
tx_buf = mtk_desc_to_tx_buf(ring, itxd);
|
||||
|
||||
/* unmap dma */
|
||||
- mtk_tx_unmap(eth, tx_buf);
|
||||
+ mtk_tx_unmap(eth, tx_buf, false);
|
||||
|
||||
itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
|
||||
if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
|
||||
@@ -1409,7 +1414,7 @@ static int mtk_poll_tx_qdma(struct mtk_e
|
||||
done[mac]++;
|
||||
budget--;
|
||||
}
|
||||
- mtk_tx_unmap(eth, tx_buf);
|
||||
+ mtk_tx_unmap(eth, tx_buf, true);
|
||||
|
||||
ring->last_free = desc;
|
||||
atomic_inc(&ring->free_count);
|
||||
@@ -1446,7 +1451,7 @@ static int mtk_poll_tx_pdma(struct mtk_e
|
||||
budget--;
|
||||
}
|
||||
|
||||
- mtk_tx_unmap(eth, tx_buf);
|
||||
+ mtk_tx_unmap(eth, tx_buf, true);
|
||||
|
||||
desc = &ring->dma[cpu];
|
||||
ring->last_free = desc;
|
||||
@@ -1648,7 +1653,7 @@ static void mtk_tx_clean(struct mtk_eth
|
||||
|
||||
if (ring->buf) {
|
||||
for (i = 0; i < MTK_DMA_SIZE; i++)
|
||||
- mtk_tx_unmap(eth, &ring->buf[i]);
|
||||
+ mtk_tx_unmap(eth, &ring->buf[i], false);
|
||||
kfree(ring->buf);
|
||||
ring->buf = NULL;
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
From 3630d519d7c3eab92567658690e44ffe0517d109 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:20:58 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: reduce MDIO bus access latency
|
||||
|
||||
usleep_range often ends up sleeping much longer than the 10-20us provided
|
||||
as a range here. This causes significant latency in mdio bus acceses,
|
||||
which easily adds multiple seconds to the boot time on MT7621 when polling
|
||||
DSA slave ports.
|
||||
Use cond_resched instead of usleep_range, since the MDIO access does not
|
||||
take much time
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -86,7 +86,7 @@ static int mtk_mdio_busy_wait(struct mtk
|
||||
return 0;
|
||||
if (time_after(jiffies, t_start + PHY_IAC_TIMEOUT))
|
||||
break;
|
||||
- usleep_range(10, 20);
|
||||
+ cond_resched();
|
||||
}
|
||||
|
||||
dev_err(eth->dev, "mdio: MDIO timeout\n");
|
@ -0,0 +1,54 @@
|
||||
From 16ef670789b252b221700adc413497ed2f941d8a Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:20:59 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: remove unnecessary TX queue stops
|
||||
|
||||
When running short on descriptors, only stop the queue for the netdev that
|
||||
tx was attempted for. By the time something tries to send on the other
|
||||
netdev, the ring might have some more room already.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 15 ++-------------
|
||||
1 file changed, 2 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1152,17 +1152,6 @@ static void mtk_wake_queue(struct mtk_et
|
||||
}
|
||||
}
|
||||
|
||||
-static void mtk_stop_queue(struct mtk_eth *eth)
|
||||
-{
|
||||
- int i;
|
||||
-
|
||||
- for (i = 0; i < MTK_MAC_COUNT; i++) {
|
||||
- if (!eth->netdev[i])
|
||||
- continue;
|
||||
- netif_stop_queue(eth->netdev[i]);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
{
|
||||
struct mtk_mac *mac = netdev_priv(dev);
|
||||
@@ -1183,7 +1172,7 @@ static netdev_tx_t mtk_start_xmit(struct
|
||||
|
||||
tx_num = mtk_cal_txd_req(skb);
|
||||
if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
|
||||
- mtk_stop_queue(eth);
|
||||
+ netif_stop_queue(dev);
|
||||
netif_err(eth, tx_queued, dev,
|
||||
"Tx Ring full when queue awake!\n");
|
||||
spin_unlock(ð->page_lock);
|
||||
@@ -1209,7 +1198,7 @@ static netdev_tx_t mtk_start_xmit(struct
|
||||
goto drop;
|
||||
|
||||
if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
|
||||
- mtk_stop_queue(eth);
|
||||
+ netif_stop_queue(dev);
|
||||
|
||||
spin_unlock(ð->page_lock);
|
||||
|
@ -0,0 +1,37 @@
|
||||
From 59555a8d0dd39bf60b7ca1ba5e7393d293f7398d Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:00 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: use larger burst size for QDMA TX
|
||||
|
||||
Improves tx performance
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +-
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +-
|
||||
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -2214,7 +2214,7 @@ static int mtk_start_dma(struct mtk_eth
|
||||
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
|
||||
mtk_w32(eth,
|
||||
MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
|
||||
- MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO |
|
||||
+ MTK_TX_BT_32DWORDS | MTK_NDP_CO_PRO |
|
||||
MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
|
||||
MTK_RX_BT_32DWORDS,
|
||||
MTK_QDMA_GLO_CFG);
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -202,7 +202,7 @@
|
||||
#define MTK_RX_BT_32DWORDS (3 << 11)
|
||||
#define MTK_NDP_CO_PRO BIT(10)
|
||||
#define MTK_TX_WB_DDONE BIT(6)
|
||||
-#define MTK_DMA_SIZE_16DWORDS (2 << 4)
|
||||
+#define MTK_TX_BT_32DWORDS (3 << 4)
|
||||
#define MTK_RX_DMA_BUSY BIT(3)
|
||||
#define MTK_TX_DMA_BUSY BIT(1)
|
||||
#define MTK_RX_DMA_EN BIT(2)
|
@ -0,0 +1,26 @@
|
||||
From 6b4423b258b91032c50a5efca15d3d9bb194ea1d Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:01 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: increase DMA ring sizes
|
||||
|
||||
256 descriptors is not enough for multi-gigabit traffic under load on
|
||||
MT7622. Bump it to 512 to improve performance.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -21,7 +21,7 @@
|
||||
#define MTK_QDMA_PAGE_SIZE 2048
|
||||
#define MTK_MAX_RX_LENGTH 1536
|
||||
#define MTK_TX_DMA_BUF_LEN 0x3fff
|
||||
-#define MTK_DMA_SIZE 256
|
||||
+#define MTK_DMA_SIZE 512
|
||||
#define MTK_NAPI_WEIGHT 64
|
||||
#define MTK_MAC_COUNT 2
|
||||
#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
|
@ -0,0 +1,313 @@
|
||||
From e9229ffd550b2d8c4997c67a501dbc3919fd4e26 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:02 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: implement dynamic interrupt
|
||||
moderation
|
||||
|
||||
Reduces the number of interrupts under load
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
[Ilya: add documentation for new struct fields]
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/Kconfig | 1 +
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 96 +++++++++++++++++++--
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 41 +++++++--
|
||||
3 files changed, 124 insertions(+), 14 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/Kconfig
|
||||
+++ b/drivers/net/ethernet/mediatek/Kconfig
|
||||
@@ -10,6 +10,7 @@ if NET_VENDOR_MEDIATEK
|
||||
config NET_MEDIATEK_SOC
|
||||
tristate "MediaTek SoC Gigabit Ethernet support"
|
||||
select PHYLINK
|
||||
+ select DIMLIB
|
||||
help
|
||||
This driver supports the gigabit ethernet MACs in the
|
||||
MediaTek SoC family.
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1254,12 +1254,13 @@ static void mtk_update_rx_cpu_idx(struct
|
||||
static int mtk_poll_rx(struct napi_struct *napi, int budget,
|
||||
struct mtk_eth *eth)
|
||||
{
|
||||
+ struct dim_sample dim_sample = {};
|
||||
struct mtk_rx_ring *ring;
|
||||
int idx;
|
||||
struct sk_buff *skb;
|
||||
u8 *data, *new_data;
|
||||
struct mtk_rx_dma *rxd, trxd;
|
||||
- int done = 0;
|
||||
+ int done = 0, bytes = 0;
|
||||
|
||||
while (done < budget) {
|
||||
struct net_device *netdev;
|
||||
@@ -1333,6 +1334,7 @@ static int mtk_poll_rx(struct napi_struc
|
||||
else
|
||||
skb_checksum_none_assert(skb);
|
||||
skb->protocol = eth_type_trans(skb, netdev);
|
||||
+ bytes += pktlen;
|
||||
|
||||
if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
|
||||
(trxd.rxd2 & RX_DMA_VTAG))
|
||||
@@ -1365,6 +1367,12 @@ rx_done:
|
||||
mtk_update_rx_cpu_idx(eth);
|
||||
}
|
||||
|
||||
+ eth->rx_packets += done;
|
||||
+ eth->rx_bytes += bytes;
|
||||
+ dim_update_sample(eth->rx_events, eth->rx_packets, eth->rx_bytes,
|
||||
+ &dim_sample);
|
||||
+ net_dim(ð->rx_dim, dim_sample);
|
||||
+
|
||||
return done;
|
||||
}
|
||||
|
||||
@@ -1457,6 +1465,7 @@ static int mtk_poll_tx_pdma(struct mtk_e
|
||||
static int mtk_poll_tx(struct mtk_eth *eth, int budget)
|
||||
{
|
||||
struct mtk_tx_ring *ring = ð->tx_ring;
|
||||
+ struct dim_sample dim_sample = {};
|
||||
unsigned int done[MTK_MAX_DEVS];
|
||||
unsigned int bytes[MTK_MAX_DEVS];
|
||||
int total = 0, i;
|
||||
@@ -1474,8 +1483,14 @@ static int mtk_poll_tx(struct mtk_eth *e
|
||||
continue;
|
||||
netdev_completed_queue(eth->netdev[i], done[i], bytes[i]);
|
||||
total += done[i];
|
||||
+ eth->tx_packets += done[i];
|
||||
+ eth->tx_bytes += bytes[i];
|
||||
}
|
||||
|
||||
+ dim_update_sample(eth->tx_events, eth->tx_packets, eth->tx_bytes,
|
||||
+ &dim_sample);
|
||||
+ net_dim(ð->tx_dim, dim_sample);
|
||||
+
|
||||
if (mtk_queue_stopped(eth) &&
|
||||
(atomic_read(&ring->free_count) > ring->thresh))
|
||||
mtk_wake_queue(eth);
|
||||
@@ -2150,6 +2165,7 @@ static irqreturn_t mtk_handle_irq_rx(int
|
||||
{
|
||||
struct mtk_eth *eth = _eth;
|
||||
|
||||
+ eth->rx_events++;
|
||||
if (likely(napi_schedule_prep(ð->rx_napi))) {
|
||||
__napi_schedule(ð->rx_napi);
|
||||
mtk_rx_irq_disable(eth, MTK_RX_DONE_INT);
|
||||
@@ -2162,6 +2178,7 @@ static irqreturn_t mtk_handle_irq_tx(int
|
||||
{
|
||||
struct mtk_eth *eth = _eth;
|
||||
|
||||
+ eth->tx_events++;
|
||||
if (likely(napi_schedule_prep(ð->tx_napi))) {
|
||||
__napi_schedule(ð->tx_napi);
|
||||
mtk_tx_irq_disable(eth, MTK_TX_DONE_INT);
|
||||
@@ -2346,6 +2363,9 @@ static int mtk_stop(struct net_device *d
|
||||
napi_disable(ð->tx_napi);
|
||||
napi_disable(ð->rx_napi);
|
||||
|
||||
+ cancel_work_sync(ð->rx_dim.work);
|
||||
+ cancel_work_sync(ð->tx_dim.work);
|
||||
+
|
||||
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
|
||||
mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
|
||||
mtk_stop_dma(eth, MTK_PDMA_GLO_CFG);
|
||||
@@ -2398,6 +2418,64 @@ err_disable_clks:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static void mtk_dim_rx(struct work_struct *work)
|
||||
+{
|
||||
+ struct dim *dim = container_of(work, struct dim, work);
|
||||
+ struct mtk_eth *eth = container_of(dim, struct mtk_eth, rx_dim);
|
||||
+ struct dim_cq_moder cur_profile;
|
||||
+ u32 val, cur;
|
||||
+
|
||||
+ cur_profile = net_dim_get_rx_moderation(eth->rx_dim.mode,
|
||||
+ dim->profile_ix);
|
||||
+ spin_lock_bh(ð->dim_lock);
|
||||
+
|
||||
+ val = mtk_r32(eth, MTK_PDMA_DELAY_INT);
|
||||
+ val &= MTK_PDMA_DELAY_TX_MASK;
|
||||
+ val |= MTK_PDMA_DELAY_RX_EN;
|
||||
+
|
||||
+ cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK);
|
||||
+ val |= cur << MTK_PDMA_DELAY_RX_PTIME_SHIFT;
|
||||
+
|
||||
+ cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK);
|
||||
+ val |= cur << MTK_PDMA_DELAY_RX_PINT_SHIFT;
|
||||
+
|
||||
+ mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
|
||||
+ mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
|
||||
+
|
||||
+ spin_unlock_bh(ð->dim_lock);
|
||||
+
|
||||
+ dim->state = DIM_START_MEASURE;
|
||||
+}
|
||||
+
|
||||
+static void mtk_dim_tx(struct work_struct *work)
|
||||
+{
|
||||
+ struct dim *dim = container_of(work, struct dim, work);
|
||||
+ struct mtk_eth *eth = container_of(dim, struct mtk_eth, tx_dim);
|
||||
+ struct dim_cq_moder cur_profile;
|
||||
+ u32 val, cur;
|
||||
+
|
||||
+ cur_profile = net_dim_get_tx_moderation(eth->tx_dim.mode,
|
||||
+ dim->profile_ix);
|
||||
+ spin_lock_bh(ð->dim_lock);
|
||||
+
|
||||
+ val = mtk_r32(eth, MTK_PDMA_DELAY_INT);
|
||||
+ val &= MTK_PDMA_DELAY_RX_MASK;
|
||||
+ val |= MTK_PDMA_DELAY_TX_EN;
|
||||
+
|
||||
+ cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK);
|
||||
+ val |= cur << MTK_PDMA_DELAY_TX_PTIME_SHIFT;
|
||||
+
|
||||
+ cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK);
|
||||
+ val |= cur << MTK_PDMA_DELAY_TX_PINT_SHIFT;
|
||||
+
|
||||
+ mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
|
||||
+ mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
|
||||
+
|
||||
+ spin_unlock_bh(ð->dim_lock);
|
||||
+
|
||||
+ dim->state = DIM_START_MEASURE;
|
||||
+}
|
||||
+
|
||||
static int mtk_hw_init(struct mtk_eth *eth)
|
||||
{
|
||||
int i, val, ret;
|
||||
@@ -2419,9 +2497,6 @@ static int mtk_hw_init(struct mtk_eth *e
|
||||
goto err_disable_pm;
|
||||
}
|
||||
|
||||
- /* enable interrupt delay for RX */
|
||||
- mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
|
||||
-
|
||||
/* disable delay and normal interrupt */
|
||||
mtk_tx_irq_disable(eth, ~0);
|
||||
mtk_rx_irq_disable(eth, ~0);
|
||||
@@ -2460,11 +2535,11 @@ static int mtk_hw_init(struct mtk_eth *e
|
||||
/* Enable RX VLan Offloading */
|
||||
mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
|
||||
|
||||
- /* enable interrupt delay for RX */
|
||||
- mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
|
||||
+ /* set interrupt delays based on current Net DIM sample */
|
||||
+ mtk_dim_rx(ð->rx_dim.work);
|
||||
+ mtk_dim_tx(ð->tx_dim.work);
|
||||
|
||||
/* disable delay and normal interrupt */
|
||||
- mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
|
||||
mtk_tx_irq_disable(eth, ~0);
|
||||
mtk_rx_irq_disable(eth, ~0);
|
||||
|
||||
@@ -2969,6 +3044,13 @@ static int mtk_probe(struct platform_dev
|
||||
spin_lock_init(ð->page_lock);
|
||||
spin_lock_init(ð->tx_irq_lock);
|
||||
spin_lock_init(ð->rx_irq_lock);
|
||||
+ spin_lock_init(ð->dim_lock);
|
||||
+
|
||||
+ eth->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
|
||||
+ INIT_WORK(ð->rx_dim.work, mtk_dim_rx);
|
||||
+
|
||||
+ eth->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
|
||||
+ INIT_WORK(ð->tx_dim.work, mtk_dim_tx);
|
||||
|
||||
if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
|
||||
eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -16,6 +16,7 @@
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/phylink.h>
|
||||
#include <linux/rhashtable.h>
|
||||
+#include <linux/dim.h>
|
||||
#include "mtk_ppe.h"
|
||||
|
||||
#define MTK_QDMA_PAGE_SIZE 2048
|
||||
@@ -136,13 +137,18 @@
|
||||
|
||||
/* PDMA Delay Interrupt Register */
|
||||
#define MTK_PDMA_DELAY_INT 0xa0c
|
||||
+#define MTK_PDMA_DELAY_RX_MASK GENMASK(15, 0)
|
||||
#define MTK_PDMA_DELAY_RX_EN BIT(15)
|
||||
-#define MTK_PDMA_DELAY_RX_PINT 4
|
||||
#define MTK_PDMA_DELAY_RX_PINT_SHIFT 8
|
||||
-#define MTK_PDMA_DELAY_RX_PTIME 4
|
||||
-#define MTK_PDMA_DELAY_RX_DELAY \
|
||||
- (MTK_PDMA_DELAY_RX_EN | MTK_PDMA_DELAY_RX_PTIME | \
|
||||
- (MTK_PDMA_DELAY_RX_PINT << MTK_PDMA_DELAY_RX_PINT_SHIFT))
|
||||
+#define MTK_PDMA_DELAY_RX_PTIME_SHIFT 0
|
||||
+
|
||||
+#define MTK_PDMA_DELAY_TX_MASK GENMASK(31, 16)
|
||||
+#define MTK_PDMA_DELAY_TX_EN BIT(31)
|
||||
+#define MTK_PDMA_DELAY_TX_PINT_SHIFT 24
|
||||
+#define MTK_PDMA_DELAY_TX_PTIME_SHIFT 16
|
||||
+
|
||||
+#define MTK_PDMA_DELAY_PINT_MASK 0x7f
|
||||
+#define MTK_PDMA_DELAY_PTIME_MASK 0xff
|
||||
|
||||
/* PDMA Interrupt Status Register */
|
||||
#define MTK_PDMA_INT_STATUS 0xa20
|
||||
@@ -224,6 +230,7 @@
|
||||
/* QDMA Interrupt Status Register */
|
||||
#define MTK_QDMA_INT_STATUS 0x1A18
|
||||
#define MTK_RX_DONE_DLY BIT(30)
|
||||
+#define MTK_TX_DONE_DLY BIT(28)
|
||||
#define MTK_RX_DONE_INT3 BIT(19)
|
||||
#define MTK_RX_DONE_INT2 BIT(18)
|
||||
#define MTK_RX_DONE_INT1 BIT(17)
|
||||
@@ -233,8 +240,7 @@
|
||||
#define MTK_TX_DONE_INT1 BIT(1)
|
||||
#define MTK_TX_DONE_INT0 BIT(0)
|
||||
#define MTK_RX_DONE_INT MTK_RX_DONE_DLY
|
||||
-#define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
|
||||
- MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
|
||||
+#define MTK_TX_DONE_INT MTK_TX_DONE_DLY
|
||||
|
||||
/* QDMA Interrupt grouping registers */
|
||||
#define MTK_QDMA_INT_GRP1 0x1a20
|
||||
@@ -863,6 +869,7 @@ struct mtk_sgmii {
|
||||
* @page_lock: Make sure that register operations are atomic
|
||||
* @tx_irq__lock: Make sure that IRQ register operations are atomic
|
||||
* @rx_irq__lock: Make sure that IRQ register operations are atomic
|
||||
+ * @dim_lock: Make sure that Net DIM operations are atomic
|
||||
* @dummy_dev: we run 2 netdevs on 1 physical DMA ring and need a
|
||||
* dummy for NAPI to work
|
||||
* @netdev: The netdev instances
|
||||
@@ -881,6 +888,14 @@ struct mtk_sgmii {
|
||||
* @rx_ring_qdma: Pointer to the memory holding info about the QDMA RX ring
|
||||
* @tx_napi: The TX NAPI struct
|
||||
* @rx_napi: The RX NAPI struct
|
||||
+ * @rx_events: Net DIM RX event counter
|
||||
+ * @rx_packets: Net DIM RX packet counter
|
||||
+ * @rx_bytes: Net DIM RX byte counter
|
||||
+ * @rx_dim: Net DIM RX context
|
||||
+ * @tx_events: Net DIM TX event counter
|
||||
+ * @tx_packets: Net DIM TX packet counter
|
||||
+ * @tx_bytes: Net DIM TX byte counter
|
||||
+ * @tx_dim: Net DIM TX context
|
||||
* @scratch_ring: Newer SoCs need memory for a second HW managed TX ring
|
||||
* @phy_scratch_ring: physical address of scratch_ring
|
||||
* @scratch_head: The scratch memory that scratch_ring points to.
|
||||
@@ -925,6 +940,18 @@ struct mtk_eth {
|
||||
|
||||
const struct mtk_soc_data *soc;
|
||||
|
||||
+ spinlock_t dim_lock;
|
||||
+
|
||||
+ u32 rx_events;
|
||||
+ u32 rx_packets;
|
||||
+ u32 rx_bytes;
|
||||
+ struct dim rx_dim;
|
||||
+
|
||||
+ u32 tx_events;
|
||||
+ u32 tx_packets;
|
||||
+ u32 tx_bytes;
|
||||
+ struct dim tx_dim;
|
||||
+
|
||||
u32 tx_int_mask_reg;
|
||||
u32 tx_int_status_reg;
|
||||
u32 rx_dma_l4_valid;
|
@ -0,0 +1,73 @@
|
||||
From 4e6bf609569c59b6bd6acf4a607c096cbd820d79 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:03 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: cache HW pointer of last freed TX
|
||||
descriptor
|
||||
|
||||
The value is only updated by the CPU, so it is cheaper to access from the
|
||||
ring data structure than from a hardware register.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++++----
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 ++
|
||||
2 files changed, 6 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1385,7 +1385,7 @@ static int mtk_poll_tx_qdma(struct mtk_e
|
||||
struct mtk_tx_buf *tx_buf;
|
||||
u32 cpu, dma;
|
||||
|
||||
- cpu = mtk_r32(eth, MTK_QTX_CRX_PTR);
|
||||
+ cpu = ring->last_free_ptr;
|
||||
dma = mtk_r32(eth, MTK_QTX_DRX_PTR);
|
||||
|
||||
desc = mtk_qdma_phys_to_virt(ring, cpu);
|
||||
@@ -1419,6 +1419,7 @@ static int mtk_poll_tx_qdma(struct mtk_e
|
||||
cpu = next_cpu;
|
||||
}
|
||||
|
||||
+ ring->last_free_ptr = cpu;
|
||||
mtk_w32(eth, cpu, MTK_QTX_CRX_PTR);
|
||||
|
||||
return budget;
|
||||
@@ -1619,6 +1620,7 @@ static int mtk_tx_alloc(struct mtk_eth *
|
||||
atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
|
||||
ring->next_free = &ring->dma[0];
|
||||
ring->last_free = &ring->dma[MTK_DMA_SIZE - 1];
|
||||
+ ring->last_free_ptr = (u32)(ring->phys + ((MTK_DMA_SIZE - 1) * sz));
|
||||
ring->thresh = MAX_SKB_FRAGS;
|
||||
|
||||
/* make sure that all changes to the dma ring are flushed before we
|
||||
@@ -1632,9 +1634,7 @@ static int mtk_tx_alloc(struct mtk_eth *
|
||||
mtk_w32(eth,
|
||||
ring->phys + ((MTK_DMA_SIZE - 1) * sz),
|
||||
MTK_QTX_CRX_PTR);
|
||||
- mtk_w32(eth,
|
||||
- ring->phys + ((MTK_DMA_SIZE - 1) * sz),
|
||||
- MTK_QTX_DRX_PTR);
|
||||
+ mtk_w32(eth, ring->last_free_ptr, MTK_QTX_DRX_PTR);
|
||||
mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES,
|
||||
MTK_QTX_CFG(0));
|
||||
} else {
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -656,6 +656,7 @@ struct mtk_tx_buf {
|
||||
* @phys: The physical addr of tx_buf
|
||||
* @next_free: Pointer to the next free descriptor
|
||||
* @last_free: Pointer to the last free descriptor
|
||||
+ * @last_free_ptr: Hardware pointer value of the last free descriptor
|
||||
* @thresh: The threshold of minimum amount of free descriptors
|
||||
* @free_count: QDMA uses a linked list. Track how many free descriptors
|
||||
* are present
|
||||
@@ -666,6 +667,7 @@ struct mtk_tx_ring {
|
||||
dma_addr_t phys;
|
||||
struct mtk_tx_dma *next_free;
|
||||
struct mtk_tx_dma *last_free;
|
||||
+ u32 last_free_ptr;
|
||||
u16 thresh;
|
||||
atomic_t free_count;
|
||||
int dma_size;
|
@ -0,0 +1,49 @@
|
||||
From 816ac3e6e67bdd78d86226c6eb53619780750e92 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:04 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: only read the full RX descriptor
|
||||
if DMA is done
|
||||
|
||||
Uncached memory access is expensive, and there is no need to access all
|
||||
descriptor words if we can't process them anyway
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 12 ++++++++----
|
||||
1 file changed, 8 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -798,13 +798,18 @@ static inline int mtk_max_buf_size(int f
|
||||
return buf_size;
|
||||
}
|
||||
|
||||
-static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd,
|
||||
+static inline bool mtk_rx_get_desc(struct mtk_rx_dma *rxd,
|
||||
struct mtk_rx_dma *dma_rxd)
|
||||
{
|
||||
- rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
|
||||
rxd->rxd2 = READ_ONCE(dma_rxd->rxd2);
|
||||
+ if (!(rxd->rxd2 & RX_DMA_DONE))
|
||||
+ return false;
|
||||
+
|
||||
+ rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
|
||||
rxd->rxd3 = READ_ONCE(dma_rxd->rxd3);
|
||||
rxd->rxd4 = READ_ONCE(dma_rxd->rxd4);
|
||||
+
|
||||
+ return true;
|
||||
}
|
||||
|
||||
/* the qdma core needs scratch memory to be setup */
|
||||
@@ -1276,8 +1281,7 @@ static int mtk_poll_rx(struct napi_struc
|
||||
rxd = &ring->dma[idx];
|
||||
data = ring->data[idx];
|
||||
|
||||
- mtk_rx_get_desc(&trxd, rxd);
|
||||
- if (!(trxd.rxd2 & RX_DMA_DONE))
|
||||
+ if (!mtk_rx_get_desc(&trxd, rxd))
|
||||
break;
|
||||
|
||||
/* find out which mac the packet come from. values start at 1 */
|
@ -0,0 +1,39 @@
|
||||
From 16769a8923fad5a5377253bcd76b0e0d64976c73 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:05 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: reduce unnecessary interrupts
|
||||
|
||||
Avoid rearming interrupt if napi_complete returns false
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 9 +++++----
|
||||
1 file changed, 5 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1540,8 +1540,8 @@ static int mtk_napi_tx(struct napi_struc
|
||||
if (status & MTK_TX_DONE_INT)
|
||||
return budget;
|
||||
|
||||
- napi_complete(napi);
|
||||
- mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
|
||||
+ if (napi_complete(napi))
|
||||
+ mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
|
||||
|
||||
return tx_done;
|
||||
}
|
||||
@@ -1574,8 +1574,9 @@ poll_again:
|
||||
remain_budget -= rx_done;
|
||||
goto poll_again;
|
||||
}
|
||||
- napi_complete(napi);
|
||||
- mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
|
||||
+
|
||||
+ if (napi_complete(napi))
|
||||
+ mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
|
||||
|
||||
return rx_done + budget - remain_budget;
|
||||
}
|
@ -0,0 +1,110 @@
|
||||
From db2c7b353db3b3f71b55f9ff4627d8a786446fbe Mon Sep 17 00:00:00 2001
|
||||
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Date: Thu, 22 Apr 2021 22:21:06 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: rework NAPI callbacks
|
||||
|
||||
Use napi_complete_done to communicate total TX and RX work done to NAPI.
|
||||
Count total RX work up instead of remaining work down for clarity.
|
||||
Remove unneeded local variables for clarity. Use do {} while instead of
|
||||
goto for clarity.
|
||||
|
||||
Suggested-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 54 +++++++++------------
|
||||
1 file changed, 24 insertions(+), 30 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1517,7 +1517,6 @@ static void mtk_handle_status_irq(struct
|
||||
static int mtk_napi_tx(struct napi_struct *napi, int budget)
|
||||
{
|
||||
struct mtk_eth *eth = container_of(napi, struct mtk_eth, tx_napi);
|
||||
- u32 status, mask;
|
||||
int tx_done = 0;
|
||||
|
||||
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
|
||||
@@ -1526,21 +1525,19 @@ static int mtk_napi_tx(struct napi_struc
|
||||
tx_done = mtk_poll_tx(eth, budget);
|
||||
|
||||
if (unlikely(netif_msg_intr(eth))) {
|
||||
- status = mtk_r32(eth, eth->tx_int_status_reg);
|
||||
- mask = mtk_r32(eth, eth->tx_int_mask_reg);
|
||||
dev_info(eth->dev,
|
||||
- "done tx %d, intr 0x%08x/0x%x\n",
|
||||
- tx_done, status, mask);
|
||||
+ "done tx %d, intr 0x%08x/0x%x\n", tx_done,
|
||||
+ mtk_r32(eth, eth->tx_int_status_reg),
|
||||
+ mtk_r32(eth, eth->tx_int_mask_reg));
|
||||
}
|
||||
|
||||
if (tx_done == budget)
|
||||
return budget;
|
||||
|
||||
- status = mtk_r32(eth, eth->tx_int_status_reg);
|
||||
- if (status & MTK_TX_DONE_INT)
|
||||
+ if (mtk_r32(eth, eth->tx_int_status_reg) & MTK_TX_DONE_INT)
|
||||
return budget;
|
||||
|
||||
- if (napi_complete(napi))
|
||||
+ if (napi_complete_done(napi, tx_done))
|
||||
mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
|
||||
|
||||
return tx_done;
|
||||
@@ -1549,36 +1546,33 @@ static int mtk_napi_tx(struct napi_struc
|
||||
static int mtk_napi_rx(struct napi_struct *napi, int budget)
|
||||
{
|
||||
struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
|
||||
- u32 status, mask;
|
||||
- int rx_done = 0;
|
||||
- int remain_budget = budget;
|
||||
+ int rx_done_total = 0;
|
||||
|
||||
mtk_handle_status_irq(eth);
|
||||
|
||||
-poll_again:
|
||||
- mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS);
|
||||
- rx_done = mtk_poll_rx(napi, remain_budget, eth);
|
||||
+ do {
|
||||
+ int rx_done;
|
||||
|
||||
- if (unlikely(netif_msg_intr(eth))) {
|
||||
- status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
|
||||
- mask = mtk_r32(eth, MTK_PDMA_INT_MASK);
|
||||
- dev_info(eth->dev,
|
||||
- "done rx %d, intr 0x%08x/0x%x\n",
|
||||
- rx_done, status, mask);
|
||||
- }
|
||||
- if (rx_done == remain_budget)
|
||||
- return budget;
|
||||
+ mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS);
|
||||
+ rx_done = mtk_poll_rx(napi, budget - rx_done_total, eth);
|
||||
+ rx_done_total += rx_done;
|
||||
+
|
||||
+ if (unlikely(netif_msg_intr(eth))) {
|
||||
+ dev_info(eth->dev,
|
||||
+ "done rx %d, intr 0x%08x/0x%x\n", rx_done,
|
||||
+ mtk_r32(eth, MTK_PDMA_INT_STATUS),
|
||||
+ mtk_r32(eth, MTK_PDMA_INT_MASK));
|
||||
+ }
|
||||
|
||||
- status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
|
||||
- if (status & MTK_RX_DONE_INT) {
|
||||
- remain_budget -= rx_done;
|
||||
- goto poll_again;
|
||||
- }
|
||||
+ if (rx_done_total == budget)
|
||||
+ return budget;
|
||||
+
|
||||
+ } while (mtk_r32(eth, MTK_PDMA_INT_STATUS) & MTK_RX_DONE_INT);
|
||||
|
||||
- if (napi_complete(napi))
|
||||
+ if (napi_complete_done(napi, rx_done_total))
|
||||
mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
|
||||
|
||||
- return rx_done + budget - remain_budget;
|
||||
+ return rx_done_total;
|
||||
}
|
||||
|
||||
static int mtk_tx_alloc(struct mtk_eth *eth)
|
@ -0,0 +1,47 @@
|
||||
From fa817272c37ef78e25dc14e4760ac78a7043a18a Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:07 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: set PPE flow hash as skb hash if
|
||||
present
|
||||
|
||||
This improves GRO performance
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
[Ilya: Use MTK_RXD4_FOE_ENTRY instead of GENMASK(13, 0)]
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/pinctrl/devinfo.h>
|
||||
#include <linux/phylink.h>
|
||||
+#include <linux/jhash.h>
|
||||
#include <net/dsa.h>
|
||||
|
||||
#include "mtk_eth_soc.h"
|
||||
@@ -1271,6 +1272,7 @@ static int mtk_poll_rx(struct napi_struc
|
||||
struct net_device *netdev;
|
||||
unsigned int pktlen;
|
||||
dma_addr_t dma_addr;
|
||||
+ u32 hash;
|
||||
int mac;
|
||||
|
||||
ring = mtk_get_rx_ring(eth);
|
||||
@@ -1340,6 +1342,12 @@ static int mtk_poll_rx(struct napi_struc
|
||||
skb->protocol = eth_type_trans(skb, netdev);
|
||||
bytes += pktlen;
|
||||
|
||||
+ hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY;
|
||||
+ if (hash != MTK_RXD4_FOE_ENTRY) {
|
||||
+ hash = jhash_1word(hash, 0);
|
||||
+ skb_set_hash(skb, hash, PKT_HASH_TYPE_L4);
|
||||
+ }
|
||||
+
|
||||
if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
|
||||
(trxd.rxd2 & RX_DMA_VTAG))
|
||||
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
|
@ -0,0 +1,71 @@
|
||||
From 3bc8e0aff23be0526af0dbc7973a8866a08d73f1 Mon Sep 17 00:00:00 2001
|
||||
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Date: Thu, 22 Apr 2021 22:21:08 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: use iopoll.h macro for DMA init
|
||||
|
||||
Replace a tight busy-wait loop without a pause with a standard
|
||||
readx_poll_timeout_atomic routine with a 5 us poll period.
|
||||
|
||||
Tested by booting a MT7621 device to ensure the driver initializes
|
||||
properly.
|
||||
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 29 +++++++++------------
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +-
|
||||
2 files changed, 14 insertions(+), 17 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -2054,25 +2054,22 @@ static int mtk_set_features(struct net_d
|
||||
/* wait for DMA to finish whatever it is doing before we start using it again */
|
||||
static int mtk_dma_busy_wait(struct mtk_eth *eth)
|
||||
{
|
||||
- unsigned long t_start = jiffies;
|
||||
+ unsigned int reg;
|
||||
+ int ret;
|
||||
+ u32 val;
|
||||
|
||||
- while (1) {
|
||||
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
|
||||
- if (!(mtk_r32(eth, MTK_QDMA_GLO_CFG) &
|
||||
- (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
|
||||
- return 0;
|
||||
- } else {
|
||||
- if (!(mtk_r32(eth, MTK_PDMA_GLO_CFG) &
|
||||
- (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
|
||||
- return 0;
|
||||
- }
|
||||
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
|
||||
+ reg = MTK_QDMA_GLO_CFG;
|
||||
+ else
|
||||
+ reg = MTK_PDMA_GLO_CFG;
|
||||
|
||||
- if (time_after(jiffies, t_start + MTK_DMA_BUSY_TIMEOUT))
|
||||
- break;
|
||||
- }
|
||||
+ ret = readx_poll_timeout_atomic(__raw_readl, eth->base + reg, val,
|
||||
+ !(val & (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)),
|
||||
+ 5, MTK_DMA_BUSY_TIMEOUT_US);
|
||||
+ if (ret)
|
||||
+ dev_err(eth->dev, "DMA init timeout\n");
|
||||
|
||||
- dev_err(eth->dev, "DMA init timeout\n");
|
||||
- return -1;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
static int mtk_dma_init(struct mtk_eth *eth)
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -213,7 +213,7 @@
|
||||
#define MTK_TX_DMA_BUSY BIT(1)
|
||||
#define MTK_RX_DMA_EN BIT(2)
|
||||
#define MTK_TX_DMA_EN BIT(0)
|
||||
-#define MTK_DMA_BUSY_TIMEOUT HZ
|
||||
+#define MTK_DMA_BUSY_TIMEOUT_US 1000000
|
||||
|
||||
/* QDMA Reset Index Register */
|
||||
#define MTK_QDMA_RST_IDX 0x1A08
|
@ -0,0 +1,63 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Sun, 18 Apr 2021 23:11:44 +0200
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: missing mutex
|
||||
|
||||
Patch 2ed37183abb7 ("netfilter: flowtable: separate replace, destroy and
|
||||
stats to different workqueues") splits the workqueue per event type. Add
|
||||
a mutex to serialize updates.
|
||||
|
||||
Fixes: 502e84e2382d ("net: ethernet: mtk_eth_soc: add flow offloading support")
|
||||
Reported-by: Frank Wunderlich <frank-w@public-files.de>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
@@ -392,6 +392,8 @@ mtk_flow_offload_stats(struct mtk_eth *e
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static DEFINE_MUTEX(mtk_flow_offload_mutex);
|
||||
+
|
||||
static int
|
||||
mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
|
||||
{
|
||||
@@ -399,6 +401,7 @@ mtk_eth_setup_tc_block_cb(enum tc_setup_
|
||||
struct net_device *dev = cb_priv;
|
||||
struct mtk_mac *mac = netdev_priv(dev);
|
||||
struct mtk_eth *eth = mac->hw;
|
||||
+ int err;
|
||||
|
||||
if (!tc_can_offload(dev))
|
||||
return -EOPNOTSUPP;
|
||||
@@ -406,18 +409,24 @@ mtk_eth_setup_tc_block_cb(enum tc_setup_
|
||||
if (type != TC_SETUP_CLSFLOWER)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
+ mutex_lock(&mtk_flow_offload_mutex);
|
||||
switch (cls->command) {
|
||||
case FLOW_CLS_REPLACE:
|
||||
- return mtk_flow_offload_replace(eth, cls);
|
||||
+ err = mtk_flow_offload_replace(eth, cls);
|
||||
+ break;
|
||||
case FLOW_CLS_DESTROY:
|
||||
- return mtk_flow_offload_destroy(eth, cls);
|
||||
+ err = mtk_flow_offload_destroy(eth, cls);
|
||||
+ break;
|
||||
case FLOW_CLS_STATS:
|
||||
- return mtk_flow_offload_stats(eth, cls);
|
||||
+ err = mtk_flow_offload_stats(eth, cls);
|
||||
+ break;
|
||||
default:
|
||||
- return -EOPNOTSUPP;
|
||||
+ err = -EOPNOTSUPP;
|
||||
+ break;
|
||||
}
|
||||
+ mutex_unlock(&mtk_flow_offload_mutex);
|
||||
|
||||
- return 0;
|
||||
+ return err;
|
||||
}
|
||||
|
||||
static int
|
@ -0,0 +1,22 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Sun, 18 Apr 2021 23:11:45 +0200
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: handle VLAN pop action
|
||||
|
||||
Do not hit EOPNOTSUPP when flowtable offload provides a VLAN pop action.
|
||||
|
||||
Fixes: efce49dfe6a8 ("netfilter: flowtable: add vlan pop action offload support")
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
@@ -233,6 +233,8 @@ mtk_flow_offload_replace(struct mtk_eth
|
||||
data.vlan.proto = act->vlan.proto;
|
||||
data.vlan.num++;
|
||||
break;
|
||||
+ case FLOW_ACTION_VLAN_POP:
|
||||
+ break;
|
||||
case FLOW_ACTION_PPPOE_PUSH:
|
||||
if (data.pppoe.num == 1)
|
||||
return -EOPNOTSUPP;
|
@ -0,0 +1,159 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Sun, 28 Mar 2021 23:08:55 +0200
|
||||
Subject: [PATCH] netfilter: flowtable: dst_check() from garbage collector path
|
||||
|
||||
Move dst_check() to the garbage collector path. Stale routes trigger the
|
||||
flow entry teardown state which makes affected flows go back to the
|
||||
classic forwarding path to re-evaluate flow offloading.
|
||||
|
||||
IPv6 requires the dst cookie to work, store it in the flow_tuple,
|
||||
otherwise dst_check() always fails.
|
||||
|
||||
Fixes: e5075c0badaa ("netfilter: flowtable: call dst_check() to fall back to classic forwarding")
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -129,7 +129,10 @@ struct flow_offload_tuple {
|
||||
in_vlan_ingress:2;
|
||||
u16 mtu;
|
||||
union {
|
||||
- struct dst_entry *dst_cache;
|
||||
+ struct {
|
||||
+ struct dst_entry *dst_cache;
|
||||
+ u32 dst_cookie;
|
||||
+ };
|
||||
struct {
|
||||
u32 ifidx;
|
||||
u32 hw_ifidx;
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -74,6 +74,18 @@ err_ct_refcnt:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(flow_offload_alloc);
|
||||
|
||||
+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
|
||||
+{
|
||||
+ const struct rt6_info *rt;
|
||||
+
|
||||
+ if (flow_tuple->l3proto == NFPROTO_IPV6) {
|
||||
+ rt = (const struct rt6_info *)flow_tuple->dst_cache;
|
||||
+ return rt6_get_cookie(rt);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int flow_offload_fill_route(struct flow_offload *flow,
|
||||
const struct nf_flow_route *route,
|
||||
enum flow_offload_tuple_dir dir)
|
||||
@@ -116,6 +128,7 @@ static int flow_offload_fill_route(struc
|
||||
return -1;
|
||||
|
||||
flow_tuple->dst_cache = dst;
|
||||
+ flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
|
||||
break;
|
||||
}
|
||||
flow_tuple->xmit_type = route->tuple[dir].xmit_type;
|
||||
@@ -389,11 +402,33 @@ nf_flow_table_iterate(struct nf_flowtabl
|
||||
return err;
|
||||
}
|
||||
|
||||
+static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
|
||||
+{
|
||||
+ struct dst_entry *dst;
|
||||
+
|
||||
+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
+ tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
|
||||
+ dst = tuple->dst_cache;
|
||||
+ if (!dst_check(dst, tuple->dst_cookie))
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static bool nf_flow_has_stale_dst(struct flow_offload *flow)
|
||||
+{
|
||||
+ return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
|
||||
+ flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
|
||||
+}
|
||||
+
|
||||
static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
|
||||
{
|
||||
struct nf_flowtable *flow_table = data;
|
||||
|
||||
- if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct))
|
||||
+ if (nf_flow_has_expired(flow) ||
|
||||
+ nf_ct_is_dying(flow->ct) ||
|
||||
+ nf_flow_has_stale_dst(flow))
|
||||
set_bit(NF_FLOW_TEARDOWN, &flow->flags);
|
||||
|
||||
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -364,15 +364,6 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
- tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
|
||||
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
|
||||
- if (!dst_check(&rt->dst, 0)) {
|
||||
- flow_offload_teardown(flow);
|
||||
- return NF_ACCEPT;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
@@ -391,6 +382,7 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
|
||||
|
||||
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
|
||||
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
|
||||
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
|
||||
IPCB(skb)->iif = skb->dev->ifindex;
|
||||
IPCB(skb)->flags = IPSKB_FORWARDED;
|
||||
@@ -399,6 +391,7 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
|
||||
switch (tuplehash->tuple.xmit_type) {
|
||||
case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
|
||||
outdev = rt->dst.dev;
|
||||
skb->dev = outdev;
|
||||
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
|
||||
@@ -607,15 +600,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
- tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
|
||||
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
|
||||
- if (!dst_check(&rt->dst, 0)) {
|
||||
- flow_offload_teardown(flow);
|
||||
- return NF_ACCEPT;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
@@ -633,6 +617,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
|
||||
|
||||
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
|
||||
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
|
||||
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
|
||||
IP6CB(skb)->iif = skb->dev->ifindex;
|
||||
IP6CB(skb)->flags = IP6SKB_FORWARDED;
|
||||
@@ -641,6 +626,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
|
||||
switch (tuplehash->tuple.xmit_type) {
|
||||
case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
|
||||
outdev = rt->dst.dev;
|
||||
skb->dev = outdev;
|
||||
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
|
@ -0,0 +1,94 @@
|
||||
From: Oz Shlomo <ozsh@nvidia.com>
|
||||
Date: Thu, 3 Jun 2021 15:12:33 +0300
|
||||
Subject: [PATCH] netfilter: conntrack: Introduce tcp offload timeout
|
||||
configuration
|
||||
|
||||
TCP connections may be offloaded from nf conntrack to nf flow table.
|
||||
Offloaded connections are aged after 30 seconds of inactivity.
|
||||
Once aged, ownership is returned to conntrack with a hard coded pickup
|
||||
time of 120 seconds, after which the connection may be deleted.
|
||||
eted. The current aging intervals may be too aggressive for some users.
|
||||
|
||||
Provide users with the ability to control the nf flow table offload
|
||||
aging and pickup time intervals via sysctl parameter as a pre-step for
|
||||
configuring the nf flow table GC timeout intervals.
|
||||
|
||||
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
|
||||
Reviewed-by: Paul Blakey <paulb@nvidia.com>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netns/conntrack.h
|
||||
+++ b/include/net/netns/conntrack.h
|
||||
@@ -27,6 +27,10 @@ struct nf_tcp_net {
|
||||
int tcp_loose;
|
||||
int tcp_be_liberal;
|
||||
int tcp_max_retrans;
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ unsigned int offload_timeout;
|
||||
+ unsigned int offload_pickup;
|
||||
+#endif
|
||||
};
|
||||
|
||||
enum udp_conntrack {
|
||||
--- a/net/netfilter/nf_conntrack_proto_tcp.c
|
||||
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
|
||||
@@ -1438,6 +1438,11 @@ void nf_conntrack_tcp_init_net(struct ne
|
||||
tn->tcp_loose = nf_ct_tcp_loose;
|
||||
tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
|
||||
tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
|
||||
+
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ tn->offload_timeout = 30 * HZ;
|
||||
+ tn->offload_pickup = 120 * HZ;
|
||||
+#endif
|
||||
}
|
||||
|
||||
const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
|
||||
--- a/net/netfilter/nf_conntrack_standalone.c
|
||||
+++ b/net/netfilter/nf_conntrack_standalone.c
|
||||
@@ -567,6 +567,10 @@ enum nf_ct_sysctl_index {
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
|
||||
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
|
||||
+#endif
|
||||
NF_SYSCTL_CT_PROTO_TCP_LOOSE,
|
||||
NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
|
||||
NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
|
||||
@@ -758,6 +762,20 @@ static struct ctl_table nf_ct_sysctl_tab
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_jiffies,
|
||||
},
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
|
||||
+ .procname = "nf_flowtable_tcp_timeout",
|
||||
+ .maxlen = sizeof(unsigned int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_dointvec_jiffies,
|
||||
+ },
|
||||
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
|
||||
+ .procname = "nf_flowtable_tcp_pickup",
|
||||
+ .maxlen = sizeof(unsigned int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_dointvec_jiffies,
|
||||
+ },
|
||||
+#endif
|
||||
[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
|
||||
.procname = "nf_conntrack_tcp_loose",
|
||||
.maxlen = sizeof(int),
|
||||
@@ -967,6 +985,12 @@ static void nf_conntrack_standalone_init
|
||||
XASSIGN(LIBERAL, &tn->tcp_be_liberal);
|
||||
XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
|
||||
#undef XASSIGN
|
||||
+
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
|
||||
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup;
|
||||
+#endif
|
||||
+
|
||||
}
|
||||
|
||||
static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
|
@ -0,0 +1,92 @@
|
||||
From: Oz Shlomo <ozsh@nvidia.com>
|
||||
Date: Thu, 3 Jun 2021 15:12:34 +0300
|
||||
Subject: [PATCH] netfilter: conntrack: Introduce udp offload timeout
|
||||
configuration
|
||||
|
||||
UDP connections may be offloaded from nf conntrack to nf flow table.
|
||||
Offloaded connections are aged after 30 seconds of inactivity.
|
||||
Once aged, ownership is returned to conntrack with a hard coded pickup
|
||||
time of 30 seconds, after which the connection may be deleted.
|
||||
eted. The current aging intervals may be too aggressive for some users.
|
||||
|
||||
Provide users with the ability to control the nf flow table offload
|
||||
aging and pickup time intervals via sysctl parameter as a pre-step for
|
||||
configuring the nf flow table GC timeout intervals.
|
||||
|
||||
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
|
||||
Reviewed-by: Paul Blakey <paulb@nvidia.com>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netns/conntrack.h
|
||||
+++ b/include/net/netns/conntrack.h
|
||||
@@ -41,6 +41,10 @@ enum udp_conntrack {
|
||||
|
||||
struct nf_udp_net {
|
||||
unsigned int timeouts[UDP_CT_MAX];
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ unsigned int offload_timeout;
|
||||
+ unsigned int offload_pickup;
|
||||
+#endif
|
||||
};
|
||||
|
||||
struct nf_icmp_net {
|
||||
--- a/net/netfilter/nf_conntrack_proto_udp.c
|
||||
+++ b/net/netfilter/nf_conntrack_proto_udp.c
|
||||
@@ -273,6 +273,11 @@ void nf_conntrack_udp_init_net(struct ne
|
||||
|
||||
for (i = 0; i < UDP_CT_MAX; i++)
|
||||
un->timeouts[i] = udp_timeouts[i];
|
||||
+
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ un->offload_timeout = 30 * HZ;
|
||||
+ un->offload_pickup = 30 * HZ;
|
||||
+#endif
|
||||
}
|
||||
|
||||
const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
|
||||
--- a/net/netfilter/nf_conntrack_standalone.c
|
||||
+++ b/net/netfilter/nf_conntrack_standalone.c
|
||||
@@ -576,6 +576,10 @@ enum nf_ct_sysctl_index {
|
||||
NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
|
||||
+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
|
||||
+#endif
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
|
||||
#ifdef CONFIG_NF_CT_PROTO_SCTP
|
||||
@@ -810,6 +814,20 @@ static struct ctl_table nf_ct_sysctl_tab
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_jiffies,
|
||||
},
|
||||
+#if IS_ENABLED(CONFIG_NFT_FLOW_OFFLOAD)
|
||||
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
|
||||
+ .procname = "nf_flowtable_udp_timeout",
|
||||
+ .maxlen = sizeof(unsigned int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_dointvec_jiffies,
|
||||
+ },
|
||||
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
|
||||
+ .procname = "nf_flowtable_udp_pickup",
|
||||
+ .maxlen = sizeof(unsigned int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_dointvec_jiffies,
|
||||
+ },
|
||||
+#endif
|
||||
[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
|
||||
.procname = "nf_conntrack_icmp_timeout",
|
||||
.maxlen = sizeof(unsigned int),
|
||||
@@ -1078,6 +1096,10 @@ static int nf_conntrack_standalone_init_
|
||||
table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
|
||||
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
|
||||
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
|
||||
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup;
|
||||
+#endif
|
||||
|
||||
nf_conntrack_standalone_init_tcp_sysctl(net, table);
|
||||
nf_conntrack_standalone_init_sctp_sysctl(net, table);
|
@ -0,0 +1,134 @@
|
||||
From: Oz Shlomo <ozsh@nvidia.com>
|
||||
Date: Thu, 3 Jun 2021 15:12:35 +0300
|
||||
Subject: [PATCH] netfilter: flowtable: Set offload timeouts according to proto
|
||||
values
|
||||
|
||||
Currently the aging period for tcp/udp connections is hard coded to
|
||||
30 seconds. Aged tcp/udp connections configure a hard coded 120/30
|
||||
seconds pickup timeout for conntrack.
|
||||
This configuration may be too aggressive or permissive for some users.
|
||||
|
||||
Dynamically configure the nf flow table GC timeout intervals according
|
||||
to the user defined values.
|
||||
|
||||
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
|
||||
Reviewed-by: Paul Blakey <paulb@nvidia.com>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -174,6 +174,8 @@ struct flow_offload {
|
||||
#define NF_FLOW_TIMEOUT (30 * HZ)
|
||||
#define nf_flowtable_time_stamp (u32)jiffies
|
||||
|
||||
+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
|
||||
+
|
||||
static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
|
||||
{
|
||||
return (__s32)(timeout - nf_flowtable_time_stamp);
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -175,12 +175,10 @@ static void flow_offload_fixup_tcp(struc
|
||||
tcp->seen[1].td_maxwin = 0;
|
||||
}
|
||||
|
||||
-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
|
||||
-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
|
||||
-
|
||||
static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
|
||||
{
|
||||
const struct nf_conntrack_l4proto *l4proto;
|
||||
+ struct net *net = nf_ct_net(ct);
|
||||
int l4num = nf_ct_protonum(ct);
|
||||
unsigned int timeout;
|
||||
|
||||
@@ -188,12 +186,17 @@ static void flow_offload_fixup_ct_timeou
|
||||
if (!l4proto)
|
||||
return;
|
||||
|
||||
- if (l4num == IPPROTO_TCP)
|
||||
- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
|
||||
- else if (l4num == IPPROTO_UDP)
|
||||
- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
|
||||
- else
|
||||
+ if (l4num == IPPROTO_TCP) {
|
||||
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
|
||||
+
|
||||
+ timeout = tn->offload_pickup;
|
||||
+ } else if (l4num == IPPROTO_UDP) {
|
||||
+ struct nf_udp_net *tn = nf_udp_pernet(net);
|
||||
+
|
||||
+ timeout = tn->offload_pickup;
|
||||
+ } else {
|
||||
return;
|
||||
+ }
|
||||
|
||||
if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
|
||||
WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
|
||||
@@ -265,11 +268,35 @@ static const struct rhashtable_params nf
|
||||
.automatic_shrinking = true,
|
||||
};
|
||||
|
||||
+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
|
||||
+{
|
||||
+ const struct nf_conntrack_l4proto *l4proto;
|
||||
+ unsigned long timeout = NF_FLOW_TIMEOUT;
|
||||
+ struct net *net = nf_ct_net(flow->ct);
|
||||
+ int l4num = nf_ct_protonum(flow->ct);
|
||||
+
|
||||
+ l4proto = nf_ct_l4proto_find(l4num);
|
||||
+ if (!l4proto)
|
||||
+ return timeout;
|
||||
+
|
||||
+ if (l4num == IPPROTO_TCP) {
|
||||
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
|
||||
+
|
||||
+ timeout = tn->offload_timeout;
|
||||
+ } else if (l4num == IPPROTO_UDP) {
|
||||
+ struct nf_udp_net *tn = nf_udp_pernet(net);
|
||||
+
|
||||
+ timeout = tn->offload_timeout;
|
||||
+ }
|
||||
+
|
||||
+ return timeout;
|
||||
+}
|
||||
+
|
||||
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
|
||||
{
|
||||
int err;
|
||||
|
||||
- flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
|
||||
+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
|
||||
|
||||
err = rhashtable_insert_fast(&flow_table->rhashtable,
|
||||
&flow->tuplehash[0].node,
|
||||
@@ -301,7 +328,7 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
|
||||
void flow_offload_refresh(struct nf_flowtable *flow_table,
|
||||
struct flow_offload *flow)
|
||||
{
|
||||
- flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
|
||||
+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
|
||||
|
||||
if (likely(!nf_flowtable_hw_offload(flow_table)))
|
||||
return;
|
||||
--- a/net/netfilter/nf_flow_table_offload.c
|
||||
+++ b/net/netfilter/nf_flow_table_offload.c
|
||||
@@ -885,7 +885,7 @@ static void flow_offload_work_stats(stru
|
||||
|
||||
lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
|
||||
offload->flow->timeout = max_t(u64, offload->flow->timeout,
|
||||
- lastused + NF_FLOW_TIMEOUT);
|
||||
+ lastused + flow_offload_get_timeout(offload->flow));
|
||||
|
||||
if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
|
||||
if (stats[0].pkts)
|
||||
@@ -989,7 +989,7 @@ void nf_flow_offload_stats(struct nf_flo
|
||||
__s32 delta;
|
||||
|
||||
delta = nf_flow_timeout_delta(flow->timeout);
|
||||
- if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10))
|
||||
+ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
|
||||
return;
|
||||
|
||||
offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
|
@ -0,0 +1,138 @@
|
||||
From 4fd59792097a6b2fb949d41264386a7ecade469e Mon Sep 17 00:00:00 2001
|
||||
From: DENG Qingfang <dqfext@gmail.com>
|
||||
Date: Mon, 25 Jan 2021 12:20:46 +0800
|
||||
Subject: [PATCH] net: ethernet: mediatek: support setting MTU
|
||||
|
||||
MT762x HW, except for MT7628, supports frame length up to 2048
|
||||
(maximum length on GDM), so allow setting MTU up to 2030.
|
||||
|
||||
Also set the default frame length to the hardware default 1518.
|
||||
|
||||
Signed-off-by: DENG Qingfang <dqfext@gmail.com>
|
||||
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
|
||||
Link: https://lore.kernel.org/r/20210125042046.5599-1-dqfext@gmail.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 43 ++++++++++++++++++---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 12 ++++--
|
||||
2 files changed, 47 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -355,7 +355,7 @@ static void mtk_mac_config(struct phylin
|
||||
/* Setup gmac */
|
||||
mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
|
||||
mcr_new = mcr_cur;
|
||||
- mcr_new |= MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
|
||||
+ mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
|
||||
MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK;
|
||||
|
||||
/* Only update control register when needed! */
|
||||
@@ -782,8 +782,8 @@ static void mtk_get_stats64(struct net_d
|
||||
static inline int mtk_max_frag_size(int mtu)
|
||||
{
|
||||
/* make sure buf_size will be at least MTK_MAX_RX_LENGTH */
|
||||
- if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH)
|
||||
- mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
|
||||
+ if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH_2K)
|
||||
+ mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN;
|
||||
|
||||
return SKB_DATA_ALIGN(MTK_RX_HLEN + mtu) +
|
||||
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||
@@ -794,7 +794,7 @@ static inline int mtk_max_buf_size(int f
|
||||
int buf_size = frag_size - NET_SKB_PAD - NET_IP_ALIGN -
|
||||
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||
|
||||
- WARN_ON(buf_size < MTK_MAX_RX_LENGTH);
|
||||
+ WARN_ON(buf_size < MTK_MAX_RX_LENGTH_2K);
|
||||
|
||||
return buf_size;
|
||||
}
|
||||
@@ -2606,6 +2606,35 @@ static void mtk_uninit(struct net_device
|
||||
mtk_rx_irq_disable(eth, ~0);
|
||||
}
|
||||
|
||||
+static int mtk_change_mtu(struct net_device *dev, int new_mtu)
|
||||
+{
|
||||
+ int length = new_mtu + MTK_RX_ETH_HLEN;
|
||||
+ struct mtk_mac *mac = netdev_priv(dev);
|
||||
+ struct mtk_eth *eth = mac->hw;
|
||||
+ u32 mcr_cur, mcr_new;
|
||||
+
|
||||
+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
|
||||
+ mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
|
||||
+ mcr_new = mcr_cur & ~MAC_MCR_MAX_RX_MASK;
|
||||
+
|
||||
+ if (length <= 1518)
|
||||
+ mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1518);
|
||||
+ else if (length <= 1536)
|
||||
+ mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1536);
|
||||
+ else if (length <= 1552)
|
||||
+ mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1552);
|
||||
+ else
|
||||
+ mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_2048);
|
||||
+
|
||||
+ if (mcr_new != mcr_cur)
|
||||
+ mtk_w32(mac->hw, mcr_new, MTK_MAC_MCR(mac->id));
|
||||
+ }
|
||||
+
|
||||
+ dev->mtu = new_mtu;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
|
||||
{
|
||||
struct mtk_mac *mac = netdev_priv(dev);
|
||||
@@ -2902,6 +2931,7 @@ static const struct net_device_ops mtk_n
|
||||
.ndo_set_mac_address = mtk_set_mac_address,
|
||||
.ndo_validate_addr = eth_validate_addr,
|
||||
.ndo_do_ioctl = mtk_do_ioctl,
|
||||
+ .ndo_change_mtu = mtk_change_mtu,
|
||||
.ndo_tx_timeout = mtk_tx_timeout,
|
||||
.ndo_get_stats64 = mtk_get_stats64,
|
||||
.ndo_fix_features = mtk_fix_features,
|
||||
@@ -3004,7 +3034,10 @@ static int mtk_add_mac(struct mtk_eth *e
|
||||
eth->netdev[id]->irq = eth->irq[0];
|
||||
eth->netdev[id]->dev.of_node = np;
|
||||
|
||||
- eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
|
||||
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628))
|
||||
+ eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
|
||||
+ else
|
||||
+ eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN;
|
||||
|
||||
return 0;
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -20,12 +20,13 @@
|
||||
#include "mtk_ppe.h"
|
||||
|
||||
#define MTK_QDMA_PAGE_SIZE 2048
|
||||
-#define MTK_MAX_RX_LENGTH 1536
|
||||
+#define MTK_MAX_RX_LENGTH 1536
|
||||
+#define MTK_MAX_RX_LENGTH_2K 2048
|
||||
#define MTK_TX_DMA_BUF_LEN 0x3fff
|
||||
#define MTK_DMA_SIZE 512
|
||||
#define MTK_NAPI_WEIGHT 64
|
||||
#define MTK_MAC_COUNT 2
|
||||
-#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
|
||||
+#define MTK_RX_ETH_HLEN (ETH_HLEN + ETH_FCS_LEN)
|
||||
#define MTK_RX_HLEN (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN)
|
||||
#define MTK_DMA_DUMMY_DESC 0xffffffff
|
||||
#define MTK_DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | \
|
||||
@@ -352,7 +353,12 @@
|
||||
|
||||
/* Mac control registers */
|
||||
#define MTK_MAC_MCR(x) (0x10100 + (x * 0x100))
|
||||
-#define MAC_MCR_MAX_RX_1536 BIT(24)
|
||||
+#define MAC_MCR_MAX_RX_MASK GENMASK(25, 24)
|
||||
+#define MAC_MCR_MAX_RX(_x) (MAC_MCR_MAX_RX_MASK & ((_x) << 24))
|
||||
+#define MAC_MCR_MAX_RX_1518 0x0
|
||||
+#define MAC_MCR_MAX_RX_1536 0x1
|
||||
+#define MAC_MCR_MAX_RX_1552 0x2
|
||||
+#define MAC_MCR_MAX_RX_2048 0x3
|
||||
#define MAC_MCR_IPG_CFG (BIT(18) | BIT(16))
|
||||
#define MAC_MCR_FORCE_MODE BIT(15)
|
||||
#define MAC_MCR_TX_EN BIT(14)
|
@ -0,0 +1,108 @@
|
||||
From c329e5afb42ff0a88285eb4d8a391a18793e4777 Mon Sep 17 00:00:00 2001
|
||||
From: David Bauer <mail@david-bauer.net>
|
||||
Date: Thu, 15 Apr 2021 03:26:50 +0200
|
||||
Subject: [PATCH] net: phy: at803x: select correct page on config init
|
||||
|
||||
The Atheros AR8031 and AR8033 expose different registers for SGMII/Fiber
|
||||
as well as the copper side of the PHY depending on the BT_BX_REG_SEL bit
|
||||
in the chip configure register.
|
||||
|
||||
The driver assumes the copper side is selected on probe, but this might
|
||||
not be the case depending which page was last selected by the
|
||||
bootloader. Notably, Ubiquiti UniFi bootloaders show this behavior.
|
||||
|
||||
Select the copper page when probing to circumvent this.
|
||||
|
||||
Signed-off-by: David Bauer <mail@david-bauer.net>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/phy/at803x.c | 50 +++++++++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 49 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/phy/at803x.c
|
||||
+++ b/drivers/net/phy/at803x.c
|
||||
@@ -139,6 +139,9 @@
|
||||
#define ATH8035_PHY_ID 0x004dd072
|
||||
#define AT8030_PHY_ID_MASK 0xffffffef
|
||||
|
||||
+#define AT803X_PAGE_FIBER 0
|
||||
+#define AT803X_PAGE_COPPER 1
|
||||
+
|
||||
MODULE_DESCRIPTION("Qualcomm Atheros AR803x PHY driver");
|
||||
MODULE_AUTHOR("Matus Ujhelyi");
|
||||
MODULE_LICENSE("GPL");
|
||||
@@ -190,6 +193,35 @@ static int at803x_debug_reg_mask(struct
|
||||
return phy_write(phydev, AT803X_DEBUG_DATA, val);
|
||||
}
|
||||
|
||||
+static int at803x_write_page(struct phy_device *phydev, int page)
|
||||
+{
|
||||
+ int mask;
|
||||
+ int set;
|
||||
+
|
||||
+ if (page == AT803X_PAGE_COPPER) {
|
||||
+ set = AT803X_BT_BX_REG_SEL;
|
||||
+ mask = 0;
|
||||
+ } else {
|
||||
+ set = 0;
|
||||
+ mask = AT803X_BT_BX_REG_SEL;
|
||||
+ }
|
||||
+
|
||||
+ return __phy_modify(phydev, AT803X_REG_CHIP_CONFIG, mask, set);
|
||||
+}
|
||||
+
|
||||
+static int at803x_read_page(struct phy_device *phydev)
|
||||
+{
|
||||
+ int ccr = __phy_read(phydev, AT803X_REG_CHIP_CONFIG);
|
||||
+
|
||||
+ if (ccr < 0)
|
||||
+ return ccr;
|
||||
+
|
||||
+ if (ccr & AT803X_BT_BX_REG_SEL)
|
||||
+ return AT803X_PAGE_COPPER;
|
||||
+
|
||||
+ return AT803X_PAGE_FIBER;
|
||||
+}
|
||||
+
|
||||
static int at803x_enable_rx_delay(struct phy_device *phydev)
|
||||
{
|
||||
return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, 0,
|
||||
@@ -508,6 +540,7 @@ static int at803x_probe(struct phy_devic
|
||||
{
|
||||
struct device *dev = &phydev->mdio.dev;
|
||||
struct at803x_priv *priv;
|
||||
+ int ret;
|
||||
|
||||
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
|
||||
if (!priv)
|
||||
@@ -515,7 +548,20 @@ static int at803x_probe(struct phy_devic
|
||||
|
||||
phydev->priv = priv;
|
||||
|
||||
- return at803x_parse_dt(phydev);
|
||||
+ ret = at803x_parse_dt(phydev);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ /* Some bootloaders leave the fiber page selected.
|
||||
+ * Switch to the copper page, as otherwise we read
|
||||
+ * the PHY capabilities from the fiber side.
|
||||
+ */
|
||||
+ if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
|
||||
+ ret = phy_select_page(phydev, AT803X_PAGE_COPPER);
|
||||
+ ret = phy_restore_page(phydev, AT803X_PAGE_COPPER, ret);
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
static void at803x_remove(struct phy_device *phydev)
|
||||
@@ -1097,6 +1143,8 @@ static struct phy_driver at803x_driver[]
|
||||
.get_wol = at803x_get_wol,
|
||||
.suspend = at803x_suspend,
|
||||
.resume = at803x_resume,
|
||||
+ .read_page = at803x_read_page,
|
||||
+ .write_page = at803x_write_page,
|
||||
/* PHY_GBIT_FEATURES */
|
||||
.read_status = at803x_read_status,
|
||||
.aneg_done = at803x_aneg_done,
|
@ -0,0 +1,73 @@
|
||||
From 8f7e876273e294b732b42af2e5e6bba91d798954 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Walle <michael@walle.cc>
|
||||
Date: Tue, 20 Apr 2021 12:29:29 +0200
|
||||
Subject: [PATCH] net: phy: at803x: fix probe error if copper page is selected
|
||||
|
||||
The commit c329e5afb42f ("net: phy: at803x: select correct page on
|
||||
config init") selects the copper page during probe. This fails if the
|
||||
copper page was already selected. In this case, the value of the copper
|
||||
page (which is 1) is propagated through phy_restore_page() and is
|
||||
finally returned for at803x_probe(). Fix it, by just using the
|
||||
at803x_page_write() directly.
|
||||
|
||||
Also in case of an error, the regulator is not disabled and leads to a
|
||||
WARN_ON() when the probe fails. This couldn't happen before, because
|
||||
at803x_parse_dt() was the last call in at803x_probe(). It is hard to
|
||||
see, that the parse_dt() actually enables the regulator. Thus move the
|
||||
regulator_enable() to the probe function and undo it in case of an
|
||||
error.
|
||||
|
||||
Fixes: c329e5afb42f ("net: phy: at803x: select correct page on config init")
|
||||
Signed-off-by: Michael Walle <michael@walle.cc>
|
||||
Reviewed-by: David Bauer <mail@david-bauer.net>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/phy/at803x.c | 23 +++++++++++++++++------
|
||||
1 file changed, 17 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/net/phy/at803x.c
|
||||
+++ b/drivers/net/phy/at803x.c
|
||||
@@ -527,10 +527,6 @@ static int at803x_parse_dt(struct phy_de
|
||||
phydev_err(phydev, "failed to get VDDIO regulator\n");
|
||||
return PTR_ERR(priv->vddio);
|
||||
}
|
||||
-
|
||||
- ret = regulator_enable(priv->vddio);
|
||||
- if (ret < 0)
|
||||
- return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -552,15 +548,30 @@ static int at803x_probe(struct phy_devic
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
+ if (priv->vddio) {
|
||||
+ ret = regulator_enable(priv->vddio);
|
||||
+ if (ret < 0)
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
/* Some bootloaders leave the fiber page selected.
|
||||
* Switch to the copper page, as otherwise we read
|
||||
* the PHY capabilities from the fiber side.
|
||||
*/
|
||||
if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
|
||||
- ret = phy_select_page(phydev, AT803X_PAGE_COPPER);
|
||||
- ret = phy_restore_page(phydev, AT803X_PAGE_COPPER, ret);
|
||||
+ phy_lock_mdio_bus(phydev);
|
||||
+ ret = at803x_write_page(phydev, AT803X_PAGE_COPPER);
|
||||
+ phy_unlock_mdio_bus(phydev);
|
||||
+ if (ret)
|
||||
+ goto err;
|
||||
}
|
||||
|
||||
+ return 0;
|
||||
+
|
||||
+err:
|
||||
+ if (priv->vddio)
|
||||
+ regulator_disable(priv->vddio);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
@ -0,0 +1,56 @@
|
||||
From b1ae3587d16a8c8fc9453e147c8708d6f006ffbb Mon Sep 17 00:00:00 2001
|
||||
From: Bjarni Jonasson <bjarni.jonasson@microchip.com>
|
||||
Date: Wed, 13 Jan 2021 12:56:25 +0100
|
||||
Subject: [PATCH] net: phy: Add 100 base-x mode
|
||||
|
||||
Sparx-5 supports this mode and it is missing in the PHY core.
|
||||
|
||||
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
|
||||
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
Documentation/networking/phy.rst | 5 +++++
|
||||
include/linux/phy.h | 4 ++++
|
||||
2 files changed, 9 insertions(+)
|
||||
|
||||
--- a/Documentation/networking/phy.rst
|
||||
+++ b/Documentation/networking/phy.rst
|
||||
@@ -286,6 +286,11 @@ Some of the interface modes are describe
|
||||
Note: due to legacy usage, some 10GBASE-R usage incorrectly makes
|
||||
use of this definition.
|
||||
|
||||
+``PHY_INTERFACE_MODE_100BASEX``
|
||||
+ This defines IEEE 802.3 Clause 24. The link operates at a fixed data
|
||||
+ rate of 125Mpbs using a 4B/5B encoding scheme, resulting in an underlying
|
||||
+ data rate of 100Mpbs.
|
||||
+
|
||||
Pause frames / flow control
|
||||
===========================
|
||||
|
||||
--- a/include/linux/phy.h
|
||||
+++ b/include/linux/phy.h
|
||||
@@ -104,6 +104,7 @@ extern const int phy_10gbit_features_arr
|
||||
* @PHY_INTERFACE_MODE_MOCA: Multimedia over Coax
|
||||
* @PHY_INTERFACE_MODE_QSGMII: Quad SGMII
|
||||
* @PHY_INTERFACE_MODE_TRGMII: Turbo RGMII
|
||||
+ * @PHY_INTERFACE_MODE_100BASEX: 100 BaseX
|
||||
* @PHY_INTERFACE_MODE_1000BASEX: 1000 BaseX
|
||||
* @PHY_INTERFACE_MODE_2500BASEX: 2500 BaseX
|
||||
* @PHY_INTERFACE_MODE_RXAUI: Reduced XAUI
|
||||
@@ -135,6 +136,7 @@ typedef enum {
|
||||
PHY_INTERFACE_MODE_MOCA,
|
||||
PHY_INTERFACE_MODE_QSGMII,
|
||||
PHY_INTERFACE_MODE_TRGMII,
|
||||
+ PHY_INTERFACE_MODE_100BASEX,
|
||||
PHY_INTERFACE_MODE_1000BASEX,
|
||||
PHY_INTERFACE_MODE_2500BASEX,
|
||||
PHY_INTERFACE_MODE_RXAUI,
|
||||
@@ -217,6 +219,8 @@ static inline const char *phy_modes(phy_
|
||||
return "usxgmii";
|
||||
case PHY_INTERFACE_MODE_10GKR:
|
||||
return "10gbase-kr";
|
||||
+ case PHY_INTERFACE_MODE_100BASEX:
|
||||
+ return "100base-x";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
From 6e12f35cef6b8a458d7ecf507ae330e0bffaad8c Mon Sep 17 00:00:00 2001
|
||||
From: Bjarni Jonasson <bjarni.jonasson@microchip.com>
|
||||
Date: Wed, 13 Jan 2021 12:56:26 +0100
|
||||
Subject: [PATCH] sfp: add support for 100 base-x SFPs
|
||||
|
||||
Add support for 100Base-FX, 100Base-LX, 100Base-PX and 100Base-BX10 modules
|
||||
This is needed for Sparx-5 switch.
|
||||
|
||||
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
|
||||
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
drivers/net/phy/sfp-bus.c | 9 +++++++++
|
||||
1 file changed, 9 insertions(+)
|
||||
|
||||
--- a/drivers/net/phy/sfp-bus.c
|
||||
+++ b/drivers/net/phy/sfp-bus.c
|
||||
@@ -280,6 +280,12 @@ void sfp_parse_support(struct sfp_bus *b
|
||||
br_min <= 1300 && br_max >= 1200)
|
||||
phylink_set(modes, 1000baseX_Full);
|
||||
|
||||
+ /* 100Base-FX, 100Base-LX, 100Base-PX, 100Base-BX10 */
|
||||
+ if (id->base.e100_base_fx || id->base.e100_base_lx)
|
||||
+ phylink_set(modes, 100baseFX_Full);
|
||||
+ if ((id->base.e_base_px || id->base.e_base_bx10) && br_nom == 100)
|
||||
+ phylink_set(modes, 100baseFX_Full);
|
||||
+
|
||||
/* For active or passive cables, select the link modes
|
||||
* based on the bit rates and the cable compliance bytes.
|
||||
*/
|
||||
@@ -399,6 +405,9 @@ phy_interface_t sfp_select_interface(str
|
||||
if (phylink_test(link_modes, 1000baseX_Full))
|
||||
return PHY_INTERFACE_MODE_1000BASEX;
|
||||
|
||||
+ if (phylink_test(link_modes, 100baseFX_Full))
|
||||
+ return PHY_INTERFACE_MODE_100BASEX;
|
||||
+
|
||||
dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n");
|
||||
|
||||
return PHY_INTERFACE_MODE_NA;
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user