mirror of
https://github.com/openwrt/openwrt.git
synced 2025-01-19 03:06:35 +00:00
generic: 5.15: drop upstream patch
Drop upstream patch from backport dir for kernel 5.15 Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
This commit is contained in:
parent
9a038e7fd1
commit
79dfa44733
@ -1,30 +0,0 @@
|
||||
From 13b1ecc3401653a355798eb1dee10cc1608202f4 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Mon, 18 Jan 2016 12:27:49 +0100
|
||||
Subject: [PATCH 33/34] Kbuild: don't hardcode path to awk in
|
||||
scripts/ld-version.sh
|
||||
|
||||
On some systems /usr/bin/awk does not exist, or is broken. Find it via
|
||||
$PATH instead.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
scripts/ld-version.sh | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/scripts/ld-version.sh
|
||||
+++ b/scripts/ld-version.sh
|
||||
@@ -1,6 +1,7 @@
|
||||
-#!/usr/bin/awk -f
|
||||
+#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# extract linker version number from stdin and turn into single number
|
||||
+exec awk '
|
||||
{
|
||||
gsub(".*\\)", "");
|
||||
gsub(".*version ", "");
|
||||
@@ -9,3 +10,4 @@
|
||||
print a[1]*100000000 + a[2]*1000000 + a[3]*10000;
|
||||
exit
|
||||
}
|
||||
+'
|
@ -1,27 +0,0 @@
|
||||
From 1027a42c25cbf8cfc4ade6503c5110aae04866af Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Daniel=20Gonz=C3=A1lez=20Cabanelas?= <dgcbueu@gmail.com>
|
||||
Date: Fri, 16 Oct 2020 20:22:37 +0200
|
||||
Subject: [PATCH] power: reset: linkstation-poweroff: add missing put_device()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
The of_mdio_find_bus() takes a reference to the underlying device
|
||||
structure, we should release that reference using a put_device() call.
|
||||
|
||||
Signed-off-by: Daniel González Cabanelas <dgcbueu@gmail.com>
|
||||
Signed-off-by: Sebastian Reichel <sre@kernel.org>
|
||||
---
|
||||
drivers/power/reset/linkstation-poweroff.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/drivers/power/reset/linkstation-poweroff.c
|
||||
+++ b/drivers/power/reset/linkstation-poweroff.c
|
||||
@@ -113,6 +113,7 @@ static int __init linkstation_poweroff_i
|
||||
return -EPROBE_DEFER;
|
||||
|
||||
phydev = phy_find_first(bus);
|
||||
+ put_device(&bus->dev);
|
||||
if (!phydev)
|
||||
return -EPROBE_DEFER;
|
||||
|
@ -1,272 +0,0 @@
|
||||
From 03662fcd41f4b764857f17b95f9a2a63c24bddd4 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Tue, 3 Nov 2020 17:28:09 +0100
|
||||
Subject: [PATCH 1/2] crypto: arm/chacha-neon - optimize for non-block size
|
||||
multiples
|
||||
|
||||
commit 86cd97ec4b943af35562a74688bc4e909b32c3d1 upstream.
|
||||
|
||||
The current NEON based ChaCha implementation for ARM is optimized for
|
||||
multiples of 4x the ChaCha block size (64 bytes). This makes sense for
|
||||
block encryption, but given that ChaCha is also often used in the
|
||||
context of networking, it makes sense to consider arbitrary length
|
||||
inputs as well.
|
||||
|
||||
For example, WireGuard typically uses 1420 byte packets, and performing
|
||||
ChaCha encryption involves 5 invocations of chacha_4block_xor_neon()
|
||||
and 3 invocations of chacha_block_xor_neon(), where the last one also
|
||||
involves a memcpy() using a buffer on the stack to process the final
|
||||
chunk of 1420 % 64 == 12 bytes.
|
||||
|
||||
Let's optimize for this case as well, by letting chacha_4block_xor_neon()
|
||||
deal with any input size between 64 and 256 bytes, using NEON permutation
|
||||
instructions and overlapping loads and stores. This way, the 140 byte
|
||||
tail of a 1420 byte input buffer can simply be processed in one go.
|
||||
|
||||
This results in the following performance improvements for 1420 byte
|
||||
blocks, without significant impact on power-of-2 input sizes. (Note
|
||||
that Raspberry Pi is widely used in combination with a 32-bit kernel,
|
||||
even though the core is 64-bit capable)
|
||||
|
||||
Cortex-A8 (BeagleBone) : 7%
|
||||
Cortex-A15 (Calxeda Midway) : 21%
|
||||
Cortex-A53 (Raspberry Pi 3) : 3%
|
||||
Cortex-A72 (Raspberry Pi 4) : 19%
|
||||
|
||||
Cc: Eric Biggers <ebiggers@google.com>
|
||||
Cc: "Jason A . Donenfeld" <Jason@zx2c4.com>
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/chacha-glue.c | 34 +++++------
|
||||
arch/arm/crypto/chacha-neon-core.S | 97 +++++++++++++++++++++++++++---
|
||||
2 files changed, 107 insertions(+), 24 deletions(-)
|
||||
|
||||
--- a/arch/arm/crypto/chacha-glue.c
|
||||
+++ b/arch/arm/crypto/chacha-glue.c
|
||||
@@ -23,7 +23,7 @@
|
||||
asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
int nrounds);
|
||||
asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
- int nrounds);
|
||||
+ int nrounds, unsigned int nbytes);
|
||||
asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
|
||||
asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
|
||||
|
||||
@@ -42,24 +42,24 @@ static void chacha_doneon(u32 *state, u8
|
||||
{
|
||||
u8 buf[CHACHA_BLOCK_SIZE];
|
||||
|
||||
- while (bytes >= CHACHA_BLOCK_SIZE * 4) {
|
||||
- chacha_4block_xor_neon(state, dst, src, nrounds);
|
||||
- bytes -= CHACHA_BLOCK_SIZE * 4;
|
||||
- src += CHACHA_BLOCK_SIZE * 4;
|
||||
- dst += CHACHA_BLOCK_SIZE * 4;
|
||||
- state[12] += 4;
|
||||
- }
|
||||
- while (bytes >= CHACHA_BLOCK_SIZE) {
|
||||
- chacha_block_xor_neon(state, dst, src, nrounds);
|
||||
- bytes -= CHACHA_BLOCK_SIZE;
|
||||
- src += CHACHA_BLOCK_SIZE;
|
||||
- dst += CHACHA_BLOCK_SIZE;
|
||||
- state[12]++;
|
||||
+ while (bytes > CHACHA_BLOCK_SIZE) {
|
||||
+ unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
|
||||
+
|
||||
+ chacha_4block_xor_neon(state, dst, src, nrounds, l);
|
||||
+ bytes -= l;
|
||||
+ src += l;
|
||||
+ dst += l;
|
||||
+ state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
|
||||
}
|
||||
if (bytes) {
|
||||
- memcpy(buf, src, bytes);
|
||||
- chacha_block_xor_neon(state, buf, buf, nrounds);
|
||||
- memcpy(dst, buf, bytes);
|
||||
+ const u8 *s = src;
|
||||
+ u8 *d = dst;
|
||||
+
|
||||
+ if (bytes != CHACHA_BLOCK_SIZE)
|
||||
+ s = d = memcpy(buf, src, bytes);
|
||||
+ chacha_block_xor_neon(state, d, s, nrounds);
|
||||
+ if (d != dst)
|
||||
+ memcpy(dst, buf, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
--- a/arch/arm/crypto/chacha-neon-core.S
|
||||
+++ b/arch/arm/crypto/chacha-neon-core.S
|
||||
@@ -47,6 +47,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
+#include <asm/cache.h>
|
||||
|
||||
.text
|
||||
.fpu neon
|
||||
@@ -205,7 +206,7 @@ ENDPROC(hchacha_block_neon)
|
||||
|
||||
.align 5
|
||||
ENTRY(chacha_4block_xor_neon)
|
||||
- push {r4-r5}
|
||||
+ push {r4, lr}
|
||||
mov r4, sp // preserve the stack pointer
|
||||
sub ip, sp, #0x20 // allocate a 32 byte buffer
|
||||
bic ip, ip, #0x1f // aligned to 32 bytes
|
||||
@@ -229,10 +230,10 @@ ENTRY(chacha_4block_xor_neon)
|
||||
vld1.32 {q0-q1}, [r0]
|
||||
vld1.32 {q2-q3}, [ip]
|
||||
|
||||
- adr r5, .Lctrinc
|
||||
+ adr lr, .Lctrinc
|
||||
vdup.32 q15, d7[1]
|
||||
vdup.32 q14, d7[0]
|
||||
- vld1.32 {q4}, [r5, :128]
|
||||
+ vld1.32 {q4}, [lr, :128]
|
||||
vdup.32 q13, d6[1]
|
||||
vdup.32 q12, d6[0]
|
||||
vdup.32 q11, d5[1]
|
||||
@@ -455,7 +456,7 @@ ENTRY(chacha_4block_xor_neon)
|
||||
|
||||
// Re-interleave the words in the first two rows of each block (x0..7).
|
||||
// Also add the counter values 0-3 to x12[0-3].
|
||||
- vld1.32 {q8}, [r5, :128] // load counter values 0-3
|
||||
+ vld1.32 {q8}, [lr, :128] // load counter values 0-3
|
||||
vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1)
|
||||
vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3)
|
||||
vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5)
|
||||
@@ -493,6 +494,8 @@ ENTRY(chacha_4block_xor_neon)
|
||||
|
||||
// Re-interleave the words in the last two rows of each block (x8..15).
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
+ mov sp, r4 // restore original stack pointer
|
||||
+ ldr r4, [r4, #8] // load number of bytes
|
||||
vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13)
|
||||
vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15)
|
||||
vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9)
|
||||
@@ -520,41 +523,121 @@ ENTRY(chacha_4block_xor_neon)
|
||||
// XOR the rest of the data with the keystream
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #96
|
||||
veor q0, q0, q8
|
||||
veor q1, q1, q12
|
||||
+ ble .Lle96
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q2
|
||||
veor q1, q1, q6
|
||||
+ ble .Lle128
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q10
|
||||
veor q1, q1, q14
|
||||
+ ble .Lle160
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q4
|
||||
veor q1, q1, q5
|
||||
+ ble .Lle192
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q9
|
||||
veor q1, q1, q13
|
||||
+ ble .Lle224
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q7
|
||||
+ blt .Llt256
|
||||
+.Lout:
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]
|
||||
- mov sp, r4 // restore original stack pointer
|
||||
veor q0, q0, q11
|
||||
veor q1, q1, q15
|
||||
vst1.8 {q0-q1}, [r1]
|
||||
|
||||
- pop {r4-r5}
|
||||
- bx lr
|
||||
+ pop {r4, pc}
|
||||
+
|
||||
+.Lle192:
|
||||
+ vmov q4, q9
|
||||
+ vmov q5, q13
|
||||
+
|
||||
+.Lle160:
|
||||
+ // nothing to do
|
||||
+
|
||||
+.Lfinalblock:
|
||||
+ // Process the final block if processing less than 4 full blocks.
|
||||
+ // Entered with 32 bytes of ChaCha cipher stream in q4-q5, and the
|
||||
+ // previous 32 byte output block that still needs to be written at
|
||||
+ // [r1] in q0-q1.
|
||||
+ beq .Lfullblock
|
||||
+
|
||||
+.Lpartialblock:
|
||||
+ adr lr, .Lpermute + 32
|
||||
+ add r2, r2, r4
|
||||
+ add lr, lr, r4
|
||||
+ add r4, r4, r1
|
||||
+
|
||||
+ vld1.8 {q2-q3}, [lr]
|
||||
+ vld1.8 {q6-q7}, [r2]
|
||||
+
|
||||
+ add r4, r4, #32
|
||||
+
|
||||
+ vtbl.8 d4, {q4-q5}, d4
|
||||
+ vtbl.8 d5, {q4-q5}, d5
|
||||
+ vtbl.8 d6, {q4-q5}, d6
|
||||
+ vtbl.8 d7, {q4-q5}, d7
|
||||
+
|
||||
+ veor q6, q6, q2
|
||||
+ veor q7, q7, q3
|
||||
+
|
||||
+ vst1.8 {q6-q7}, [r4] // overlapping stores
|
||||
+ vst1.8 {q0-q1}, [r1]
|
||||
+ pop {r4, pc}
|
||||
+
|
||||
+.Lfullblock:
|
||||
+ vmov q11, q4
|
||||
+ vmov q15, q5
|
||||
+ b .Lout
|
||||
+.Lle96:
|
||||
+ vmov q4, q2
|
||||
+ vmov q5, q6
|
||||
+ b .Lfinalblock
|
||||
+.Lle128:
|
||||
+ vmov q4, q10
|
||||
+ vmov q5, q14
|
||||
+ b .Lfinalblock
|
||||
+.Lle224:
|
||||
+ vmov q4, q3
|
||||
+ vmov q5, q7
|
||||
+ b .Lfinalblock
|
||||
+.Llt256:
|
||||
+ vmov q4, q11
|
||||
+ vmov q5, q15
|
||||
+ b .Lpartialblock
|
||||
ENDPROC(chacha_4block_xor_neon)
|
||||
+
|
||||
+ .align L1_CACHE_SHIFT
|
||||
+.Lpermute:
|
||||
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
+ .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
|
||||
+ .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
|
||||
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
+ .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
|
||||
+ .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
|
@ -1,38 +0,0 @@
|
||||
From 7f63462faf9eab69132bea9abd48c2c05a93145b Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Sun, 13 Dec 2020 15:39:29 +0100
|
||||
Subject: [PATCH 2/2] crypto: arm/chacha-neon - add missing counter increment
|
||||
|
||||
commit fd16931a2f518a32753920ff20895e5cf04c8ff1 upstream.
|
||||
|
||||
Commit 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block
|
||||
size multiples") refactored the chacha block handling in the glue code in
|
||||
a way that may result in the counter increment to be omitted when calling
|
||||
chacha_block_xor_neon() to process a full block. This violates the skcipher
|
||||
API, which requires that the output IV is suitable for handling more input
|
||||
as long as the preceding input has been presented in round multiples of the
|
||||
block size. Also, the same code is exposed via the chacha library interface
|
||||
whose callers may actually rely on this increment to occur even for final
|
||||
blocks that are smaller than the chacha block size.
|
||||
|
||||
So increment the counter after calling chacha_block_xor_neon().
|
||||
|
||||
Fixes: 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block size multiples")
|
||||
Reported-by: Eric Biggers <ebiggers@kernel.org>
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/chacha-glue.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/arch/arm/crypto/chacha-glue.c
|
||||
+++ b/arch/arm/crypto/chacha-glue.c
|
||||
@@ -60,6 +60,7 @@ static void chacha_doneon(u32 *state, u8
|
||||
chacha_block_xor_neon(state, d, s, nrounds);
|
||||
if (d != dst)
|
||||
memcpy(dst, buf, bytes);
|
||||
+ state[12]++;
|
||||
}
|
||||
}
|
||||
|
@ -1,42 +0,0 @@
|
||||
From a13827e9091c07e25cdeec9a402d74a27e2a1111 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Mon, 22 Feb 2021 17:25:46 +0100
|
||||
Subject: [PATCH] wireguard: peer: put frequently used members above cache
|
||||
lines
|
||||
|
||||
commit 5a0598695634a6bb4126818902dd9140cd9df8b6 upstream.
|
||||
|
||||
The is_dead boolean is checked for every single packet, while the
|
||||
internal_id member is used basically only for pr_debug messages. So it
|
||||
makes sense to hoist up is_dead into some space formerly unused by a
|
||||
struct hole, while demoting internal_api to below the lowest struct
|
||||
cache line.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/peer.h | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/peer.h
|
||||
+++ b/drivers/net/wireguard/peer.h
|
||||
@@ -39,6 +39,7 @@ struct wg_peer {
|
||||
struct prev_queue tx_queue, rx_queue;
|
||||
struct sk_buff_head staged_packet_queue;
|
||||
int serial_work_cpu;
|
||||
+ bool is_dead;
|
||||
struct noise_keypairs keypairs;
|
||||
struct endpoint endpoint;
|
||||
struct dst_cache endpoint_cache;
|
||||
@@ -61,9 +62,8 @@ struct wg_peer {
|
||||
struct rcu_head rcu;
|
||||
struct list_head peer_list;
|
||||
struct list_head allowedips_list;
|
||||
- u64 internal_id;
|
||||
struct napi_struct napi;
|
||||
- bool is_dead;
|
||||
+ u64 internal_id;
|
||||
};
|
||||
|
||||
struct wg_peer *wg_peer_create(struct wg_device *wg,
|
@ -1,36 +0,0 @@
|
||||
From 6523061868212473f63812a0c477a161742bed42 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Sat, 27 Feb 2021 13:20:24 +0100
|
||||
Subject: [PATCH] MIPS: select CPU_MIPS64 for remaining MIPS64 CPUs
|
||||
|
||||
The CPU_MIPS64 and CPU_MIPS32 variables are supposed to be able to
|
||||
distinguish broadly between 64-bit and 32-bit MIPS CPUs. However, they
|
||||
weren't selected by the specialty CPUs, Octeon and Loongson, which meant
|
||||
it was possible to hit a weird state of:
|
||||
|
||||
MIPS=y, CONFIG_64BIT=y, CPU_MIPS64=n
|
||||
|
||||
This commit rectifies the issue by having CPU_MIPS64 be selected when
|
||||
the missing Octeon or Loongson models are selected.
|
||||
|
||||
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
|
||||
Cc: Ralf Baechle <ralf@linux-mips.org>
|
||||
Cc: George Cherian <gcherian@marvell.com>
|
||||
Cc: Huacai Chen <chenhuacai@kernel.org>
|
||||
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/mips/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/mips/Kconfig
|
||||
+++ b/arch/mips/Kconfig
|
||||
@@ -2088,7 +2088,7 @@ config CPU_MIPS32
|
||||
config CPU_MIPS64
|
||||
bool
|
||||
default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R5 || \
|
||||
- CPU_MIPS64_R6
|
||||
+ CPU_MIPS64_R6 || CPU_LOONGSON64 || CPU_CAVIUM_OCTEON
|
||||
|
||||
#
|
||||
# These indicate the revision of the architecture
|
@ -1,36 +0,0 @@
|
||||
From 7d1531c81c0fb4c93bea8dc316043ad0e4d0c270 Mon Sep 17 00:00:00 2001
|
||||
From: Chuanhong Guo <gch981213@gmail.com>
|
||||
Date: Sun, 25 Oct 2020 23:19:40 +0800
|
||||
Subject: [PATCH] MIPS: zboot: put appended dtb into a section
|
||||
|
||||
This will make a separated section for dtb appear in ELF, and we can
|
||||
then use objcopy to patch a dtb into vmlinuz when RAW_APPENDED_DTB
|
||||
is set in kernel config.
|
||||
|
||||
command to patch a dtb:
|
||||
objcopy --set-section-flags=.appended_dtb=alloc,contents \
|
||||
--update-section=.appended_dtb=<target>.dtb vmlinuz vmlinuz-dtb
|
||||
|
||||
Signed-off-by: Chuanhong Guo <gch981213@gmail.com>
|
||||
---
|
||||
arch/mips/boot/compressed/ld.script | 9 ++++++---
|
||||
1 file changed, 6 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/arch/mips/boot/compressed/ld.script
|
||||
+++ b/arch/mips/boot/compressed/ld.script
|
||||
@@ -31,9 +31,12 @@ SECTIONS
|
||||
CONSTRUCTORS
|
||||
. = ALIGN(16);
|
||||
}
|
||||
- __appended_dtb = .;
|
||||
- /* leave space for appended DTB */
|
||||
- . += 0x100000;
|
||||
+
|
||||
+ .appended_dtb : {
|
||||
+ __appended_dtb = .;
|
||||
+ /* leave space for appended DTB */
|
||||
+ . += 0x100000;
|
||||
+ }
|
||||
|
||||
_edata = .;
|
||||
/* End of data section */
|
@ -1,324 +0,0 @@
|
||||
From 04e9ab75267489224364fa510a88ada83e11c325 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Thu, 10 Dec 2020 18:23:52 +0100
|
||||
Subject: [PATCH] dt-bindings: mtd: convert "fixed-partitions" to the
|
||||
json-schema
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This standardizes its documentation, allows validating with Makefile
|
||||
checks and helps writing DTS files.
|
||||
|
||||
Noticeable changes:
|
||||
1. Dropped "Partitions can be represented by sub-nodes of a flash
|
||||
device." as we also support subpartitions (don't have to be part of
|
||||
flash device node)
|
||||
2. Dropped "to Linux" as bindings are meant to be os agnostic.
|
||||
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Link: https://lore.kernel.org/r/20201210172352.31632-1-zajec5@gmail.com
|
||||
Signed-off-by: Rob Herring <robh@kernel.org>
|
||||
---
|
||||
.../devicetree/bindings/mtd/partition.txt | 131 +--------------
|
||||
.../mtd/partitions/fixed-partitions.yaml | 152 ++++++++++++++++++
|
||||
2 files changed, 154 insertions(+), 129 deletions(-)
|
||||
create mode 100644 Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
|
||||
|
||||
--- a/Documentation/devicetree/bindings/mtd/partition.txt
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partition.txt
|
||||
@@ -24,137 +24,10 @@ another partitioning method.
|
||||
Available bindings are listed in the "partitions" subdirectory.
|
||||
|
||||
|
||||
-Fixed Partitions
|
||||
-================
|
||||
-
|
||||
-Partitions can be represented by sub-nodes of a flash device. This can be used
|
||||
-on platforms which have strong conventions about which portions of a flash are
|
||||
-used for what purposes, but which don't use an on-flash partition table such
|
||||
-as RedBoot.
|
||||
-
|
||||
-The partition table should be a subnode of the flash node and should be named
|
||||
-'partitions'. This node should have the following property:
|
||||
-- compatible : (required) must be "fixed-partitions"
|
||||
-Partitions are then defined in subnodes of the partitions node.
|
||||
+Deprecated: partitions defined in flash node
|
||||
+============================================
|
||||
|
||||
For backwards compatibility partitions as direct subnodes of the flash device are
|
||||
supported. This use is discouraged.
|
||||
NOTE: also for backwards compatibility, direct subnodes that have a compatible
|
||||
string are not considered partitions, as they may be used for other bindings.
|
||||
-
|
||||
-#address-cells & #size-cells must both be present in the partitions subnode of the
|
||||
-flash device. There are two valid values for both:
|
||||
-<1>: for partitions that require a single 32-bit cell to represent their
|
||||
- size/address (aka the value is below 4 GiB)
|
||||
-<2>: for partitions that require two 32-bit cells to represent their
|
||||
- size/address (aka the value is 4 GiB or greater).
|
||||
-
|
||||
-Required properties:
|
||||
-- reg : The partition's offset and size within the flash
|
||||
-
|
||||
-Optional properties:
|
||||
-- label : The label / name for this partition. If omitted, the label is taken
|
||||
- from the node name (excluding the unit address).
|
||||
-- read-only : This parameter, if present, is a hint to Linux that this
|
||||
- partition should only be mounted read-only. This is usually used for flash
|
||||
- partitions containing early-boot firmware images or data which should not be
|
||||
- clobbered.
|
||||
-- lock : Do not unlock the partition at initialization time (not supported on
|
||||
- all devices)
|
||||
-- slc-mode: This parameter, if present, allows one to emulate SLC mode on a
|
||||
- partition attached to an MLC NAND thus making this partition immune to
|
||||
- paired-pages corruptions
|
||||
-
|
||||
-Examples:
|
||||
-
|
||||
-
|
||||
-flash@0 {
|
||||
- partitions {
|
||||
- compatible = "fixed-partitions";
|
||||
- #address-cells = <1>;
|
||||
- #size-cells = <1>;
|
||||
-
|
||||
- partition@0 {
|
||||
- label = "u-boot";
|
||||
- reg = <0x0000000 0x100000>;
|
||||
- read-only;
|
||||
- };
|
||||
-
|
||||
- uimage@100000 {
|
||||
- reg = <0x0100000 0x200000>;
|
||||
- };
|
||||
- };
|
||||
-};
|
||||
-
|
||||
-flash@1 {
|
||||
- partitions {
|
||||
- compatible = "fixed-partitions";
|
||||
- #address-cells = <1>;
|
||||
- #size-cells = <2>;
|
||||
-
|
||||
- /* a 4 GiB partition */
|
||||
- partition@0 {
|
||||
- label = "filesystem";
|
||||
- reg = <0x00000000 0x1 0x00000000>;
|
||||
- };
|
||||
- };
|
||||
-};
|
||||
-
|
||||
-flash@2 {
|
||||
- partitions {
|
||||
- compatible = "fixed-partitions";
|
||||
- #address-cells = <2>;
|
||||
- #size-cells = <2>;
|
||||
-
|
||||
- /* an 8 GiB partition */
|
||||
- partition@0 {
|
||||
- label = "filesystem #1";
|
||||
- reg = <0x0 0x00000000 0x2 0x00000000>;
|
||||
- };
|
||||
-
|
||||
- /* a 4 GiB partition */
|
||||
- partition@200000000 {
|
||||
- label = "filesystem #2";
|
||||
- reg = <0x2 0x00000000 0x1 0x00000000>;
|
||||
- };
|
||||
- };
|
||||
-};
|
||||
-
|
||||
-flash@3 {
|
||||
- partitions {
|
||||
- compatible = "fixed-partitions";
|
||||
- #address-cells = <1>;
|
||||
- #size-cells = <1>;
|
||||
-
|
||||
- partition@0 {
|
||||
- label = "bootloader";
|
||||
- reg = <0x000000 0x100000>;
|
||||
- read-only;
|
||||
- };
|
||||
-
|
||||
- firmware@100000 {
|
||||
- label = "firmware";
|
||||
- reg = <0x100000 0xe00000>;
|
||||
- compatible = "brcm,trx";
|
||||
- };
|
||||
-
|
||||
- calibration@f00000 {
|
||||
- label = "calibration";
|
||||
- reg = <0xf00000 0x100000>;
|
||||
- compatible = "fixed-partitions";
|
||||
- ranges = <0 0xf00000 0x100000>;
|
||||
- #address-cells = <1>;
|
||||
- #size-cells = <1>;
|
||||
-
|
||||
- partition@0 {
|
||||
- label = "wifi0";
|
||||
- reg = <0x000000 0x080000>;
|
||||
- };
|
||||
-
|
||||
- partition@80000 {
|
||||
- label = "wifi1";
|
||||
- reg = <0x080000 0x080000>;
|
||||
- };
|
||||
- };
|
||||
- };
|
||||
-};
|
||||
--- /dev/null
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
|
||||
@@ -0,0 +1,152 @@
|
||||
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
+%YAML 1.2
|
||||
+---
|
||||
+$id: http://devicetree.org/schemas/mtd/partitions/fixed-partitions.yaml#
|
||||
+$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
+
|
||||
+title: Fixed partitions
|
||||
+
|
||||
+description: |
|
||||
+ This binding can be used on platforms which have strong conventions about
|
||||
+ which portions of a flash are used for what purposes, but which don't use an
|
||||
+ on-flash partition table such as RedBoot.
|
||||
+
|
||||
+ The partition table should be a node named "partitions". Partitions are then
|
||||
+ defined as subnodes.
|
||||
+
|
||||
+maintainers:
|
||||
+ - Rafał Miłecki <rafal@milecki.pl>
|
||||
+
|
||||
+properties:
|
||||
+ compatible:
|
||||
+ const: fixed-partitions
|
||||
+
|
||||
+ "#address-cells": true
|
||||
+
|
||||
+ "#size-cells": true
|
||||
+
|
||||
+patternProperties:
|
||||
+ "@[0-9a-f]+$":
|
||||
+ description: node describing a single flash partition
|
||||
+ type: object
|
||||
+
|
||||
+ properties:
|
||||
+ reg:
|
||||
+ description: partition's offset and size within the flash
|
||||
+ maxItems: 1
|
||||
+
|
||||
+ label:
|
||||
+ description: The label / name for this partition. If omitted, the label
|
||||
+ is taken from the node name (excluding the unit address).
|
||||
+
|
||||
+ read-only:
|
||||
+ description: This parameter, if present, is a hint that this partition
|
||||
+ should only be mounted read-only. This is usually used for flash
|
||||
+ partitions containing early-boot firmware images or data which should
|
||||
+ not be clobbered.
|
||||
+ type: boolean
|
||||
+
|
||||
+ lock:
|
||||
+ description: Do not unlock the partition at initialization time (not
|
||||
+ supported on all devices)
|
||||
+ type: boolean
|
||||
+
|
||||
+ slc-mode:
|
||||
+ description: This parameter, if present, allows one to emulate SLC mode
|
||||
+ on a partition attached to an MLC NAND thus making this partition
|
||||
+ immune to paired-pages corruptions
|
||||
+ type: boolean
|
||||
+
|
||||
+ required:
|
||||
+ - reg
|
||||
+
|
||||
+required:
|
||||
+ - "#address-cells"
|
||||
+ - "#size-cells"
|
||||
+
|
||||
+additionalProperties: true
|
||||
+
|
||||
+examples:
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ partition@0 {
|
||||
+ label = "u-boot";
|
||||
+ reg = <0x0000000 0x100000>;
|
||||
+ read-only;
|
||||
+ };
|
||||
+
|
||||
+ uimage@100000 {
|
||||
+ reg = <0x0100000 0x200000>;
|
||||
+ };
|
||||
+ };
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <2>;
|
||||
+
|
||||
+ /* a 4 GiB partition */
|
||||
+ partition@0 {
|
||||
+ label = "filesystem";
|
||||
+ reg = <0x00000000 0x1 0x00000000>;
|
||||
+ };
|
||||
+ };
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ #address-cells = <2>;
|
||||
+ #size-cells = <2>;
|
||||
+
|
||||
+ /* an 8 GiB partition */
|
||||
+ partition@0 {
|
||||
+ label = "filesystem #1";
|
||||
+ reg = <0x0 0x00000000 0x2 0x00000000>;
|
||||
+ };
|
||||
+
|
||||
+ /* a 4 GiB partition */
|
||||
+ partition@200000000 {
|
||||
+ label = "filesystem #2";
|
||||
+ reg = <0x2 0x00000000 0x1 0x00000000>;
|
||||
+ };
|
||||
+ };
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ partition@0 {
|
||||
+ label = "bootloader";
|
||||
+ reg = <0x000000 0x100000>;
|
||||
+ read-only;
|
||||
+ };
|
||||
+
|
||||
+ firmware@100000 {
|
||||
+ compatible = "brcm,trx";
|
||||
+ label = "firmware";
|
||||
+ reg = <0x100000 0xe00000>;
|
||||
+ };
|
||||
+
|
||||
+ calibration@f00000 {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ label = "calibration";
|
||||
+ reg = <0xf00000 0x100000>;
|
||||
+ ranges = <0 0xf00000 0x100000>;
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ partition@0 {
|
||||
+ label = "wifi0";
|
||||
+ reg = <0x000000 0x080000>;
|
||||
+ };
|
||||
+
|
||||
+ partition@80000 {
|
||||
+ label = "wifi1";
|
||||
+ reg = <0x080000 0x080000>;
|
||||
+ };
|
||||
+ };
|
||||
+ };
|
@ -1,115 +0,0 @@
|
||||
From 6418522022c706fd867b00b2571edba48b8fa8c7 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Thu, 11 Feb 2021 23:04:25 +0100
|
||||
Subject: [PATCH] dt-bindings: mtd: move partition binding to its own file
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Single partition binding is quite common and may be:
|
||||
1. Used by multiple parsers
|
||||
2. Extended for more specific cases
|
||||
|
||||
Move it to separated file to avoid code duplication.
|
||||
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Reviewed-by: Rob Herring <robh@kernel.org>
|
||||
Signed-off-by: Richard Weinberger <richard@nod.at>
|
||||
---
|
||||
.../mtd/partitions/fixed-partitions.yaml | 33 +------------
|
||||
.../bindings/mtd/partitions/partition.yaml | 47 +++++++++++++++++++
|
||||
2 files changed, 48 insertions(+), 32 deletions(-)
|
||||
create mode 100644 Documentation/devicetree/bindings/mtd/partitions/partition.yaml
|
||||
|
||||
--- a/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
|
||||
@@ -27,38 +27,7 @@ properties:
|
||||
|
||||
patternProperties:
|
||||
"@[0-9a-f]+$":
|
||||
- description: node describing a single flash partition
|
||||
- type: object
|
||||
-
|
||||
- properties:
|
||||
- reg:
|
||||
- description: partition's offset and size within the flash
|
||||
- maxItems: 1
|
||||
-
|
||||
- label:
|
||||
- description: The label / name for this partition. If omitted, the label
|
||||
- is taken from the node name (excluding the unit address).
|
||||
-
|
||||
- read-only:
|
||||
- description: This parameter, if present, is a hint that this partition
|
||||
- should only be mounted read-only. This is usually used for flash
|
||||
- partitions containing early-boot firmware images or data which should
|
||||
- not be clobbered.
|
||||
- type: boolean
|
||||
-
|
||||
- lock:
|
||||
- description: Do not unlock the partition at initialization time (not
|
||||
- supported on all devices)
|
||||
- type: boolean
|
||||
-
|
||||
- slc-mode:
|
||||
- description: This parameter, if present, allows one to emulate SLC mode
|
||||
- on a partition attached to an MLC NAND thus making this partition
|
||||
- immune to paired-pages corruptions
|
||||
- type: boolean
|
||||
-
|
||||
- required:
|
||||
- - reg
|
||||
+ $ref: "partition.yaml#"
|
||||
|
||||
required:
|
||||
- "#address-cells"
|
||||
--- /dev/null
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/partition.yaml
|
||||
@@ -0,0 +1,47 @@
|
||||
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
+%YAML 1.2
|
||||
+---
|
||||
+$id: http://devicetree.org/schemas/mtd/partitions/partition.yaml#
|
||||
+$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
+
|
||||
+title: Partition
|
||||
+
|
||||
+description: |
|
||||
+ This binding describes a single flash partition. Each partition must have its
|
||||
+ relative offset and size specified. Depending on partition function extra
|
||||
+ properties can be used.
|
||||
+
|
||||
+maintainers:
|
||||
+ - Rafał Miłecki <rafal@milecki.pl>
|
||||
+
|
||||
+properties:
|
||||
+ reg:
|
||||
+ description: partition's offset and size within the flash
|
||||
+ maxItems: 1
|
||||
+
|
||||
+ label:
|
||||
+ description: The label / name for this partition. If omitted, the label
|
||||
+ is taken from the node name (excluding the unit address).
|
||||
+
|
||||
+ read-only:
|
||||
+ description: This parameter, if present, is a hint that this partition
|
||||
+ should only be mounted read-only. This is usually used for flash
|
||||
+ partitions containing early-boot firmware images or data which should
|
||||
+ not be clobbered.
|
||||
+ type: boolean
|
||||
+
|
||||
+ lock:
|
||||
+ description: Do not unlock the partition at initialization time (not
|
||||
+ supported on all devices)
|
||||
+ type: boolean
|
||||
+
|
||||
+ slc-mode:
|
||||
+ description: This parameter, if present, allows one to emulate SLC mode
|
||||
+ on a partition attached to an MLC NAND thus making this partition
|
||||
+ immune to paired-pages corruptions
|
||||
+ type: boolean
|
||||
+
|
||||
+required:
|
||||
+ - reg
|
||||
+
|
||||
+additionalProperties: true
|
@ -1,92 +0,0 @@
|
||||
From 6e9dff6fe3fbc452f16566e4a7e293b0decefdba Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Thu, 11 Feb 2021 23:04:26 +0100
|
||||
Subject: [PATCH] dt-bindings: mtd: add binding for BCM4908 partitions
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
BCM4908 uses fixed partitions layout but function of some partitions may
|
||||
vary. Some devices use multiple firmware partitions and those partitions
|
||||
should be marked to let system discover their purpose.
|
||||
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Signed-off-by: Richard Weinberger <richard@nod.at>
|
||||
---
|
||||
.../partitions/brcm,bcm4908-partitions.yaml | 70 +++++++++++++++++++
|
||||
1 file changed, 70 insertions(+)
|
||||
create mode 100644 Documentation/devicetree/bindings/mtd/partitions/brcm,bcm4908-partitions.yaml
|
||||
|
||||
--- /dev/null
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/brcm,bcm4908-partitions.yaml
|
||||
@@ -0,0 +1,70 @@
|
||||
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
+%YAML 1.2
|
||||
+---
|
||||
+$id: http://devicetree.org/schemas/mtd/partitions/brcm,bcm4908-partitions.yaml#
|
||||
+$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
+
|
||||
+title: Broadcom BCM4908 partitioning
|
||||
+
|
||||
+description: |
|
||||
+ Broadcom BCM4908 CFE bootloader supports two firmware partitions. One is used
|
||||
+ for regular booting, the other is treated as fallback.
|
||||
+
|
||||
+ This binding allows defining all fixed partitions and marking those containing
|
||||
+ firmware. System can use that information e.g. for booting or flashing
|
||||
+ purposes.
|
||||
+
|
||||
+maintainers:
|
||||
+ - Rafał Miłecki <rafal@milecki.pl>
|
||||
+
|
||||
+properties:
|
||||
+ compatible:
|
||||
+ const: brcm,bcm4908-partitions
|
||||
+
|
||||
+ "#address-cells":
|
||||
+ enum: [ 1, 2 ]
|
||||
+
|
||||
+ "#size-cells":
|
||||
+ enum: [ 1, 2 ]
|
||||
+
|
||||
+patternProperties:
|
||||
+ "^partition@[0-9a-f]+$":
|
||||
+ $ref: "partition.yaml#"
|
||||
+ properties:
|
||||
+ compatible:
|
||||
+ const: brcm,bcm4908-firmware
|
||||
+ unevaluatedProperties: false
|
||||
+
|
||||
+required:
|
||||
+ - "#address-cells"
|
||||
+ - "#size-cells"
|
||||
+
|
||||
+additionalProperties: false
|
||||
+
|
||||
+examples:
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "brcm,bcm4908-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ partition@0 {
|
||||
+ label = "cferom";
|
||||
+ reg = <0x0 0x100000>;
|
||||
+ };
|
||||
+
|
||||
+ partition@100000 {
|
||||
+ compatible = "brcm,bcm4908-firmware";
|
||||
+ reg = <0x100000 0xf00000>;
|
||||
+ };
|
||||
+
|
||||
+ partition@1000000 {
|
||||
+ compatible = "brcm,bcm4908-firmware";
|
||||
+ reg = <0x1000000 0xf00000>;
|
||||
+ };
|
||||
+
|
||||
+ partition@1f00000 {
|
||||
+ label = "calibration";
|
||||
+ reg = <0x1f00000 0x100000>;
|
||||
+ };
|
||||
+ };
|
@ -1,654 +0,0 @@
|
||||
From afbef8efb591792579c633a7c545f914c6165f82 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Thu, 11 Feb 2021 23:04:27 +0100
|
||||
Subject: [PATCH] mtd: parsers: ofpart: support BCM4908 fixed partitions
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Some devices use fixed partitioning with some partitions requiring some
|
||||
extra logic. E.g. BCM4908 may have multiple firmware partitions but
|
||||
detecting currently used one requires checking bootloader parameters.
|
||||
|
||||
To support such cases without duplicating a lot of code (without copying
|
||||
most of the ofpart.c code) support for post-parsing callback was added.
|
||||
|
||||
BCM4908 support in ofpart can be enabled using config option and results
|
||||
in compiling & executing a specific callback. It simply reads offset of
|
||||
currently used firmware partition from the DT. Bootloader specifies it
|
||||
using the "brcm_blparms" property.
|
||||
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
---
|
||||
drivers/mtd/parsers/Kconfig | 9 +++
|
||||
drivers/mtd/parsers/Makefile | 2 +
|
||||
drivers/mtd/parsers/ofpart_bcm4908.c | 64 +++++++++++++++++++
|
||||
drivers/mtd/parsers/ofpart_bcm4908.h | 15 +++++
|
||||
.../mtd/parsers/{ofpart.c => ofpart_core.c} | 28 +++++++-
|
||||
5 files changed, 116 insertions(+), 2 deletions(-)
|
||||
create mode 100644 drivers/mtd/parsers/ofpart_bcm4908.c
|
||||
create mode 100644 drivers/mtd/parsers/ofpart_bcm4908.h
|
||||
rename drivers/mtd/parsers/{ofpart.c => ofpart_core.c} (88%)
|
||||
|
||||
--- a/drivers/mtd/parsers/Kconfig
|
||||
+++ b/drivers/mtd/parsers/Kconfig
|
||||
@@ -67,6 +67,15 @@ config MTD_OF_PARTS
|
||||
flash memory node, as described in
|
||||
Documentation/devicetree/bindings/mtd/partition.txt.
|
||||
|
||||
+config MTD_OF_PARTS_BCM4908
|
||||
+ bool "BCM4908 partitioning support"
|
||||
+ depends on MTD_OF_PARTS && (ARCH_BCM4908 || COMPILE_TEST)
|
||||
+ default ARCH_BCM4908
|
||||
+ help
|
||||
+ This provides partitions parser for BCM4908 family devices
|
||||
+ that can have multiple "firmware" partitions. It takes care of
|
||||
+ finding currently used one and backup ones.
|
||||
+
|
||||
config MTD_PARSER_IMAGETAG
|
||||
tristate "Parser for BCM963XX Image Tag format partitions"
|
||||
depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST
|
||||
--- a/drivers/mtd/parsers/Makefile
|
||||
+++ b/drivers/mtd/parsers/Makefile
|
||||
@@ -4,6 +4,8 @@ obj-$(CONFIG_MTD_BCM47XX_PARTS) += bcm4
|
||||
obj-$(CONFIG_MTD_BCM63XX_PARTS) += bcm63xxpart.o
|
||||
obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdlinepart.o
|
||||
obj-$(CONFIG_MTD_OF_PARTS) += ofpart.o
|
||||
+ofpart-y += ofpart_core.o
|
||||
+ofpart-$(CONFIG_MTD_OF_PARTS_BCM4908) += ofpart_bcm4908.o
|
||||
obj-$(CONFIG_MTD_PARSER_IMAGETAG) += parser_imagetag.o
|
||||
obj-$(CONFIG_MTD_AFS_PARTS) += afs.o
|
||||
obj-$(CONFIG_MTD_PARSER_TRX) += parser_trx.o
|
||||
--- /dev/null
|
||||
+++ b/drivers/mtd/parsers/ofpart_bcm4908.c
|
||||
@@ -0,0 +1,64 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0
|
||||
+/*
|
||||
+ * Copyright (C) 2021 Rafał Miłecki <rafal@milecki.pl>
|
||||
+ */
|
||||
+
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/of.h>
|
||||
+#include <linux/mtd/mtd.h>
|
||||
+#include <linux/slab.h>
|
||||
+#include <linux/mtd/partitions.h>
|
||||
+
|
||||
+#include "ofpart_bcm4908.h"
|
||||
+
|
||||
+#define BLPARAMS_FW_OFFSET "NAND_RFS_OFS"
|
||||
+
|
||||
+static long long bcm4908_partitions_fw_offset(void)
|
||||
+{
|
||||
+ struct device_node *root;
|
||||
+ struct property *prop;
|
||||
+ const char *s;
|
||||
+
|
||||
+ root = of_find_node_by_path("/");
|
||||
+ if (!root)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ of_property_for_each_string(root, "brcm_blparms", prop, s) {
|
||||
+ size_t len = strlen(BLPARAMS_FW_OFFSET);
|
||||
+ unsigned long offset;
|
||||
+ int err;
|
||||
+
|
||||
+ if (strncmp(s, BLPARAMS_FW_OFFSET, len) || s[len] != '=')
|
||||
+ continue;
|
||||
+
|
||||
+ err = kstrtoul(s + len + 1, 0, &offset);
|
||||
+ if (err) {
|
||||
+ pr_err("failed to parse %s\n", s + len + 1);
|
||||
+ return err;
|
||||
+ }
|
||||
+
|
||||
+ return offset << 10;
|
||||
+ }
|
||||
+
|
||||
+ return -ENOENT;
|
||||
+}
|
||||
+
|
||||
+int bcm4908_partitions_post_parse(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts)
|
||||
+{
|
||||
+ long long fw_offset;
|
||||
+ int i;
|
||||
+
|
||||
+ fw_offset = bcm4908_partitions_fw_offset();
|
||||
+
|
||||
+ for (i = 0; i < nr_parts; i++) {
|
||||
+ if (of_device_is_compatible(parts[i].of_node, "brcm,bcm4908-firmware")) {
|
||||
+ if (fw_offset < 0 || parts[i].offset == fw_offset)
|
||||
+ parts[i].name = "firmware";
|
||||
+ else
|
||||
+ parts[i].name = "backup";
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
--- /dev/null
|
||||
+++ b/drivers/mtd/parsers/ofpart_bcm4908.h
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 */
|
||||
+#ifndef __BCM4908_PARTITIONS_H
|
||||
+#define __BCM4908_PARTITIONS_H
|
||||
+
|
||||
+#ifdef CONFIG_MTD_OF_PARTS_BCM4908
|
||||
+int bcm4908_partitions_post_parse(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts);
|
||||
+#else
|
||||
+static inline int bcm4908_partitions_post_parse(struct mtd_info *mtd, struct mtd_partition *parts,
|
||||
+ int nr_parts)
|
||||
+{
|
||||
+ return -EOPNOTSUPP;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#endif
|
||||
--- a/drivers/mtd/parsers/ofpart.c
|
||||
+++ /dev/null
|
||||
@@ -1,239 +0,0 @@
|
||||
-// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
-/*
|
||||
- * Flash partitions described by the OF (or flattened) device tree
|
||||
- *
|
||||
- * Copyright © 2006 MontaVista Software Inc.
|
||||
- * Author: Vitaly Wool <vwool@ru.mvista.com>
|
||||
- *
|
||||
- * Revised to handle newer style flash binding by:
|
||||
- * Copyright © 2007 David Gibson, IBM Corporation.
|
||||
- */
|
||||
-
|
||||
-#include <linux/module.h>
|
||||
-#include <linux/init.h>
|
||||
-#include <linux/of.h>
|
||||
-#include <linux/mtd/mtd.h>
|
||||
-#include <linux/slab.h>
|
||||
-#include <linux/mtd/partitions.h>
|
||||
-
|
||||
-static bool node_has_compatible(struct device_node *pp)
|
||||
-{
|
||||
- return of_get_property(pp, "compatible", NULL);
|
||||
-}
|
||||
-
|
||||
-static int parse_fixed_partitions(struct mtd_info *master,
|
||||
- const struct mtd_partition **pparts,
|
||||
- struct mtd_part_parser_data *data)
|
||||
-{
|
||||
- struct mtd_partition *parts;
|
||||
- struct device_node *mtd_node;
|
||||
- struct device_node *ofpart_node;
|
||||
- const char *partname;
|
||||
- struct device_node *pp;
|
||||
- int nr_parts, i, ret = 0;
|
||||
- bool dedicated = true;
|
||||
-
|
||||
-
|
||||
- /* Pull of_node from the master device node */
|
||||
- mtd_node = mtd_get_of_node(master);
|
||||
- if (!mtd_node)
|
||||
- return 0;
|
||||
-
|
||||
- ofpart_node = of_get_child_by_name(mtd_node, "partitions");
|
||||
- if (!ofpart_node) {
|
||||
- /*
|
||||
- * We might get here even when ofpart isn't used at all (e.g.,
|
||||
- * when using another parser), so don't be louder than
|
||||
- * KERN_DEBUG
|
||||
- */
|
||||
- pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n",
|
||||
- master->name, mtd_node);
|
||||
- ofpart_node = mtd_node;
|
||||
- dedicated = false;
|
||||
- } else if (!of_device_is_compatible(ofpart_node, "fixed-partitions")) {
|
||||
- /* The 'partitions' subnode might be used by another parser */
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- /* First count the subnodes */
|
||||
- nr_parts = 0;
|
||||
- for_each_child_of_node(ofpart_node, pp) {
|
||||
- if (!dedicated && node_has_compatible(pp))
|
||||
- continue;
|
||||
-
|
||||
- nr_parts++;
|
||||
- }
|
||||
-
|
||||
- if (nr_parts == 0)
|
||||
- return 0;
|
||||
-
|
||||
- parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL);
|
||||
- if (!parts)
|
||||
- return -ENOMEM;
|
||||
-
|
||||
- i = 0;
|
||||
- for_each_child_of_node(ofpart_node, pp) {
|
||||
- const __be32 *reg;
|
||||
- int len;
|
||||
- int a_cells, s_cells;
|
||||
-
|
||||
- if (!dedicated && node_has_compatible(pp))
|
||||
- continue;
|
||||
-
|
||||
- reg = of_get_property(pp, "reg", &len);
|
||||
- if (!reg) {
|
||||
- if (dedicated) {
|
||||
- pr_debug("%s: ofpart partition %pOF (%pOF) missing reg property.\n",
|
||||
- master->name, pp,
|
||||
- mtd_node);
|
||||
- goto ofpart_fail;
|
||||
- } else {
|
||||
- nr_parts--;
|
||||
- continue;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- a_cells = of_n_addr_cells(pp);
|
||||
- s_cells = of_n_size_cells(pp);
|
||||
- if (len / 4 != a_cells + s_cells) {
|
||||
- pr_debug("%s: ofpart partition %pOF (%pOF) error parsing reg property.\n",
|
||||
- master->name, pp,
|
||||
- mtd_node);
|
||||
- goto ofpart_fail;
|
||||
- }
|
||||
-
|
||||
- parts[i].offset = of_read_number(reg, a_cells);
|
||||
- parts[i].size = of_read_number(reg + a_cells, s_cells);
|
||||
- parts[i].of_node = pp;
|
||||
-
|
||||
- partname = of_get_property(pp, "label", &len);
|
||||
- if (!partname)
|
||||
- partname = of_get_property(pp, "name", &len);
|
||||
- parts[i].name = partname;
|
||||
-
|
||||
- if (of_get_property(pp, "read-only", &len))
|
||||
- parts[i].mask_flags |= MTD_WRITEABLE;
|
||||
-
|
||||
- if (of_get_property(pp, "lock", &len))
|
||||
- parts[i].mask_flags |= MTD_POWERUP_LOCK;
|
||||
-
|
||||
- if (of_property_read_bool(pp, "slc-mode"))
|
||||
- parts[i].add_flags |= MTD_SLC_ON_MLC_EMULATION;
|
||||
-
|
||||
- i++;
|
||||
- }
|
||||
-
|
||||
- if (!nr_parts)
|
||||
- goto ofpart_none;
|
||||
-
|
||||
- *pparts = parts;
|
||||
- return nr_parts;
|
||||
-
|
||||
-ofpart_fail:
|
||||
- pr_err("%s: error parsing ofpart partition %pOF (%pOF)\n",
|
||||
- master->name, pp, mtd_node);
|
||||
- ret = -EINVAL;
|
||||
-ofpart_none:
|
||||
- of_node_put(pp);
|
||||
- kfree(parts);
|
||||
- return ret;
|
||||
-}
|
||||
-
|
||||
-static const struct of_device_id parse_ofpart_match_table[] = {
|
||||
- { .compatible = "fixed-partitions" },
|
||||
- {},
|
||||
-};
|
||||
-MODULE_DEVICE_TABLE(of, parse_ofpart_match_table);
|
||||
-
|
||||
-static struct mtd_part_parser ofpart_parser = {
|
||||
- .parse_fn = parse_fixed_partitions,
|
||||
- .name = "fixed-partitions",
|
||||
- .of_match_table = parse_ofpart_match_table,
|
||||
-};
|
||||
-
|
||||
-static int parse_ofoldpart_partitions(struct mtd_info *master,
|
||||
- const struct mtd_partition **pparts,
|
||||
- struct mtd_part_parser_data *data)
|
||||
-{
|
||||
- struct mtd_partition *parts;
|
||||
- struct device_node *dp;
|
||||
- int i, plen, nr_parts;
|
||||
- const struct {
|
||||
- __be32 offset, len;
|
||||
- } *part;
|
||||
- const char *names;
|
||||
-
|
||||
- /* Pull of_node from the master device node */
|
||||
- dp = mtd_get_of_node(master);
|
||||
- if (!dp)
|
||||
- return 0;
|
||||
-
|
||||
- part = of_get_property(dp, "partitions", &plen);
|
||||
- if (!part)
|
||||
- return 0; /* No partitions found */
|
||||
-
|
||||
- pr_warn("Device tree uses obsolete partition map binding: %pOF\n", dp);
|
||||
-
|
||||
- nr_parts = plen / sizeof(part[0]);
|
||||
-
|
||||
- parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL);
|
||||
- if (!parts)
|
||||
- return -ENOMEM;
|
||||
-
|
||||
- names = of_get_property(dp, "partition-names", &plen);
|
||||
-
|
||||
- for (i = 0; i < nr_parts; i++) {
|
||||
- parts[i].offset = be32_to_cpu(part->offset);
|
||||
- parts[i].size = be32_to_cpu(part->len) & ~1;
|
||||
- /* bit 0 set signifies read only partition */
|
||||
- if (be32_to_cpu(part->len) & 1)
|
||||
- parts[i].mask_flags = MTD_WRITEABLE;
|
||||
-
|
||||
- if (names && (plen > 0)) {
|
||||
- int len = strlen(names) + 1;
|
||||
-
|
||||
- parts[i].name = names;
|
||||
- plen -= len;
|
||||
- names += len;
|
||||
- } else {
|
||||
- parts[i].name = "unnamed";
|
||||
- }
|
||||
-
|
||||
- part++;
|
||||
- }
|
||||
-
|
||||
- *pparts = parts;
|
||||
- return nr_parts;
|
||||
-}
|
||||
-
|
||||
-static struct mtd_part_parser ofoldpart_parser = {
|
||||
- .parse_fn = parse_ofoldpart_partitions,
|
||||
- .name = "ofoldpart",
|
||||
-};
|
||||
-
|
||||
-static int __init ofpart_parser_init(void)
|
||||
-{
|
||||
- register_mtd_parser(&ofpart_parser);
|
||||
- register_mtd_parser(&ofoldpart_parser);
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static void __exit ofpart_parser_exit(void)
|
||||
-{
|
||||
- deregister_mtd_parser(&ofpart_parser);
|
||||
- deregister_mtd_parser(&ofoldpart_parser);
|
||||
-}
|
||||
-
|
||||
-module_init(ofpart_parser_init);
|
||||
-module_exit(ofpart_parser_exit);
|
||||
-
|
||||
-MODULE_LICENSE("GPL");
|
||||
-MODULE_DESCRIPTION("Parser for MTD partitioning information in device tree");
|
||||
-MODULE_AUTHOR("Vitaly Wool, David Gibson");
|
||||
-/*
|
||||
- * When MTD core cannot find the requested parser, it tries to load the module
|
||||
- * with the same name. Since we provide the ofoldpart parser, we should have
|
||||
- * the corresponding alias.
|
||||
- */
|
||||
-MODULE_ALIAS("fixed-partitions");
|
||||
-MODULE_ALIAS("ofoldpart");
|
||||
--- /dev/null
|
||||
+++ b/drivers/mtd/parsers/ofpart_core.c
|
||||
@@ -0,0 +1,263 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
+/*
|
||||
+ * Flash partitions described by the OF (or flattened) device tree
|
||||
+ *
|
||||
+ * Copyright © 2006 MontaVista Software Inc.
|
||||
+ * Author: Vitaly Wool <vwool@ru.mvista.com>
|
||||
+ *
|
||||
+ * Revised to handle newer style flash binding by:
|
||||
+ * Copyright © 2007 David Gibson, IBM Corporation.
|
||||
+ */
|
||||
+
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/of.h>
|
||||
+#include <linux/mtd/mtd.h>
|
||||
+#include <linux/slab.h>
|
||||
+#include <linux/mtd/partitions.h>
|
||||
+
|
||||
+#include "ofpart_bcm4908.h"
|
||||
+
|
||||
+struct fixed_partitions_quirks {
|
||||
+ int (*post_parse)(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts);
|
||||
+};
|
||||
+
|
||||
+struct fixed_partitions_quirks bcm4908_partitions_quirks = {
|
||||
+ .post_parse = bcm4908_partitions_post_parse,
|
||||
+};
|
||||
+
|
||||
+static const struct of_device_id parse_ofpart_match_table[];
|
||||
+
|
||||
+static bool node_has_compatible(struct device_node *pp)
|
||||
+{
|
||||
+ return of_get_property(pp, "compatible", NULL);
|
||||
+}
|
||||
+
|
||||
+static int parse_fixed_partitions(struct mtd_info *master,
|
||||
+ const struct mtd_partition **pparts,
|
||||
+ struct mtd_part_parser_data *data)
|
||||
+{
|
||||
+ const struct fixed_partitions_quirks *quirks;
|
||||
+ const struct of_device_id *of_id;
|
||||
+ struct mtd_partition *parts;
|
||||
+ struct device_node *mtd_node;
|
||||
+ struct device_node *ofpart_node;
|
||||
+ const char *partname;
|
||||
+ struct device_node *pp;
|
||||
+ int nr_parts, i, ret = 0;
|
||||
+ bool dedicated = true;
|
||||
+
|
||||
+ /* Pull of_node from the master device node */
|
||||
+ mtd_node = mtd_get_of_node(master);
|
||||
+ if (!mtd_node)
|
||||
+ return 0;
|
||||
+
|
||||
+ ofpart_node = of_get_child_by_name(mtd_node, "partitions");
|
||||
+ if (!ofpart_node) {
|
||||
+ /*
|
||||
+ * We might get here even when ofpart isn't used at all (e.g.,
|
||||
+ * when using another parser), so don't be louder than
|
||||
+ * KERN_DEBUG
|
||||
+ */
|
||||
+ pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n",
|
||||
+ master->name, mtd_node);
|
||||
+ ofpart_node = mtd_node;
|
||||
+ dedicated = false;
|
||||
+ }
|
||||
+
|
||||
+ of_id = of_match_node(parse_ofpart_match_table, ofpart_node);
|
||||
+ if (dedicated && !of_id) {
|
||||
+ /* The 'partitions' subnode might be used by another parser */
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ quirks = of_id ? of_id->data : NULL;
|
||||
+
|
||||
+ /* First count the subnodes */
|
||||
+ nr_parts = 0;
|
||||
+ for_each_child_of_node(ofpart_node, pp) {
|
||||
+ if (!dedicated && node_has_compatible(pp))
|
||||
+ continue;
|
||||
+
|
||||
+ nr_parts++;
|
||||
+ }
|
||||
+
|
||||
+ if (nr_parts == 0)
|
||||
+ return 0;
|
||||
+
|
||||
+ parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL);
|
||||
+ if (!parts)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ i = 0;
|
||||
+ for_each_child_of_node(ofpart_node, pp) {
|
||||
+ const __be32 *reg;
|
||||
+ int len;
|
||||
+ int a_cells, s_cells;
|
||||
+
|
||||
+ if (!dedicated && node_has_compatible(pp))
|
||||
+ continue;
|
||||
+
|
||||
+ reg = of_get_property(pp, "reg", &len);
|
||||
+ if (!reg) {
|
||||
+ if (dedicated) {
|
||||
+ pr_debug("%s: ofpart partition %pOF (%pOF) missing reg property.\n",
|
||||
+ master->name, pp,
|
||||
+ mtd_node);
|
||||
+ goto ofpart_fail;
|
||||
+ } else {
|
||||
+ nr_parts--;
|
||||
+ continue;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ a_cells = of_n_addr_cells(pp);
|
||||
+ s_cells = of_n_size_cells(pp);
|
||||
+ if (len / 4 != a_cells + s_cells) {
|
||||
+ pr_debug("%s: ofpart partition %pOF (%pOF) error parsing reg property.\n",
|
||||
+ master->name, pp,
|
||||
+ mtd_node);
|
||||
+ goto ofpart_fail;
|
||||
+ }
|
||||
+
|
||||
+ parts[i].offset = of_read_number(reg, a_cells);
|
||||
+ parts[i].size = of_read_number(reg + a_cells, s_cells);
|
||||
+ parts[i].of_node = pp;
|
||||
+
|
||||
+ partname = of_get_property(pp, "label", &len);
|
||||
+ if (!partname)
|
||||
+ partname = of_get_property(pp, "name", &len);
|
||||
+ parts[i].name = partname;
|
||||
+
|
||||
+ if (of_get_property(pp, "read-only", &len))
|
||||
+ parts[i].mask_flags |= MTD_WRITEABLE;
|
||||
+
|
||||
+ if (of_get_property(pp, "lock", &len))
|
||||
+ parts[i].mask_flags |= MTD_POWERUP_LOCK;
|
||||
+
|
||||
+ if (of_property_read_bool(pp, "slc-mode"))
|
||||
+ parts[i].add_flags |= MTD_SLC_ON_MLC_EMULATION;
|
||||
+
|
||||
+ i++;
|
||||
+ }
|
||||
+
|
||||
+ if (!nr_parts)
|
||||
+ goto ofpart_none;
|
||||
+
|
||||
+ if (quirks && quirks->post_parse)
|
||||
+ quirks->post_parse(master, parts, nr_parts);
|
||||
+
|
||||
+ *pparts = parts;
|
||||
+ return nr_parts;
|
||||
+
|
||||
+ofpart_fail:
|
||||
+ pr_err("%s: error parsing ofpart partition %pOF (%pOF)\n",
|
||||
+ master->name, pp, mtd_node);
|
||||
+ ret = -EINVAL;
|
||||
+ofpart_none:
|
||||
+ of_node_put(pp);
|
||||
+ kfree(parts);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static const struct of_device_id parse_ofpart_match_table[] = {
|
||||
+ /* Generic */
|
||||
+ { .compatible = "fixed-partitions" },
|
||||
+ /* Customized */
|
||||
+ { .compatible = "brcm,bcm4908-partitions", .data = &bcm4908_partitions_quirks, },
|
||||
+ {},
|
||||
+};
|
||||
+MODULE_DEVICE_TABLE(of, parse_ofpart_match_table);
|
||||
+
|
||||
+static struct mtd_part_parser ofpart_parser = {
|
||||
+ .parse_fn = parse_fixed_partitions,
|
||||
+ .name = "fixed-partitions",
|
||||
+ .of_match_table = parse_ofpart_match_table,
|
||||
+};
|
||||
+
|
||||
+static int parse_ofoldpart_partitions(struct mtd_info *master,
|
||||
+ const struct mtd_partition **pparts,
|
||||
+ struct mtd_part_parser_data *data)
|
||||
+{
|
||||
+ struct mtd_partition *parts;
|
||||
+ struct device_node *dp;
|
||||
+ int i, plen, nr_parts;
|
||||
+ const struct {
|
||||
+ __be32 offset, len;
|
||||
+ } *part;
|
||||
+ const char *names;
|
||||
+
|
||||
+ /* Pull of_node from the master device node */
|
||||
+ dp = mtd_get_of_node(master);
|
||||
+ if (!dp)
|
||||
+ return 0;
|
||||
+
|
||||
+ part = of_get_property(dp, "partitions", &plen);
|
||||
+ if (!part)
|
||||
+ return 0; /* No partitions found */
|
||||
+
|
||||
+ pr_warn("Device tree uses obsolete partition map binding: %pOF\n", dp);
|
||||
+
|
||||
+ nr_parts = plen / sizeof(part[0]);
|
||||
+
|
||||
+ parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL);
|
||||
+ if (!parts)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ names = of_get_property(dp, "partition-names", &plen);
|
||||
+
|
||||
+ for (i = 0; i < nr_parts; i++) {
|
||||
+ parts[i].offset = be32_to_cpu(part->offset);
|
||||
+ parts[i].size = be32_to_cpu(part->len) & ~1;
|
||||
+ /* bit 0 set signifies read only partition */
|
||||
+ if (be32_to_cpu(part->len) & 1)
|
||||
+ parts[i].mask_flags = MTD_WRITEABLE;
|
||||
+
|
||||
+ if (names && (plen > 0)) {
|
||||
+ int len = strlen(names) + 1;
|
||||
+
|
||||
+ parts[i].name = names;
|
||||
+ plen -= len;
|
||||
+ names += len;
|
||||
+ } else {
|
||||
+ parts[i].name = "unnamed";
|
||||
+ }
|
||||
+
|
||||
+ part++;
|
||||
+ }
|
||||
+
|
||||
+ *pparts = parts;
|
||||
+ return nr_parts;
|
||||
+}
|
||||
+
|
||||
+static struct mtd_part_parser ofoldpart_parser = {
|
||||
+ .parse_fn = parse_ofoldpart_partitions,
|
||||
+ .name = "ofoldpart",
|
||||
+};
|
||||
+
|
||||
+static int __init ofpart_parser_init(void)
|
||||
+{
|
||||
+ register_mtd_parser(&ofpart_parser);
|
||||
+ register_mtd_parser(&ofoldpart_parser);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void __exit ofpart_parser_exit(void)
|
||||
+{
|
||||
+ deregister_mtd_parser(&ofpart_parser);
|
||||
+ deregister_mtd_parser(&ofoldpart_parser);
|
||||
+}
|
||||
+
|
||||
+module_init(ofpart_parser_init);
|
||||
+module_exit(ofpart_parser_exit);
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
||||
+MODULE_DESCRIPTION("Parser for MTD partitioning information in device tree");
|
||||
+MODULE_AUTHOR("Vitaly Wool, David Gibson");
|
||||
+/*
|
||||
+ * When MTD core cannot find the requested parser, it tries to load the module
|
||||
+ * with the same name. Since we provide the ofoldpart parser, we should have
|
||||
+ * the corresponding alias.
|
||||
+ */
|
||||
+MODULE_ALIAS("fixed-partitions");
|
||||
+MODULE_ALIAS("ofoldpart");
|
@ -1,69 +0,0 @@
|
||||
From 2d751203aacf86a1b301a188d8551c7da91043ab Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Tue, 2 Mar 2021 20:00:12 +0100
|
||||
Subject: [PATCH] mtd: parsers: ofpart: limit parsing of deprecated DT syntax
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
For backward compatibility ofpart still supports the old syntax like:
|
||||
spi-flash@0 {
|
||||
compatible = "jedec,spi-nor";
|
||||
reg = <0x0>;
|
||||
|
||||
partition@0 {
|
||||
label = "bootloader";
|
||||
reg = <0x0 0x100000>;
|
||||
};
|
||||
};
|
||||
(without "partitions" subnode).
|
||||
|
||||
There is no reason however to support nested partitions without a clear
|
||||
"compatible" string like:
|
||||
partitions {
|
||||
compatible = "fixed-partitions";
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
|
||||
partition@0 {
|
||||
label = "bootloader";
|
||||
reg = <0x0 0x100000>;
|
||||
|
||||
partition@0 {
|
||||
label = "config";
|
||||
reg = <0x80000 0x80000>;
|
||||
};
|
||||
};
|
||||
};
|
||||
(we never officially supported or documented that).
|
||||
|
||||
Make sure ofpart doesn't attempt to parse above.
|
||||
|
||||
Cc: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210302190012.1255-1-zajec5@gmail.com
|
||||
---
|
||||
drivers/mtd/parsers/ofpart_core.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/mtd/parsers/ofpart_core.c
|
||||
+++ b/drivers/mtd/parsers/ofpart_core.c
|
||||
@@ -53,7 +53,7 @@ static int parse_fixed_partitions(struct
|
||||
return 0;
|
||||
|
||||
ofpart_node = of_get_child_by_name(mtd_node, "partitions");
|
||||
- if (!ofpart_node) {
|
||||
+ if (!ofpart_node && !master->parent) {
|
||||
/*
|
||||
* We might get here even when ofpart isn't used at all (e.g.,
|
||||
* when using another parser), so don't be louder than
|
||||
@@ -64,6 +64,8 @@ static int parse_fixed_partitions(struct
|
||||
ofpart_node = mtd_node;
|
||||
dedicated = false;
|
||||
}
|
||||
+ if (!ofpart_node)
|
||||
+ return 0;
|
||||
|
||||
of_id = of_match_node(parse_ofpart_match_table, ofpart_node);
|
||||
if (dedicated && !of_id) {
|
@ -1,34 +0,0 @@
|
||||
From b87b6d2d6f540e29c3f98e1572d64e560d73d6c1 Mon Sep 17 00:00:00 2001
|
||||
From: Wei Yongjun <weiyongjun1@huawei.com>
|
||||
Date: Thu, 4 Mar 2021 06:46:00 +0000
|
||||
Subject: [PATCH] mtd: parsers: ofpart: make symbol 'bcm4908_partitions_quirks'
|
||||
static
|
||||
|
||||
The sparse tool complains as follows:
|
||||
|
||||
drivers/mtd/parsers/ofpart_core.c:25:32: warning:
|
||||
symbol 'bcm4908_partitions_quirks' was not declared. Should it be static?
|
||||
|
||||
This symbol is not used outside of ofpart_core.c, so this
|
||||
commit marks it static.
|
||||
|
||||
Fixes: 457da931b608 ("mtd: parsers: ofpart: support BCM4908 fixed partitions")
|
||||
Reported-by: Hulk Robot <hulkci@huawei.com>
|
||||
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210304064600.3279138-1-weiyongjun1@huawei.com
|
||||
---
|
||||
drivers/mtd/parsers/ofpart_core.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/mtd/parsers/ofpart_core.c
|
||||
+++ b/drivers/mtd/parsers/ofpart_core.c
|
||||
@@ -22,7 +22,7 @@ struct fixed_partitions_quirks {
|
||||
int (*post_parse)(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts);
|
||||
};
|
||||
|
||||
-struct fixed_partitions_quirks bcm4908_partitions_quirks = {
|
||||
+static struct fixed_partitions_quirks bcm4908_partitions_quirks = {
|
||||
.post_parse = bcm4908_partitions_post_parse,
|
||||
};
|
||||
|
@ -1,38 +0,0 @@
|
||||
From a5d83d6e2bc747b13f347962d4b335d70b23559b Mon Sep 17 00:00:00 2001
|
||||
From: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Date: Fri, 12 Mar 2021 07:28:19 +0100
|
||||
Subject: [PATCH] mtd: core: add nvmem-cells compatible to parse mtd as nvmem
|
||||
cells
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Partitions that contains the nvmem-cells compatible will register
|
||||
their direct subonodes as nvmem cells and the node will be treated as a
|
||||
nvmem provider.
|
||||
|
||||
Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Tested-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
---
|
||||
drivers/mtd/mtdcore.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/mtd/mtdcore.c
|
||||
+++ b/drivers/mtd/mtdcore.c
|
||||
@@ -531,6 +531,7 @@ static int mtd_nvmem_reg_read(void *priv
|
||||
|
||||
static int mtd_nvmem_add(struct mtd_info *mtd)
|
||||
{
|
||||
+ struct device_node *node = mtd_get_of_node(mtd);
|
||||
struct nvmem_config config = {};
|
||||
|
||||
config.id = -1;
|
||||
@@ -543,7 +544,7 @@ static int mtd_nvmem_add(struct mtd_info
|
||||
config.stride = 1;
|
||||
config.read_only = true;
|
||||
config.root_only = true;
|
||||
- config.no_of_node = true;
|
||||
+ config.no_of_node = !of_device_is_compatible(node, "nvmem-cells");
|
||||
config.priv = mtd;
|
||||
|
||||
mtd->nvmem = nvmem_register(&config);
|
@ -1,25 +0,0 @@
|
||||
From 42645976c3289b03a12f1bd2bc131fd98fc27170 Mon Sep 17 00:00:00 2001
|
||||
From: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Date: Fri, 12 Mar 2021 07:28:20 +0100
|
||||
Subject: [PATCH] devicetree: nvmem: nvmem: drop $nodename restriction
|
||||
|
||||
Drop $nodename restriction as now mtd partition can also be used as
|
||||
nvmem provider.
|
||||
|
||||
Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
---
|
||||
Documentation/devicetree/bindings/nvmem/nvmem.yaml | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
--- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml
|
||||
+++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml
|
||||
@@ -20,9 +20,6 @@ description: |
|
||||
storage device.
|
||||
|
||||
properties:
|
||||
- $nodename:
|
||||
- pattern: "^(eeprom|efuse|nvram)(@.*|-[0-9a-f])*$"
|
||||
-
|
||||
"#address-cells":
|
||||
const: 1
|
||||
|
@ -1,117 +0,0 @@
|
||||
From 377aa0135dc8489312edd3184d143ce3a89ff7ee Mon Sep 17 00:00:00 2001
|
||||
From: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Date: Fri, 12 Mar 2021 07:28:21 +0100
|
||||
Subject: [PATCH] dt-bindings: mtd: Document use of nvmem-cells compatible
|
||||
|
||||
Document nvmem-cells compatible used to treat mtd partitions as a
|
||||
nvmem provider.
|
||||
|
||||
Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Reviewed-by: Rob Herring <robh@kernel.org>
|
||||
---
|
||||
.../bindings/mtd/partitions/nvmem-cells.yaml | 99 +++++++++++++++++++
|
||||
1 file changed, 99 insertions(+)
|
||||
create mode 100644 Documentation/devicetree/bindings/mtd/partitions/nvmem-cells.yaml
|
||||
|
||||
--- /dev/null
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/nvmem-cells.yaml
|
||||
@@ -0,0 +1,99 @@
|
||||
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
+%YAML 1.2
|
||||
+---
|
||||
+$id: http://devicetree.org/schemas/mtd/partitions/nvmem-cells.yaml#
|
||||
+$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
+
|
||||
+title: Nvmem cells
|
||||
+
|
||||
+description: |
|
||||
+ Any partition containing the compatible "nvmem-cells" will register as a
|
||||
+ nvmem provider.
|
||||
+ Each direct subnodes represents a nvmem cell following the nvmem binding.
|
||||
+ Nvmem binding to declare nvmem-cells can be found in:
|
||||
+ Documentation/devicetree/bindings/nvmem/nvmem.yaml
|
||||
+
|
||||
+maintainers:
|
||||
+ - Ansuel Smith <ansuelsmth@gmail.com>
|
||||
+
|
||||
+allOf:
|
||||
+ - $ref: /schemas/nvmem/nvmem.yaml#
|
||||
+
|
||||
+properties:
|
||||
+ compatible:
|
||||
+ const: nvmem-cells
|
||||
+
|
||||
+required:
|
||||
+ - compatible
|
||||
+
|
||||
+additionalProperties: true
|
||||
+
|
||||
+examples:
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ /* ... */
|
||||
+
|
||||
+ };
|
||||
+ art: art@1200000 {
|
||||
+ compatible = "nvmem-cells";
|
||||
+ reg = <0x1200000 0x0140000>;
|
||||
+ label = "art";
|
||||
+ read-only;
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ macaddr_gmac1: macaddr_gmac1@0 {
|
||||
+ reg = <0x0 0x6>;
|
||||
+ };
|
||||
+
|
||||
+ macaddr_gmac2: macaddr_gmac2@6 {
|
||||
+ reg = <0x6 0x6>;
|
||||
+ };
|
||||
+
|
||||
+ pre_cal_24g: pre_cal_24g@1000 {
|
||||
+ reg = <0x1000 0x2f20>;
|
||||
+ };
|
||||
+
|
||||
+ pre_cal_5g: pre_cal_5g@5000{
|
||||
+ reg = <0x5000 0x2f20>;
|
||||
+ };
|
||||
+ };
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "fixed-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ partition@0 {
|
||||
+ label = "bootloader";
|
||||
+ reg = <0x000000 0x100000>;
|
||||
+ read-only;
|
||||
+ };
|
||||
+
|
||||
+ firmware@100000 {
|
||||
+ compatible = "brcm,trx";
|
||||
+ label = "firmware";
|
||||
+ reg = <0x100000 0xe00000>;
|
||||
+ };
|
||||
+
|
||||
+ calibration@f00000 {
|
||||
+ compatible = "nvmem-cells";
|
||||
+ label = "calibration";
|
||||
+ reg = <0xf00000 0x100000>;
|
||||
+ ranges = <0 0xf00000 0x100000>;
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ wifi0@0 {
|
||||
+ reg = <0x000000 0x080000>;
|
||||
+ };
|
||||
+
|
||||
+ wifi1@80000 {
|
||||
+ reg = <0x080000 0x080000>;
|
||||
+ };
|
||||
+ };
|
||||
+ };
|
@ -1,98 +0,0 @@
|
||||
From 2fa7294175c76e1ec568aa75c1891fd908728c8d Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Fri, 12 Mar 2021 14:49:18 +0100
|
||||
Subject: [PATCH] dt-bindings: mtd: add binding for Linksys Northstar
|
||||
partitions
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Linksys on Broadcom Northstar devices uses fixed flash layout with
|
||||
multiple firmware partitions.
|
||||
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Reviewed-by: Rob Herring <robh@kernel.org>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210312134919.7767-1-zajec5@gmail.com
|
||||
---
|
||||
.../mtd/partitions/linksys,ns-partitions.yaml | 74 +++++++++++++++++++
|
||||
1 file changed, 74 insertions(+)
|
||||
create mode 100644 Documentation/devicetree/bindings/mtd/partitions/linksys,ns-partitions.yaml
|
||||
|
||||
--- /dev/null
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/linksys,ns-partitions.yaml
|
||||
@@ -0,0 +1,74 @@
|
||||
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
+%YAML 1.2
|
||||
+---
|
||||
+$id: http://devicetree.org/schemas/mtd/partitions/linksys,ns-partitions.yaml#
|
||||
+$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
+
|
||||
+title: Linksys Northstar partitioning
|
||||
+
|
||||
+description: |
|
||||
+ Linksys devices based on Broadcom Northstar architecture often use two
|
||||
+ firmware partitions. One is used for regular booting, the other is treated as
|
||||
+ fallback.
|
||||
+
|
||||
+ This binding allows defining all fixed partitions and marking those containing
|
||||
+ firmware. System can use that information e.g. for booting or flashing
|
||||
+ purposes.
|
||||
+
|
||||
+maintainers:
|
||||
+ - Rafał Miłecki <rafal@milecki.pl>
|
||||
+
|
||||
+properties:
|
||||
+ compatible:
|
||||
+ const: linksys,ns-partitions
|
||||
+
|
||||
+ "#address-cells":
|
||||
+ enum: [ 1, 2 ]
|
||||
+
|
||||
+ "#size-cells":
|
||||
+ enum: [ 1, 2 ]
|
||||
+
|
||||
+patternProperties:
|
||||
+ "^partition@[0-9a-f]+$":
|
||||
+ $ref: "partition.yaml#"
|
||||
+ properties:
|
||||
+ compatible:
|
||||
+ items:
|
||||
+ - const: linksys,ns-firmware
|
||||
+ - const: brcm,trx
|
||||
+ unevaluatedProperties: false
|
||||
+
|
||||
+required:
|
||||
+ - "#address-cells"
|
||||
+ - "#size-cells"
|
||||
+
|
||||
+additionalProperties: false
|
||||
+
|
||||
+examples:
|
||||
+ - |
|
||||
+ partitions {
|
||||
+ compatible = "linksys,ns-partitions";
|
||||
+ #address-cells = <1>;
|
||||
+ #size-cells = <1>;
|
||||
+
|
||||
+ partition@0 {
|
||||
+ label = "boot";
|
||||
+ reg = <0x0 0x100000>;
|
||||
+ read-only;
|
||||
+ };
|
||||
+
|
||||
+ partition@100000 {
|
||||
+ label = "nvram";
|
||||
+ reg = <0x100000 0x100000>;
|
||||
+ };
|
||||
+
|
||||
+ partition@200000 {
|
||||
+ compatible = "linksys,ns-firmware", "brcm,trx";
|
||||
+ reg = <0x200000 0xf00000>;
|
||||
+ };
|
||||
+
|
||||
+ partition@1100000 {
|
||||
+ compatible = "linksys,ns-firmware", "brcm,trx";
|
||||
+ reg = <0x1100000 0xf00000>;
|
||||
+ };
|
||||
+ };
|
@ -1,156 +0,0 @@
|
||||
From 7134a2d026d942210b4d26d6059c9d979ca7866e Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||
Date: Fri, 12 Mar 2021 14:49:19 +0100
|
||||
Subject: [PATCH] mtd: parsers: ofpart: support Linksys Northstar partitions
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This allows extending ofpart parser with support for Linksys Northstar
|
||||
devices. That support uses recently added quirks mechanism.
|
||||
|
||||
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210312134919.7767-2-zajec5@gmail.com
|
||||
---
|
||||
drivers/mtd/parsers/Kconfig | 10 +++++
|
||||
drivers/mtd/parsers/Makefile | 1 +
|
||||
drivers/mtd/parsers/ofpart_core.c | 6 +++
|
||||
drivers/mtd/parsers/ofpart_linksys_ns.c | 50 +++++++++++++++++++++++++
|
||||
drivers/mtd/parsers/ofpart_linksys_ns.h | 18 +++++++++
|
||||
5 files changed, 85 insertions(+)
|
||||
create mode 100644 drivers/mtd/parsers/ofpart_linksys_ns.c
|
||||
create mode 100644 drivers/mtd/parsers/ofpart_linksys_ns.h
|
||||
|
||||
--- a/drivers/mtd/parsers/Kconfig
|
||||
+++ b/drivers/mtd/parsers/Kconfig
|
||||
@@ -76,6 +76,16 @@ config MTD_OF_PARTS_BCM4908
|
||||
that can have multiple "firmware" partitions. It takes care of
|
||||
finding currently used one and backup ones.
|
||||
|
||||
+config MTD_OF_PARTS_LINKSYS_NS
|
||||
+ bool "Linksys Northstar partitioning support"
|
||||
+ depends on MTD_OF_PARTS && (ARCH_BCM_5301X || ARCH_BCM4908 || COMPILE_TEST)
|
||||
+ default ARCH_BCM_5301X
|
||||
+ help
|
||||
+ This provides partitions parser for Linksys devices based on Broadcom
|
||||
+ Northstar architecture. Linksys commonly uses fixed flash layout with
|
||||
+ two "firmware" partitions. Currently used firmware has to be detected
|
||||
+ using CFE environment variable.
|
||||
+
|
||||
config MTD_PARSER_IMAGETAG
|
||||
tristate "Parser for BCM963XX Image Tag format partitions"
|
||||
depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST
|
||||
--- a/drivers/mtd/parsers/Makefile
|
||||
+++ b/drivers/mtd/parsers/Makefile
|
||||
@@ -6,6 +6,7 @@ obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdl
|
||||
obj-$(CONFIG_MTD_OF_PARTS) += ofpart.o
|
||||
ofpart-y += ofpart_core.o
|
||||
ofpart-$(CONFIG_MTD_OF_PARTS_BCM4908) += ofpart_bcm4908.o
|
||||
+ofpart-$(CONFIG_MTD_OF_PARTS_LINKSYS_NS)+= ofpart_linksys_ns.o
|
||||
obj-$(CONFIG_MTD_PARSER_IMAGETAG) += parser_imagetag.o
|
||||
obj-$(CONFIG_MTD_AFS_PARTS) += afs.o
|
||||
obj-$(CONFIG_MTD_PARSER_TRX) += parser_trx.o
|
||||
--- a/drivers/mtd/parsers/ofpart_core.c
|
||||
+++ b/drivers/mtd/parsers/ofpart_core.c
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <linux/mtd/partitions.h>
|
||||
|
||||
#include "ofpart_bcm4908.h"
|
||||
+#include "ofpart_linksys_ns.h"
|
||||
|
||||
struct fixed_partitions_quirks {
|
||||
int (*post_parse)(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts);
|
||||
@@ -26,6 +27,10 @@ static struct fixed_partitions_quirks bc
|
||||
.post_parse = bcm4908_partitions_post_parse,
|
||||
};
|
||||
|
||||
+static struct fixed_partitions_quirks linksys_ns_partitions_quirks = {
|
||||
+ .post_parse = linksys_ns_partitions_post_parse,
|
||||
+};
|
||||
+
|
||||
static const struct of_device_id parse_ofpart_match_table[];
|
||||
|
||||
static bool node_has_compatible(struct device_node *pp)
|
||||
@@ -167,6 +172,7 @@ static const struct of_device_id parse_o
|
||||
{ .compatible = "fixed-partitions" },
|
||||
/* Customized */
|
||||
{ .compatible = "brcm,bcm4908-partitions", .data = &bcm4908_partitions_quirks, },
|
||||
+ { .compatible = "linksys,ns-partitions", .data = &linksys_ns_partitions_quirks, },
|
||||
{},
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, parse_ofpart_match_table);
|
||||
--- /dev/null
|
||||
+++ b/drivers/mtd/parsers/ofpart_linksys_ns.c
|
||||
@@ -0,0 +1,50 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0
|
||||
+/*
|
||||
+ * Copyright (C) 2021 Rafał Miłecki <rafal@milecki.pl>
|
||||
+ */
|
||||
+
|
||||
+#include <linux/bcm47xx_nvram.h>
|
||||
+#include <linux/mtd/mtd.h>
|
||||
+#include <linux/mtd/partitions.h>
|
||||
+
|
||||
+#include "ofpart_linksys_ns.h"
|
||||
+
|
||||
+#define NVRAM_BOOT_PART "bootpartition"
|
||||
+
|
||||
+static int ofpart_linksys_ns_bootpartition(void)
|
||||
+{
|
||||
+ char buf[4];
|
||||
+ int bootpartition;
|
||||
+
|
||||
+ /* Check CFE environment variable */
|
||||
+ if (bcm47xx_nvram_getenv(NVRAM_BOOT_PART, buf, sizeof(buf)) > 0) {
|
||||
+ if (!kstrtoint(buf, 0, &bootpartition))
|
||||
+ return bootpartition;
|
||||
+ pr_warn("Failed to parse %s value \"%s\"\n", NVRAM_BOOT_PART,
|
||||
+ buf);
|
||||
+ } else {
|
||||
+ pr_warn("Failed to get NVRAM \"%s\"\n", NVRAM_BOOT_PART);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+int linksys_ns_partitions_post_parse(struct mtd_info *mtd,
|
||||
+ struct mtd_partition *parts,
|
||||
+ int nr_parts)
|
||||
+{
|
||||
+ int bootpartition = ofpart_linksys_ns_bootpartition();
|
||||
+ int trx_idx = 0;
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < nr_parts; i++) {
|
||||
+ if (of_device_is_compatible(parts[i].of_node, "linksys,ns-firmware")) {
|
||||
+ if (trx_idx++ == bootpartition)
|
||||
+ parts[i].name = "firmware";
|
||||
+ else
|
||||
+ parts[i].name = "backup";
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
--- /dev/null
|
||||
+++ b/drivers/mtd/parsers/ofpart_linksys_ns.h
|
||||
@@ -0,0 +1,18 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 */
|
||||
+#ifndef __OFPART_LINKSYS_NS_H
|
||||
+#define __OFPART_LINKSYS_NS_H
|
||||
+
|
||||
+#ifdef CONFIG_MTD_OF_PARTS_LINKSYS_NS
|
||||
+int linksys_ns_partitions_post_parse(struct mtd_info *mtd,
|
||||
+ struct mtd_partition *parts,
|
||||
+ int nr_parts);
|
||||
+#else
|
||||
+static inline int linksys_ns_partitions_post_parse(struct mtd_info *mtd,
|
||||
+ struct mtd_partition *parts,
|
||||
+ int nr_parts)
|
||||
+{
|
||||
+ return -EOPNOTSUPP;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#endif
|
@ -1,54 +0,0 @@
|
||||
From 7e4404113686868858a34210c28ae122e967aa64 Mon Sep 17 00:00:00 2001
|
||||
From: Mauri Sandberg <sandberg@mailfence.com>
|
||||
Date: Tue, 9 Mar 2021 19:48:59 +0200
|
||||
Subject: [PATCH] mtd: cfi_cmdset_0002: Disable buffered writes for AMD chip
|
||||
0x2201
|
||||
|
||||
Buffer writes do not work with AMD chip 0x2201. The chip in question
|
||||
is a AMD/Spansion/Cypress Semiconductor S29GL256N and datasheet [1]
|
||||
talks about writing buffers being possible. While waiting for a neater
|
||||
solution resort to writing word-sized chunks only.
|
||||
|
||||
Without the patch kernel logs will be flooded with entries like below:
|
||||
|
||||
jffs2_scan_eraseblock(): End of filesystem marker found at 0x0
|
||||
jffs2_build_filesystem(): unlocking the mtd device...
|
||||
done.
|
||||
jffs2_build_filesystem(): erasing all blocks after the end marker...
|
||||
MTD do_write_buffer_wait(): software timeout, address:0x01ec000a.
|
||||
jffs2: Write clean marker to block at 0x01920000 failed: -5
|
||||
MTD do_write_buffer_wait(): software timeout, address:0x01e2000a.
|
||||
jffs2: Write clean marker to block at 0x01880000 failed: -5
|
||||
MTD do_write_buffer_wait(): software timeout, address:0x01e0000a.
|
||||
jffs2: Write clean marker to block at 0x01860000 failed: -5
|
||||
MTD do_write_buffer_wait(): software timeout, address:0x01dc000a.
|
||||
jffs2: Write clean marker to block at 0x01820000 failed: -5
|
||||
MTD do_write_buffer_wait(): software timeout, address:0x01da000a.
|
||||
jffs2: Write clean marker to block at 0x01800000 failed: -5
|
||||
...
|
||||
|
||||
Tested on a Buffalo wzr-hp-g300nh running kernel 5.10.16.
|
||||
|
||||
[1] https://www.cypress.com/file/219941/download
|
||||
or https://datasheetspdf.com/pdf-file/565708/SPANSION/S29GL256N/1
|
||||
|
||||
Signed-off-by: Mauri Sandberg <sandberg@mailfence.com>
|
||||
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
|
||||
Link: https://lore.kernel.org/r/20210309174859.362060-1-sandberg@mailfence.com
|
||||
---
|
||||
drivers/mtd/chips/cfi_cmdset_0002.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
|
||||
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
|
||||
@@ -272,6 +272,10 @@ static void fixup_use_write_buffers(stru
|
||||
{
|
||||
struct map_info *map = mtd->priv;
|
||||
struct cfi_private *cfi = map->fldrv_priv;
|
||||
+
|
||||
+ if (cfi->mfr == CFI_MFR_AMD && cfi->id == 0x2201)
|
||||
+ return;
|
||||
+
|
||||
if (cfi->cfiq->BufWriteTimeoutTyp) {
|
||||
pr_debug("Using buffer write method\n");
|
||||
mtd->_write = cfi_amdstd_write_buffers;
|
@ -1,32 +0,0 @@
|
||||
From a4d82940ff85a7e307953dfa715f65d5ab487e10 Mon Sep 17 00:00:00 2001
|
||||
From: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
Date: Sun, 18 Apr 2021 23:46:14 +0200
|
||||
Subject: dt-bindings: mtd: brcm,trx: Add brcm,trx-magic
|
||||
|
||||
This adds the description of an additional property which allows to
|
||||
specify a custom partition parser magic to detect a trx partition.
|
||||
Buffalo has multiple device which are using the trx format, but with
|
||||
different magic values.
|
||||
|
||||
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
Acked-by: Rob Herring <robh@kernel.org>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210418214616.239574-2-hauke@hauke-m.de
|
||||
---
|
||||
.../devicetree/bindings/mtd/partitions/brcm,trx.txt | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
--- a/Documentation/devicetree/bindings/mtd/partitions/brcm,trx.txt
|
||||
+++ b/Documentation/devicetree/bindings/mtd/partitions/brcm,trx.txt
|
||||
@@ -28,6 +28,11 @@ detected by a software parsing TRX heade
|
||||
Required properties:
|
||||
- compatible : (required) must be "brcm,trx"
|
||||
|
||||
+Optional properties:
|
||||
+
|
||||
+- brcm,trx-magic: TRX magic, if it is different from the default magic
|
||||
+ 0x30524448 as a u32.
|
||||
+
|
||||
Example:
|
||||
|
||||
flash@0 {
|
@ -1,50 +0,0 @@
|
||||
From d7f7e04f8b67571a4bf5a0dcd4f9da4214f5262c Mon Sep 17 00:00:00 2001
|
||||
From: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
Date: Sun, 18 Apr 2021 23:46:15 +0200
|
||||
Subject: mtd: parsers: trx: Allow to specify brcm, trx-magic in DT
|
||||
|
||||
Buffalo uses a different TRX magic for every device, to be able to use
|
||||
this trx parser, make it possible to specify the TRX magic in device
|
||||
tree. If no TRX magic is specified in device tree, the standard value
|
||||
will be used. This value should only be specified if a vendor chooses to
|
||||
use a non standard TRX magic.
|
||||
|
||||
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210418214616.239574-3-hauke@hauke-m.de
|
||||
---
|
||||
drivers/mtd/parsers/parser_trx.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/mtd/parsers/parser_trx.c
|
||||
+++ b/drivers/mtd/parsers/parser_trx.c
|
||||
@@ -51,13 +51,20 @@ static int parser_trx_parse(struct mtd_i
|
||||
const struct mtd_partition **pparts,
|
||||
struct mtd_part_parser_data *data)
|
||||
{
|
||||
+ struct device_node *np = mtd_get_of_node(mtd);
|
||||
struct mtd_partition *parts;
|
||||
struct mtd_partition *part;
|
||||
struct trx_header trx;
|
||||
size_t bytes_read;
|
||||
uint8_t curr_part = 0, i = 0;
|
||||
+ uint32_t trx_magic = TRX_MAGIC;
|
||||
int err;
|
||||
|
||||
+ /* Get different magic from device tree if specified */
|
||||
+ err = of_property_read_u32(np, "brcm,trx-magic", &trx_magic);
|
||||
+ if (err != 0 && err != -EINVAL)
|
||||
+ pr_err("failed to parse \"brcm,trx-magic\" DT attribute, using default: %d\n", err);
|
||||
+
|
||||
parts = kcalloc(TRX_PARSER_MAX_PARTS, sizeof(struct mtd_partition),
|
||||
GFP_KERNEL);
|
||||
if (!parts)
|
||||
@@ -70,7 +77,7 @@ static int parser_trx_parse(struct mtd_i
|
||||
return err;
|
||||
}
|
||||
|
||||
- if (trx.magic != TRX_MAGIC) {
|
||||
+ if (trx.magic != trx_magic) {
|
||||
kfree(parts);
|
||||
return -ENOENT;
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
From 81bb218c829246962a6327c64eec18ddcc049936 Mon Sep 17 00:00:00 2001
|
||||
From: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
Date: Sun, 18 Apr 2021 23:46:16 +0200
|
||||
Subject: mtd: parsers: trx: Allow to use TRX parser on Mediatek SoCs
|
||||
|
||||
Buffalo uses the TRX partition format also on Mediatek MT7622 SoCs.
|
||||
|
||||
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
Link: https://lore.kernel.org/linux-mtd/20210418214616.239574-4-hauke@hauke-m.de
|
||||
---
|
||||
drivers/mtd/parsers/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/mtd/parsers/Kconfig
|
||||
+++ b/drivers/mtd/parsers/Kconfig
|
||||
@@ -115,7 +115,7 @@ config MTD_AFS_PARTS
|
||||
|
||||
config MTD_PARSER_TRX
|
||||
tristate "Parser for TRX format partitions"
|
||||
- depends on MTD && (BCM47XX || ARCH_BCM_5301X || COMPILE_TEST)
|
||||
+ depends on MTD && (BCM47XX || ARCH_BCM_5301X || ARCH_MEDIATEK || COMPILE_TEST)
|
||||
help
|
||||
TRX is a firmware format used by Broadcom on their devices. It
|
||||
may contain up to 3/4 partitions (depending on the version).
|
@ -1,25 +0,0 @@
|
||||
From dcdf415b740923530dc71d89fecc8361078473f5 Mon Sep 17 00:00:00 2001
|
||||
From: Rui Salvaterra <rsalvaterra@gmail.com>
|
||||
Date: Mon, 5 Apr 2021 16:11:55 +0100
|
||||
Subject: [PATCH] ubifs: default to zstd compression
|
||||
|
||||
Compared to lzo and zlib, zstd is the best all-around performer, both in terms
|
||||
of speed and compression ratio. Set it as the default, if available.
|
||||
|
||||
Signed-off-by: Rui Salvaterra <rsalvaterra@gmail.com>
|
||||
---
|
||||
fs/ubifs/sb.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
--- a/fs/ubifs/sb.c
|
||||
+++ b/fs/ubifs/sb.c
|
||||
@@ -53,6 +53,9 @@
|
||||
|
||||
static int get_default_compressor(struct ubifs_info *c)
|
||||
{
|
||||
+ if (ubifs_compr_present(c, UBIFS_COMPR_ZSTD))
|
||||
+ return UBIFS_COMPR_ZSTD;
|
||||
+
|
||||
if (ubifs_compr_present(c, UBIFS_COMPR_LZO))
|
||||
return UBIFS_COMPR_LZO;
|
||||
|
@ -1,88 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Mon, 8 Feb 2021 11:34:08 -0800
|
||||
Subject: [PATCH] net: extract napi poll functionality to __napi_poll()
|
||||
|
||||
This commit introduces a new function __napi_poll() which does the main
|
||||
logic of the existing napi_poll() function, and will be called by other
|
||||
functions in later commits.
|
||||
This idea and implementation is done by Felix Fietkau <nbd@nbd.name> and
|
||||
is proposed as part of the patch to move napi work to work_queue
|
||||
context.
|
||||
This commit by itself is a code restructure.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Wei Wang <weiwan@google.com>
|
||||
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -6805,15 +6805,10 @@ void __netif_napi_del(struct napi_struct
|
||||
}
|
||||
EXPORT_SYMBOL(__netif_napi_del);
|
||||
|
||||
-static int napi_poll(struct napi_struct *n, struct list_head *repoll)
|
||||
+static int __napi_poll(struct napi_struct *n, bool *repoll)
|
||||
{
|
||||
- void *have;
|
||||
int work, weight;
|
||||
|
||||
- list_del_init(&n->poll_list);
|
||||
-
|
||||
- have = netpoll_poll_lock(n);
|
||||
-
|
||||
weight = n->weight;
|
||||
|
||||
/* This NAPI_STATE_SCHED test is for avoiding a race
|
||||
@@ -6833,7 +6828,7 @@ static int napi_poll(struct napi_struct
|
||||
n->poll, work, weight);
|
||||
|
||||
if (likely(work < weight))
|
||||
- goto out_unlock;
|
||||
+ return work;
|
||||
|
||||
/* Drivers must not modify the NAPI state if they
|
||||
* consume the entire weight. In such cases this code
|
||||
@@ -6842,7 +6837,7 @@ static int napi_poll(struct napi_struct
|
||||
*/
|
||||
if (unlikely(napi_disable_pending(n))) {
|
||||
napi_complete(n);
|
||||
- goto out_unlock;
|
||||
+ return work;
|
||||
}
|
||||
|
||||
if (n->gro_bitmask) {
|
||||
@@ -6860,12 +6855,29 @@ static int napi_poll(struct napi_struct
|
||||
if (unlikely(!list_empty(&n->poll_list))) {
|
||||
pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
|
||||
n->dev ? n->dev->name : "backlog");
|
||||
- goto out_unlock;
|
||||
+ return work;
|
||||
}
|
||||
|
||||
- list_add_tail(&n->poll_list, repoll);
|
||||
+ *repoll = true;
|
||||
+
|
||||
+ return work;
|
||||
+}
|
||||
+
|
||||
+static int napi_poll(struct napi_struct *n, struct list_head *repoll)
|
||||
+{
|
||||
+ bool do_repoll = false;
|
||||
+ void *have;
|
||||
+ int work;
|
||||
+
|
||||
+ list_del_init(&n->poll_list);
|
||||
+
|
||||
+ have = netpoll_poll_lock(n);
|
||||
+
|
||||
+ work = __napi_poll(n, &do_repoll);
|
||||
+
|
||||
+ if (do_repoll)
|
||||
+ list_add_tail(&n->poll_list, repoll);
|
||||
|
||||
-out_unlock:
|
||||
netpoll_poll_unlock(have);
|
||||
|
||||
return work;
|
@ -1,261 +0,0 @@
|
||||
From: Wei Wang <weiwan@google.com>
|
||||
Date: Mon, 8 Feb 2021 11:34:09 -0800
|
||||
Subject: [PATCH] net: implement threaded-able napi poll loop support
|
||||
|
||||
This patch allows running each napi poll loop inside its own
|
||||
kernel thread.
|
||||
The kthread is created during netif_napi_add() if dev->threaded
|
||||
is set. And threaded mode is enabled in napi_enable(). We will
|
||||
provide a way to set dev->threaded and enable threaded mode
|
||||
without a device up/down in the following patch.
|
||||
|
||||
Once that threaded mode is enabled and the kthread is
|
||||
started, napi_schedule() will wake-up such thread instead
|
||||
of scheduling the softirq.
|
||||
|
||||
The threaded poll loop behaves quite likely the net_rx_action,
|
||||
but it does not have to manipulate local irqs and uses
|
||||
an explicit scheduling point based on netdev_budget.
|
||||
|
||||
Co-developed-by: Paolo Abeni <pabeni@redhat.com>
|
||||
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
||||
Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
||||
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
||||
Co-developed-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Signed-off-by: Wei Wang <weiwan@google.com>
|
||||
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -347,6 +347,7 @@ struct napi_struct {
|
||||
struct list_head dev_list;
|
||||
struct hlist_node napi_hash_node;
|
||||
unsigned int napi_id;
|
||||
+ struct task_struct *thread;
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -357,6 +358,7 @@ enum {
|
||||
NAPI_STATE_LISTED, /* NAPI added to system lists */
|
||||
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
|
||||
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
|
||||
+ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -367,6 +369,7 @@ enum {
|
||||
NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED),
|
||||
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
|
||||
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
|
||||
+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
|
||||
};
|
||||
|
||||
enum gro_result {
|
||||
@@ -497,20 +500,7 @@ static inline bool napi_complete(struct
|
||||
*/
|
||||
void napi_disable(struct napi_struct *n);
|
||||
|
||||
-/**
|
||||
- * napi_enable - enable NAPI scheduling
|
||||
- * @n: NAPI context
|
||||
- *
|
||||
- * Resume NAPI from being scheduled on this context.
|
||||
- * Must be paired with napi_disable.
|
||||
- */
|
||||
-static inline void napi_enable(struct napi_struct *n)
|
||||
-{
|
||||
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
|
||||
- smp_mb__before_atomic();
|
||||
- clear_bit(NAPI_STATE_SCHED, &n->state);
|
||||
- clear_bit(NAPI_STATE_NPSVC, &n->state);
|
||||
-}
|
||||
+void napi_enable(struct napi_struct *n);
|
||||
|
||||
/**
|
||||
* napi_synchronize - wait until NAPI is not running
|
||||
@@ -1842,6 +1832,8 @@ enum netdev_ml_priv_type {
|
||||
*
|
||||
* @wol_enabled: Wake-on-LAN is enabled
|
||||
*
|
||||
+ * @threaded: napi threaded mode is enabled
|
||||
+ *
|
||||
* @net_notifier_list: List of per-net netdev notifier block
|
||||
* that follow this device when it is moved
|
||||
* to another network namespace.
|
||||
@@ -2161,6 +2153,7 @@ struct net_device {
|
||||
struct lock_class_key *qdisc_running_key;
|
||||
bool proto_down;
|
||||
unsigned wol_enabled:1;
|
||||
+ unsigned threaded:1;
|
||||
|
||||
struct list_head net_notifier_list;
|
||||
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -91,6 +91,7 @@
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/ethtool.h>
|
||||
#include <linux/skbuff.h>
|
||||
+#include <linux/kthread.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf_trace.h>
|
||||
#include <net/net_namespace.h>
|
||||
@@ -1500,6 +1501,27 @@ void netdev_notify_peers(struct net_devi
|
||||
}
|
||||
EXPORT_SYMBOL(netdev_notify_peers);
|
||||
|
||||
+static int napi_threaded_poll(void *data);
|
||||
+
|
||||
+static int napi_kthread_create(struct napi_struct *n)
|
||||
+{
|
||||
+ int err = 0;
|
||||
+
|
||||
+ /* Create and wake up the kthread once to put it in
|
||||
+ * TASK_INTERRUPTIBLE mode to avoid the blocked task
|
||||
+ * warning and work with loadavg.
|
||||
+ */
|
||||
+ n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
|
||||
+ n->dev->name, n->napi_id);
|
||||
+ if (IS_ERR(n->thread)) {
|
||||
+ err = PTR_ERR(n->thread);
|
||||
+ pr_err("kthread_run failed with err %d\n", err);
|
||||
+ n->thread = NULL;
|
||||
+ }
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
|
||||
{
|
||||
const struct net_device_ops *ops = dev->netdev_ops;
|
||||
@@ -4267,6 +4289,21 @@ int gro_normal_batch __read_mostly = 8;
|
||||
static inline void ____napi_schedule(struct softnet_data *sd,
|
||||
struct napi_struct *napi)
|
||||
{
|
||||
+ struct task_struct *thread;
|
||||
+
|
||||
+ if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
|
||||
+ /* Paired with smp_mb__before_atomic() in
|
||||
+ * napi_enable(). Use READ_ONCE() to guarantee
|
||||
+ * a complete read on napi->thread. Only call
|
||||
+ * wake_up_process() when it's not NULL.
|
||||
+ */
|
||||
+ thread = READ_ONCE(napi->thread);
|
||||
+ if (thread) {
|
||||
+ wake_up_process(thread);
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
list_add_tail(&napi->poll_list, &sd->poll_list);
|
||||
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
|
||||
}
|
||||
@@ -6758,6 +6795,12 @@ void netif_napi_add(struct net_device *d
|
||||
set_bit(NAPI_STATE_NPSVC, &napi->state);
|
||||
list_add_rcu(&napi->dev_list, &dev->napi_list);
|
||||
napi_hash_add(napi);
|
||||
+ /* Create kthread for this napi if dev->threaded is set.
|
||||
+ * Clear dev->threaded if kthread creation failed so that
|
||||
+ * threaded mode will not be enabled in napi_enable().
|
||||
+ */
|
||||
+ if (dev->threaded && napi_kthread_create(napi))
|
||||
+ dev->threaded = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(netif_napi_add);
|
||||
|
||||
@@ -6774,9 +6817,28 @@ void napi_disable(struct napi_struct *n)
|
||||
hrtimer_cancel(&n->timer);
|
||||
|
||||
clear_bit(NAPI_STATE_DISABLE, &n->state);
|
||||
+ clear_bit(NAPI_STATE_THREADED, &n->state);
|
||||
}
|
||||
EXPORT_SYMBOL(napi_disable);
|
||||
|
||||
+/**
|
||||
+ * napi_enable - enable NAPI scheduling
|
||||
+ * @n: NAPI context
|
||||
+ *
|
||||
+ * Resume NAPI from being scheduled on this context.
|
||||
+ * Must be paired with napi_disable.
|
||||
+ */
|
||||
+void napi_enable(struct napi_struct *n)
|
||||
+{
|
||||
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
|
||||
+ smp_mb__before_atomic();
|
||||
+ clear_bit(NAPI_STATE_SCHED, &n->state);
|
||||
+ clear_bit(NAPI_STATE_NPSVC, &n->state);
|
||||
+ if (n->dev->threaded && n->thread)
|
||||
+ set_bit(NAPI_STATE_THREADED, &n->state);
|
||||
+}
|
||||
+EXPORT_SYMBOL(napi_enable);
|
||||
+
|
||||
static void flush_gro_hash(struct napi_struct *napi)
|
||||
{
|
||||
int i;
|
||||
@@ -6802,6 +6864,11 @@ void __netif_napi_del(struct napi_struct
|
||||
|
||||
flush_gro_hash(napi);
|
||||
napi->gro_bitmask = 0;
|
||||
+
|
||||
+ if (napi->thread) {
|
||||
+ kthread_stop(napi->thread);
|
||||
+ napi->thread = NULL;
|
||||
+ }
|
||||
}
|
||||
EXPORT_SYMBOL(__netif_napi_del);
|
||||
|
||||
@@ -6883,6 +6950,51 @@ static int napi_poll(struct napi_struct
|
||||
return work;
|
||||
}
|
||||
|
||||
+static int napi_thread_wait(struct napi_struct *napi)
|
||||
+{
|
||||
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||
+
|
||||
+ while (!kthread_should_stop() && !napi_disable_pending(napi)) {
|
||||
+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
|
||||
+ WARN_ON(!list_empty(&napi->poll_list));
|
||||
+ __set_current_state(TASK_RUNNING);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ schedule();
|
||||
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||
+ }
|
||||
+ __set_current_state(TASK_RUNNING);
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+static int napi_threaded_poll(void *data)
|
||||
+{
|
||||
+ struct napi_struct *napi = data;
|
||||
+ void *have;
|
||||
+
|
||||
+ while (!napi_thread_wait(napi)) {
|
||||
+ for (;;) {
|
||||
+ bool repoll = false;
|
||||
+
|
||||
+ local_bh_disable();
|
||||
+
|
||||
+ have = netpoll_poll_lock(napi);
|
||||
+ __napi_poll(napi, &repoll);
|
||||
+ netpoll_poll_unlock(have);
|
||||
+
|
||||
+ __kfree_skb_flush();
|
||||
+ local_bh_enable();
|
||||
+
|
||||
+ if (!repoll)
|
||||
+ break;
|
||||
+
|
||||
+ cond_resched();
|
||||
+ }
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static __latent_entropy void net_rx_action(struct softirq_action *h)
|
||||
{
|
||||
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
|
@ -1,177 +0,0 @@
|
||||
From: Wei Wang <weiwan@google.com>
|
||||
Date: Mon, 8 Feb 2021 11:34:10 -0800
|
||||
Subject: [PATCH] net: add sysfs attribute to control napi threaded mode
|
||||
|
||||
This patch adds a new sysfs attribute to the network device class.
|
||||
Said attribute provides a per-device control to enable/disable the
|
||||
threaded mode for all the napi instances of the given network device,
|
||||
without the need for a device up/down.
|
||||
User sets it to 1 or 0 to enable or disable threaded mode.
|
||||
Note: when switching between threaded and the current softirq based mode
|
||||
for a napi instance, it will not immediately take effect if the napi is
|
||||
currently being polled. The mode switch will happen for the next time
|
||||
napi_schedule() is called.
|
||||
|
||||
Co-developed-by: Paolo Abeni <pabeni@redhat.com>
|
||||
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
||||
Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
||||
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
||||
Co-developed-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Wei Wang <weiwan@google.com>
|
||||
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/Documentation/ABI/testing/sysfs-class-net
|
||||
+++ b/Documentation/ABI/testing/sysfs-class-net
|
||||
@@ -337,3 +337,18 @@ Contact: netdev@vger.kernel.org
|
||||
Description:
|
||||
32-bit unsigned integer counting the number of times the link has
|
||||
been down
|
||||
+
|
||||
+What: /sys/class/net/<iface>/threaded
|
||||
+Date: Jan 2021
|
||||
+KernelVersion: 5.12
|
||||
+Contact: netdev@vger.kernel.org
|
||||
+Description:
|
||||
+ Boolean value to control the threaded mode per device. User could
|
||||
+ set this value to enable/disable threaded mode for all napi
|
||||
+ belonging to this device, without the need to do device up/down.
|
||||
+
|
||||
+ Possible values:
|
||||
+ == ==================================
|
||||
+ 0 threaded mode disabled for this dev
|
||||
+ 1 threaded mode enabled for this dev
|
||||
+ == ==================================
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -491,6 +491,8 @@ static inline bool napi_complete(struct
|
||||
return napi_complete_done(n, 0);
|
||||
}
|
||||
|
||||
+int dev_set_threaded(struct net_device *dev, bool threaded);
|
||||
+
|
||||
/**
|
||||
* napi_disable - prevent NAPI from scheduling
|
||||
* @n: NAPI context
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -4293,8 +4293,9 @@ static inline void ____napi_schedule(str
|
||||
|
||||
if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
|
||||
/* Paired with smp_mb__before_atomic() in
|
||||
- * napi_enable(). Use READ_ONCE() to guarantee
|
||||
- * a complete read on napi->thread. Only call
|
||||
+ * napi_enable()/dev_set_threaded().
|
||||
+ * Use READ_ONCE() to guarantee a complete
|
||||
+ * read on napi->thread. Only call
|
||||
* wake_up_process() when it's not NULL.
|
||||
*/
|
||||
thread = READ_ONCE(napi->thread);
|
||||
@@ -6768,6 +6769,49 @@ static void init_gro_hash(struct napi_st
|
||||
napi->gro_bitmask = 0;
|
||||
}
|
||||
|
||||
+int dev_set_threaded(struct net_device *dev, bool threaded)
|
||||
+{
|
||||
+ struct napi_struct *napi;
|
||||
+ int err = 0;
|
||||
+
|
||||
+ if (dev->threaded == threaded)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (threaded) {
|
||||
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
|
||||
+ if (!napi->thread) {
|
||||
+ err = napi_kthread_create(napi);
|
||||
+ if (err) {
|
||||
+ threaded = false;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ dev->threaded = threaded;
|
||||
+
|
||||
+ /* Make sure kthread is created before THREADED bit
|
||||
+ * is set.
|
||||
+ */
|
||||
+ smp_mb__before_atomic();
|
||||
+
|
||||
+ /* Setting/unsetting threaded mode on a napi might not immediately
|
||||
+ * take effect, if the current napi instance is actively being
|
||||
+ * polled. In this case, the switch between threaded mode and
|
||||
+ * softirq mode will happen in the next round of napi_schedule().
|
||||
+ * This should not cause hiccups/stalls to the live traffic.
|
||||
+ */
|
||||
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
|
||||
+ if (threaded)
|
||||
+ set_bit(NAPI_STATE_THREADED, &napi->state);
|
||||
+ else
|
||||
+ clear_bit(NAPI_STATE_THREADED, &napi->state);
|
||||
+ }
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
|
||||
int (*poll)(struct napi_struct *, int), int weight)
|
||||
{
|
||||
--- a/net/core/net-sysfs.c
|
||||
+++ b/net/core/net-sysfs.c
|
||||
@@ -587,6 +587,45 @@ static ssize_t phys_switch_id_show(struc
|
||||
}
|
||||
static DEVICE_ATTR_RO(phys_switch_id);
|
||||
|
||||
+static ssize_t threaded_show(struct device *dev,
|
||||
+ struct device_attribute *attr, char *buf)
|
||||
+{
|
||||
+ struct net_device *netdev = to_net_dev(dev);
|
||||
+ ssize_t ret = -EINVAL;
|
||||
+
|
||||
+ if (!rtnl_trylock())
|
||||
+ return restart_syscall();
|
||||
+
|
||||
+ if (dev_isalive(netdev))
|
||||
+ ret = sprintf(buf, fmt_dec, netdev->threaded);
|
||||
+
|
||||
+ rtnl_unlock();
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static int modify_napi_threaded(struct net_device *dev, unsigned long val)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ if (list_empty(&dev->napi_list))
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ if (val != 0 && val != 1)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ ret = dev_set_threaded(dev, val);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static ssize_t threaded_store(struct device *dev,
|
||||
+ struct device_attribute *attr,
|
||||
+ const char *buf, size_t len)
|
||||
+{
|
||||
+ return netdev_store(dev, attr, buf, len, modify_napi_threaded);
|
||||
+}
|
||||
+static DEVICE_ATTR_RW(threaded);
|
||||
+
|
||||
static struct attribute *net_class_attrs[] __ro_after_init = {
|
||||
&dev_attr_netdev_group.attr,
|
||||
&dev_attr_type.attr,
|
||||
@@ -619,6 +658,7 @@ static struct attribute *net_class_attrs
|
||||
&dev_attr_proto_down.attr,
|
||||
&dev_attr_carrier_up_count.attr,
|
||||
&dev_attr_carrier_down_count.attr,
|
||||
+ &dev_attr_threaded.attr,
|
||||
NULL,
|
||||
};
|
||||
ATTRIBUTE_GROUPS(net_class);
|
@ -1,93 +0,0 @@
|
||||
From: Wei Wang <weiwan@google.com>
|
||||
Date: Mon, 1 Mar 2021 17:21:13 -0800
|
||||
Subject: [PATCH] net: fix race between napi kthread mode and busy poll
|
||||
|
||||
Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to
|
||||
determine if the kthread owns this napi and could call napi->poll() on
|
||||
it. However, if socket busy poll is enabled, it is possible that the
|
||||
busy poll thread grabs this SCHED bit (after the previous napi->poll()
|
||||
invokes napi_complete_done() and clears SCHED bit) and tries to poll
|
||||
on the same napi. napi_disable() could grab the SCHED bit as well.
|
||||
This patch tries to fix this race by adding a new bit
|
||||
NAPI_STATE_SCHED_THREADED in napi->state. This bit gets set in
|
||||
____napi_schedule() if the threaded mode is enabled, and gets cleared
|
||||
in napi_complete_done(), and we only poll the napi in kthread if this
|
||||
bit is set. This helps distinguish the ownership of the napi between
|
||||
kthread and other scenarios and fixes the race issue.
|
||||
|
||||
Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support")
|
||||
Reported-by: Martin Zaharinov <micron10@gmail.com>
|
||||
Suggested-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Signed-off-by: Wei Wang <weiwan@google.com>
|
||||
Cc: Alexander Duyck <alexanderduyck@fb.com>
|
||||
Cc: Eric Dumazet <edumazet@google.com>
|
||||
Cc: Paolo Abeni <pabeni@redhat.com>
|
||||
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -359,6 +359,7 @@ enum {
|
||||
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
|
||||
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
|
||||
NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
|
||||
+ NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -370,6 +371,7 @@ enum {
|
||||
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
|
||||
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
|
||||
NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
|
||||
+ NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED),
|
||||
};
|
||||
|
||||
enum gro_result {
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -4300,6 +4300,8 @@ static inline void ____napi_schedule(str
|
||||
*/
|
||||
thread = READ_ONCE(napi->thread);
|
||||
if (thread) {
|
||||
+ if (thread->state != TASK_INTERRUPTIBLE)
|
||||
+ set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
|
||||
wake_up_process(thread);
|
||||
return;
|
||||
}
|
||||
@@ -6560,7 +6562,8 @@ bool napi_complete_done(struct napi_stru
|
||||
|
||||
WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
|
||||
|
||||
- new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
|
||||
+ new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
|
||||
+ NAPIF_STATE_SCHED_THREADED);
|
||||
|
||||
/* If STATE_MISSED was set, leave STATE_SCHED set,
|
||||
* because we will call napi->poll() one more time.
|
||||
@@ -6996,16 +6999,25 @@ static int napi_poll(struct napi_struct
|
||||
|
||||
static int napi_thread_wait(struct napi_struct *napi)
|
||||
{
|
||||
+ bool woken = false;
|
||||
+
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
while (!kthread_should_stop() && !napi_disable_pending(napi)) {
|
||||
- if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
|
||||
+ /* Testing SCHED_THREADED bit here to make sure the current
|
||||
+ * kthread owns this napi and could poll on this napi.
|
||||
+ * Testing SCHED bit is not enough because SCHED bit might be
|
||||
+ * set by some other busy poll thread or by napi_disable().
|
||||
+ */
|
||||
+ if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
|
||||
WARN_ON(!list_empty(&napi->poll_list));
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return 0;
|
||||
}
|
||||
|
||||
schedule();
|
||||
+ /* woken being true indicates this thread owns this napi. */
|
||||
+ woken = true;
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
@ -1,53 +0,0 @@
|
||||
From: Paolo Abeni <pabeni@redhat.com>
|
||||
Date: Fri, 9 Apr 2021 17:24:17 +0200
|
||||
Subject: [PATCH] net: fix hangup on napi_disable for threaded napi
|
||||
|
||||
napi_disable() is subject to an hangup, when the threaded
|
||||
mode is enabled and the napi is under heavy traffic.
|
||||
|
||||
If the relevant napi has been scheduled and the napi_disable()
|
||||
kicks in before the next napi_threaded_wait() completes - so
|
||||
that the latter quits due to the napi_disable_pending() condition,
|
||||
the existing code leaves the NAPI_STATE_SCHED bit set and the
|
||||
napi_disable() loop waiting for such bit will hang.
|
||||
|
||||
This patch addresses the issue by dropping the NAPI_STATE_DISABLE
|
||||
bit test in napi_thread_wait(). The later napi_threaded_poll()
|
||||
iteration will take care of clearing the NAPI_STATE_SCHED.
|
||||
|
||||
This also addresses a related problem reported by Jakub:
|
||||
before this patch a napi_disable()/napi_enable() pair killed
|
||||
the napi thread, effectively disabling the threaded mode.
|
||||
On the patched kernel napi_disable() simply stops scheduling
|
||||
the relevant thread.
|
||||
|
||||
v1 -> v2:
|
||||
- let the main napi_thread_poll() loop clear the SCHED bit
|
||||
|
||||
Reported-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support")
|
||||
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
||||
Reviewed-by: Eric Dumazet <edumazet@google.com>
|
||||
Link: https://lore.kernel.org/r/883923fa22745a9589e8610962b7dc59df09fb1f.1617981844.git.pabeni@redhat.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -7003,7 +7003,7 @@ static int napi_thread_wait(struct napi_
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
- while (!kthread_should_stop() && !napi_disable_pending(napi)) {
|
||||
+ while (!kthread_should_stop()) {
|
||||
/* Testing SCHED_THREADED bit here to make sure the current
|
||||
* kthread owns this napi and could poll on this napi.
|
||||
* Testing SCHED bit is not enough because SCHED bit might be
|
||||
@@ -7021,6 +7021,7 @@ static int napi_thread_wait(struct napi_
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
+
|
||||
return -1;
|
||||
}
|
||||
|
@ -1,52 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Fri, 20 Nov 2020 13:49:13 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add hash offset field to tuple
|
||||
|
||||
Add a placeholder field to calculate hash tuple offset. Similar to
|
||||
2c407aca6497 ("netfilter: conntrack: avoid gcc-10 zero-length-bounds
|
||||
warning").
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -107,6 +107,10 @@ struct flow_offload_tuple {
|
||||
|
||||
u8 l3proto;
|
||||
u8 l4proto;
|
||||
+
|
||||
+ /* All members above are keys for lookups, see flow_offload_hash(). */
|
||||
+ struct { } __hash;
|
||||
+
|
||||
u8 dir;
|
||||
|
||||
u16 mtu;
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -191,14 +191,14 @@ static u32 flow_offload_hash(const void
|
||||
{
|
||||
const struct flow_offload_tuple *tuple = data;
|
||||
|
||||
- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||
+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
|
||||
}
|
||||
|
||||
static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
|
||||
{
|
||||
const struct flow_offload_tuple_rhash *tuplehash = data;
|
||||
|
||||
- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||
+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
|
||||
}
|
||||
|
||||
static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
|
||||
@@ -207,7 +207,7 @@ static int flow_offload_hash_cmp(struct
|
||||
const struct flow_offload_tuple *tuple = arg->key;
|
||||
const struct flow_offload_tuple_rhash *x = ptr;
|
||||
|
||||
- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
|
||||
+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
|
||||
return 1;
|
||||
|
||||
return 0;
|
@ -1,98 +0,0 @@
|
||||
From: Oz Shlomo <ozsh@nvidia.com>
|
||||
Date: Tue, 23 Mar 2021 00:56:19 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: separate replace, destroy and
|
||||
stats to different workqueues
|
||||
|
||||
Currently the flow table offload replace, destroy and stats work items are
|
||||
executed on a single workqueue. As such, DESTROY and STATS commands may
|
||||
be backloged after a burst of REPLACE work items. This scenario can bloat
|
||||
up memory and may cause active connections to age.
|
||||
|
||||
Instatiate add, del and stats workqueues to avoid backlogs of non-dependent
|
||||
actions. Provide sysfs control over the workqueue attributes, allowing
|
||||
userspace applications to control the workqueue cpumask.
|
||||
|
||||
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
|
||||
Reviewed-by: Paul Blakey <paulb@nvidia.com>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_offload.c
|
||||
+++ b/net/netfilter/nf_flow_table_offload.c
|
||||
@@ -13,7 +13,9 @@
|
||||
#include <net/netfilter/nf_conntrack_core.h>
|
||||
#include <net/netfilter/nf_conntrack_tuple.h>
|
||||
|
||||
-static struct workqueue_struct *nf_flow_offload_wq;
|
||||
+static struct workqueue_struct *nf_flow_offload_add_wq;
|
||||
+static struct workqueue_struct *nf_flow_offload_del_wq;
|
||||
+static struct workqueue_struct *nf_flow_offload_stats_wq;
|
||||
|
||||
struct flow_offload_work {
|
||||
struct list_head list;
|
||||
@@ -827,7 +829,12 @@ static void flow_offload_work_handler(st
|
||||
|
||||
static void flow_offload_queue_work(struct flow_offload_work *offload)
|
||||
{
|
||||
- queue_work(nf_flow_offload_wq, &offload->work);
|
||||
+ if (offload->cmd == FLOW_CLS_REPLACE)
|
||||
+ queue_work(nf_flow_offload_add_wq, &offload->work);
|
||||
+ else if (offload->cmd == FLOW_CLS_DESTROY)
|
||||
+ queue_work(nf_flow_offload_del_wq, &offload->work);
|
||||
+ else
|
||||
+ queue_work(nf_flow_offload_stats_wq, &offload->work);
|
||||
}
|
||||
|
||||
static struct flow_offload_work *
|
||||
@@ -899,8 +906,11 @@ void nf_flow_offload_stats(struct nf_flo
|
||||
|
||||
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
|
||||
{
|
||||
- if (nf_flowtable_hw_offload(flowtable))
|
||||
- flush_workqueue(nf_flow_offload_wq);
|
||||
+ if (nf_flowtable_hw_offload(flowtable)) {
|
||||
+ flush_workqueue(nf_flow_offload_add_wq);
|
||||
+ flush_workqueue(nf_flow_offload_del_wq);
|
||||
+ flush_workqueue(nf_flow_offload_stats_wq);
|
||||
+ }
|
||||
}
|
||||
|
||||
static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
|
||||
@@ -1013,15 +1023,33 @@ EXPORT_SYMBOL_GPL(nf_flow_table_offload_
|
||||
|
||||
int nf_flow_table_offload_init(void)
|
||||
{
|
||||
- nf_flow_offload_wq = alloc_workqueue("nf_flow_table_offload",
|
||||
- WQ_UNBOUND, 0);
|
||||
- if (!nf_flow_offload_wq)
|
||||
+ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
|
||||
+ WQ_UNBOUND | WQ_SYSFS, 0);
|
||||
+ if (!nf_flow_offload_add_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
|
||||
+ WQ_UNBOUND | WQ_SYSFS, 0);
|
||||
+ if (!nf_flow_offload_del_wq)
|
||||
+ goto err_del_wq;
|
||||
+
|
||||
+ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
|
||||
+ WQ_UNBOUND | WQ_SYSFS, 0);
|
||||
+ if (!nf_flow_offload_stats_wq)
|
||||
+ goto err_stats_wq;
|
||||
+
|
||||
return 0;
|
||||
+
|
||||
+err_stats_wq:
|
||||
+ destroy_workqueue(nf_flow_offload_del_wq);
|
||||
+err_del_wq:
|
||||
+ destroy_workqueue(nf_flow_offload_add_wq);
|
||||
+ return -ENOMEM;
|
||||
}
|
||||
|
||||
void nf_flow_table_offload_exit(void)
|
||||
{
|
||||
- destroy_workqueue(nf_flow_offload_wq);
|
||||
+ destroy_workqueue(nf_flow_offload_add_wq);
|
||||
+ destroy_workqueue(nf_flow_offload_del_wq);
|
||||
+ destroy_workqueue(nf_flow_offload_stats_wq);
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
From: YueHaibing <yuehaibing@huawei.com>
|
||||
Date: Tue, 23 Mar 2021 00:56:21 +0100
|
||||
Subject: [PATCH] netfilter: conntrack: Remove unused variable
|
||||
declaration
|
||||
|
||||
commit e97c3e278e95 ("tproxy: split off ipv6 defragmentation to a separate
|
||||
module") left behind this.
|
||||
|
||||
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
|
||||
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
|
||||
@@ -4,7 +4,4 @@
|
||||
|
||||
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
|
||||
|
||||
-#include <linux/sysctl.h>
|
||||
-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
|
||||
-
|
||||
#endif /* _NF_CONNTRACK_IPV6_H*/
|
@ -1,291 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:22 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: consolidate
|
||||
skb_try_make_writable() call
|
||||
|
||||
Fetch the layer 4 header size to be mangled by NAT when building the
|
||||
tuple, then use it to make writable the network and the transport
|
||||
headers. After this update, the NAT routines now assumes that the skbuff
|
||||
area is writable. Do the pointer refetch only after the single
|
||||
skb_try_make_writable() call.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -394,9 +394,6 @@ static int nf_flow_nat_port_tcp(struct s
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||
- return -1;
|
||||
-
|
||||
tcph = (void *)(skb_network_header(skb) + thoff);
|
||||
inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
|
||||
|
||||
@@ -408,9 +405,6 @@ static int nf_flow_nat_port_udp(struct s
|
||||
{
|
||||
struct udphdr *udph;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||
- return -1;
|
||||
-
|
||||
udph = (void *)(skb_network_header(skb) + thoff);
|
||||
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||
inet_proto_csum_replace2(&udph->check, skb, port,
|
||||
@@ -446,9 +440,6 @@ int nf_flow_snat_port(const struct flow_
|
||||
struct flow_ports *hdr;
|
||||
__be16 port, new_port;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*hdr)))
|
||||
- return -1;
|
||||
-
|
||||
hdr = (void *)(skb_network_header(skb) + thoff);
|
||||
|
||||
switch (dir) {
|
||||
@@ -477,9 +468,6 @@ int nf_flow_dnat_port(const struct flow_
|
||||
struct flow_ports *hdr;
|
||||
__be16 port, new_port;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*hdr)))
|
||||
- return -1;
|
||||
-
|
||||
hdr = (void *)(skb_network_header(skb) + thoff);
|
||||
|
||||
switch (dir) {
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -39,9 +39,6 @@ static int nf_flow_nat_ip_tcp(struct sk_
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||
- return -1;
|
||||
-
|
||||
tcph = (void *)(skb_network_header(skb) + thoff);
|
||||
inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
|
||||
|
||||
@@ -53,9 +50,6 @@ static int nf_flow_nat_ip_udp(struct sk_
|
||||
{
|
||||
struct udphdr *udph;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||
- return -1;
|
||||
-
|
||||
udph = (void *)(skb_network_header(skb) + thoff);
|
||||
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||
inet_proto_csum_replace4(&udph->check, skb, addr,
|
||||
@@ -136,19 +130,17 @@ static int nf_flow_dnat_ip(const struct
|
||||
}
|
||||
|
||||
static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||
- unsigned int thoff, enum flow_offload_tuple_dir dir)
|
||||
+ unsigned int thoff, enum flow_offload_tuple_dir dir,
|
||||
+ struct iphdr *iph)
|
||||
{
|
||||
- struct iphdr *iph = ip_hdr(skb);
|
||||
-
|
||||
if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
|
||||
(nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||
- nf_flow_snat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
|
||||
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||
return -1;
|
||||
|
||||
- iph = ip_hdr(skb);
|
||||
if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
|
||||
(nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||
- nf_flow_dnat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
|
||||
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
@@ -160,10 +152,10 @@ static bool ip_has_options(unsigned int
|
||||
}
|
||||
|
||||
static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
|
||||
- struct flow_offload_tuple *tuple)
|
||||
+ struct flow_offload_tuple *tuple, u32 *hdrsize)
|
||||
{
|
||||
- unsigned int thoff, hdrsize;
|
||||
struct flow_ports *ports;
|
||||
+ unsigned int thoff;
|
||||
struct iphdr *iph;
|
||||
|
||||
if (!pskb_may_pull(skb, sizeof(*iph)))
|
||||
@@ -178,10 +170,10 @@ static int nf_flow_tuple_ip(struct sk_bu
|
||||
|
||||
switch (iph->protocol) {
|
||||
case IPPROTO_TCP:
|
||||
- hdrsize = sizeof(struct tcphdr);
|
||||
+ *hdrsize = sizeof(struct tcphdr);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
- hdrsize = sizeof(struct udphdr);
|
||||
+ *hdrsize = sizeof(struct udphdr);
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
@@ -191,7 +183,7 @@ static int nf_flow_tuple_ip(struct sk_bu
|
||||
return -1;
|
||||
|
||||
thoff = iph->ihl * 4;
|
||||
- if (!pskb_may_pull(skb, thoff + hdrsize))
|
||||
+ if (!pskb_may_pull(skb, thoff + *hdrsize))
|
||||
return -1;
|
||||
|
||||
iph = ip_hdr(skb);
|
||||
@@ -252,11 +244,12 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
unsigned int thoff;
|
||||
struct iphdr *iph;
|
||||
__be32 nexthop;
|
||||
+ u32 hdrsize;
|
||||
|
||||
if (skb->protocol != htons(ETH_P_IP))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
|
||||
+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize) < 0)
|
||||
return NF_ACCEPT;
|
||||
|
||||
tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||
@@ -271,11 +264,13 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (skb_try_make_writable(skb, sizeof(*iph)))
|
||||
+ iph = ip_hdr(skb);
|
||||
+ thoff = iph->ihl * 4;
|
||||
+ if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
- thoff = ip_hdr(skb)->ihl * 4;
|
||||
- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
|
||||
+ iph = ip_hdr(skb);
|
||||
+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
flow_offload_refresh(flow_table, flow);
|
||||
@@ -285,10 +280,9 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
|
||||
- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
|
||||
+ if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0)
|
||||
return NF_DROP;
|
||||
|
||||
- iph = ip_hdr(skb);
|
||||
ip_decrease_ttl(iph);
|
||||
skb->tstamp = 0;
|
||||
|
||||
@@ -317,9 +311,6 @@ static int nf_flow_nat_ipv6_tcp(struct s
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||
- return -1;
|
||||
-
|
||||
tcph = (void *)(skb_network_header(skb) + thoff);
|
||||
inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
|
||||
new_addr->s6_addr32, true);
|
||||
@@ -333,9 +324,6 @@ static int nf_flow_nat_ipv6_udp(struct s
|
||||
{
|
||||
struct udphdr *udph;
|
||||
|
||||
- if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||
- return -1;
|
||||
-
|
||||
udph = (void *)(skb_network_header(skb) + thoff);
|
||||
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||
inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
|
||||
@@ -417,31 +405,30 @@ static int nf_flow_dnat_ipv6(const struc
|
||||
|
||||
static int nf_flow_nat_ipv6(const struct flow_offload *flow,
|
||||
struct sk_buff *skb,
|
||||
- enum flow_offload_tuple_dir dir)
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct ipv6hdr *ip6h)
|
||||
{
|
||||
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
|
||||
unsigned int thoff = sizeof(*ip6h);
|
||||
|
||||
if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
|
||||
(nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||
- nf_flow_snat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
|
||||
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||
return -1;
|
||||
|
||||
- ip6h = ipv6_hdr(skb);
|
||||
if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
|
||||
(nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||
- nf_flow_dnat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
|
||||
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
|
||||
- struct flow_offload_tuple *tuple)
|
||||
+ struct flow_offload_tuple *tuple, u32 *hdrsize)
|
||||
{
|
||||
- unsigned int thoff, hdrsize;
|
||||
struct flow_ports *ports;
|
||||
struct ipv6hdr *ip6h;
|
||||
+ unsigned int thoff;
|
||||
|
||||
if (!pskb_may_pull(skb, sizeof(*ip6h)))
|
||||
return -1;
|
||||
@@ -450,10 +437,10 @@ static int nf_flow_tuple_ipv6(struct sk_
|
||||
|
||||
switch (ip6h->nexthdr) {
|
||||
case IPPROTO_TCP:
|
||||
- hdrsize = sizeof(struct tcphdr);
|
||||
+ *hdrsize = sizeof(struct tcphdr);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
- hdrsize = sizeof(struct udphdr);
|
||||
+ *hdrsize = sizeof(struct udphdr);
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
@@ -463,7 +450,7 @@ static int nf_flow_tuple_ipv6(struct sk_
|
||||
return -1;
|
||||
|
||||
thoff = sizeof(*ip6h);
|
||||
- if (!pskb_may_pull(skb, thoff + hdrsize))
|
||||
+ if (!pskb_may_pull(skb, thoff + *hdrsize))
|
||||
return -1;
|
||||
|
||||
ip6h = ipv6_hdr(skb);
|
||||
@@ -493,11 +480,12 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
struct net_device *outdev;
|
||||
struct ipv6hdr *ip6h;
|
||||
struct rt6_info *rt;
|
||||
+ u32 hdrsize;
|
||||
|
||||
if (skb->protocol != htons(ETH_P_IPV6))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
|
||||
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize) < 0)
|
||||
return NF_ACCEPT;
|
||||
|
||||
tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||
@@ -523,13 +511,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
|
||||
- if (skb_try_make_writable(skb, sizeof(*ip6h)))
|
||||
+ if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
|
||||
+ ip6h = ipv6_hdr(skb);
|
||||
+ if (nf_flow_nat_ipv6(flow, skb, dir, ip6h) < 0)
|
||||
return NF_DROP;
|
||||
|
||||
- ip6h = ipv6_hdr(skb);
|
||||
ip6h->hop_limit--;
|
||||
skb->tstamp = 0;
|
||||
|
@ -1,35 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:23 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: move skb_try_make_writable()
|
||||
before NAT in IPv4
|
||||
|
||||
For consistency with the IPv6 flowtable datapath and to make sure the
|
||||
skbuff is writable right before the NAT header updates.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -266,10 +266,6 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
|
||||
iph = ip_hdr(skb);
|
||||
thoff = iph->ihl * 4;
|
||||
- if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
- return NF_DROP;
|
||||
-
|
||||
- iph = ip_hdr(skb);
|
||||
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
@@ -280,6 +276,10 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
|
||||
+ if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
+ return NF_DROP;
|
||||
+
|
||||
+ iph = ip_hdr(skb);
|
||||
if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0)
|
||||
return NF_DROP;
|
||||
|
@ -1,82 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:24 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: move FLOW_OFFLOAD_DIR_MAX away
|
||||
from enumeration
|
||||
|
||||
This allows to remove the default case which should not ever happen and
|
||||
that was added to avoid gcc warnings on unhandled FLOW_OFFLOAD_DIR_MAX
|
||||
enumeration case.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -86,8 +86,8 @@ static inline bool nf_flowtable_hw_offlo
|
||||
enum flow_offload_tuple_dir {
|
||||
FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
|
||||
FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
|
||||
- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
|
||||
};
|
||||
+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
|
||||
|
||||
struct flow_offload_tuple {
|
||||
union {
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -453,8 +453,6 @@ int nf_flow_snat_port(const struct flow_
|
||||
new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
|
||||
hdr->dest = new_port;
|
||||
break;
|
||||
- default:
|
||||
- return -1;
|
||||
}
|
||||
|
||||
return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||
@@ -481,8 +479,6 @@ int nf_flow_dnat_port(const struct flow_
|
||||
new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
|
||||
hdr->source = new_port;
|
||||
break;
|
||||
- default:
|
||||
- return -1;
|
||||
}
|
||||
|
||||
return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -96,8 +96,6 @@ static int nf_flow_snat_ip(const struct
|
||||
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
|
||||
iph->daddr = new_addr;
|
||||
break;
|
||||
- default:
|
||||
- return -1;
|
||||
}
|
||||
csum_replace4(&iph->check, addr, new_addr);
|
||||
|
||||
@@ -121,8 +119,6 @@ static int nf_flow_dnat_ip(const struct
|
||||
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
|
||||
iph->saddr = new_addr;
|
||||
break;
|
||||
- default:
|
||||
- return -1;
|
||||
}
|
||||
csum_replace4(&iph->check, addr, new_addr);
|
||||
|
||||
@@ -371,8 +367,6 @@ static int nf_flow_snat_ipv6(const struc
|
||||
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
|
||||
ip6h->daddr = new_addr;
|
||||
break;
|
||||
- default:
|
||||
- return -1;
|
||||
}
|
||||
|
||||
return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||
@@ -396,8 +390,6 @@ static int nf_flow_dnat_ipv6(const struc
|
||||
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
|
||||
ip6h->saddr = new_addr;
|
||||
break;
|
||||
- default:
|
||||
- return -1;
|
||||
}
|
||||
|
||||
return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
@ -1,394 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:25 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: fast NAT functions never fail
|
||||
|
||||
Simplify existing fast NAT routines by returning void. After the
|
||||
skb_try_make_writable() call consolidation, these routines cannot ever
|
||||
fail.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -228,12 +228,12 @@ void nf_flow_table_free(struct nf_flowta
|
||||
|
||||
void flow_offload_teardown(struct flow_offload *flow);
|
||||
|
||||
-int nf_flow_snat_port(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb, unsigned int thoff,
|
||||
- u8 protocol, enum flow_offload_tuple_dir dir);
|
||||
-int nf_flow_dnat_port(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb, unsigned int thoff,
|
||||
- u8 protocol, enum flow_offload_tuple_dir dir);
|
||||
+void nf_flow_snat_port(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, unsigned int thoff,
|
||||
+ u8 protocol, enum flow_offload_tuple_dir dir);
|
||||
+void nf_flow_dnat_port(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, unsigned int thoff,
|
||||
+ u8 protocol, enum flow_offload_tuple_dir dir);
|
||||
|
||||
struct flow_ports {
|
||||
__be16 source, dest;
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -388,20 +388,17 @@ static void nf_flow_offload_work_gc(stru
|
||||
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
|
||||
}
|
||||
|
||||
-
|
||||
-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||
- __be16 port, __be16 new_port)
|
||||
+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||
+ __be16 port, __be16 new_port)
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
|
||||
tcph = (void *)(skb_network_header(skb) + thoff);
|
||||
inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
|
||||
- __be16 port, __be16 new_port)
|
||||
+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
|
||||
+ __be16 port, __be16 new_port)
|
||||
{
|
||||
struct udphdr *udph;
|
||||
|
||||
@@ -412,30 +409,24 @@ static int nf_flow_nat_port_udp(struct s
|
||||
if (!udph->check)
|
||||
udph->check = CSUM_MANGLED_0;
|
||||
}
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
|
||||
- u8 protocol, __be16 port, __be16 new_port)
|
||||
+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
|
||||
+ u8 protocol, __be16 port, __be16 new_port)
|
||||
{
|
||||
switch (protocol) {
|
||||
case IPPROTO_TCP:
|
||||
- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_port_udp(skb, thoff, port, new_port);
|
||||
break;
|
||||
}
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-int nf_flow_snat_port(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb, unsigned int thoff,
|
||||
- u8 protocol, enum flow_offload_tuple_dir dir)
|
||||
+void nf_flow_snat_port(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, unsigned int thoff,
|
||||
+ u8 protocol, enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct flow_ports *hdr;
|
||||
__be16 port, new_port;
|
||||
@@ -455,13 +446,13 @@ int nf_flow_snat_port(const struct flow_
|
||||
break;
|
||||
}
|
||||
|
||||
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||
+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_snat_port);
|
||||
|
||||
-int nf_flow_dnat_port(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb, unsigned int thoff,
|
||||
- u8 protocol, enum flow_offload_tuple_dir dir)
|
||||
+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
|
||||
+ unsigned int thoff, u8 protocol,
|
||||
+ enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct flow_ports *hdr;
|
||||
__be16 port, new_port;
|
||||
@@ -481,7 +472,7 @@ int nf_flow_dnat_port(const struct flow_
|
||||
break;
|
||||
}
|
||||
|
||||
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||
+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -34,19 +34,17 @@ static int nf_flow_state_check(struct fl
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||
- __be32 addr, __be32 new_addr)
|
||||
+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||
+ __be32 addr, __be32 new_addr)
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
|
||||
tcph = (void *)(skb_network_header(skb) + thoff);
|
||||
inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
|
||||
- __be32 addr, __be32 new_addr)
|
||||
+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
|
||||
+ __be32 addr, __be32 new_addr)
|
||||
{
|
||||
struct udphdr *udph;
|
||||
|
||||
@@ -57,31 +55,25 @@ static int nf_flow_nat_ip_udp(struct sk_
|
||||
if (!udph->check)
|
||||
udph->check = CSUM_MANGLED_0;
|
||||
}
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
|
||||
- unsigned int thoff, __be32 addr,
|
||||
- __be32 new_addr)
|
||||
+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
|
||||
+ unsigned int thoff, __be32 addr,
|
||||
+ __be32 new_addr)
|
||||
{
|
||||
switch (iph->protocol) {
|
||||
case IPPROTO_TCP:
|
||||
- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
|
||||
break;
|
||||
}
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||
- struct iphdr *iph, unsigned int thoff,
|
||||
- enum flow_offload_tuple_dir dir)
|
||||
+static void nf_flow_snat_ip(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, struct iphdr *iph,
|
||||
+ unsigned int thoff, enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
__be32 addr, new_addr;
|
||||
|
||||
@@ -99,12 +91,12 @@ static int nf_flow_snat_ip(const struct
|
||||
}
|
||||
csum_replace4(&iph->check, addr, new_addr);
|
||||
|
||||
- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||
+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||
}
|
||||
|
||||
-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||
- struct iphdr *iph, unsigned int thoff,
|
||||
- enum flow_offload_tuple_dir dir)
|
||||
+static void nf_flow_dnat_ip(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, struct iphdr *iph,
|
||||
+ unsigned int thoff, enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
__be32 addr, new_addr;
|
||||
|
||||
@@ -122,24 +114,21 @@ static int nf_flow_dnat_ip(const struct
|
||||
}
|
||||
csum_replace4(&iph->check, addr, new_addr);
|
||||
|
||||
- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||
+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||
+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||
unsigned int thoff, enum flow_offload_tuple_dir dir,
|
||||
struct iphdr *iph)
|
||||
{
|
||||
- if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
|
||||
- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||
- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||
- return -1;
|
||||
-
|
||||
- if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
|
||||
- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||
- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||
- return -1;
|
||||
-
|
||||
- return 0;
|
||||
+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
|
||||
+ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
|
||||
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
|
||||
+ }
|
||||
+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
|
||||
+ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
|
||||
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
|
||||
+ }
|
||||
}
|
||||
|
||||
static bool ip_has_options(unsigned int thoff)
|
||||
@@ -276,8 +265,7 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
return NF_DROP;
|
||||
|
||||
iph = ip_hdr(skb);
|
||||
- if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
|
||||
|
||||
ip_decrease_ttl(iph);
|
||||
skb->tstamp = 0;
|
||||
@@ -301,22 +289,21 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
|
||||
|
||||
-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||
- struct in6_addr *addr,
|
||||
- struct in6_addr *new_addr)
|
||||
+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||
+ struct in6_addr *addr,
|
||||
+ struct in6_addr *new_addr,
|
||||
+ struct ipv6hdr *ip6h)
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
|
||||
tcph = (void *)(skb_network_header(skb) + thoff);
|
||||
inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
|
||||
new_addr->s6_addr32, true);
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
|
||||
- struct in6_addr *addr,
|
||||
- struct in6_addr *new_addr)
|
||||
+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
|
||||
+ struct in6_addr *addr,
|
||||
+ struct in6_addr *new_addr)
|
||||
{
|
||||
struct udphdr *udph;
|
||||
|
||||
@@ -327,32 +314,26 @@ static int nf_flow_nat_ipv6_udp(struct s
|
||||
if (!udph->check)
|
||||
udph->check = CSUM_MANGLED_0;
|
||||
}
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||
- unsigned int thoff, struct in6_addr *addr,
|
||||
- struct in6_addr *new_addr)
|
||||
+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||
+ unsigned int thoff, struct in6_addr *addr,
|
||||
+ struct in6_addr *new_addr)
|
||||
{
|
||||
switch (ip6h->nexthdr) {
|
||||
case IPPROTO_TCP:
|
||||
- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
|
||||
break;
|
||||
}
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||
- unsigned int thoff,
|
||||
- enum flow_offload_tuple_dir dir)
|
||||
+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||
+ unsigned int thoff,
|
||||
+ enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct in6_addr addr, new_addr;
|
||||
|
||||
@@ -369,13 +350,13 @@ static int nf_flow_snat_ipv6(const struc
|
||||
break;
|
||||
}
|
||||
|
||||
- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||
+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||
}
|
||||
|
||||
-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||
- unsigned int thoff,
|
||||
- enum flow_offload_tuple_dir dir)
|
||||
+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||
+ unsigned int thoff,
|
||||
+ enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct in6_addr addr, new_addr;
|
||||
|
||||
@@ -392,27 +373,24 @@ static int nf_flow_dnat_ipv6(const struc
|
||||
break;
|
||||
}
|
||||
|
||||
- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||
+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||
}
|
||||
|
||||
-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
|
||||
- struct sk_buff *skb,
|
||||
- enum flow_offload_tuple_dir dir,
|
||||
- struct ipv6hdr *ip6h)
|
||||
+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
|
||||
+ struct sk_buff *skb,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct ipv6hdr *ip6h)
|
||||
{
|
||||
unsigned int thoff = sizeof(*ip6h);
|
||||
|
||||
- if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
|
||||
- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||
- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||
- return -1;
|
||||
-
|
||||
- if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
|
||||
- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||
- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||
- return -1;
|
||||
-
|
||||
- return 0;
|
||||
+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
|
||||
+ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
|
||||
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
|
||||
+ }
|
||||
+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
|
||||
+ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
|
||||
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
|
||||
+ }
|
||||
}
|
||||
|
||||
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
|
||||
@@ -507,8 +485,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
return NF_DROP;
|
||||
|
||||
ip6h = ipv6_hdr(skb);
|
||||
- if (nf_flow_nat_ipv6(flow, skb, dir, ip6h) < 0)
|
||||
- return NF_DROP;
|
||||
+ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
|
||||
|
||||
ip6h->hop_limit--;
|
||||
skb->tstamp = 0;
|
@ -1,46 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:26 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: call dst_check() to fall back to
|
||||
classic forwarding
|
||||
|
||||
In case the route is stale, pass up the packet to the classic forwarding
|
||||
path for re-evaluation and schedule this flow entry for removal.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -197,14 +197,6 @@ static bool nf_flow_exceeds_mtu(const st
|
||||
return true;
|
||||
}
|
||||
|
||||
-static int nf_flow_offload_dst_check(struct dst_entry *dst)
|
||||
-{
|
||||
- if (unlikely(dst_xfrm(dst)))
|
||||
- return dst_check(dst, 0) ? 0 : -1;
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
|
||||
const struct nf_hook_state *state,
|
||||
struct dst_entry *dst)
|
||||
@@ -256,7 +248,7 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
|
||||
flow_offload_refresh(flow_table, flow);
|
||||
|
||||
- if (nf_flow_offload_dst_check(&rt->dst)) {
|
||||
+ if (!dst_check(&rt->dst, 0)) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
@@ -476,7 +468,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
|
||||
flow_offload_refresh(flow_table, flow);
|
||||
|
||||
- if (nf_flow_offload_dst_check(&rt->dst)) {
|
||||
+ if (!dst_check(&rt->dst, 0)) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_ACCEPT;
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:27 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: refresh timeout after dst and
|
||||
writable checks
|
||||
|
||||
Refresh the timeout (and retry hardware offload) once the skbuff dst
|
||||
is confirmed to be current and after the skbuff is made writable.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -246,8 +246,6 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- flow_offload_refresh(flow_table, flow);
|
||||
-
|
||||
if (!dst_check(&rt->dst, 0)) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_ACCEPT;
|
||||
@@ -256,6 +254,8 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
+ flow_offload_refresh(flow_table, flow);
|
||||
+
|
||||
iph = ip_hdr(skb);
|
||||
nf_flow_nat_ip(flow, skb, thoff, dir, iph);
|
||||
|
||||
@@ -466,8 +466,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
sizeof(*ip6h)))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- flow_offload_refresh(flow_table, flow);
|
||||
-
|
||||
if (!dst_check(&rt->dst, 0)) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_ACCEPT;
|
||||
@@ -476,6 +474,8 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
+ flow_offload_refresh(flow_table, flow);
|
||||
+
|
||||
ip6h = ipv6_hdr(skb);
|
||||
nf_flow_nat_ipv6(flow, skb, dir, ip6h);
|
||||
|
@ -1,103 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Tue, 23 Mar 2021 00:56:28 +0100
|
||||
Subject: [PATCH] netfilter: nftables: update table flags from the commit
|
||||
phase
|
||||
|
||||
Do not update table flags from the preparation phase. Store the flags
|
||||
update into the transaction, then update the flags from the commit
|
||||
phase.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_tables.h
|
||||
+++ b/include/net/netfilter/nf_tables.h
|
||||
@@ -1470,13 +1470,16 @@ struct nft_trans_chain {
|
||||
|
||||
struct nft_trans_table {
|
||||
bool update;
|
||||
- bool enable;
|
||||
+ u8 state;
|
||||
+ u32 flags;
|
||||
};
|
||||
|
||||
#define nft_trans_table_update(trans) \
|
||||
(((struct nft_trans_table *)trans->data)->update)
|
||||
-#define nft_trans_table_enable(trans) \
|
||||
- (((struct nft_trans_table *)trans->data)->enable)
|
||||
+#define nft_trans_table_state(trans) \
|
||||
+ (((struct nft_trans_table *)trans->data)->state)
|
||||
+#define nft_trans_table_flags(trans) \
|
||||
+ (((struct nft_trans_table *)trans->data)->flags)
|
||||
|
||||
struct nft_trans_elem {
|
||||
struct nft_set *set;
|
||||
--- a/net/netfilter/nf_tables_api.c
|
||||
+++ b/net/netfilter/nf_tables_api.c
|
||||
@@ -891,6 +891,12 @@ static void nf_tables_table_disable(stru
|
||||
nft_table_disable(net, table, 0);
|
||||
}
|
||||
|
||||
+enum {
|
||||
+ NFT_TABLE_STATE_UNCHANGED = 0,
|
||||
+ NFT_TABLE_STATE_DORMANT,
|
||||
+ NFT_TABLE_STATE_WAKEUP
|
||||
+};
|
||||
+
|
||||
static int nf_tables_updtable(struct nft_ctx *ctx)
|
||||
{
|
||||
struct nft_trans *trans;
|
||||
@@ -914,19 +920,17 @@ static int nf_tables_updtable(struct nft
|
||||
|
||||
if ((flags & NFT_TABLE_F_DORMANT) &&
|
||||
!(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
|
||||
- nft_trans_table_enable(trans) = false;
|
||||
+ nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT;
|
||||
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
|
||||
ctx->table->flags & NFT_TABLE_F_DORMANT) {
|
||||
- ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
|
||||
ret = nf_tables_table_enable(ctx->net, ctx->table);
|
||||
if (ret >= 0)
|
||||
- nft_trans_table_enable(trans) = true;
|
||||
- else
|
||||
- ctx->table->flags |= NFT_TABLE_F_DORMANT;
|
||||
+ nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP;
|
||||
}
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
+ nft_trans_table_flags(trans) = flags;
|
||||
nft_trans_table_update(trans) = true;
|
||||
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
|
||||
return 0;
|
||||
@@ -7908,11 +7912,10 @@ static int nf_tables_commit(struct net *
|
||||
switch (trans->msg_type) {
|
||||
case NFT_MSG_NEWTABLE:
|
||||
if (nft_trans_table_update(trans)) {
|
||||
- if (!nft_trans_table_enable(trans)) {
|
||||
- nf_tables_table_disable(net,
|
||||
- trans->ctx.table);
|
||||
- trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
|
||||
- }
|
||||
+ if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT)
|
||||
+ nf_tables_table_disable(net, trans->ctx.table);
|
||||
+
|
||||
+ trans->ctx.table->flags = nft_trans_table_flags(trans);
|
||||
} else {
|
||||
nft_clear(net, trans->ctx.table);
|
||||
}
|
||||
@@ -8125,11 +8128,9 @@ static int __nf_tables_abort(struct net
|
||||
switch (trans->msg_type) {
|
||||
case NFT_MSG_NEWTABLE:
|
||||
if (nft_trans_table_update(trans)) {
|
||||
- if (nft_trans_table_enable(trans)) {
|
||||
- nf_tables_table_disable(net,
|
||||
- trans->ctx.table);
|
||||
- trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
|
||||
- }
|
||||
+ if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP)
|
||||
+ nf_tables_table_disable(net, trans->ctx.table);
|
||||
+
|
||||
nft_trans_destroy(trans);
|
||||
} else {
|
||||
list_del_rcu(&trans->ctx.table->list);
|
@ -1,170 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:32 +0100
|
||||
Subject: [PATCH] net: resolve forwarding path from virtual netdevice and
|
||||
HW destination address
|
||||
|
||||
This patch adds dev_fill_forward_path() which resolves the path to reach
|
||||
the real netdevice from the IP forwarding side. This function takes as
|
||||
input the netdevice and the destination hardware address and it walks
|
||||
down the devices calling .ndo_fill_forward_path() for each device until
|
||||
the real device is found.
|
||||
|
||||
For instance, assuming the following topology:
|
||||
|
||||
IP forwarding
|
||||
/ \
|
||||
br0 eth0
|
||||
/ \
|
||||
eth1 eth2
|
||||
.
|
||||
.
|
||||
.
|
||||
ethX
|
||||
ab:cd:ef:ab:cd:ef
|
||||
|
||||
where eth1 and eth2 are bridge ports and eth0 provides WAN connectivity.
|
||||
ethX is the interface in another box which is connected to the eth1
|
||||
bridge port.
|
||||
|
||||
For packets going through IP forwarding to br0 whose destination MAC
|
||||
address is ab:cd:ef:ab:cd:ef, dev_fill_forward_path() provides the
|
||||
following path:
|
||||
|
||||
br0 -> eth1
|
||||
|
||||
.ndo_fill_forward_path for br0 looks up at the FDB for the bridge port
|
||||
from the destination MAC address to get the bridge port eth1.
|
||||
|
||||
This information allows to create a fast path that bypasses the classic
|
||||
bridge and IP forwarding paths, so packets go directly from the bridge
|
||||
port eth1 to eth0 (wan interface) and vice versa.
|
||||
|
||||
fast path
|
||||
.------------------------.
|
||||
/ \
|
||||
| IP forwarding |
|
||||
| / \ \/
|
||||
| br0 eth0
|
||||
. / \
|
||||
-> eth1 eth2
|
||||
.
|
||||
.
|
||||
.
|
||||
ethX
|
||||
ab:cd:ef:ab:cd:ef
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -827,6 +827,27 @@ typedef u16 (*select_queue_fallback_t)(s
|
||||
struct sk_buff *skb,
|
||||
struct net_device *sb_dev);
|
||||
|
||||
+enum net_device_path_type {
|
||||
+ DEV_PATH_ETHERNET = 0,
|
||||
+};
|
||||
+
|
||||
+struct net_device_path {
|
||||
+ enum net_device_path_type type;
|
||||
+ const struct net_device *dev;
|
||||
+};
|
||||
+
|
||||
+#define NET_DEVICE_PATH_STACK_MAX 5
|
||||
+
|
||||
+struct net_device_path_stack {
|
||||
+ int num_paths;
|
||||
+ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
|
||||
+};
|
||||
+
|
||||
+struct net_device_path_ctx {
|
||||
+ const struct net_device *dev;
|
||||
+ const u8 *daddr;
|
||||
+};
|
||||
+
|
||||
enum tc_setup_type {
|
||||
TC_SETUP_QDISC_MQPRIO,
|
||||
TC_SETUP_CLSU32,
|
||||
@@ -1273,6 +1294,8 @@ struct netdev_net_notifier {
|
||||
* struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
|
||||
* If a device is paired with a peer device, return the peer instance.
|
||||
* The caller must be under RCU read context.
|
||||
+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
|
||||
+ * Get the forwarding path to reach the real device from the HW destination address
|
||||
*/
|
||||
struct net_device_ops {
|
||||
int (*ndo_init)(struct net_device *dev);
|
||||
@@ -1481,6 +1504,8 @@ struct net_device_ops {
|
||||
int (*ndo_tunnel_ctl)(struct net_device *dev,
|
||||
struct ip_tunnel_parm *p, int cmd);
|
||||
struct net_device * (*ndo_get_peer_dev)(struct net_device *dev);
|
||||
+ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path);
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -2828,6 +2853,8 @@ void dev_remove_offload(struct packet_of
|
||||
|
||||
int dev_get_iflink(const struct net_device *dev);
|
||||
int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
|
||||
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
|
||||
+ struct net_device_path_stack *stack);
|
||||
struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
|
||||
unsigned short mask);
|
||||
struct net_device *dev_get_by_name(struct net *net, const char *name);
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -847,6 +847,52 @@ int dev_fill_metadata_dst(struct net_dev
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
|
||||
|
||||
+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
|
||||
+{
|
||||
+ int k = stack->num_paths++;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
|
||||
+ return NULL;
|
||||
+
|
||||
+ return &stack->path[k];
|
||||
+}
|
||||
+
|
||||
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
|
||||
+ struct net_device_path_stack *stack)
|
||||
+{
|
||||
+ const struct net_device *last_dev;
|
||||
+ struct net_device_path_ctx ctx = {
|
||||
+ .dev = dev,
|
||||
+ .daddr = daddr,
|
||||
+ };
|
||||
+ struct net_device_path *path;
|
||||
+ int ret = 0;
|
||||
+
|
||||
+ stack->num_paths = 0;
|
||||
+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
|
||||
+ last_dev = ctx.dev;
|
||||
+ path = dev_fwd_path(stack);
|
||||
+ if (!path)
|
||||
+ return -1;
|
||||
+
|
||||
+ memset(path, 0, sizeof(struct net_device_path));
|
||||
+ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
|
||||
+ if (ret < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(last_dev == ctx.dev))
|
||||
+ return -1;
|
||||
+ }
|
||||
+ path = dev_fwd_path(stack);
|
||||
+ if (!path)
|
||||
+ return -1;
|
||||
+ path->type = DEV_PATH_ETHERNET;
|
||||
+ path->dev = ctx.dev;
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
|
||||
+
|
||||
/**
|
||||
* __dev_get_by_name - find a device by its name
|
||||
* @net: the applicable net namespace
|
@ -1,80 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:33 +0100
|
||||
Subject: [PATCH] net: 8021q: resolve forwarding path for vlan devices
|
||||
|
||||
Add .ndo_fill_forward_path for vlan devices.
|
||||
|
||||
For instance, assuming the following topology:
|
||||
|
||||
IP forwarding
|
||||
/ \
|
||||
eth0.100 eth0
|
||||
|
|
||||
eth0
|
||||
.
|
||||
.
|
||||
.
|
||||
ethX
|
||||
ab:cd:ef:ab:cd:ef
|
||||
|
||||
For packets going through IP forwarding to eth0.100 whose destination
|
||||
MAC address is ab:cd:ef:ab:cd:ef, dev_fill_forward_path() provides the
|
||||
following path:
|
||||
|
||||
eth0.100 -> eth0
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -829,11 +829,18 @@ typedef u16 (*select_queue_fallback_t)(s
|
||||
|
||||
enum net_device_path_type {
|
||||
DEV_PATH_ETHERNET = 0,
|
||||
+ DEV_PATH_VLAN,
|
||||
};
|
||||
|
||||
struct net_device_path {
|
||||
enum net_device_path_type type;
|
||||
const struct net_device *dev;
|
||||
+ union {
|
||||
+ struct {
|
||||
+ u16 id;
|
||||
+ __be16 proto;
|
||||
+ } encap;
|
||||
+ };
|
||||
};
|
||||
|
||||
#define NET_DEVICE_PATH_STACK_MAX 5
|
||||
--- a/net/8021q/vlan_dev.c
|
||||
+++ b/net/8021q/vlan_dev.c
|
||||
@@ -770,6 +770,20 @@ static int vlan_dev_get_iflink(const str
|
||||
return real_dev->ifindex;
|
||||
}
|
||||
|
||||
+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
|
||||
+
|
||||
+ path->type = DEV_PATH_VLAN;
|
||||
+ path->encap.id = vlan->vlan_id;
|
||||
+ path->encap.proto = vlan->vlan_proto;
|
||||
+ path->dev = ctx->dev;
|
||||
+ ctx->dev = vlan->real_dev;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static const struct ethtool_ops vlan_ethtool_ops = {
|
||||
.get_link_ksettings = vlan_ethtool_get_link_ksettings,
|
||||
.get_drvinfo = vlan_ethtool_get_drvinfo,
|
||||
@@ -808,6 +822,7 @@ static const struct net_device_ops vlan_
|
||||
#endif
|
||||
.ndo_fix_features = vlan_dev_fix_features,
|
||||
.ndo_get_iflink = vlan_dev_get_iflink,
|
||||
+ .ndo_fill_forward_path = vlan_dev_fill_forward_path,
|
||||
};
|
||||
|
||||
static void vlan_dev_free(struct net_device *dev)
|
@ -1,62 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:34 +0100
|
||||
Subject: [PATCH] net: bridge: resolve forwarding path for bridge devices
|
||||
|
||||
Add .ndo_fill_forward_path for bridge devices.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -830,6 +830,7 @@ typedef u16 (*select_queue_fallback_t)(s
|
||||
enum net_device_path_type {
|
||||
DEV_PATH_ETHERNET = 0,
|
||||
DEV_PATH_VLAN,
|
||||
+ DEV_PATH_BRIDGE,
|
||||
};
|
||||
|
||||
struct net_device_path {
|
||||
--- a/net/bridge/br_device.c
|
||||
+++ b/net/bridge/br_device.c
|
||||
@@ -398,6 +398,32 @@ static int br_del_slave(struct net_devic
|
||||
return br_del_if(br, slave_dev);
|
||||
}
|
||||
|
||||
+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ struct net_bridge_fdb_entry *f;
|
||||
+ struct net_bridge_port *dst;
|
||||
+ struct net_bridge *br;
|
||||
+
|
||||
+ if (netif_is_bridge_port(ctx->dev))
|
||||
+ return -1;
|
||||
+
|
||||
+ br = netdev_priv(ctx->dev);
|
||||
+ f = br_fdb_find_rcu(br, ctx->daddr, 0);
|
||||
+ if (!f || !f->dst)
|
||||
+ return -1;
|
||||
+
|
||||
+ dst = READ_ONCE(f->dst);
|
||||
+ if (!dst)
|
||||
+ return -1;
|
||||
+
|
||||
+ path->type = DEV_PATH_BRIDGE;
|
||||
+ path->dev = dst->br->dev;
|
||||
+ ctx->dev = dst->dev;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static const struct ethtool_ops br_ethtool_ops = {
|
||||
.get_drvinfo = br_getinfo,
|
||||
.get_link = ethtool_op_get_link,
|
||||
@@ -432,6 +458,7 @@ static const struct net_device_ops br_ne
|
||||
.ndo_bridge_setlink = br_setlink,
|
||||
.ndo_bridge_dellink = br_dellink,
|
||||
.ndo_features_check = passthru_features_check,
|
||||
+ .ndo_fill_forward_path = br_fill_forward_path,
|
||||
};
|
||||
|
||||
static struct device_type br_type = {
|
@ -1,207 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 24 Mar 2021 02:30:35 +0100
|
||||
Subject: [PATCH] net: bridge: resolve forwarding path for VLAN tag
|
||||
actions in bridge devices
|
||||
|
||||
Depending on the VLAN settings of the bridge and the port, the bridge can
|
||||
either add or remove a tag. When vlan filtering is enabled, the fdb lookup
|
||||
also needs to know the VLAN tag/proto for the destination address
|
||||
To provide this, keep track of the stack of VLAN tags for the path in the
|
||||
lookup context
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -841,10 +841,20 @@ struct net_device_path {
|
||||
u16 id;
|
||||
__be16 proto;
|
||||
} encap;
|
||||
+ struct {
|
||||
+ enum {
|
||||
+ DEV_PATH_BR_VLAN_KEEP,
|
||||
+ DEV_PATH_BR_VLAN_TAG,
|
||||
+ DEV_PATH_BR_VLAN_UNTAG,
|
||||
+ } vlan_mode;
|
||||
+ u16 vlan_id;
|
||||
+ __be16 vlan_proto;
|
||||
+ } bridge;
|
||||
};
|
||||
};
|
||||
|
||||
#define NET_DEVICE_PATH_STACK_MAX 5
|
||||
+#define NET_DEVICE_PATH_VLAN_MAX 2
|
||||
|
||||
struct net_device_path_stack {
|
||||
int num_paths;
|
||||
@@ -854,6 +864,12 @@ struct net_device_path_stack {
|
||||
struct net_device_path_ctx {
|
||||
const struct net_device *dev;
|
||||
const u8 *daddr;
|
||||
+
|
||||
+ int num_vlans;
|
||||
+ struct {
|
||||
+ u16 id;
|
||||
+ __be16 proto;
|
||||
+ } vlan[NET_DEVICE_PATH_VLAN_MAX];
|
||||
};
|
||||
|
||||
enum tc_setup_type {
|
||||
--- a/net/8021q/vlan_dev.c
|
||||
+++ b/net/8021q/vlan_dev.c
|
||||
@@ -780,6 +780,12 @@ static int vlan_dev_fill_forward_path(st
|
||||
path->encap.proto = vlan->vlan_proto;
|
||||
path->dev = ctx->dev;
|
||||
ctx->dev = vlan->real_dev;
|
||||
+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
|
||||
+ return -ENOSPC;
|
||||
+
|
||||
+ ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
|
||||
+ ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
|
||||
+ ctx->num_vlans++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
--- a/net/bridge/br_device.c
|
||||
+++ b/net/bridge/br_device.c
|
||||
@@ -409,7 +409,10 @@ static int br_fill_forward_path(struct n
|
||||
return -1;
|
||||
|
||||
br = netdev_priv(ctx->dev);
|
||||
- f = br_fdb_find_rcu(br, ctx->daddr, 0);
|
||||
+
|
||||
+ br_vlan_fill_forward_path_pvid(br, ctx, path);
|
||||
+
|
||||
+ f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
|
||||
if (!f || !f->dst)
|
||||
return -1;
|
||||
|
||||
@@ -417,10 +420,28 @@ static int br_fill_forward_path(struct n
|
||||
if (!dst)
|
||||
return -1;
|
||||
|
||||
+ if (br_vlan_fill_forward_path_mode(br, dst, path))
|
||||
+ return -1;
|
||||
+
|
||||
path->type = DEV_PATH_BRIDGE;
|
||||
path->dev = dst->br->dev;
|
||||
ctx->dev = dst->dev;
|
||||
|
||||
+ switch (path->bridge.vlan_mode) {
|
||||
+ case DEV_PATH_BR_VLAN_TAG:
|
||||
+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
|
||||
+ return -ENOSPC;
|
||||
+ ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
|
||||
+ ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
|
||||
+ ctx->num_vlans++;
|
||||
+ break;
|
||||
+ case DEV_PATH_BR_VLAN_UNTAG:
|
||||
+ ctx->num_vlans--;
|
||||
+ break;
|
||||
+ case DEV_PATH_BR_VLAN_KEEP:
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
--- a/net/bridge/br_private.h
|
||||
+++ b/net/bridge/br_private.h
|
||||
@@ -1093,6 +1093,13 @@ void br_vlan_notify(const struct net_bri
|
||||
bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
|
||||
const struct net_bridge_vlan *range_end);
|
||||
|
||||
+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
|
||||
+ struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path);
|
||||
+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
|
||||
+ struct net_bridge_port *dst,
|
||||
+ struct net_device_path *path);
|
||||
+
|
||||
static inline struct net_bridge_vlan_group *br_vlan_group(
|
||||
const struct net_bridge *br)
|
||||
{
|
||||
@@ -1250,6 +1257,19 @@ static inline int nbp_get_num_vlan_infos
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
+
|
||||
+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
|
||||
+ struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
|
||||
+ struct net_bridge_port *dst,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
|
||||
static inline struct net_bridge_vlan_group *br_vlan_group(
|
||||
const struct net_bridge *br)
|
||||
--- a/net/bridge/br_vlan.c
|
||||
+++ b/net/bridge/br_vlan.c
|
||||
@@ -1327,6 +1327,59 @@ int br_vlan_get_pvid_rcu(const struct ne
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
|
||||
|
||||
+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
|
||||
+ struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ struct net_bridge_vlan_group *vg;
|
||||
+ int idx = ctx->num_vlans - 1;
|
||||
+ u16 vid;
|
||||
+
|
||||
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
|
||||
+
|
||||
+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
|
||||
+ return;
|
||||
+
|
||||
+ vg = br_vlan_group(br);
|
||||
+
|
||||
+ if (idx >= 0 &&
|
||||
+ ctx->vlan[idx].proto == br->vlan_proto) {
|
||||
+ vid = ctx->vlan[idx].id;
|
||||
+ } else {
|
||||
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
|
||||
+ vid = br_get_pvid(vg);
|
||||
+ }
|
||||
+
|
||||
+ path->bridge.vlan_id = vid;
|
||||
+ path->bridge.vlan_proto = br->vlan_proto;
|
||||
+}
|
||||
+
|
||||
+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
|
||||
+ struct net_bridge_port *dst,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ struct net_bridge_vlan_group *vg;
|
||||
+ struct net_bridge_vlan *v;
|
||||
+
|
||||
+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
|
||||
+ return 0;
|
||||
+
|
||||
+ vg = nbp_vlan_group_rcu(dst);
|
||||
+ v = br_vlan_find(vg, path->bridge.vlan_id);
|
||||
+ if (!v || !br_vlan_should_use(v))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
|
||||
+ return 0;
|
||||
+
|
||||
+ if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
|
||||
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
|
||||
+ else
|
||||
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
int br_vlan_get_info(const struct net_device *dev, u16 vid,
|
||||
struct bridge_vlan_info *p_vinfo)
|
||||
{
|
@ -1,113 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 24 Mar 2021 02:30:36 +0100
|
||||
Subject: [PATCH] net: ppp: resolve forwarding path for bridge pppoe
|
||||
devices
|
||||
|
||||
Pass on the PPPoE session ID, destination hardware address and the real
|
||||
device.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/drivers/net/ppp/ppp_generic.c
|
||||
+++ b/drivers/net/ppp/ppp_generic.c
|
||||
@@ -1466,12 +1466,34 @@ static void ppp_dev_priv_destructor(stru
|
||||
ppp_destroy_interface(ppp);
|
||||
}
|
||||
|
||||
+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ struct ppp *ppp = netdev_priv(ctx->dev);
|
||||
+ struct ppp_channel *chan;
|
||||
+ struct channel *pch;
|
||||
+
|
||||
+ if (ppp->flags & SC_MULTILINK)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ if (list_empty(&ppp->channels))
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ pch = list_first_entry(&ppp->channels, struct channel, clist);
|
||||
+ chan = pch->chan;
|
||||
+ if (!chan->ops->fill_forward_path)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ return chan->ops->fill_forward_path(ctx, path, chan);
|
||||
+}
|
||||
+
|
||||
static const struct net_device_ops ppp_netdev_ops = {
|
||||
.ndo_init = ppp_dev_init,
|
||||
.ndo_uninit = ppp_dev_uninit,
|
||||
.ndo_start_xmit = ppp_start_xmit,
|
||||
.ndo_do_ioctl = ppp_net_ioctl,
|
||||
.ndo_get_stats64 = ppp_get_stats64,
|
||||
+ .ndo_fill_forward_path = ppp_fill_forward_path,
|
||||
};
|
||||
|
||||
static struct device_type ppp_type = {
|
||||
--- a/drivers/net/ppp/pppoe.c
|
||||
+++ b/drivers/net/ppp/pppoe.c
|
||||
@@ -972,8 +972,31 @@ static int pppoe_xmit(struct ppp_channel
|
||||
return __pppoe_xmit(sk, skb);
|
||||
}
|
||||
|
||||
+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path,
|
||||
+ const struct ppp_channel *chan)
|
||||
+{
|
||||
+ struct sock *sk = (struct sock *)chan->private;
|
||||
+ struct pppox_sock *po = pppox_sk(sk);
|
||||
+ struct net_device *dev = po->pppoe_dev;
|
||||
+
|
||||
+ if (sock_flag(sk, SOCK_DEAD) ||
|
||||
+ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
|
||||
+ return -1;
|
||||
+
|
||||
+ path->type = DEV_PATH_PPPOE;
|
||||
+ path->encap.proto = htons(ETH_P_PPP_SES);
|
||||
+ path->encap.id = be16_to_cpu(po->num);
|
||||
+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
|
||||
+ path->dev = ctx->dev;
|
||||
+ ctx->dev = dev;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static const struct ppp_channel_ops pppoe_chan_ops = {
|
||||
.start_xmit = pppoe_xmit,
|
||||
+ .fill_forward_path = pppoe_fill_forward_path,
|
||||
};
|
||||
|
||||
static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -831,6 +831,7 @@ enum net_device_path_type {
|
||||
DEV_PATH_ETHERNET = 0,
|
||||
DEV_PATH_VLAN,
|
||||
DEV_PATH_BRIDGE,
|
||||
+ DEV_PATH_PPPOE,
|
||||
};
|
||||
|
||||
struct net_device_path {
|
||||
@@ -840,6 +841,7 @@ struct net_device_path {
|
||||
struct {
|
||||
u16 id;
|
||||
__be16 proto;
|
||||
+ u8 h_dest[ETH_ALEN];
|
||||
} encap;
|
||||
struct {
|
||||
enum {
|
||||
--- a/include/linux/ppp_channel.h
|
||||
+++ b/include/linux/ppp_channel.h
|
||||
@@ -28,6 +28,9 @@ struct ppp_channel_ops {
|
||||
int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
|
||||
/* Handle an ioctl call that has come in via /dev/ppp. */
|
||||
int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
|
||||
+ int (*fill_forward_path)(struct net_device_path_ctx *,
|
||||
+ struct net_device_path *,
|
||||
+ const struct ppp_channel *);
|
||||
};
|
||||
|
||||
struct ppp_channel {
|
@ -1,63 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 24 Mar 2021 02:30:37 +0100
|
||||
Subject: [PATCH] net: dsa: resolve forwarding path for dsa slave ports
|
||||
|
||||
Add .ndo_fill_forward_path for dsa slave port devices
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -832,6 +832,7 @@ enum net_device_path_type {
|
||||
DEV_PATH_VLAN,
|
||||
DEV_PATH_BRIDGE,
|
||||
DEV_PATH_PPPOE,
|
||||
+ DEV_PATH_DSA,
|
||||
};
|
||||
|
||||
struct net_device_path {
|
||||
@@ -852,6 +853,10 @@ struct net_device_path {
|
||||
u16 vlan_id;
|
||||
__be16 vlan_proto;
|
||||
} bridge;
|
||||
+ struct {
|
||||
+ int port;
|
||||
+ u16 proto;
|
||||
+ } dsa;
|
||||
};
|
||||
};
|
||||
|
||||
--- a/net/dsa/slave.c
|
||||
+++ b/net/dsa/slave.c
|
||||
@@ -1619,6 +1619,21 @@ static struct devlink_port *dsa_slave_ge
|
||||
return dp->ds->devlink ? &dp->devlink_port : NULL;
|
||||
}
|
||||
|
||||
+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
|
||||
+ struct net_device_path *path)
|
||||
+{
|
||||
+ struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
|
||||
+ struct dsa_port *cpu_dp = dp->cpu_dp;
|
||||
+
|
||||
+ path->dev = ctx->dev;
|
||||
+ path->type = DEV_PATH_DSA;
|
||||
+ path->dsa.proto = cpu_dp->tag_ops->proto;
|
||||
+ path->dsa.port = dp->index;
|
||||
+ ctx->dev = cpu_dp->master;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static const struct net_device_ops dsa_slave_netdev_ops = {
|
||||
.ndo_open = dsa_slave_open,
|
||||
.ndo_stop = dsa_slave_close,
|
||||
@@ -1644,6 +1659,7 @@ static const struct net_device_ops dsa_s
|
||||
.ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
|
||||
.ndo_get_devlink_port = dsa_slave_get_devlink_port,
|
||||
.ndo_change_mtu = dsa_slave_change_mtu,
|
||||
+ .ndo_fill_forward_path = dsa_slave_fill_forward_path,
|
||||
};
|
||||
|
||||
static struct device_type dsa_type = {
|
@ -1,147 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:38 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add xmit path types
|
||||
|
||||
Add the xmit_type field that defines the two supported xmit paths in the
|
||||
flowtable data plane, which are the neighbour and the xfrm xmit paths.
|
||||
This patch prepares for new flowtable xmit path types to come.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -89,6 +89,11 @@ enum flow_offload_tuple_dir {
|
||||
};
|
||||
#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
|
||||
|
||||
+enum flow_offload_xmit_type {
|
||||
+ FLOW_OFFLOAD_XMIT_NEIGH = 0,
|
||||
+ FLOW_OFFLOAD_XMIT_XFRM,
|
||||
+};
|
||||
+
|
||||
struct flow_offload_tuple {
|
||||
union {
|
||||
struct in_addr src_v4;
|
||||
@@ -111,7 +116,8 @@ struct flow_offload_tuple {
|
||||
/* All members above are keys for lookups, see flow_offload_hash(). */
|
||||
struct { } __hash;
|
||||
|
||||
- u8 dir;
|
||||
+ u8 dir:6,
|
||||
+ xmit_type:2;
|
||||
|
||||
u16 mtu;
|
||||
|
||||
@@ -157,7 +163,8 @@ static inline __s32 nf_flow_timeout_delt
|
||||
|
||||
struct nf_flow_route {
|
||||
struct {
|
||||
- struct dst_entry *dst;
|
||||
+ struct dst_entry *dst;
|
||||
+ enum flow_offload_xmit_type xmit_type;
|
||||
} tuple[FLOW_OFFLOAD_DIR_MAX];
|
||||
};
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -95,6 +95,7 @@ static int flow_offload_fill_route(struc
|
||||
}
|
||||
|
||||
flow_tuple->iifidx = other_dst->dev->ifindex;
|
||||
+ flow_tuple->xmit_type = route->tuple[dir].xmit_type;
|
||||
flow_tuple->dst_cache = dst;
|
||||
|
||||
return 0;
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -235,8 +235,6 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
|
||||
dir = tuplehash->tuple.dir;
|
||||
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||
- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
|
||||
- outdev = rt->dst.dev;
|
||||
|
||||
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||
return NF_ACCEPT;
|
||||
@@ -265,13 +263,16 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
|
||||
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
|
||||
|
||||
- if (unlikely(dst_xfrm(&rt->dst))) {
|
||||
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
|
||||
+
|
||||
+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
|
||||
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
|
||||
IPCB(skb)->iif = skb->dev->ifindex;
|
||||
IPCB(skb)->flags = IPSKB_FORWARDED;
|
||||
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
|
||||
}
|
||||
|
||||
+ outdev = rt->dst.dev;
|
||||
skb->dev = outdev;
|
||||
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
|
||||
skb_dst_set_noref(skb, &rt->dst);
|
||||
@@ -456,8 +457,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
|
||||
dir = tuplehash->tuple.dir;
|
||||
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||
- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
|
||||
- outdev = rt->dst.dev;
|
||||
|
||||
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||
return NF_ACCEPT;
|
||||
@@ -485,13 +484,16 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
|
||||
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
|
||||
|
||||
- if (unlikely(dst_xfrm(&rt->dst))) {
|
||||
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
|
||||
+
|
||||
+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
|
||||
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
|
||||
IP6CB(skb)->iif = skb->dev->ifindex;
|
||||
IP6CB(skb)->flags = IP6SKB_FORWARDED;
|
||||
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
|
||||
}
|
||||
|
||||
+ outdev = rt->dst.dev;
|
||||
skb->dev = outdev;
|
||||
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
|
||||
skb_dst_set_noref(skb, &rt->dst);
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -19,6 +19,22 @@ struct nft_flow_offload {
|
||||
struct nft_flowtable *flowtable;
|
||||
};
|
||||
|
||||
+static enum flow_offload_xmit_type nft_xmit_type(struct dst_entry *dst)
|
||||
+{
|
||||
+ if (dst_xfrm(dst))
|
||||
+ return FLOW_OFFLOAD_XMIT_XFRM;
|
||||
+
|
||||
+ return FLOW_OFFLOAD_XMIT_NEIGH;
|
||||
+}
|
||||
+
|
||||
+static void nft_default_forward_path(struct nf_flow_route *route,
|
||||
+ struct dst_entry *dst_cache,
|
||||
+ enum ip_conntrack_dir dir)
|
||||
+{
|
||||
+ route->tuple[dir].dst = dst_cache;
|
||||
+ route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
|
||||
+}
|
||||
+
|
||||
static int nft_flow_route(const struct nft_pktinfo *pkt,
|
||||
const struct nf_conn *ct,
|
||||
struct nf_flow_route *route,
|
||||
@@ -44,8 +60,8 @@ static int nft_flow_route(const struct n
|
||||
if (!other_dst)
|
||||
return -ENOENT;
|
||||
|
||||
- route->tuple[dir].dst = this_dst;
|
||||
- route->tuple[!dir].dst = other_dst;
|
||||
+ nft_default_forward_path(route, this_dst, dir);
|
||||
+ nft_default_forward_path(route, other_dst, !dir);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,191 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:39 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: use dev_fill_forward_path() to
|
||||
obtain ingress device
|
||||
|
||||
Obtain the ingress device in the tuple from the route in the reply
|
||||
direction. Use dev_fill_forward_path() instead to get the real ingress
|
||||
device for this flow.
|
||||
|
||||
Fall back to use the ingress device that the IP forwarding route
|
||||
provides if:
|
||||
|
||||
- dev_fill_forward_path() finds no real ingress device.
|
||||
- the ingress device that is obtained is not part of the flowtable
|
||||
devices.
|
||||
- this route has a xfrm policy.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -164,6 +164,9 @@ static inline __s32 nf_flow_timeout_delt
|
||||
struct nf_flow_route {
|
||||
struct {
|
||||
struct dst_entry *dst;
|
||||
+ struct {
|
||||
+ u32 ifindex;
|
||||
+ } in;
|
||||
enum flow_offload_xmit_type xmit_type;
|
||||
} tuple[FLOW_OFFLOAD_DIR_MAX];
|
||||
};
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -79,7 +79,6 @@ static int flow_offload_fill_route(struc
|
||||
enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
|
||||
- struct dst_entry *other_dst = route->tuple[!dir].dst;
|
||||
struct dst_entry *dst = route->tuple[dir].dst;
|
||||
|
||||
if (!dst_hold_safe(route->tuple[dir].dst))
|
||||
@@ -94,7 +93,7 @@ static int flow_offload_fill_route(struc
|
||||
break;
|
||||
}
|
||||
|
||||
- flow_tuple->iifidx = other_dst->dev->ifindex;
|
||||
+ flow_tuple->iifidx = route->tuple[dir].in.ifindex;
|
||||
flow_tuple->xmit_type = route->tuple[dir].xmit_type;
|
||||
flow_tuple->dst_cache = dst;
|
||||
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -31,14 +31,104 @@ static void nft_default_forward_path(str
|
||||
struct dst_entry *dst_cache,
|
||||
enum ip_conntrack_dir dir)
|
||||
{
|
||||
+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
|
||||
route->tuple[dir].dst = dst_cache;
|
||||
route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
|
||||
}
|
||||
|
||||
+static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
|
||||
+ const struct dst_entry *dst_cache,
|
||||
+ const struct nf_conn *ct,
|
||||
+ enum ip_conntrack_dir dir,
|
||||
+ struct net_device_path_stack *stack)
|
||||
+{
|
||||
+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
|
||||
+ struct net_device *dev = dst_cache->dev;
|
||||
+ unsigned char ha[ETH_ALEN];
|
||||
+ struct neighbour *n;
|
||||
+ u8 nud_state;
|
||||
+
|
||||
+ n = dst_neigh_lookup(dst_cache, daddr);
|
||||
+ if (!n)
|
||||
+ return -1;
|
||||
+
|
||||
+ read_lock_bh(&n->lock);
|
||||
+ nud_state = n->nud_state;
|
||||
+ ether_addr_copy(ha, n->ha);
|
||||
+ read_unlock_bh(&n->lock);
|
||||
+ neigh_release(n);
|
||||
+
|
||||
+ if (!(nud_state & NUD_VALID))
|
||||
+ return -1;
|
||||
+
|
||||
+ return dev_fill_forward_path(dev, ha, stack);
|
||||
+}
|
||||
+
|
||||
+struct nft_forward_info {
|
||||
+ const struct net_device *indev;
|
||||
+};
|
||||
+
|
||||
+static void nft_dev_path_info(const struct net_device_path_stack *stack,
|
||||
+ struct nft_forward_info *info)
|
||||
+{
|
||||
+ const struct net_device_path *path;
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < stack->num_paths; i++) {
|
||||
+ path = &stack->path[i];
|
||||
+ switch (path->type) {
|
||||
+ case DEV_PATH_ETHERNET:
|
||||
+ info->indev = path->dev;
|
||||
+ break;
|
||||
+ case DEV_PATH_VLAN:
|
||||
+ case DEV_PATH_BRIDGE:
|
||||
+ default:
|
||||
+ info->indev = NULL;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static bool nft_flowtable_find_dev(const struct net_device *dev,
|
||||
+ struct nft_flowtable *ft)
|
||||
+{
|
||||
+ struct nft_hook *hook;
|
||||
+ bool found = false;
|
||||
+
|
||||
+ list_for_each_entry_rcu(hook, &ft->hook_list, list) {
|
||||
+ if (hook->ops.dev != dev)
|
||||
+ continue;
|
||||
+
|
||||
+ found = true;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ return found;
|
||||
+}
|
||||
+
|
||||
+static void nft_dev_forward_path(struct nf_flow_route *route,
|
||||
+ const struct nf_conn *ct,
|
||||
+ enum ip_conntrack_dir dir,
|
||||
+ struct nft_flowtable *ft)
|
||||
+{
|
||||
+ const struct dst_entry *dst = route->tuple[dir].dst;
|
||||
+ struct net_device_path_stack stack;
|
||||
+ struct nft_forward_info info = {};
|
||||
+
|
||||
+ if (nft_dev_fill_forward_path(route, dst, ct, dir, &stack) >= 0)
|
||||
+ nft_dev_path_info(&stack, &info);
|
||||
+
|
||||
+ if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
|
||||
+ return;
|
||||
+
|
||||
+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
|
||||
+}
|
||||
+
|
||||
static int nft_flow_route(const struct nft_pktinfo *pkt,
|
||||
const struct nf_conn *ct,
|
||||
struct nf_flow_route *route,
|
||||
- enum ip_conntrack_dir dir)
|
||||
+ enum ip_conntrack_dir dir,
|
||||
+ struct nft_flowtable *ft)
|
||||
{
|
||||
struct dst_entry *this_dst = skb_dst(pkt->skb);
|
||||
struct dst_entry *other_dst = NULL;
|
||||
@@ -63,6 +153,12 @@ static int nft_flow_route(const struct n
|
||||
nft_default_forward_path(route, this_dst, dir);
|
||||
nft_default_forward_path(route, other_dst, !dir);
|
||||
|
||||
+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
|
||||
+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
|
||||
+ nft_dev_forward_path(route, ct, dir, ft);
|
||||
+ nft_dev_forward_path(route, ct, !dir, ft);
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -90,8 +186,8 @@ static void nft_flow_offload_eval(const
|
||||
struct nft_flow_offload *priv = nft_expr_priv(expr);
|
||||
struct nf_flowtable *flowtable = &priv->flowtable->data;
|
||||
struct tcphdr _tcph, *tcph = NULL;
|
||||
+ struct nf_flow_route route = {};
|
||||
enum ip_conntrack_info ctinfo;
|
||||
- struct nf_flow_route route;
|
||||
struct flow_offload *flow;
|
||||
enum ip_conntrack_dir dir;
|
||||
struct nf_conn *ct;
|
||||
@@ -128,7 +224,7 @@ static void nft_flow_offload_eval(const
|
||||
goto out;
|
||||
|
||||
dir = CTINFO2DIR(ctinfo);
|
||||
- if (nft_flow_route(pkt, ct, &route, dir) < 0)
|
||||
+ if (nft_flow_route(pkt, ct, &route, dir, priv->flowtable) < 0)
|
||||
goto err_flow_route;
|
||||
|
||||
flow = flow_offload_alloc(ct);
|
@ -1,374 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:40 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: use dev_fill_forward_path() to
|
||||
obtain egress device
|
||||
|
||||
The egress device in the tuple is obtained from route. Use
|
||||
dev_fill_forward_path() instead to provide the real egress device for
|
||||
this flow whenever this is available.
|
||||
|
||||
The new FLOW_OFFLOAD_XMIT_DIRECT type uses dev_queue_xmit() to transmit
|
||||
ethernet frames. Cache the source and destination hardware address to
|
||||
use dev_queue_xmit() to transfer packets.
|
||||
|
||||
The FLOW_OFFLOAD_XMIT_DIRECT replaces FLOW_OFFLOAD_XMIT_NEIGH if
|
||||
dev_fill_forward_path() finds a direct transmit path.
|
||||
|
||||
In case of topology updates, if peer is moved to different bridge port,
|
||||
the connection will time out, reconnect will result in a new entry with
|
||||
the correct path. Snooping fdb updates would allow for cleaning up stale
|
||||
flowtable entries.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -92,6 +92,7 @@ enum flow_offload_tuple_dir {
|
||||
enum flow_offload_xmit_type {
|
||||
FLOW_OFFLOAD_XMIT_NEIGH = 0,
|
||||
FLOW_OFFLOAD_XMIT_XFRM,
|
||||
+ FLOW_OFFLOAD_XMIT_DIRECT,
|
||||
};
|
||||
|
||||
struct flow_offload_tuple {
|
||||
@@ -120,8 +121,14 @@ struct flow_offload_tuple {
|
||||
xmit_type:2;
|
||||
|
||||
u16 mtu;
|
||||
-
|
||||
- struct dst_entry *dst_cache;
|
||||
+ union {
|
||||
+ struct dst_entry *dst_cache;
|
||||
+ struct {
|
||||
+ u32 ifidx;
|
||||
+ u8 h_source[ETH_ALEN];
|
||||
+ u8 h_dest[ETH_ALEN];
|
||||
+ } out;
|
||||
+ };
|
||||
};
|
||||
|
||||
struct flow_offload_tuple_rhash {
|
||||
@@ -167,6 +174,11 @@ struct nf_flow_route {
|
||||
struct {
|
||||
u32 ifindex;
|
||||
} in;
|
||||
+ struct {
|
||||
+ u32 ifindex;
|
||||
+ u8 h_source[ETH_ALEN];
|
||||
+ u8 h_dest[ETH_ALEN];
|
||||
+ } out;
|
||||
enum flow_offload_xmit_type xmit_type;
|
||||
} tuple[FLOW_OFFLOAD_DIR_MAX];
|
||||
};
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -81,9 +81,6 @@ static int flow_offload_fill_route(struc
|
||||
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
|
||||
struct dst_entry *dst = route->tuple[dir].dst;
|
||||
|
||||
- if (!dst_hold_safe(route->tuple[dir].dst))
|
||||
- return -1;
|
||||
-
|
||||
switch (flow_tuple->l3proto) {
|
||||
case NFPROTO_IPV4:
|
||||
flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
|
||||
@@ -94,12 +91,36 @@ static int flow_offload_fill_route(struc
|
||||
}
|
||||
|
||||
flow_tuple->iifidx = route->tuple[dir].in.ifindex;
|
||||
+
|
||||
+ switch (route->tuple[dir].xmit_type) {
|
||||
+ case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
+ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
|
||||
+ ETH_ALEN);
|
||||
+ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
|
||||
+ ETH_ALEN);
|
||||
+ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
|
||||
+ break;
|
||||
+ case FLOW_OFFLOAD_XMIT_XFRM:
|
||||
+ case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ if (!dst_hold_safe(route->tuple[dir].dst))
|
||||
+ return -1;
|
||||
+
|
||||
+ flow_tuple->dst_cache = dst;
|
||||
+ break;
|
||||
+ }
|
||||
flow_tuple->xmit_type = route->tuple[dir].xmit_type;
|
||||
- flow_tuple->dst_cache = dst;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void nft_flow_dst_release(struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir)
|
||||
+{
|
||||
+ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
+ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
|
||||
+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
|
||||
+}
|
||||
+
|
||||
int flow_offload_route_init(struct flow_offload *flow,
|
||||
const struct nf_flow_route *route)
|
||||
{
|
||||
@@ -118,7 +139,7 @@ int flow_offload_route_init(struct flow_
|
||||
return 0;
|
||||
|
||||
err_route_reply:
|
||||
- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
|
||||
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -169,8 +190,8 @@ static void flow_offload_fixup_ct(struct
|
||||
|
||||
static void flow_offload_route_release(struct flow_offload *flow)
|
||||
{
|
||||
- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
|
||||
- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
|
||||
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
|
||||
}
|
||||
|
||||
void flow_offload_free(struct flow_offload *flow)
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -207,6 +207,24 @@ static unsigned int nf_flow_xmit_xfrm(st
|
||||
return NF_STOLEN;
|
||||
}
|
||||
|
||||
+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
|
||||
+ const struct flow_offload_tuple_rhash *tuplehash,
|
||||
+ unsigned short type)
|
||||
+{
|
||||
+ struct net_device *outdev;
|
||||
+
|
||||
+ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
|
||||
+ if (!outdev)
|
||||
+ return NF_DROP;
|
||||
+
|
||||
+ skb->dev = outdev;
|
||||
+ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
|
||||
+ tuplehash->tuple.out.h_source, skb->len);
|
||||
+ dev_queue_xmit(skb);
|
||||
+
|
||||
+ return NF_STOLEN;
|
||||
+}
|
||||
+
|
||||
unsigned int
|
||||
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
|
||||
const struct nf_hook_state *state)
|
||||
@@ -222,6 +240,7 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
struct iphdr *iph;
|
||||
__be32 nexthop;
|
||||
u32 hdrsize;
|
||||
+ int ret;
|
||||
|
||||
if (skb->protocol != htons(ETH_P_IP))
|
||||
return NF_ACCEPT;
|
||||
@@ -244,9 +263,13 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (!dst_check(&rt->dst, 0)) {
|
||||
- flow_offload_teardown(flow);
|
||||
- return NF_ACCEPT;
|
||||
+ if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
+ tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
|
||||
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
|
||||
+ if (!dst_check(&rt->dst, 0)) {
|
||||
+ flow_offload_teardown(flow);
|
||||
+ return NF_ACCEPT;
|
||||
+ }
|
||||
}
|
||||
|
||||
if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
@@ -263,8 +286,6 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
|
||||
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
|
||||
|
||||
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
|
||||
-
|
||||
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
|
||||
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
|
||||
IPCB(skb)->iif = skb->dev->ifindex;
|
||||
@@ -272,13 +293,23 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
|
||||
}
|
||||
|
||||
- outdev = rt->dst.dev;
|
||||
- skb->dev = outdev;
|
||||
- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
|
||||
- skb_dst_set_noref(skb, &rt->dst);
|
||||
- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
|
||||
+ switch (tuplehash->tuple.xmit_type) {
|
||||
+ case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ outdev = rt->dst.dev;
|
||||
+ skb->dev = outdev;
|
||||
+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
|
||||
+ skb_dst_set_noref(skb, &rt->dst);
|
||||
+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
|
||||
+ ret = NF_STOLEN;
|
||||
+ break;
|
||||
+ case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
|
||||
+ if (ret == NF_DROP)
|
||||
+ flow_offload_teardown(flow);
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
- return NF_STOLEN;
|
||||
+ return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
|
||||
|
||||
@@ -444,6 +475,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
struct ipv6hdr *ip6h;
|
||||
struct rt6_info *rt;
|
||||
u32 hdrsize;
|
||||
+ int ret;
|
||||
|
||||
if (skb->protocol != htons(ETH_P_IPV6))
|
||||
return NF_ACCEPT;
|
||||
@@ -465,9 +497,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
sizeof(*ip6h)))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (!dst_check(&rt->dst, 0)) {
|
||||
- flow_offload_teardown(flow);
|
||||
- return NF_ACCEPT;
|
||||
+ if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
+ tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
|
||||
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
|
||||
+ if (!dst_check(&rt->dst, 0)) {
|
||||
+ flow_offload_teardown(flow);
|
||||
+ return NF_ACCEPT;
|
||||
+ }
|
||||
}
|
||||
|
||||
if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
|
||||
@@ -484,8 +520,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
|
||||
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
|
||||
|
||||
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
|
||||
-
|
||||
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
|
||||
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
|
||||
IP6CB(skb)->iif = skb->dev->ifindex;
|
||||
@@ -493,12 +527,22 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
|
||||
}
|
||||
|
||||
- outdev = rt->dst.dev;
|
||||
- skb->dev = outdev;
|
||||
- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
|
||||
- skb_dst_set_noref(skb, &rt->dst);
|
||||
- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
|
||||
+ switch (tuplehash->tuple.xmit_type) {
|
||||
+ case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ outdev = rt->dst.dev;
|
||||
+ skb->dev = outdev;
|
||||
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
|
||||
+ skb_dst_set_noref(skb, &rt->dst);
|
||||
+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
|
||||
+ ret = NF_STOLEN;
|
||||
+ break;
|
||||
+ case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
|
||||
+ if (ret == NF_DROP)
|
||||
+ flow_offload_teardown(flow);
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
- return NF_STOLEN;
|
||||
+ return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -39,12 +39,11 @@ static void nft_default_forward_path(str
|
||||
static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
|
||||
const struct dst_entry *dst_cache,
|
||||
const struct nf_conn *ct,
|
||||
- enum ip_conntrack_dir dir,
|
||||
+ enum ip_conntrack_dir dir, u8 *ha,
|
||||
struct net_device_path_stack *stack)
|
||||
{
|
||||
const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
|
||||
struct net_device *dev = dst_cache->dev;
|
||||
- unsigned char ha[ETH_ALEN];
|
||||
struct neighbour *n;
|
||||
u8 nud_state;
|
||||
|
||||
@@ -66,27 +65,43 @@ static int nft_dev_fill_forward_path(con
|
||||
|
||||
struct nft_forward_info {
|
||||
const struct net_device *indev;
|
||||
+ const struct net_device *outdev;
|
||||
+ u8 h_source[ETH_ALEN];
|
||||
+ u8 h_dest[ETH_ALEN];
|
||||
+ enum flow_offload_xmit_type xmit_type;
|
||||
};
|
||||
|
||||
static void nft_dev_path_info(const struct net_device_path_stack *stack,
|
||||
- struct nft_forward_info *info)
|
||||
+ struct nft_forward_info *info,
|
||||
+ unsigned char *ha)
|
||||
{
|
||||
const struct net_device_path *path;
|
||||
int i;
|
||||
|
||||
+ memcpy(info->h_dest, ha, ETH_ALEN);
|
||||
+
|
||||
for (i = 0; i < stack->num_paths; i++) {
|
||||
path = &stack->path[i];
|
||||
switch (path->type) {
|
||||
case DEV_PATH_ETHERNET:
|
||||
info->indev = path->dev;
|
||||
+ if (is_zero_ether_addr(info->h_source))
|
||||
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
break;
|
||||
- case DEV_PATH_VLAN:
|
||||
case DEV_PATH_BRIDGE:
|
||||
+ if (is_zero_ether_addr(info->h_source))
|
||||
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
+
|
||||
+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
|
||||
+ break;
|
||||
+ case DEV_PATH_VLAN:
|
||||
default:
|
||||
info->indev = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
+ if (!info->outdev)
|
||||
+ info->outdev = info->indev;
|
||||
}
|
||||
|
||||
static bool nft_flowtable_find_dev(const struct net_device *dev,
|
||||
@@ -114,14 +129,22 @@ static void nft_dev_forward_path(struct
|
||||
const struct dst_entry *dst = route->tuple[dir].dst;
|
||||
struct net_device_path_stack stack;
|
||||
struct nft_forward_info info = {};
|
||||
+ unsigned char ha[ETH_ALEN];
|
||||
|
||||
- if (nft_dev_fill_forward_path(route, dst, ct, dir, &stack) >= 0)
|
||||
- nft_dev_path_info(&stack, &info);
|
||||
+ if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
|
||||
+ nft_dev_path_info(&stack, &info, ha);
|
||||
|
||||
if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
|
||||
return;
|
||||
|
||||
route->tuple[!dir].in.ifindex = info.indev->ifindex;
|
||||
+
|
||||
+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
|
||||
+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
|
||||
+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
|
||||
+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
|
||||
+ route->tuple[dir].xmit_type = info.xmit_type;
|
||||
+ }
|
||||
}
|
||||
|
||||
static int nft_flow_route(const struct nft_pktinfo *pkt,
|
@ -1,410 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:41 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add vlan support
|
||||
|
||||
Add the vlan id and protocol to the flow tuple to uniquely identify
|
||||
flows from the receive path. For the transmit path, dev_hard_header() on
|
||||
the vlan device push the headers. This patch includes support for two
|
||||
vlan headers (QinQ) from the ingress path.
|
||||
|
||||
Add a generic encap field to the flowtable entry which stores the
|
||||
protocol and the tag id. This allows to reuse these fields in the PPPoE
|
||||
support coming in a later patch.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -95,6 +95,8 @@ enum flow_offload_xmit_type {
|
||||
FLOW_OFFLOAD_XMIT_DIRECT,
|
||||
};
|
||||
|
||||
+#define NF_FLOW_TABLE_ENCAP_MAX 2
|
||||
+
|
||||
struct flow_offload_tuple {
|
||||
union {
|
||||
struct in_addr src_v4;
|
||||
@@ -113,13 +115,17 @@ struct flow_offload_tuple {
|
||||
|
||||
u8 l3proto;
|
||||
u8 l4proto;
|
||||
+ struct {
|
||||
+ u16 id;
|
||||
+ __be16 proto;
|
||||
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
|
||||
/* All members above are keys for lookups, see flow_offload_hash(). */
|
||||
struct { } __hash;
|
||||
|
||||
- u8 dir:6,
|
||||
- xmit_type:2;
|
||||
-
|
||||
+ u8 dir:4,
|
||||
+ xmit_type:2,
|
||||
+ encap_num:2;
|
||||
u16 mtu;
|
||||
union {
|
||||
struct dst_entry *dst_cache;
|
||||
@@ -173,6 +179,11 @@ struct nf_flow_route {
|
||||
struct dst_entry *dst;
|
||||
struct {
|
||||
u32 ifindex;
|
||||
+ struct {
|
||||
+ u16 id;
|
||||
+ __be16 proto;
|
||||
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
+ u8 num_encaps;
|
||||
} in;
|
||||
struct {
|
||||
u32 ifindex;
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -80,6 +80,7 @@ static int flow_offload_fill_route(struc
|
||||
{
|
||||
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
|
||||
struct dst_entry *dst = route->tuple[dir].dst;
|
||||
+ int i, j = 0;
|
||||
|
||||
switch (flow_tuple->l3proto) {
|
||||
case NFPROTO_IPV4:
|
||||
@@ -91,6 +92,12 @@ static int flow_offload_fill_route(struc
|
||||
}
|
||||
|
||||
flow_tuple->iifidx = route->tuple[dir].in.ifindex;
|
||||
+ for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
|
||||
+ flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
|
||||
+ flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
|
||||
+ j++;
|
||||
+ }
|
||||
+ flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
|
||||
|
||||
switch (route->tuple[dir].xmit_type) {
|
||||
case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -136,23 +136,44 @@ static bool ip_has_options(unsigned int
|
||||
return thoff != sizeof(struct iphdr);
|
||||
}
|
||||
|
||||
+static void nf_flow_tuple_encap(struct sk_buff *skb,
|
||||
+ struct flow_offload_tuple *tuple)
|
||||
+{
|
||||
+ int i = 0;
|
||||
+
|
||||
+ if (skb_vlan_tag_present(skb)) {
|
||||
+ tuple->encap[i].id = skb_vlan_tag_get(skb);
|
||||
+ tuple->encap[i].proto = skb->vlan_proto;
|
||||
+ i++;
|
||||
+ }
|
||||
+ if (skb->protocol == htons(ETH_P_8021Q)) {
|
||||
+ struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
+
|
||||
+ tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
|
||||
+ tuple->encap[i].proto = skb->protocol;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
|
||||
- struct flow_offload_tuple *tuple, u32 *hdrsize)
|
||||
+ struct flow_offload_tuple *tuple, u32 *hdrsize,
|
||||
+ u32 offset)
|
||||
{
|
||||
struct flow_ports *ports;
|
||||
unsigned int thoff;
|
||||
struct iphdr *iph;
|
||||
|
||||
- if (!pskb_may_pull(skb, sizeof(*iph)))
|
||||
+ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
|
||||
return -1;
|
||||
|
||||
- iph = ip_hdr(skb);
|
||||
- thoff = iph->ihl * 4;
|
||||
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
|
||||
+ thoff = (iph->ihl * 4);
|
||||
|
||||
if (ip_is_fragment(iph) ||
|
||||
unlikely(ip_has_options(thoff)))
|
||||
return -1;
|
||||
|
||||
+ thoff += offset;
|
||||
+
|
||||
switch (iph->protocol) {
|
||||
case IPPROTO_TCP:
|
||||
*hdrsize = sizeof(struct tcphdr);
|
||||
@@ -167,11 +188,10 @@ static int nf_flow_tuple_ip(struct sk_bu
|
||||
if (iph->ttl <= 1)
|
||||
return -1;
|
||||
|
||||
- thoff = iph->ihl * 4;
|
||||
if (!pskb_may_pull(skb, thoff + *hdrsize))
|
||||
return -1;
|
||||
|
||||
- iph = ip_hdr(skb);
|
||||
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
|
||||
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
|
||||
|
||||
tuple->src_v4.s_addr = iph->saddr;
|
||||
@@ -181,6 +201,7 @@ static int nf_flow_tuple_ip(struct sk_bu
|
||||
tuple->l3proto = AF_INET;
|
||||
tuple->l4proto = iph->protocol;
|
||||
tuple->iifidx = dev->ifindex;
|
||||
+ nf_flow_tuple_encap(skb, tuple);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -207,6 +228,43 @@ static unsigned int nf_flow_xmit_xfrm(st
|
||||
return NF_STOLEN;
|
||||
}
|
||||
|
||||
+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
|
||||
+ u32 *offset)
|
||||
+{
|
||||
+ if (skb->protocol == htons(ETH_P_8021Q)) {
|
||||
+ struct vlan_ethhdr *veth;
|
||||
+
|
||||
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
+ if (veth->h_vlan_encapsulated_proto == proto) {
|
||||
+ *offset += VLAN_HLEN;
|
||||
+ return true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static void nf_flow_encap_pop(struct sk_buff *skb,
|
||||
+ struct flow_offload_tuple_rhash *tuplehash)
|
||||
+{
|
||||
+ struct vlan_hdr *vlan_hdr;
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < tuplehash->tuple.encap_num; i++) {
|
||||
+ if (skb_vlan_tag_present(skb)) {
|
||||
+ __vlan_hwaccel_clear_tag(skb);
|
||||
+ continue;
|
||||
+ }
|
||||
+ if (skb->protocol == htons(ETH_P_8021Q)) {
|
||||
+ vlan_hdr = (struct vlan_hdr *)skb->data;
|
||||
+ __skb_pull(skb, VLAN_HLEN);
|
||||
+ vlan_set_encap_proto(skb, vlan_hdr);
|
||||
+ skb_reset_network_header(skb);
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
|
||||
const struct flow_offload_tuple_rhash *tuplehash,
|
||||
unsigned short type)
|
||||
@@ -235,17 +293,18 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
enum flow_offload_tuple_dir dir;
|
||||
struct flow_offload *flow;
|
||||
struct net_device *outdev;
|
||||
+ u32 hdrsize, offset = 0;
|
||||
+ unsigned int thoff, mtu;
|
||||
struct rtable *rt;
|
||||
- unsigned int thoff;
|
||||
struct iphdr *iph;
|
||||
__be32 nexthop;
|
||||
- u32 hdrsize;
|
||||
int ret;
|
||||
|
||||
- if (skb->protocol != htons(ETH_P_IP))
|
||||
+ if (skb->protocol != htons(ETH_P_IP) &&
|
||||
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize) < 0)
|
||||
+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
|
||||
return NF_ACCEPT;
|
||||
|
||||
tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||
@@ -255,11 +314,12 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
dir = tuplehash->tuple.dir;
|
||||
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||
|
||||
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||
+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
|
||||
+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- iph = ip_hdr(skb);
|
||||
- thoff = iph->ihl * 4;
|
||||
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
|
||||
+ thoff = (iph->ihl * 4) + offset;
|
||||
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
@@ -277,6 +337,9 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
|
||||
flow_offload_refresh(flow_table, flow);
|
||||
|
||||
+ nf_flow_encap_pop(skb, tuplehash);
|
||||
+ thoff -= offset;
|
||||
+
|
||||
iph = ip_hdr(skb);
|
||||
nf_flow_nat_ip(flow, skb, thoff, dir, iph);
|
||||
|
||||
@@ -418,16 +481,18 @@ static void nf_flow_nat_ipv6(const struc
|
||||
}
|
||||
|
||||
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
|
||||
- struct flow_offload_tuple *tuple, u32 *hdrsize)
|
||||
+ struct flow_offload_tuple *tuple, u32 *hdrsize,
|
||||
+ u32 offset)
|
||||
{
|
||||
struct flow_ports *ports;
|
||||
struct ipv6hdr *ip6h;
|
||||
unsigned int thoff;
|
||||
|
||||
- if (!pskb_may_pull(skb, sizeof(*ip6h)))
|
||||
+ thoff = sizeof(*ip6h) + offset;
|
||||
+ if (!pskb_may_pull(skb, thoff))
|
||||
return -1;
|
||||
|
||||
- ip6h = ipv6_hdr(skb);
|
||||
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
|
||||
|
||||
switch (ip6h->nexthdr) {
|
||||
case IPPROTO_TCP:
|
||||
@@ -443,11 +508,10 @@ static int nf_flow_tuple_ipv6(struct sk_
|
||||
if (ip6h->hop_limit <= 1)
|
||||
return -1;
|
||||
|
||||
- thoff = sizeof(*ip6h);
|
||||
if (!pskb_may_pull(skb, thoff + *hdrsize))
|
||||
return -1;
|
||||
|
||||
- ip6h = ipv6_hdr(skb);
|
||||
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
|
||||
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
|
||||
|
||||
tuple->src_v6 = ip6h->saddr;
|
||||
@@ -457,6 +521,7 @@ static int nf_flow_tuple_ipv6(struct sk_
|
||||
tuple->l3proto = AF_INET6;
|
||||
tuple->l4proto = ip6h->nexthdr;
|
||||
tuple->iifidx = dev->ifindex;
|
||||
+ nf_flow_tuple_encap(skb, tuple);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -472,15 +537,17 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
const struct in6_addr *nexthop;
|
||||
struct flow_offload *flow;
|
||||
struct net_device *outdev;
|
||||
+ unsigned int thoff, mtu;
|
||||
+ u32 hdrsize, offset = 0;
|
||||
struct ipv6hdr *ip6h;
|
||||
struct rt6_info *rt;
|
||||
- u32 hdrsize;
|
||||
int ret;
|
||||
|
||||
- if (skb->protocol != htons(ETH_P_IPV6))
|
||||
+ if (skb->protocol != htons(ETH_P_IPV6) &&
|
||||
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize) < 0)
|
||||
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
|
||||
return NF_ACCEPT;
|
||||
|
||||
tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||
@@ -490,11 +557,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
dir = tuplehash->tuple.dir;
|
||||
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||
|
||||
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||
+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
|
||||
+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
|
||||
- sizeof(*ip6h)))
|
||||
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
|
||||
+ thoff = sizeof(*ip6h) + offset;
|
||||
+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
@@ -506,11 +575,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
}
|
||||
}
|
||||
|
||||
- if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
|
||||
+ if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
flow_offload_refresh(flow_table, flow);
|
||||
|
||||
+ nf_flow_encap_pop(skb, tuplehash);
|
||||
+
|
||||
ip6h = ipv6_hdr(skb);
|
||||
nf_flow_nat_ipv6(flow, skb, dir, ip6h);
|
||||
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -66,6 +66,11 @@ static int nft_dev_fill_forward_path(con
|
||||
struct nft_forward_info {
|
||||
const struct net_device *indev;
|
||||
const struct net_device *outdev;
|
||||
+ struct id {
|
||||
+ __u16 id;
|
||||
+ __be16 proto;
|
||||
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
+ u8 num_encaps;
|
||||
u8 h_source[ETH_ALEN];
|
||||
u8 h_dest[ETH_ALEN];
|
||||
enum flow_offload_xmit_type xmit_type;
|
||||
@@ -84,9 +89,23 @@ static void nft_dev_path_info(const stru
|
||||
path = &stack->path[i];
|
||||
switch (path->type) {
|
||||
case DEV_PATH_ETHERNET:
|
||||
+ case DEV_PATH_VLAN:
|
||||
info->indev = path->dev;
|
||||
if (is_zero_ether_addr(info->h_source))
|
||||
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
+
|
||||
+ if (path->type == DEV_PATH_ETHERNET)
|
||||
+ break;
|
||||
+
|
||||
+ /* DEV_PATH_VLAN */
|
||||
+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
|
||||
+ info->indev = NULL;
|
||||
+ break;
|
||||
+ }
|
||||
+ info->outdev = path->dev;
|
||||
+ info->encap[info->num_encaps].id = path->encap.id;
|
||||
+ info->encap[info->num_encaps].proto = path->encap.proto;
|
||||
+ info->num_encaps++;
|
||||
break;
|
||||
case DEV_PATH_BRIDGE:
|
||||
if (is_zero_ether_addr(info->h_source))
|
||||
@@ -94,7 +113,6 @@ static void nft_dev_path_info(const stru
|
||||
|
||||
info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
|
||||
break;
|
||||
- case DEV_PATH_VLAN:
|
||||
default:
|
||||
info->indev = NULL;
|
||||
break;
|
||||
@@ -130,6 +148,7 @@ static void nft_dev_forward_path(struct
|
||||
struct net_device_path_stack stack;
|
||||
struct nft_forward_info info = {};
|
||||
unsigned char ha[ETH_ALEN];
|
||||
+ int i;
|
||||
|
||||
if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
|
||||
nft_dev_path_info(&stack, &info, ha);
|
||||
@@ -138,6 +157,11 @@ static void nft_dev_forward_path(struct
|
||||
return;
|
||||
|
||||
route->tuple[!dir].in.ifindex = info.indev->ifindex;
|
||||
+ for (i = 0; i < info.num_encaps; i++) {
|
||||
+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
|
||||
+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
|
||||
+ }
|
||||
+ route->tuple[!dir].in.num_encaps = info.num_encaps;
|
||||
|
||||
if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
|
||||
memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
|
@ -1,30 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:42 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add bridge vlan filtering support
|
||||
|
||||
Add the vlan tag based when PVID is set on.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -111,6 +111,18 @@ static void nft_dev_path_info(const stru
|
||||
if (is_zero_ether_addr(info->h_source))
|
||||
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
|
||||
+ switch (path->bridge.vlan_mode) {
|
||||
+ case DEV_PATH_BR_VLAN_TAG:
|
||||
+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
|
||||
+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
|
||||
+ info->num_encaps++;
|
||||
+ break;
|
||||
+ case DEV_PATH_BR_VLAN_UNTAG:
|
||||
+ info->num_encaps--;
|
||||
+ break;
|
||||
+ case DEV_PATH_BR_VLAN_KEEP:
|
||||
+ break;
|
||||
+ }
|
||||
info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
|
||||
break;
|
||||
default:
|
@ -1,145 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:43 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add pppoe support
|
||||
|
||||
Add the PPPoE protocol and session id to the flow tuple using the encap
|
||||
fields to uniquely identify flows from the receive path. For the
|
||||
transmit path, dev_hard_header() on the vlan device push the headers.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -7,6 +7,9 @@
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/netdevice.h>
|
||||
+#include <linux/if_ether.h>
|
||||
+#include <linux/if_pppox.h>
|
||||
+#include <linux/ppp_defs.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/ipv6.h>
|
||||
#include <net/ip6_route.h>
|
||||
@@ -139,6 +142,8 @@ static bool ip_has_options(unsigned int
|
||||
static void nf_flow_tuple_encap(struct sk_buff *skb,
|
||||
struct flow_offload_tuple *tuple)
|
||||
{
|
||||
+ struct vlan_ethhdr *veth;
|
||||
+ struct pppoe_hdr *phdr;
|
||||
int i = 0;
|
||||
|
||||
if (skb_vlan_tag_present(skb)) {
|
||||
@@ -146,11 +151,17 @@ static void nf_flow_tuple_encap(struct s
|
||||
tuple->encap[i].proto = skb->vlan_proto;
|
||||
i++;
|
||||
}
|
||||
- if (skb->protocol == htons(ETH_P_8021Q)) {
|
||||
- struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
-
|
||||
+ switch (skb->protocol) {
|
||||
+ case htons(ETH_P_8021Q):
|
||||
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
|
||||
tuple->encap[i].proto = skb->protocol;
|
||||
+ break;
|
||||
+ case htons(ETH_P_PPP_SES):
|
||||
+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
|
||||
+ tuple->encap[i].id = ntohs(phdr->sid);
|
||||
+ tuple->encap[i].proto = skb->protocol;
|
||||
+ break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -228,17 +239,41 @@ static unsigned int nf_flow_xmit_xfrm(st
|
||||
return NF_STOLEN;
|
||||
}
|
||||
|
||||
+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
|
||||
+{
|
||||
+ __be16 proto;
|
||||
+
|
||||
+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
|
||||
+ sizeof(struct pppoe_hdr)));
|
||||
+ switch (proto) {
|
||||
+ case htons(PPP_IP):
|
||||
+ return htons(ETH_P_IP);
|
||||
+ case htons(PPP_IPV6):
|
||||
+ return htons(ETH_P_IPV6);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
|
||||
u32 *offset)
|
||||
{
|
||||
- if (skb->protocol == htons(ETH_P_8021Q)) {
|
||||
- struct vlan_ethhdr *veth;
|
||||
+ struct vlan_ethhdr *veth;
|
||||
|
||||
+ switch (skb->protocol) {
|
||||
+ case htons(ETH_P_8021Q):
|
||||
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
if (veth->h_vlan_encapsulated_proto == proto) {
|
||||
*offset += VLAN_HLEN;
|
||||
return true;
|
||||
}
|
||||
+ break;
|
||||
+ case htons(ETH_P_PPP_SES):
|
||||
+ if (nf_flow_pppoe_proto(skb) == proto) {
|
||||
+ *offset += PPPOE_SES_HLEN;
|
||||
+ return true;
|
||||
+ }
|
||||
+ break;
|
||||
}
|
||||
|
||||
return false;
|
||||
@@ -255,12 +290,18 @@ static void nf_flow_encap_pop(struct sk_
|
||||
__vlan_hwaccel_clear_tag(skb);
|
||||
continue;
|
||||
}
|
||||
- if (skb->protocol == htons(ETH_P_8021Q)) {
|
||||
+ switch (skb->protocol) {
|
||||
+ case htons(ETH_P_8021Q):
|
||||
vlan_hdr = (struct vlan_hdr *)skb->data;
|
||||
__skb_pull(skb, VLAN_HLEN);
|
||||
vlan_set_encap_proto(skb, vlan_hdr);
|
||||
skb_reset_network_header(skb);
|
||||
break;
|
||||
+ case htons(ETH_P_PPP_SES):
|
||||
+ skb->protocol = nf_flow_pppoe_proto(skb);
|
||||
+ skb_pull(skb, PPPOE_SES_HLEN);
|
||||
+ skb_reset_network_header(skb);
|
||||
+ break;
|
||||
}
|
||||
}
|
||||
}
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -90,6 +90,7 @@ static void nft_dev_path_info(const stru
|
||||
switch (path->type) {
|
||||
case DEV_PATH_ETHERNET:
|
||||
case DEV_PATH_VLAN:
|
||||
+ case DEV_PATH_PPPOE:
|
||||
info->indev = path->dev;
|
||||
if (is_zero_ether_addr(info->h_source))
|
||||
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
@@ -97,7 +98,7 @@ static void nft_dev_path_info(const stru
|
||||
if (path->type == DEV_PATH_ETHERNET)
|
||||
break;
|
||||
|
||||
- /* DEV_PATH_VLAN */
|
||||
+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
|
||||
if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
|
||||
info->indev = NULL;
|
||||
break;
|
||||
@@ -106,6 +107,8 @@ static void nft_dev_path_info(const stru
|
||||
info->encap[info->num_encaps].id = path->encap.id;
|
||||
info->encap[info->num_encaps].proto = path->encap.proto;
|
||||
info->num_encaps++;
|
||||
+ if (path->type == DEV_PATH_PPPOE)
|
||||
+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
|
||||
break;
|
||||
case DEV_PATH_BRIDGE:
|
||||
if (is_zero_ether_addr(info->h_source))
|
@ -1,32 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:44 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add dsa support
|
||||
|
||||
Replace the master ethernet device by the dsa slave port. Packets coming
|
||||
in from the software ingress path use the dsa slave port as input
|
||||
device.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -89,6 +89,7 @@ static void nft_dev_path_info(const stru
|
||||
path = &stack->path[i];
|
||||
switch (path->type) {
|
||||
case DEV_PATH_ETHERNET:
|
||||
+ case DEV_PATH_DSA:
|
||||
case DEV_PATH_VLAN:
|
||||
case DEV_PATH_PPPOE:
|
||||
info->indev = path->dev;
|
||||
@@ -97,6 +98,10 @@ static void nft_dev_path_info(const stru
|
||||
|
||||
if (path->type == DEV_PATH_ETHERNET)
|
||||
break;
|
||||
+ if (path->type == DEV_PATH_DSA) {
|
||||
+ i = stack->num_paths;
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
/* DEV_PATH_VLAN and DEV_PATH_PPPOE */
|
||||
if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
|
@ -1,107 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:45 +0100
|
||||
Subject: [PATCH] selftests: netfilter: flowtable bridge and vlan support
|
||||
|
||||
This patch adds two new tests to cover bridge and vlan support:
|
||||
|
||||
- Add a bridge device to the Router1 (nsr1) container and attach the
|
||||
veth0 device to the bridge. Set the IP address to the bridge device
|
||||
to exercise the bridge forwarding path.
|
||||
|
||||
- Add vlan encapsulation between to the bridge device in the Router1 and
|
||||
one of the sender containers (ns1).
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
|
||||
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
|
||||
@@ -370,6 +370,88 @@ else
|
||||
ip netns exec nsr1 nft list ruleset
|
||||
fi
|
||||
|
||||
+# Another test:
|
||||
+# Add bridge interface br0 to Router1, with NAT enabled.
|
||||
+ip -net nsr1 link add name br0 type bridge
|
||||
+ip -net nsr1 addr flush dev veth0
|
||||
+ip -net nsr1 link set up dev veth0
|
||||
+ip -net nsr1 link set veth0 master br0
|
||||
+ip -net nsr1 addr add 10.0.1.1/24 dev br0
|
||||
+ip -net nsr1 addr add dead:1::1/64 dev br0
|
||||
+ip -net nsr1 link set up dev br0
|
||||
+
|
||||
+ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
|
||||
+
|
||||
+# br0 with NAT enabled.
|
||||
+ip netns exec nsr1 nft -f - <<EOF
|
||||
+flush table ip nat
|
||||
+table ip nat {
|
||||
+ chain prerouting {
|
||||
+ type nat hook prerouting priority 0; policy accept;
|
||||
+ meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
|
||||
+ }
|
||||
+
|
||||
+ chain postrouting {
|
||||
+ type nat hook postrouting priority 0; policy accept;
|
||||
+ meta oifname "veth1" counter masquerade
|
||||
+ }
|
||||
+}
|
||||
+EOF
|
||||
+
|
||||
+if test_tcp_forwarding_nat ns1 ns2; then
|
||||
+ echo "PASS: flow offloaded for ns1/ns2 with bridge NAT"
|
||||
+else
|
||||
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
|
||||
+ ip netns exec nsr1 nft list ruleset
|
||||
+ ret=1
|
||||
+fi
|
||||
+
|
||||
+# Another test:
|
||||
+# Add bridge interface br0 to Router1, with NAT and VLAN.
|
||||
+ip -net nsr1 link set veth0 nomaster
|
||||
+ip -net nsr1 link set down dev veth0
|
||||
+ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10
|
||||
+ip -net nsr1 link set up dev veth0
|
||||
+ip -net nsr1 link set up dev veth0.10
|
||||
+ip -net nsr1 link set veth0.10 master br0
|
||||
+
|
||||
+ip -net ns1 addr flush dev eth0
|
||||
+ip -net ns1 link add link eth0 name eth0.10 type vlan id 10
|
||||
+ip -net ns1 link set eth0 up
|
||||
+ip -net ns1 link set eth0.10 up
|
||||
+ip -net ns1 addr add 10.0.1.99/24 dev eth0.10
|
||||
+ip -net ns1 route add default via 10.0.1.1
|
||||
+ip -net ns1 addr add dead:1::99/64 dev eth0.10
|
||||
+
|
||||
+if test_tcp_forwarding_nat ns1 ns2; then
|
||||
+ echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN"
|
||||
+else
|
||||
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
|
||||
+ ip netns exec nsr1 nft list ruleset
|
||||
+ ret=1
|
||||
+fi
|
||||
+
|
||||
+# restore test topology (remove bridge and VLAN)
|
||||
+ip -net nsr1 link set veth0 nomaster
|
||||
+ip -net nsr1 link set veth0 down
|
||||
+ip -net nsr1 link set veth0.10 down
|
||||
+ip -net nsr1 link delete veth0.10 type vlan
|
||||
+ip -net nsr1 link delete br0 type bridge
|
||||
+ip -net ns1 addr flush dev eth0.10
|
||||
+ip -net ns1 link set eth0.10 down
|
||||
+ip -net ns1 link set eth0 down
|
||||
+ip -net ns1 link delete eth0.10 type vlan
|
||||
+
|
||||
+# restore address in ns1 and nsr1
|
||||
+ip -net ns1 link set eth0 up
|
||||
+ip -net ns1 addr add 10.0.1.99/24 dev eth0
|
||||
+ip -net ns1 route add default via 10.0.1.1
|
||||
+ip -net ns1 addr add dead:1::99/64 dev eth0
|
||||
+ip -net ns1 route add default via dead:1::1
|
||||
+ip -net nsr1 addr add 10.0.1.1/24 dev veth0
|
||||
+ip -net nsr1 addr add dead:1::1/64 dev veth0
|
||||
+ip -net nsr1 link set up dev veth0
|
||||
+
|
||||
KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
|
||||
KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
|
||||
SPI1=$RANDOM
|
@ -1,310 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:46 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: add offload support for xmit path
|
||||
types
|
||||
|
||||
When the flow tuple xmit_type is set to FLOW_OFFLOAD_XMIT_DIRECT, the
|
||||
dst_cache pointer is not valid, and the h_source/h_dest/ifidx out fields
|
||||
need to be used.
|
||||
|
||||
This patch also adds the FLOW_ACTION_VLAN_PUSH action to pass the VLAN
|
||||
tag to the driver.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_offload.c
|
||||
+++ b/net/netfilter/nf_flow_table_offload.c
|
||||
@@ -177,28 +177,45 @@ static int flow_offload_eth_src(struct n
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
- const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple;
|
||||
struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
|
||||
struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
|
||||
- struct net_device *dev;
|
||||
+ const struct flow_offload_tuple *other_tuple, *this_tuple;
|
||||
+ struct net_device *dev = NULL;
|
||||
+ const unsigned char *addr;
|
||||
u32 mask, val;
|
||||
u16 val16;
|
||||
|
||||
- dev = dev_get_by_index(net, tuple->iifidx);
|
||||
- if (!dev)
|
||||
- return -ENOENT;
|
||||
+ this_tuple = &flow->tuplehash[dir].tuple;
|
||||
+
|
||||
+ switch (this_tuple->xmit_type) {
|
||||
+ case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
+ addr = this_tuple->out.h_source;
|
||||
+ break;
|
||||
+ case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
+ dev = dev_get_by_index(net, other_tuple->iifidx);
|
||||
+ if (!dev)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ addr = dev->dev_addr;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
|
||||
mask = ~0xffff0000;
|
||||
- memcpy(&val16, dev->dev_addr, 2);
|
||||
+ memcpy(&val16, addr, 2);
|
||||
val = val16 << 16;
|
||||
flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
|
||||
&val, &mask);
|
||||
|
||||
mask = ~0xffffffff;
|
||||
- memcpy(&val, dev->dev_addr + 2, 4);
|
||||
+ memcpy(&val, addr + 2, 4);
|
||||
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
|
||||
&val, &mask);
|
||||
- dev_put(dev);
|
||||
+
|
||||
+ if (dev)
|
||||
+ dev_put(dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -210,27 +227,40 @@ static int flow_offload_eth_dst(struct n
|
||||
{
|
||||
struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
|
||||
struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
|
||||
- const void *daddr = &flow->tuplehash[!dir].tuple.src_v4;
|
||||
+ const struct flow_offload_tuple *other_tuple, *this_tuple;
|
||||
const struct dst_entry *dst_cache;
|
||||
unsigned char ha[ETH_ALEN];
|
||||
struct neighbour *n;
|
||||
+ const void *daddr;
|
||||
u32 mask, val;
|
||||
u8 nud_state;
|
||||
u16 val16;
|
||||
|
||||
- dst_cache = flow->tuplehash[dir].tuple.dst_cache;
|
||||
- n = dst_neigh_lookup(dst_cache, daddr);
|
||||
- if (!n)
|
||||
- return -ENOENT;
|
||||
-
|
||||
- read_lock_bh(&n->lock);
|
||||
- nud_state = n->nud_state;
|
||||
- ether_addr_copy(ha, n->ha);
|
||||
- read_unlock_bh(&n->lock);
|
||||
+ this_tuple = &flow->tuplehash[dir].tuple;
|
||||
|
||||
- if (!(nud_state & NUD_VALID)) {
|
||||
+ switch (this_tuple->xmit_type) {
|
||||
+ case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
+ ether_addr_copy(ha, this_tuple->out.h_dest);
|
||||
+ break;
|
||||
+ case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
+ daddr = &other_tuple->src_v4;
|
||||
+ dst_cache = this_tuple->dst_cache;
|
||||
+ n = dst_neigh_lookup(dst_cache, daddr);
|
||||
+ if (!n)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ read_lock_bh(&n->lock);
|
||||
+ nud_state = n->nud_state;
|
||||
+ ether_addr_copy(ha, n->ha);
|
||||
+ read_unlock_bh(&n->lock);
|
||||
neigh_release(n);
|
||||
- return -ENOENT;
|
||||
+
|
||||
+ if (!(nud_state & NUD_VALID))
|
||||
+ return -ENOENT;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
mask = ~0xffffffff;
|
||||
@@ -243,7 +273,6 @@ static int flow_offload_eth_dst(struct n
|
||||
val = val16;
|
||||
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
|
||||
&val, &mask);
|
||||
- neigh_release(n);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -465,27 +494,52 @@ static void flow_offload_ipv4_checksum(s
|
||||
}
|
||||
}
|
||||
|
||||
-static void flow_offload_redirect(const struct flow_offload *flow,
|
||||
+static void flow_offload_redirect(struct net *net,
|
||||
+ const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
- struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
|
||||
- struct rtable *rt;
|
||||
+ const struct flow_offload_tuple *this_tuple, *other_tuple;
|
||||
+ struct flow_action_entry *entry;
|
||||
+ struct net_device *dev;
|
||||
+ int ifindex;
|
||||
+
|
||||
+ this_tuple = &flow->tuplehash[dir].tuple;
|
||||
+ switch (this_tuple->xmit_type) {
|
||||
+ case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
+ this_tuple = &flow->tuplehash[dir].tuple;
|
||||
+ ifindex = this_tuple->out.ifidx;
|
||||
+ break;
|
||||
+ case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
+ ifindex = other_tuple->iifidx;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
|
||||
+ dev = dev_get_by_index(net, ifindex);
|
||||
+ if (!dev)
|
||||
+ return;
|
||||
+
|
||||
+ entry = flow_action_entry_next(flow_rule);
|
||||
entry->id = FLOW_ACTION_REDIRECT;
|
||||
- entry->dev = rt->dst.dev;
|
||||
- dev_hold(rt->dst.dev);
|
||||
+ entry->dev = dev;
|
||||
}
|
||||
|
||||
static void flow_offload_encap_tunnel(const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
+ const struct flow_offload_tuple *this_tuple;
|
||||
struct flow_action_entry *entry;
|
||||
struct dst_entry *dst;
|
||||
|
||||
- dst = flow->tuplehash[dir].tuple.dst_cache;
|
||||
+ this_tuple = &flow->tuplehash[dir].tuple;
|
||||
+ if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
|
||||
+ return;
|
||||
+
|
||||
+ dst = this_tuple->dst_cache;
|
||||
if (dst && dst->lwtstate) {
|
||||
struct ip_tunnel_info *tun_info;
|
||||
|
||||
@@ -502,10 +556,15 @@ static void flow_offload_decap_tunnel(co
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
+ const struct flow_offload_tuple *other_tuple;
|
||||
struct flow_action_entry *entry;
|
||||
struct dst_entry *dst;
|
||||
|
||||
- dst = flow->tuplehash[!dir].tuple.dst_cache;
|
||||
+ other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
|
||||
+ return;
|
||||
+
|
||||
+ dst = other_tuple->dst_cache;
|
||||
if (dst && dst->lwtstate) {
|
||||
struct ip_tunnel_info *tun_info;
|
||||
|
||||
@@ -517,10 +576,14 @@ static void flow_offload_decap_tunnel(co
|
||||
}
|
||||
}
|
||||
|
||||
-int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
|
||||
- enum flow_offload_tuple_dir dir,
|
||||
- struct nf_flow_rule *flow_rule)
|
||||
+static int
|
||||
+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
+ const struct flow_offload_tuple *other_tuple;
|
||||
+ int i;
|
||||
+
|
||||
flow_offload_decap_tunnel(flow, dir, flow_rule);
|
||||
flow_offload_encap_tunnel(flow, dir, flow_rule);
|
||||
|
||||
@@ -528,6 +591,26 @@ int nf_flow_rule_route_ipv4(struct net *
|
||||
flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
|
||||
return -1;
|
||||
|
||||
+ other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
+
|
||||
+ for (i = 0; i < other_tuple->encap_num; i++) {
|
||||
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
|
||||
+
|
||||
+ entry->id = FLOW_ACTION_VLAN_PUSH;
|
||||
+ entry->vlan.vid = other_tuple->encap[i].id;
|
||||
+ entry->vlan.proto = other_tuple->encap[i].proto;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
+{
|
||||
+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
|
||||
+ return -1;
|
||||
+
|
||||
if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
|
||||
flow_offload_ipv4_snat(net, flow, dir, flow_rule);
|
||||
flow_offload_port_snat(net, flow, dir, flow_rule);
|
||||
@@ -540,7 +623,7 @@ int nf_flow_rule_route_ipv4(struct net *
|
||||
test_bit(NF_FLOW_DNAT, &flow->flags))
|
||||
flow_offload_ipv4_checksum(net, flow, flow_rule);
|
||||
|
||||
- flow_offload_redirect(flow, dir, flow_rule);
|
||||
+ flow_offload_redirect(net, flow, dir, flow_rule);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -550,11 +633,7 @@ int nf_flow_rule_route_ipv6(struct net *
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
- flow_offload_decap_tunnel(flow, dir, flow_rule);
|
||||
- flow_offload_encap_tunnel(flow, dir, flow_rule);
|
||||
-
|
||||
- if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
|
||||
- flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
|
||||
+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
|
||||
return -1;
|
||||
|
||||
if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
|
||||
@@ -566,7 +645,7 @@ int nf_flow_rule_route_ipv6(struct net *
|
||||
flow_offload_port_dnat(net, flow, dir, flow_rule);
|
||||
}
|
||||
|
||||
- flow_offload_redirect(flow, dir, flow_rule);
|
||||
+ flow_offload_redirect(net, flow, dir, flow_rule);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -580,10 +659,10 @@ nf_flow_offload_rule_alloc(struct net *n
|
||||
enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
const struct nf_flowtable *flowtable = offload->flowtable;
|
||||
+ const struct flow_offload_tuple *tuple, *other_tuple;
|
||||
const struct flow_offload *flow = offload->flow;
|
||||
- const struct flow_offload_tuple *tuple;
|
||||
+ struct dst_entry *other_dst = NULL;
|
||||
struct nf_flow_rule *flow_rule;
|
||||
- struct dst_entry *other_dst;
|
||||
int err = -ENOMEM;
|
||||
|
||||
flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
|
||||
@@ -599,7 +678,10 @@ nf_flow_offload_rule_alloc(struct net *n
|
||||
flow_rule->rule->match.key = &flow_rule->match.key;
|
||||
|
||||
tuple = &flow->tuplehash[dir].tuple;
|
||||
- other_dst = flow->tuplehash[!dir].tuple.dst_cache;
|
||||
+ other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
|
||||
+ other_dst = other_tuple->dst_cache;
|
||||
+
|
||||
err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
|
||||
if (err < 0)
|
||||
goto err_flow_match;
|
@ -1,114 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:47 +0100
|
||||
Subject: [PATCH] netfilter: nft_flow_offload: use direct xmit if
|
||||
hardware offload is enabled
|
||||
|
||||
If there is a forward path to reach an ethernet device and hardware
|
||||
offload is enabled, then use the direct xmit path.
|
||||
|
||||
Moreover, store the real device in the direct xmit path info since
|
||||
software datapath uses dev_hard_header() to push the layer encapsulation
|
||||
headers while hardware offload refers to the real device.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -131,6 +131,7 @@ struct flow_offload_tuple {
|
||||
struct dst_entry *dst_cache;
|
||||
struct {
|
||||
u32 ifidx;
|
||||
+ u32 hw_ifidx;
|
||||
u8 h_source[ETH_ALEN];
|
||||
u8 h_dest[ETH_ALEN];
|
||||
} out;
|
||||
@@ -187,6 +188,7 @@ struct nf_flow_route {
|
||||
} in;
|
||||
struct {
|
||||
u32 ifindex;
|
||||
+ u32 hw_ifindex;
|
||||
u8 h_source[ETH_ALEN];
|
||||
u8 h_dest[ETH_ALEN];
|
||||
} out;
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -106,6 +106,7 @@ static int flow_offload_fill_route(struc
|
||||
memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
|
||||
ETH_ALEN);
|
||||
flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
|
||||
+ flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
|
||||
break;
|
||||
case FLOW_OFFLOAD_XMIT_XFRM:
|
||||
case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
--- a/net/netfilter/nf_flow_table_offload.c
|
||||
+++ b/net/netfilter/nf_flow_table_offload.c
|
||||
@@ -508,7 +508,7 @@ static void flow_offload_redirect(struct
|
||||
switch (this_tuple->xmit_type) {
|
||||
case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
this_tuple = &flow->tuplehash[dir].tuple;
|
||||
- ifindex = this_tuple->out.ifidx;
|
||||
+ ifindex = this_tuple->out.hw_ifidx;
|
||||
break;
|
||||
case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -66,6 +66,7 @@ static int nft_dev_fill_forward_path(con
|
||||
struct nft_forward_info {
|
||||
const struct net_device *indev;
|
||||
const struct net_device *outdev;
|
||||
+ const struct net_device *hw_outdev;
|
||||
struct id {
|
||||
__u16 id;
|
||||
__be16 proto;
|
||||
@@ -76,9 +77,18 @@ struct nft_forward_info {
|
||||
enum flow_offload_xmit_type xmit_type;
|
||||
};
|
||||
|
||||
+static bool nft_is_valid_ether_device(const struct net_device *dev)
|
||||
+{
|
||||
+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
|
||||
+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
|
||||
+ return false;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
static void nft_dev_path_info(const struct net_device_path_stack *stack,
|
||||
struct nft_forward_info *info,
|
||||
- unsigned char *ha)
|
||||
+ unsigned char *ha, struct nf_flowtable *flowtable)
|
||||
{
|
||||
const struct net_device_path *path;
|
||||
int i;
|
||||
@@ -140,6 +150,12 @@ static void nft_dev_path_info(const stru
|
||||
}
|
||||
if (!info->outdev)
|
||||
info->outdev = info->indev;
|
||||
+
|
||||
+ info->hw_outdev = info->indev;
|
||||
+
|
||||
+ if (nf_flowtable_hw_offload(flowtable) &&
|
||||
+ nft_is_valid_ether_device(info->indev))
|
||||
+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
|
||||
}
|
||||
|
||||
static bool nft_flowtable_find_dev(const struct net_device *dev,
|
||||
@@ -171,7 +187,7 @@ static void nft_dev_forward_path(struct
|
||||
int i;
|
||||
|
||||
if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
|
||||
- nft_dev_path_info(&stack, &info, ha);
|
||||
+ nft_dev_path_info(&stack, &info, ha, &ft->data);
|
||||
|
||||
if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
|
||||
return;
|
||||
@@ -187,6 +203,7 @@ static void nft_dev_forward_path(struct
|
||||
memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
|
||||
memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
|
||||
route->tuple[dir].out.ifindex = info.outdev->ifindex;
|
||||
+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
|
||||
route->tuple[dir].xmit_type = info.xmit_type;
|
||||
}
|
||||
}
|
@ -1,123 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 24 Mar 2021 02:30:48 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: bridge vlan hardware offload and
|
||||
switchdev
|
||||
|
||||
The switch might have already added the VLAN tag through PVID hardware
|
||||
offload. Keep this extra VLAN in the flowtable but skip it on egress.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -849,6 +849,7 @@ struct net_device_path {
|
||||
DEV_PATH_BR_VLAN_KEEP,
|
||||
DEV_PATH_BR_VLAN_TAG,
|
||||
DEV_PATH_BR_VLAN_UNTAG,
|
||||
+ DEV_PATH_BR_VLAN_UNTAG_HW,
|
||||
} vlan_mode;
|
||||
u16 vlan_id;
|
||||
__be16 vlan_proto;
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -123,9 +123,10 @@ struct flow_offload_tuple {
|
||||
/* All members above are keys for lookups, see flow_offload_hash(). */
|
||||
struct { } __hash;
|
||||
|
||||
- u8 dir:4,
|
||||
+ u8 dir:2,
|
||||
xmit_type:2,
|
||||
- encap_num:2;
|
||||
+ encap_num:2,
|
||||
+ in_vlan_ingress:2;
|
||||
u16 mtu;
|
||||
union {
|
||||
struct dst_entry *dst_cache;
|
||||
@@ -184,7 +185,8 @@ struct nf_flow_route {
|
||||
u16 id;
|
||||
__be16 proto;
|
||||
} encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
- u8 num_encaps;
|
||||
+ u8 num_encaps:2,
|
||||
+ ingress_vlans:2;
|
||||
} in;
|
||||
struct {
|
||||
u32 ifindex;
|
||||
--- a/net/bridge/br_device.c
|
||||
+++ b/net/bridge/br_device.c
|
||||
@@ -435,6 +435,7 @@ static int br_fill_forward_path(struct n
|
||||
ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
|
||||
ctx->num_vlans++;
|
||||
break;
|
||||
+ case DEV_PATH_BR_VLAN_UNTAG_HW:
|
||||
case DEV_PATH_BR_VLAN_UNTAG:
|
||||
ctx->num_vlans--;
|
||||
break;
|
||||
--- a/net/bridge/br_vlan.c
|
||||
+++ b/net/bridge/br_vlan.c
|
||||
@@ -1374,6 +1374,8 @@ int br_vlan_fill_forward_path_mode(struc
|
||||
|
||||
if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
|
||||
path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
|
||||
+ else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
|
||||
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
|
||||
else
|
||||
path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -95,6 +95,8 @@ static int flow_offload_fill_route(struc
|
||||
for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
|
||||
flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
|
||||
flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
|
||||
+ if (route->tuple[dir].in.ingress_vlans & BIT(i))
|
||||
+ flow_tuple->in_vlan_ingress |= BIT(j);
|
||||
j++;
|
||||
}
|
||||
flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
|
||||
--- a/net/netfilter/nf_flow_table_offload.c
|
||||
+++ b/net/netfilter/nf_flow_table_offload.c
|
||||
@@ -594,8 +594,12 @@ nf_flow_rule_route_common(struct net *ne
|
||||
other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
|
||||
for (i = 0; i < other_tuple->encap_num; i++) {
|
||||
- struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
|
||||
+ struct flow_action_entry *entry;
|
||||
|
||||
+ if (other_tuple->in_vlan_ingress & BIT(i))
|
||||
+ continue;
|
||||
+
|
||||
+ entry = flow_action_entry_next(flow_rule);
|
||||
entry->id = FLOW_ACTION_VLAN_PUSH;
|
||||
entry->vlan.vid = other_tuple->encap[i].id;
|
||||
entry->vlan.proto = other_tuple->encap[i].proto;
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -72,6 +72,7 @@ struct nft_forward_info {
|
||||
__be16 proto;
|
||||
} encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
u8 num_encaps;
|
||||
+ u8 ingress_vlans;
|
||||
u8 h_source[ETH_ALEN];
|
||||
u8 h_dest[ETH_ALEN];
|
||||
enum flow_offload_xmit_type xmit_type;
|
||||
@@ -130,6 +131,9 @@ static void nft_dev_path_info(const stru
|
||||
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
|
||||
switch (path->bridge.vlan_mode) {
|
||||
+ case DEV_PATH_BR_VLAN_UNTAG_HW:
|
||||
+ info->ingress_vlans |= BIT(info->num_encaps - 1);
|
||||
+ break;
|
||||
case DEV_PATH_BR_VLAN_TAG:
|
||||
info->encap[info->num_encaps].id = path->bridge.vlan_id;
|
||||
info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
|
||||
@@ -198,6 +202,7 @@ static void nft_dev_forward_path(struct
|
||||
route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
|
||||
}
|
||||
route->tuple[!dir].in.num_encaps = info.num_encaps;
|
||||
+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
|
||||
|
||||
if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
|
||||
memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
|
@ -1,30 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:49 +0100
|
||||
Subject: [PATCH] net: flow_offload: add FLOW_ACTION_PPPOE_PUSH
|
||||
|
||||
Add an action to represent the PPPoE hardware offload support that
|
||||
includes the session ID.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/flow_offload.h
|
||||
+++ b/include/net/flow_offload.h
|
||||
@@ -147,6 +147,7 @@ enum flow_action_id {
|
||||
FLOW_ACTION_MPLS_POP,
|
||||
FLOW_ACTION_MPLS_MANGLE,
|
||||
FLOW_ACTION_GATE,
|
||||
+ FLOW_ACTION_PPPOE_PUSH,
|
||||
NUM_FLOW_ACTIONS,
|
||||
};
|
||||
|
||||
@@ -271,6 +272,9 @@ struct flow_action_entry {
|
||||
u32 num_entries;
|
||||
struct action_gate_entry *entries;
|
||||
} gate;
|
||||
+ struct { /* FLOW_ACTION_PPPOE_PUSH */
|
||||
+ u16 sid;
|
||||
+ } pppoe;
|
||||
};
|
||||
struct flow_action_cookie *cookie; /* user defined action cookie */
|
||||
};
|
@ -1,35 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:50 +0100
|
||||
Subject: [PATCH] netfilter: flowtable: support for
|
||||
FLOW_ACTION_PPPOE_PUSH
|
||||
|
||||
Add a PPPoE push action if layer 2 protocol is ETH_P_PPP_SES to add
|
||||
PPPoE flowtable hardware offload support.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_flow_table_offload.c
|
||||
+++ b/net/netfilter/nf_flow_table_offload.c
|
||||
@@ -600,9 +600,18 @@ nf_flow_rule_route_common(struct net *ne
|
||||
continue;
|
||||
|
||||
entry = flow_action_entry_next(flow_rule);
|
||||
- entry->id = FLOW_ACTION_VLAN_PUSH;
|
||||
- entry->vlan.vid = other_tuple->encap[i].id;
|
||||
- entry->vlan.proto = other_tuple->encap[i].proto;
|
||||
+
|
||||
+ switch (other_tuple->encap[i].proto) {
|
||||
+ case htons(ETH_P_PPP_SES):
|
||||
+ entry->id = FLOW_ACTION_PPPOE_PUSH;
|
||||
+ entry->pppoe.sid = other_tuple->encap[i].id;
|
||||
+ break;
|
||||
+ case htons(ETH_P_8021Q):
|
||||
+ entry->id = FLOW_ACTION_VLAN_PUSH;
|
||||
+ entry->vlan.vid = other_tuple->encap[i].id;
|
||||
+ entry->vlan.proto = other_tuple->encap[i].proto;
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
|
||||
return 0;
|
@ -1,53 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:51 +0100
|
||||
Subject: [PATCH] dsa: slave: add support for TC_SETUP_FT
|
||||
|
||||
The dsa infrastructure provides a well-defined hierarchy of devices,
|
||||
pass up the call to set up the flow block to the master device. From the
|
||||
software dataplane, the netfilter infrastructure uses the dsa slave
|
||||
devices to refer to the input and output device for the given skbuff.
|
||||
Similarly, the flowtable definition in the ruleset refers to the dsa
|
||||
slave port devices.
|
||||
|
||||
This patch adds the glue code to call ndo_setup_tc with TC_SETUP_FT
|
||||
with the master device via the dsa slave devices.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/net/dsa/slave.c
|
||||
+++ b/net/dsa/slave.c
|
||||
@@ -1239,14 +1239,32 @@ static int dsa_slave_setup_tc_block(stru
|
||||
}
|
||||
}
|
||||
|
||||
+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
|
||||
+ void *type_data)
|
||||
+{
|
||||
+ struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
|
||||
+ struct net_device *master = cpu_dp->master;
|
||||
+
|
||||
+ if (!master->netdev_ops->ndo_setup_tc)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
|
||||
+}
|
||||
+
|
||||
static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
|
||||
void *type_data)
|
||||
{
|
||||
struct dsa_port *dp = dsa_slave_to_port(dev);
|
||||
struct dsa_switch *ds = dp->ds;
|
||||
|
||||
- if (type == TC_SETUP_BLOCK)
|
||||
+ switch (type) {
|
||||
+ case TC_SETUP_BLOCK:
|
||||
return dsa_slave_setup_tc_block(dev, type_data);
|
||||
+ case TC_SETUP_FT:
|
||||
+ return dsa_slave_setup_ft_block(ds, dp->index, type_data);
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
if (!ds->ops->port_setup_tc)
|
||||
return -EOPNOTSUPP;
|
@ -1,68 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 24 Mar 2021 02:30:52 +0100
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: fix parsing packets in GDM
|
||||
|
||||
When using DSA, set the special tag in GDM ingress control to allow the MAC
|
||||
to parse packets properly earlier. This affects rx DMA source port reporting.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/pinctrl/devinfo.h>
|
||||
#include <linux/phylink.h>
|
||||
+#include <net/dsa.h>
|
||||
|
||||
#include "mtk_eth_soc.h"
|
||||
|
||||
@@ -1285,13 +1286,12 @@ static int mtk_poll_rx(struct napi_struc
|
||||
break;
|
||||
|
||||
/* find out which mac the packet come from. values start at 1 */
|
||||
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
|
||||
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) ||
|
||||
+ (trxd.rxd4 & RX_DMA_SPECIAL_TAG))
|
||||
mac = 0;
|
||||
- } else {
|
||||
- mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
|
||||
- RX_DMA_FPORT_MASK;
|
||||
- mac--;
|
||||
- }
|
||||
+ else
|
||||
+ mac = ((trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
|
||||
+ RX_DMA_FPORT_MASK) - 1;
|
||||
|
||||
if (unlikely(mac < 0 || mac >= MTK_MAC_COUNT ||
|
||||
!eth->netdev[mac]))
|
||||
@@ -2254,6 +2254,9 @@ static void mtk_gdm_config(struct mtk_et
|
||||
|
||||
val |= config;
|
||||
|
||||
+ if (!i && eth->netdev[0] && netdev_uses_dsa(eth->netdev[0]))
|
||||
+ val |= MTK_GDMA_SPECIAL_TAG;
|
||||
+
|
||||
mtk_w32(eth, val, MTK_GDMA_FWD_CFG(i));
|
||||
}
|
||||
/* Reset and enable PSE */
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -81,6 +81,7 @@
|
||||
|
||||
/* GDM Exgress Control Register */
|
||||
#define MTK_GDMA_FWD_CFG(x) (0x500 + (x * 0x1000))
|
||||
+#define MTK_GDMA_SPECIAL_TAG BIT(24)
|
||||
#define MTK_GDMA_ICS_EN BIT(22)
|
||||
#define MTK_GDMA_TCS_EN BIT(21)
|
||||
#define MTK_GDMA_UCS_EN BIT(20)
|
||||
@@ -318,6 +319,7 @@
|
||||
#define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
|
||||
#define RX_DMA_FPORT_SHIFT 19
|
||||
#define RX_DMA_FPORT_MASK 0x7
|
||||
+#define RX_DMA_SPECIAL_TAG BIT(22)
|
||||
|
||||
/* PHY Indirect Access Control registers */
|
||||
#define MTK_PHY_IAC 0x10004
|
File diff suppressed because it is too large
Load Diff
@ -1,568 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 24 Mar 2021 02:30:54 +0100
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: add flow offloading support
|
||||
|
||||
This adds support for offloading IPv4 routed flows, including SNAT/DNAT,
|
||||
one VLAN, PPPoE and DSA.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/Makefile
|
||||
+++ b/drivers/net/ethernet/mediatek/Makefile
|
||||
@@ -4,5 +4,5 @@
|
||||
#
|
||||
|
||||
obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
|
||||
-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_ppe.o mtk_ppe_debugfs.o
|
||||
+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
|
||||
obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -2834,6 +2834,7 @@ static const struct net_device_ops mtk_n
|
||||
#ifdef CONFIG_NET_POLL_CONTROLLER
|
||||
.ndo_poll_controller = mtk_poll_controller,
|
||||
#endif
|
||||
+ .ndo_setup_tc = mtk_eth_setup_tc,
|
||||
};
|
||||
|
||||
static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
|
||||
@@ -3092,6 +3093,10 @@ static int mtk_probe(struct platform_dev
|
||||
eth->base + MTK_ETH_PPE_BASE, 2);
|
||||
if (err)
|
||||
goto err_free_dev;
|
||||
+
|
||||
+ err = mtk_eth_offload_init(eth);
|
||||
+ if (err)
|
||||
+ goto err_free_dev;
|
||||
}
|
||||
|
||||
for (i = 0; i < MTK_MAX_DEVS; i++) {
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <linux/u64_stats_sync.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/phylink.h>
|
||||
+#include <linux/rhashtable.h>
|
||||
#include "mtk_ppe.h"
|
||||
|
||||
#define MTK_QDMA_PAGE_SIZE 2048
|
||||
@@ -40,7 +41,8 @@
|
||||
NETIF_F_HW_VLAN_CTAG_RX | \
|
||||
NETIF_F_SG | NETIF_F_TSO | \
|
||||
NETIF_F_TSO6 | \
|
||||
- NETIF_F_IPV6_CSUM)
|
||||
+ NETIF_F_IPV6_CSUM |\
|
||||
+ NETIF_F_HW_TC)
|
||||
#define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
|
||||
#define NEXT_DESP_IDX(X, Y) (((X) + 1) & ((Y) - 1))
|
||||
|
||||
@@ -929,6 +931,7 @@ struct mtk_eth {
|
||||
int ip_align;
|
||||
|
||||
struct mtk_ppe ppe;
|
||||
+ struct rhashtable flow_table;
|
||||
};
|
||||
|
||||
/* struct mtk_mac - the structure that holds the info about the MACs of the
|
||||
@@ -973,4 +976,9 @@ int mtk_gmac_sgmii_path_setup(struct mtk
|
||||
int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id);
|
||||
int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
|
||||
|
||||
+int mtk_eth_offload_init(struct mtk_eth *eth);
|
||||
+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
|
||||
+ void *type_data);
|
||||
+
|
||||
+
|
||||
#endif /* MTK_ETH_H */
|
||||
--- /dev/null
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
@@ -0,0 +1,485 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+/*
|
||||
+ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
|
||||
+ */
|
||||
+
|
||||
+#include <linux/if_ether.h>
|
||||
+#include <linux/rhashtable.h>
|
||||
+#include <linux/if_ether.h>
|
||||
+#include <linux/ip.h>
|
||||
+#include <net/flow_offload.h>
|
||||
+#include <net/pkt_cls.h>
|
||||
+#include <net/dsa.h>
|
||||
+#include "mtk_eth_soc.h"
|
||||
+
|
||||
+struct mtk_flow_data {
|
||||
+ struct ethhdr eth;
|
||||
+
|
||||
+ union {
|
||||
+ struct {
|
||||
+ __be32 src_addr;
|
||||
+ __be32 dst_addr;
|
||||
+ } v4;
|
||||
+ };
|
||||
+
|
||||
+ __be16 src_port;
|
||||
+ __be16 dst_port;
|
||||
+
|
||||
+ struct {
|
||||
+ u16 id;
|
||||
+ __be16 proto;
|
||||
+ u8 num;
|
||||
+ } vlan;
|
||||
+ struct {
|
||||
+ u16 sid;
|
||||
+ u8 num;
|
||||
+ } pppoe;
|
||||
+};
|
||||
+
|
||||
+struct mtk_flow_entry {
|
||||
+ struct rhash_head node;
|
||||
+ unsigned long cookie;
|
||||
+ u16 hash;
|
||||
+};
|
||||
+
|
||||
+static const struct rhashtable_params mtk_flow_ht_params = {
|
||||
+ .head_offset = offsetof(struct mtk_flow_entry, node),
|
||||
+ .head_offset = offsetof(struct mtk_flow_entry, cookie),
|
||||
+ .key_len = sizeof(unsigned long),
|
||||
+ .automatic_shrinking = true,
|
||||
+};
|
||||
+
|
||||
+static u32
|
||||
+mtk_eth_timestamp(struct mtk_eth *eth)
|
||||
+{
|
||||
+ return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
|
||||
+ bool egress)
|
||||
+{
|
||||
+ return mtk_foe_entry_set_ipv4_tuple(foe, egress,
|
||||
+ data->v4.src_addr, data->src_port,
|
||||
+ data->v4.dst_addr, data->dst_port);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
|
||||
+{
|
||||
+ void *dest = eth + act->mangle.offset;
|
||||
+ const void *src = &act->mangle.val;
|
||||
+
|
||||
+ if (act->mangle.offset > 8)
|
||||
+ return;
|
||||
+
|
||||
+ if (act->mangle.mask == 0xffff) {
|
||||
+ src += 2;
|
||||
+ dest += 2;
|
||||
+ }
|
||||
+
|
||||
+ memcpy(dest, src, act->mangle.mask ? 2 : 4);
|
||||
+}
|
||||
+
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_mangle_ports(const struct flow_action_entry *act,
|
||||
+ struct mtk_flow_data *data)
|
||||
+{
|
||||
+ u32 val = ntohl(act->mangle.val);
|
||||
+
|
||||
+ switch (act->mangle.offset) {
|
||||
+ case 0:
|
||||
+ if (act->mangle.mask == ~htonl(0xffff))
|
||||
+ data->dst_port = cpu_to_be16(val);
|
||||
+ else
|
||||
+ data->src_port = cpu_to_be16(val >> 16);
|
||||
+ break;
|
||||
+ case 2:
|
||||
+ data->dst_port = cpu_to_be16(val);
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
|
||||
+ struct mtk_flow_data *data)
|
||||
+{
|
||||
+ __be32 *dest;
|
||||
+
|
||||
+ switch (act->mangle.offset) {
|
||||
+ case offsetof(struct iphdr, saddr):
|
||||
+ dest = &data->v4.src_addr;
|
||||
+ break;
|
||||
+ case offsetof(struct iphdr, daddr):
|
||||
+ dest = &data->v4.dst_addr;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ memcpy(dest, &act->mangle.val, sizeof(u32));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_get_dsa_port(struct net_device **dev)
|
||||
+{
|
||||
+#if IS_ENABLED(CONFIG_NET_DSA)
|
||||
+ struct dsa_port *dp;
|
||||
+
|
||||
+ dp = dsa_port_from_netdev(*dev);
|
||||
+ if (IS_ERR(dp))
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ *dev = dp->cpu_dp->master;
|
||||
+
|
||||
+ return dp->index;
|
||||
+#else
|
||||
+ return -ENODEV;
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
|
||||
+ struct net_device *dev)
|
||||
+{
|
||||
+ int pse_port, dsa_port;
|
||||
+
|
||||
+ dsa_port = mtk_flow_get_dsa_port(&dev);
|
||||
+ if (dsa_port >= 0)
|
||||
+ mtk_foe_entry_set_dsa(foe, dsa_port);
|
||||
+
|
||||
+ if (dev == eth->netdev[0])
|
||||
+ pse_port = 1;
|
||||
+ else if (dev == eth->netdev[1])
|
||||
+ pse_port = 2;
|
||||
+ else
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ mtk_foe_entry_set_pse_port(foe, pse_port);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
|
||||
+{
|
||||
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
|
||||
+ struct flow_action_entry *act;
|
||||
+ struct mtk_flow_data data = {};
|
||||
+ struct mtk_foe_entry foe;
|
||||
+ struct net_device *odev = NULL;
|
||||
+ struct mtk_flow_entry *entry;
|
||||
+ int offload_type = 0;
|
||||
+ u16 addr_type = 0;
|
||||
+ u32 timestamp;
|
||||
+ u8 l4proto = 0;
|
||||
+ int err = 0;
|
||||
+ int hash;
|
||||
+ int i;
|
||||
+
|
||||
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
|
||||
+ struct flow_match_meta match;
|
||||
+
|
||||
+ flow_rule_match_meta(rule, &match);
|
||||
+ } else {
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
|
||||
+ struct flow_match_control match;
|
||||
+
|
||||
+ flow_rule_match_control(rule, &match);
|
||||
+ addr_type = match.key->addr_type;
|
||||
+ } else {
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
|
||||
+ struct flow_match_basic match;
|
||||
+
|
||||
+ flow_rule_match_basic(rule, &match);
|
||||
+ l4proto = match.key->ip_proto;
|
||||
+ } else {
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ flow_action_for_each(i, act, &rule->action) {
|
||||
+ switch (act->id) {
|
||||
+ case FLOW_ACTION_MANGLE:
|
||||
+ if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
|
||||
+ mtk_flow_offload_mangle_eth(act, &data.eth);
|
||||
+ break;
|
||||
+ case FLOW_ACTION_REDIRECT:
|
||||
+ odev = act->dev;
|
||||
+ break;
|
||||
+ case FLOW_ACTION_CSUM:
|
||||
+ break;
|
||||
+ case FLOW_ACTION_VLAN_PUSH:
|
||||
+ if (data.vlan.num == 1 ||
|
||||
+ act->vlan.proto != htons(ETH_P_8021Q))
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ data.vlan.id = act->vlan.vid;
|
||||
+ data.vlan.proto = act->vlan.proto;
|
||||
+ data.vlan.num++;
|
||||
+ break;
|
||||
+ case FLOW_ACTION_PPPOE_PUSH:
|
||||
+ if (data.pppoe.num == 1)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ data.pppoe.sid = act->pppoe.sid;
|
||||
+ data.pppoe.num++;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ switch (addr_type) {
|
||||
+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
|
||||
+ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ if (!is_valid_ether_addr(data.eth.h_source) ||
|
||||
+ !is_valid_ether_addr(data.eth.h_dest))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
|
||||
+ data.eth.h_source,
|
||||
+ data.eth.h_dest);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
|
||||
+ struct flow_match_ports ports;
|
||||
+
|
||||
+ flow_rule_match_ports(rule, &ports);
|
||||
+ data.src_port = ports.key->src;
|
||||
+ data.dst_port = ports.key->dst;
|
||||
+ } else {
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
|
||||
+ struct flow_match_ipv4_addrs addrs;
|
||||
+
|
||||
+ flow_rule_match_ipv4_addrs(rule, &addrs);
|
||||
+
|
||||
+ data.v4.src_addr = addrs.key->src;
|
||||
+ data.v4.dst_addr = addrs.key->dst;
|
||||
+
|
||||
+ mtk_flow_set_ipv4_addr(&foe, &data, false);
|
||||
+ }
|
||||
+
|
||||
+ flow_action_for_each(i, act, &rule->action) {
|
||||
+ if (act->id != FLOW_ACTION_MANGLE)
|
||||
+ continue;
|
||||
+
|
||||
+ switch (act->mangle.htype) {
|
||||
+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
|
||||
+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
|
||||
+ err = mtk_flow_mangle_ports(act, &data);
|
||||
+ break;
|
||||
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
|
||||
+ err = mtk_flow_mangle_ipv4(act, &data);
|
||||
+ break;
|
||||
+ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
|
||||
+ /* handled earlier */
|
||||
+ break;
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+ }
|
||||
+
|
||||
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
|
||||
+ err = mtk_flow_set_ipv4_addr(&foe, &data, true);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+ }
|
||||
+
|
||||
+ if (data.vlan.num == 1) {
|
||||
+ if (data.vlan.proto != htons(ETH_P_8021Q))
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ mtk_foe_entry_set_vlan(&foe, data.vlan.id);
|
||||
+ }
|
||||
+ if (data.pppoe.num == 1)
|
||||
+ mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
|
||||
+
|
||||
+ err = mtk_flow_set_output_device(eth, &foe, odev);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
|
||||
+ if (!entry)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ entry->cookie = f->cookie;
|
||||
+ timestamp = mtk_eth_timestamp(eth);
|
||||
+ hash = mtk_foe_entry_commit(ð->ppe, &foe, timestamp);
|
||||
+ if (hash < 0) {
|
||||
+ err = hash;
|
||||
+ goto free;
|
||||
+ }
|
||||
+
|
||||
+ entry->hash = hash;
|
||||
+ err = rhashtable_insert_fast(ð->flow_table, &entry->node,
|
||||
+ mtk_flow_ht_params);
|
||||
+ if (err < 0)
|
||||
+ goto clear_flow;
|
||||
+
|
||||
+ return 0;
|
||||
+clear_flow:
|
||||
+ mtk_foe_entry_clear(ð->ppe, hash);
|
||||
+free:
|
||||
+ kfree(entry);
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
|
||||
+{
|
||||
+ struct mtk_flow_entry *entry;
|
||||
+
|
||||
+ entry = rhashtable_lookup(ð->flow_table, &f->cookie,
|
||||
+ mtk_flow_ht_params);
|
||||
+ if (!entry)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ mtk_foe_entry_clear(ð->ppe, entry->hash);
|
||||
+ rhashtable_remove_fast(ð->flow_table, &entry->node,
|
||||
+ mtk_flow_ht_params);
|
||||
+ kfree(entry);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
|
||||
+{
|
||||
+ struct mtk_flow_entry *entry;
|
||||
+ int timestamp;
|
||||
+ u32 idle;
|
||||
+
|
||||
+ entry = rhashtable_lookup(ð->flow_table, &f->cookie,
|
||||
+ mtk_flow_ht_params);
|
||||
+ if (!entry)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ timestamp = mtk_foe_entry_timestamp(ð->ppe, entry->hash);
|
||||
+ if (timestamp < 0)
|
||||
+ return -ETIMEDOUT;
|
||||
+
|
||||
+ idle = mtk_eth_timestamp(eth) - timestamp;
|
||||
+ f->stats.lastused = jiffies - idle * HZ;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
|
||||
+{
|
||||
+ struct flow_cls_offload *cls = type_data;
|
||||
+ struct net_device *dev = cb_priv;
|
||||
+ struct mtk_mac *mac = netdev_priv(dev);
|
||||
+ struct mtk_eth *eth = mac->hw;
|
||||
+
|
||||
+ if (!tc_can_offload(dev))
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ if (type != TC_SETUP_CLSFLOWER)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ switch (cls->command) {
|
||||
+ case FLOW_CLS_REPLACE:
|
||||
+ return mtk_flow_offload_replace(eth, cls);
|
||||
+ case FLOW_CLS_DESTROY:
|
||||
+ return mtk_flow_offload_destroy(eth, cls);
|
||||
+ case FLOW_CLS_STATS:
|
||||
+ return mtk_flow_offload_stats(eth, cls);
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
|
||||
+{
|
||||
+ struct mtk_mac *mac = netdev_priv(dev);
|
||||
+ struct mtk_eth *eth = mac->hw;
|
||||
+ static LIST_HEAD(block_cb_list);
|
||||
+ struct flow_block_cb *block_cb;
|
||||
+ flow_setup_cb_t *cb;
|
||||
+
|
||||
+ if (!eth->ppe.foe_table)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ cb = mtk_eth_setup_tc_block_cb;
|
||||
+ f->driver_block_list = &block_cb_list;
|
||||
+
|
||||
+ switch (f->command) {
|
||||
+ case FLOW_BLOCK_BIND:
|
||||
+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
|
||||
+ if (block_cb) {
|
||||
+ flow_block_cb_incref(block_cb);
|
||||
+ return 0;
|
||||
+ }
|
||||
+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
|
||||
+ if (IS_ERR(block_cb))
|
||||
+ return PTR_ERR(block_cb);
|
||||
+
|
||||
+ flow_block_cb_add(block_cb, f);
|
||||
+ list_add_tail(&block_cb->driver_list, &block_cb_list);
|
||||
+ return 0;
|
||||
+ case FLOW_BLOCK_UNBIND:
|
||||
+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
|
||||
+ if (!block_cb)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ if (flow_block_cb_decref(block_cb)) {
|
||||
+ flow_block_cb_remove(block_cb, f);
|
||||
+ list_del(&block_cb->driver_list);
|
||||
+ }
|
||||
+ return 0;
|
||||
+ default:
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
|
||||
+ void *type_data)
|
||||
+{
|
||||
+ if (type == TC_SETUP_FT)
|
||||
+ return mtk_eth_setup_tc_block(dev, type_data);
|
||||
+
|
||||
+ return -EOPNOTSUPP;
|
||||
+}
|
||||
+
|
||||
+int mtk_eth_offload_init(struct mtk_eth *eth)
|
||||
+{
|
||||
+ if (!eth->ppe.foe_table)
|
||||
+ return 0;
|
||||
+
|
||||
+ return rhashtable_init(ð->flow_table, &mtk_flow_ht_params);
|
||||
+}
|
@ -1,236 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Wed, 24 Mar 2021 02:30:55 +0100
|
||||
Subject: [PATCH] docs: nf_flowtable: update documentation with
|
||||
enhancements
|
||||
|
||||
This patch updates the flowtable documentation to describe recent
|
||||
enhancements:
|
||||
|
||||
- Offload action is available after the first packets go through the
|
||||
classic forwarding path.
|
||||
- IPv4 and IPv6 are supported. Only TCP and UDP layer 4 are supported at
|
||||
this stage.
|
||||
- Tuple has been augmented to track VLAN id and PPPoE session id.
|
||||
- Bridge and IP forwarding integration, including bridge VLAN filtering
|
||||
support.
|
||||
- Hardware offload support.
|
||||
- Describe the [OFFLOAD] and [HW_OFFLOAD] tags in the conntrack table
|
||||
listing.
|
||||
- Replace 'flow offload' by 'flow add' in example rulesets (preferred
|
||||
syntax).
|
||||
- Describe existing cache limitations.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/Documentation/networking/nf_flowtable.rst
|
||||
+++ b/Documentation/networking/nf_flowtable.rst
|
||||
@@ -4,35 +4,38 @@
|
||||
Netfilter's flowtable infrastructure
|
||||
====================================
|
||||
|
||||
-This documentation describes the software flowtable infrastructure available in
|
||||
-Netfilter since Linux kernel 4.16.
|
||||
+This documentation describes the Netfilter flowtable infrastructure which allows
|
||||
+you to define a fastpath through the flowtable datapath. This infrastructure
|
||||
+also provides hardware offload support. The flowtable supports for the layer 3
|
||||
+IPv4 and IPv6 and the layer 4 TCP and UDP protocols.
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
-Initial packets follow the classic forwarding path, once the flow enters the
|
||||
-established state according to the conntrack semantics (ie. we have seen traffic
|
||||
-in both directions), then you can decide to offload the flow to the flowtable
|
||||
-from the forward chain via the 'flow offload' action available in nftables.
|
||||
-
|
||||
-Packets that find an entry in the flowtable (ie. flowtable hit) are sent to the
|
||||
-output netdevice via neigh_xmit(), hence, they bypass the classic forwarding
|
||||
-path (the visible effect is that you do not see these packets from any of the
|
||||
-netfilter hooks coming after the ingress). In case of flowtable miss, the packet
|
||||
-follows the classic forward path.
|
||||
-
|
||||
-The flowtable uses a resizable hashtable, lookups are based on the following
|
||||
-7-tuple selectors: source, destination, layer 3 and layer 4 protocols, source
|
||||
-and destination ports and the input interface (useful in case there are several
|
||||
-conntrack zones in place).
|
||||
-
|
||||
-Flowtables are populated via the 'flow offload' nftables action, so the user can
|
||||
-selectively specify what flows are placed into the flow table. Hence, packets
|
||||
-follow the classic forwarding path unless the user explicitly instruct packets
|
||||
-to use this new alternative forwarding path via nftables policy.
|
||||
+Once the first packet of the flow successfully goes through the IP forwarding
|
||||
+path, from the second packet on, you might decide to offload the flow to the
|
||||
+flowtable through your ruleset. The flowtable infrastructure provides a rule
|
||||
+action that allows you to specify when to add a flow to the flowtable.
|
||||
+
|
||||
+A packet that finds a matching entry in the flowtable (ie. flowtable hit) is
|
||||
+transmitted to the output netdevice via neigh_xmit(), hence, packets bypass the
|
||||
+classic IP forwarding path (the visible effect is that you do not see these
|
||||
+packets from any of the Netfilter hooks coming after ingress). In case that
|
||||
+there is no matching entry in the flowtable (ie. flowtable miss), the packet
|
||||
+follows the classic IP forwarding path.
|
||||
+
|
||||
+The flowtable uses a resizable hashtable. Lookups are based on the following
|
||||
+n-tuple selectors: layer 2 protocol encapsulation (VLAN and PPPoE), layer 3
|
||||
+source and destination, layer 4 source and destination ports and the input
|
||||
+interface (useful in case there are several conntrack zones in place).
|
||||
+
|
||||
+The 'flow add' action allows you to populate the flowtable, the user selectively
|
||||
+specifies what flows are placed into the flowtable. Hence, packets follow the
|
||||
+classic IP forwarding path unless the user explicitly instruct flows to use this
|
||||
+new alternative forwarding path via policy.
|
||||
|
||||
-This is represented in Fig.1, which describes the classic forwarding path
|
||||
-including the Netfilter hooks and the flowtable fastpath bypass.
|
||||
+The flowtable datapath is represented in Fig.1, which describes the classic IP
|
||||
+forwarding path including the Netfilter hooks and the flowtable fastpath bypass.
|
||||
|
||||
::
|
||||
|
||||
@@ -67,11 +70,13 @@ including the Netfilter hooks and the fl
|
||||
Fig.1 Netfilter hooks and flowtable interactions
|
||||
|
||||
The flowtable entry also stores the NAT configuration, so all packets are
|
||||
-mangled according to the NAT policy that matches the initial packets that went
|
||||
-through the classic forwarding path. The TTL is decremented before calling
|
||||
-neigh_xmit(). Fragmented traffic is passed up to follow the classic forwarding
|
||||
-path given that the transport selectors are missing, therefore flowtable lookup
|
||||
-is not possible.
|
||||
+mangled according to the NAT policy that is specified from the classic IP
|
||||
+forwarding path. The TTL is decremented before calling neigh_xmit(). Fragmented
|
||||
+traffic is passed up to follow the classic IP forwarding path given that the
|
||||
+transport header is missing, in this case, flowtable lookups are not possible.
|
||||
+TCP RST and FIN packets are also passed up to the classic IP forwarding path to
|
||||
+release the flow gracefully. Packets that exceed the MTU are also passed up to
|
||||
+the classic forwarding path to report packet-too-big ICMP errors to the sender.
|
||||
|
||||
Example configuration
|
||||
---------------------
|
||||
@@ -85,7 +90,7 @@ flowtable and add one rule to your forwa
|
||||
}
|
||||
chain y {
|
||||
type filter hook forward priority 0; policy accept;
|
||||
- ip protocol tcp flow offload @f
|
||||
+ ip protocol tcp flow add @f
|
||||
counter packets 0 bytes 0
|
||||
}
|
||||
}
|
||||
@@ -103,6 +108,117 @@ flow is offloaded, you will observe that
|
||||
does not get updated for the packets that are being forwarded through the
|
||||
forwarding bypass.
|
||||
|
||||
+You can identify offloaded flows through the [OFFLOAD] tag when listing your
|
||||
+connection tracking table.
|
||||
+
|
||||
+::
|
||||
+ # conntrack -L
|
||||
+ tcp 6 src=10.141.10.2 dst=192.168.10.2 sport=52728 dport=5201 src=192.168.10.2 dst=192.168.10.1 sport=5201 dport=52728 [OFFLOAD] mark=0 use=2
|
||||
+
|
||||
+
|
||||
+Layer 2 encapsulation
|
||||
+---------------------
|
||||
+
|
||||
+Since Linux kernel 5.13, the flowtable infrastructure discovers the real
|
||||
+netdevice behind VLAN and PPPoE netdevices. The flowtable software datapath
|
||||
+parses the VLAN and PPPoE layer 2 headers to extract the ethertype and the
|
||||
+VLAN ID / PPPoE session ID which are used for the flowtable lookups. The
|
||||
+flowtable datapath also deals with layer 2 decapsulation.
|
||||
+
|
||||
+You do not need to add the PPPoE and the VLAN devices to your flowtable,
|
||||
+instead the real device is sufficient for the flowtable to track your flows.
|
||||
+
|
||||
+Bridge and IP forwarding
|
||||
+------------------------
|
||||
+
|
||||
+Since Linux kernel 5.13, you can add bridge ports to the flowtable. The
|
||||
+flowtable infrastructure discovers the topology behind the bridge device. This
|
||||
+allows the flowtable to define a fastpath bypass between the bridge ports
|
||||
+(represented as eth1 and eth2 in the example figure below) and the gateway
|
||||
+device (represented as eth0) in your switch/router.
|
||||
+
|
||||
+::
|
||||
+ fastpath bypass
|
||||
+ .-------------------------.
|
||||
+ / \
|
||||
+ | IP forwarding |
|
||||
+ | / \ \/
|
||||
+ | br0 eth0 ..... eth0
|
||||
+ . / \ *host B*
|
||||
+ -> eth1 eth2
|
||||
+ . *switch/router*
|
||||
+ .
|
||||
+ .
|
||||
+ eth0
|
||||
+ *host A*
|
||||
+
|
||||
+The flowtable infrastructure also supports for bridge VLAN filtering actions
|
||||
+such as PVID and untagged. You can also stack a classic VLAN device on top of
|
||||
+your bridge port.
|
||||
+
|
||||
+If you would like that your flowtable defines a fastpath between your bridge
|
||||
+ports and your IP forwarding path, you have to add your bridge ports (as
|
||||
+represented by the real netdevice) to your flowtable definition.
|
||||
+
|
||||
+Counters
|
||||
+--------
|
||||
+
|
||||
+The flowtable can synchronize packet and byte counters with the existing
|
||||
+connection tracking entry by specifying the counter statement in your flowtable
|
||||
+definition, e.g.
|
||||
+
|
||||
+::
|
||||
+ table inet x {
|
||||
+ flowtable f {
|
||||
+ hook ingress priority 0; devices = { eth0, eth1 };
|
||||
+ counter
|
||||
+ }
|
||||
+ ...
|
||||
+ }
|
||||
+
|
||||
+Counter support is available since Linux kernel 5.7.
|
||||
+
|
||||
+Hardware offload
|
||||
+----------------
|
||||
+
|
||||
+If your network device provides hardware offload support, you can turn it on by
|
||||
+means of the 'offload' flag in your flowtable definition, e.g.
|
||||
+
|
||||
+::
|
||||
+ table inet x {
|
||||
+ flowtable f {
|
||||
+ hook ingress priority 0; devices = { eth0, eth1 };
|
||||
+ flags offload;
|
||||
+ }
|
||||
+ ...
|
||||
+ }
|
||||
+
|
||||
+There is a workqueue that adds the flows to the hardware. Note that a few
|
||||
+packets might still run over the flowtable software path until the workqueue has
|
||||
+a chance to offload the flow to the network device.
|
||||
+
|
||||
+You can identify hardware offloaded flows through the [HW_OFFLOAD] tag when
|
||||
+listing your connection tracking table. Please, note that the [OFFLOAD] tag
|
||||
+refers to the software offload mode, so there is a distinction between [OFFLOAD]
|
||||
+which refers to the software flowtable fastpath and [HW_OFFLOAD] which refers
|
||||
+to the hardware offload datapath being used by the flow.
|
||||
+
|
||||
+The flowtable hardware offload infrastructure also supports for the DSA
|
||||
+(Distributed Switch Architecture).
|
||||
+
|
||||
+Limitations
|
||||
+-----------
|
||||
+
|
||||
+The flowtable behaves like a cache. The flowtable entries might get stale if
|
||||
+either the destination MAC address or the egress netdevice that is used for
|
||||
+transmission changes.
|
||||
+
|
||||
+This might be a problem if:
|
||||
+
|
||||
+- You run the flowtable in software mode and you combine bridge and IP
|
||||
+ forwarding in your setup.
|
||||
+- Hardware offload is enabled.
|
||||
+
|
||||
More reading
|
||||
------------
|
||||
|
@ -1,72 +0,0 @@
|
||||
From c5d66587b8900201e1530b7c18d41e87bd5812f4 Mon Sep 17 00:00:00 2001
|
||||
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Date: Thu, 15 Apr 2021 17:37:48 -0700
|
||||
Subject: [PATCH] net: ethernet: mediatek: ppe: fix busy wait loop
|
||||
|
||||
The intention is for the loop to timeout if the body does not succeed.
|
||||
The current logic calls time_is_before_jiffies(timeout) which is false
|
||||
until after the timeout, so the loop body never executes.
|
||||
|
||||
Fix by using readl_poll_timeout as a more standard and less error-prone
|
||||
solution.
|
||||
|
||||
Fixes: ba37b7caf1ed ("net: ethernet: mtk_eth_soc: add support for initializing the PPE")
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Cc: Felix Fietkau <nbd@nbd.name>
|
||||
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_ppe.c | 20 +++++++++-----------
|
||||
drivers/net/ethernet/mediatek/mtk_ppe.h | 1 +
|
||||
2 files changed, 10 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
|
||||
@@ -2,9 +2,8 @@
|
||||
/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
|
||||
|
||||
#include <linux/kernel.h>
|
||||
-#include <linux/jiffies.h>
|
||||
-#include <linux/delay.h>
|
||||
#include <linux/io.h>
|
||||
+#include <linux/iopoll.h>
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include "mtk_ppe.h"
|
||||
@@ -44,18 +43,17 @@ static u32 ppe_clear(struct mtk_ppe *ppe
|
||||
|
||||
static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
|
||||
{
|
||||
- unsigned long timeout = jiffies + HZ;
|
||||
-
|
||||
- while (time_is_before_jiffies(timeout)) {
|
||||
- if (!(ppe_r32(ppe, MTK_PPE_GLO_CFG) & MTK_PPE_GLO_CFG_BUSY))
|
||||
- return 0;
|
||||
+ int ret;
|
||||
+ u32 val;
|
||||
|
||||
- usleep_range(10, 20);
|
||||
- }
|
||||
+ ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
|
||||
+ !(val & MTK_PPE_GLO_CFG_BUSY),
|
||||
+ 20, MTK_PPE_WAIT_TIMEOUT_US);
|
||||
|
||||
- dev_err(ppe->dev, "PPE table busy");
|
||||
+ if (ret)
|
||||
+ dev_err(ppe->dev, "PPE table busy");
|
||||
|
||||
- return -ETIMEDOUT;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
|
||||
@@ -12,6 +12,7 @@
|
||||
#define MTK_PPE_ENTRIES_SHIFT 3
|
||||
#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
|
||||
#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
|
||||
+#define MTK_PPE_WAIT_TIMEOUT_US 1000000
|
||||
|
||||
#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0)
|
||||
#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8)
|
@ -1,29 +0,0 @@
|
||||
From 6ecaf81d4ac6365f9284f9d68d74f7c209e74f98 Mon Sep 17 00:00:00 2001
|
||||
From: DENG Qingfang <dqfext@gmail.com>
|
||||
Date: Sat, 17 Apr 2021 15:29:04 +0800
|
||||
Subject: [PATCH] net: ethernet: mediatek: fix a typo bug in flow offloading
|
||||
|
||||
Issue was traffic problems after a while with increased ping times if
|
||||
flow offload is active. It turns out that key_offset with cookie is
|
||||
needed in rhashtable_params but was re-assigned to head_offset.
|
||||
Fix the assignment.
|
||||
|
||||
Fixes: 502e84e2382d ("net: ethernet: mtk_eth_soc: add flow offloading support")
|
||||
Signed-off-by: DENG Qingfang <dqfext@gmail.com>
|
||||
Tested-by: Frank Wunderlich <frank-w@public-files.de>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
@@ -44,7 +44,7 @@ struct mtk_flow_entry {
|
||||
|
||||
static const struct rhashtable_params mtk_flow_ht_params = {
|
||||
.head_offset = offsetof(struct mtk_flow_entry, node),
|
||||
- .head_offset = offsetof(struct mtk_flow_entry, cookie),
|
||||
+ .key_offset = offsetof(struct mtk_flow_entry, cookie),
|
||||
.key_len = sizeof(unsigned long),
|
||||
.automatic_shrinking = true,
|
||||
};
|
@ -1,38 +0,0 @@
|
||||
From 5196c417854942e218a59ec87bf7d414b3bd581e Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:20:55 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: unmap RX data before calling
|
||||
build_skb
|
||||
|
||||
Since build_skb accesses the data area (for initializing shinfo), dma unmap
|
||||
needs to happen before that call
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
[Ilya: split build_skb cleanup fix into a separate commit]
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1319,6 +1319,9 @@ static int mtk_poll_rx(struct napi_struc
|
||||
goto release_desc;
|
||||
}
|
||||
|
||||
+ dma_unmap_single(eth->dev, trxd.rxd1,
|
||||
+ ring->buf_size, DMA_FROM_DEVICE);
|
||||
+
|
||||
/* receive data */
|
||||
skb = build_skb(data, ring->frag_size);
|
||||
if (unlikely(!skb)) {
|
||||
@@ -1328,8 +1331,6 @@ static int mtk_poll_rx(struct napi_struc
|
||||
}
|
||||
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
|
||||
|
||||
- dma_unmap_single(eth->dev, trxd.rxd1,
|
||||
- ring->buf_size, DMA_FROM_DEVICE);
|
||||
pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
|
||||
skb->dev = netdev;
|
||||
skb_put(skb, pktlen);
|
@ -1,38 +0,0 @@
|
||||
From 787082ab9f7be4711e52f67c388535eda74a1269 Mon Sep 17 00:00:00 2001
|
||||
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Date: Thu, 22 Apr 2021 22:20:56 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: fix build_skb cleanup
|
||||
|
||||
In case build_skb fails, call skb_free_frag on the correct pointer. Also
|
||||
update the DMA structures with the new mapping before exiting, because
|
||||
the mapping was successful
|
||||
|
||||
Suggested-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1325,9 +1325,9 @@ static int mtk_poll_rx(struct napi_struc
|
||||
/* receive data */
|
||||
skb = build_skb(data, ring->frag_size);
|
||||
if (unlikely(!skb)) {
|
||||
- skb_free_frag(new_data);
|
||||
+ skb_free_frag(data);
|
||||
netdev->stats.rx_dropped++;
|
||||
- goto release_desc;
|
||||
+ goto skip_rx;
|
||||
}
|
||||
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
|
||||
|
||||
@@ -1347,6 +1347,7 @@ static int mtk_poll_rx(struct napi_struc
|
||||
skb_record_rx_queue(skb, 0);
|
||||
napi_gro_receive(napi, skb);
|
||||
|
||||
+skip_rx:
|
||||
ring->data[idx] = new_data;
|
||||
rxd->rxd1 = (unsigned int)dma_addr;
|
||||
|
@ -1,77 +0,0 @@
|
||||
From c30c4a82739090a2de4a4e3f245355ea4fb3ec14 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:20:57 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: use napi_consume_skb
|
||||
|
||||
Should improve performance, since it can use bulk free
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 19 ++++++++++++-------
|
||||
1 file changed, 12 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -879,7 +879,8 @@ static int txd_to_idx(struct mtk_tx_ring
|
||||
return ((void *)dma - (void *)ring->dma) / sizeof(*dma);
|
||||
}
|
||||
|
||||
-static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf)
|
||||
+static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf,
|
||||
+ bool napi)
|
||||
{
|
||||
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
|
||||
if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
|
||||
@@ -911,8 +912,12 @@ static void mtk_tx_unmap(struct mtk_eth
|
||||
|
||||
tx_buf->flags = 0;
|
||||
if (tx_buf->skb &&
|
||||
- (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC))
|
||||
- dev_kfree_skb_any(tx_buf->skb);
|
||||
+ (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC)) {
|
||||
+ if (napi)
|
||||
+ napi_consume_skb(tx_buf->skb, napi);
|
||||
+ else
|
||||
+ dev_kfree_skb_any(tx_buf->skb);
|
||||
+ }
|
||||
tx_buf->skb = NULL;
|
||||
}
|
||||
|
||||
@@ -1090,7 +1095,7 @@ err_dma:
|
||||
tx_buf = mtk_desc_to_tx_buf(ring, itxd);
|
||||
|
||||
/* unmap dma */
|
||||
- mtk_tx_unmap(eth, tx_buf);
|
||||
+ mtk_tx_unmap(eth, tx_buf, false);
|
||||
|
||||
itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
|
||||
if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
|
||||
@@ -1409,7 +1414,7 @@ static int mtk_poll_tx_qdma(struct mtk_e
|
||||
done[mac]++;
|
||||
budget--;
|
||||
}
|
||||
- mtk_tx_unmap(eth, tx_buf);
|
||||
+ mtk_tx_unmap(eth, tx_buf, true);
|
||||
|
||||
ring->last_free = desc;
|
||||
atomic_inc(&ring->free_count);
|
||||
@@ -1446,7 +1451,7 @@ static int mtk_poll_tx_pdma(struct mtk_e
|
||||
budget--;
|
||||
}
|
||||
|
||||
- mtk_tx_unmap(eth, tx_buf);
|
||||
+ mtk_tx_unmap(eth, tx_buf, true);
|
||||
|
||||
desc = &ring->dma[cpu];
|
||||
ring->last_free = desc;
|
||||
@@ -1648,7 +1653,7 @@ static void mtk_tx_clean(struct mtk_eth
|
||||
|
||||
if (ring->buf) {
|
||||
for (i = 0; i < MTK_DMA_SIZE; i++)
|
||||
- mtk_tx_unmap(eth, &ring->buf[i]);
|
||||
+ mtk_tx_unmap(eth, &ring->buf[i], false);
|
||||
kfree(ring->buf);
|
||||
ring->buf = NULL;
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
From 3630d519d7c3eab92567658690e44ffe0517d109 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:20:58 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: reduce MDIO bus access latency
|
||||
|
||||
usleep_range often ends up sleeping much longer than the 10-20us provided
|
||||
as a range here. This causes significant latency in mdio bus acceses,
|
||||
which easily adds multiple seconds to the boot time on MT7621 when polling
|
||||
DSA slave ports.
|
||||
Use cond_resched instead of usleep_range, since the MDIO access does not
|
||||
take much time
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -86,7 +86,7 @@ static int mtk_mdio_busy_wait(struct mtk
|
||||
return 0;
|
||||
if (time_after(jiffies, t_start + PHY_IAC_TIMEOUT))
|
||||
break;
|
||||
- usleep_range(10, 20);
|
||||
+ cond_resched();
|
||||
}
|
||||
|
||||
dev_err(eth->dev, "mdio: MDIO timeout\n");
|
@ -1,54 +0,0 @@
|
||||
From 16ef670789b252b221700adc413497ed2f941d8a Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:20:59 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: remove unnecessary TX queue stops
|
||||
|
||||
When running short on descriptors, only stop the queue for the netdev that
|
||||
tx was attempted for. By the time something tries to send on the other
|
||||
netdev, the ring might have some more room already.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 15 ++-------------
|
||||
1 file changed, 2 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1152,17 +1152,6 @@ static void mtk_wake_queue(struct mtk_et
|
||||
}
|
||||
}
|
||||
|
||||
-static void mtk_stop_queue(struct mtk_eth *eth)
|
||||
-{
|
||||
- int i;
|
||||
-
|
||||
- for (i = 0; i < MTK_MAC_COUNT; i++) {
|
||||
- if (!eth->netdev[i])
|
||||
- continue;
|
||||
- netif_stop_queue(eth->netdev[i]);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
{
|
||||
struct mtk_mac *mac = netdev_priv(dev);
|
||||
@@ -1183,7 +1172,7 @@ static netdev_tx_t mtk_start_xmit(struct
|
||||
|
||||
tx_num = mtk_cal_txd_req(skb);
|
||||
if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
|
||||
- mtk_stop_queue(eth);
|
||||
+ netif_stop_queue(dev);
|
||||
netif_err(eth, tx_queued, dev,
|
||||
"Tx Ring full when queue awake!\n");
|
||||
spin_unlock(ð->page_lock);
|
||||
@@ -1209,7 +1198,7 @@ static netdev_tx_t mtk_start_xmit(struct
|
||||
goto drop;
|
||||
|
||||
if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
|
||||
- mtk_stop_queue(eth);
|
||||
+ netif_stop_queue(dev);
|
||||
|
||||
spin_unlock(ð->page_lock);
|
||||
|
@ -1,37 +0,0 @@
|
||||
From 59555a8d0dd39bf60b7ca1ba5e7393d293f7398d Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:00 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: use larger burst size for QDMA TX
|
||||
|
||||
Improves tx performance
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +-
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +-
|
||||
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -2214,7 +2214,7 @@ static int mtk_start_dma(struct mtk_eth
|
||||
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
|
||||
mtk_w32(eth,
|
||||
MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
|
||||
- MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO |
|
||||
+ MTK_TX_BT_32DWORDS | MTK_NDP_CO_PRO |
|
||||
MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
|
||||
MTK_RX_BT_32DWORDS,
|
||||
MTK_QDMA_GLO_CFG);
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -202,7 +202,7 @@
|
||||
#define MTK_RX_BT_32DWORDS (3 << 11)
|
||||
#define MTK_NDP_CO_PRO BIT(10)
|
||||
#define MTK_TX_WB_DDONE BIT(6)
|
||||
-#define MTK_DMA_SIZE_16DWORDS (2 << 4)
|
||||
+#define MTK_TX_BT_32DWORDS (3 << 4)
|
||||
#define MTK_RX_DMA_BUSY BIT(3)
|
||||
#define MTK_TX_DMA_BUSY BIT(1)
|
||||
#define MTK_RX_DMA_EN BIT(2)
|
@ -1,26 +0,0 @@
|
||||
From 6b4423b258b91032c50a5efca15d3d9bb194ea1d Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:01 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: increase DMA ring sizes
|
||||
|
||||
256 descriptors is not enough for multi-gigabit traffic under load on
|
||||
MT7622. Bump it to 512 to improve performance.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -21,7 +21,7 @@
|
||||
#define MTK_QDMA_PAGE_SIZE 2048
|
||||
#define MTK_MAX_RX_LENGTH 1536
|
||||
#define MTK_TX_DMA_BUF_LEN 0x3fff
|
||||
-#define MTK_DMA_SIZE 256
|
||||
+#define MTK_DMA_SIZE 512
|
||||
#define MTK_NAPI_WEIGHT 64
|
||||
#define MTK_MAC_COUNT 2
|
||||
#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
|
@ -1,313 +0,0 @@
|
||||
From e9229ffd550b2d8c4997c67a501dbc3919fd4e26 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:02 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: implement dynamic interrupt
|
||||
moderation
|
||||
|
||||
Reduces the number of interrupts under load
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
[Ilya: add documentation for new struct fields]
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/Kconfig | 1 +
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 96 +++++++++++++++++++--
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 41 +++++++--
|
||||
3 files changed, 124 insertions(+), 14 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/Kconfig
|
||||
+++ b/drivers/net/ethernet/mediatek/Kconfig
|
||||
@@ -10,6 +10,7 @@ if NET_VENDOR_MEDIATEK
|
||||
config NET_MEDIATEK_SOC
|
||||
tristate "MediaTek SoC Gigabit Ethernet support"
|
||||
select PHYLINK
|
||||
+ select DIMLIB
|
||||
help
|
||||
This driver supports the gigabit ethernet MACs in the
|
||||
MediaTek SoC family.
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1254,12 +1254,13 @@ static void mtk_update_rx_cpu_idx(struct
|
||||
static int mtk_poll_rx(struct napi_struct *napi, int budget,
|
||||
struct mtk_eth *eth)
|
||||
{
|
||||
+ struct dim_sample dim_sample = {};
|
||||
struct mtk_rx_ring *ring;
|
||||
int idx;
|
||||
struct sk_buff *skb;
|
||||
u8 *data, *new_data;
|
||||
struct mtk_rx_dma *rxd, trxd;
|
||||
- int done = 0;
|
||||
+ int done = 0, bytes = 0;
|
||||
|
||||
while (done < budget) {
|
||||
struct net_device *netdev;
|
||||
@@ -1333,6 +1334,7 @@ static int mtk_poll_rx(struct napi_struc
|
||||
else
|
||||
skb_checksum_none_assert(skb);
|
||||
skb->protocol = eth_type_trans(skb, netdev);
|
||||
+ bytes += pktlen;
|
||||
|
||||
if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
|
||||
(trxd.rxd2 & RX_DMA_VTAG))
|
||||
@@ -1365,6 +1367,12 @@ rx_done:
|
||||
mtk_update_rx_cpu_idx(eth);
|
||||
}
|
||||
|
||||
+ eth->rx_packets += done;
|
||||
+ eth->rx_bytes += bytes;
|
||||
+ dim_update_sample(eth->rx_events, eth->rx_packets, eth->rx_bytes,
|
||||
+ &dim_sample);
|
||||
+ net_dim(ð->rx_dim, dim_sample);
|
||||
+
|
||||
return done;
|
||||
}
|
||||
|
||||
@@ -1457,6 +1465,7 @@ static int mtk_poll_tx_pdma(struct mtk_e
|
||||
static int mtk_poll_tx(struct mtk_eth *eth, int budget)
|
||||
{
|
||||
struct mtk_tx_ring *ring = ð->tx_ring;
|
||||
+ struct dim_sample dim_sample = {};
|
||||
unsigned int done[MTK_MAX_DEVS];
|
||||
unsigned int bytes[MTK_MAX_DEVS];
|
||||
int total = 0, i;
|
||||
@@ -1474,8 +1483,14 @@ static int mtk_poll_tx(struct mtk_eth *e
|
||||
continue;
|
||||
netdev_completed_queue(eth->netdev[i], done[i], bytes[i]);
|
||||
total += done[i];
|
||||
+ eth->tx_packets += done[i];
|
||||
+ eth->tx_bytes += bytes[i];
|
||||
}
|
||||
|
||||
+ dim_update_sample(eth->tx_events, eth->tx_packets, eth->tx_bytes,
|
||||
+ &dim_sample);
|
||||
+ net_dim(ð->tx_dim, dim_sample);
|
||||
+
|
||||
if (mtk_queue_stopped(eth) &&
|
||||
(atomic_read(&ring->free_count) > ring->thresh))
|
||||
mtk_wake_queue(eth);
|
||||
@@ -2150,6 +2165,7 @@ static irqreturn_t mtk_handle_irq_rx(int
|
||||
{
|
||||
struct mtk_eth *eth = _eth;
|
||||
|
||||
+ eth->rx_events++;
|
||||
if (likely(napi_schedule_prep(ð->rx_napi))) {
|
||||
__napi_schedule(ð->rx_napi);
|
||||
mtk_rx_irq_disable(eth, MTK_RX_DONE_INT);
|
||||
@@ -2162,6 +2178,7 @@ static irqreturn_t mtk_handle_irq_tx(int
|
||||
{
|
||||
struct mtk_eth *eth = _eth;
|
||||
|
||||
+ eth->tx_events++;
|
||||
if (likely(napi_schedule_prep(ð->tx_napi))) {
|
||||
__napi_schedule(ð->tx_napi);
|
||||
mtk_tx_irq_disable(eth, MTK_TX_DONE_INT);
|
||||
@@ -2346,6 +2363,9 @@ static int mtk_stop(struct net_device *d
|
||||
napi_disable(ð->tx_napi);
|
||||
napi_disable(ð->rx_napi);
|
||||
|
||||
+ cancel_work_sync(ð->rx_dim.work);
|
||||
+ cancel_work_sync(ð->tx_dim.work);
|
||||
+
|
||||
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
|
||||
mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
|
||||
mtk_stop_dma(eth, MTK_PDMA_GLO_CFG);
|
||||
@@ -2398,6 +2418,64 @@ err_disable_clks:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static void mtk_dim_rx(struct work_struct *work)
|
||||
+{
|
||||
+ struct dim *dim = container_of(work, struct dim, work);
|
||||
+ struct mtk_eth *eth = container_of(dim, struct mtk_eth, rx_dim);
|
||||
+ struct dim_cq_moder cur_profile;
|
||||
+ u32 val, cur;
|
||||
+
|
||||
+ cur_profile = net_dim_get_rx_moderation(eth->rx_dim.mode,
|
||||
+ dim->profile_ix);
|
||||
+ spin_lock_bh(ð->dim_lock);
|
||||
+
|
||||
+ val = mtk_r32(eth, MTK_PDMA_DELAY_INT);
|
||||
+ val &= MTK_PDMA_DELAY_TX_MASK;
|
||||
+ val |= MTK_PDMA_DELAY_RX_EN;
|
||||
+
|
||||
+ cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK);
|
||||
+ val |= cur << MTK_PDMA_DELAY_RX_PTIME_SHIFT;
|
||||
+
|
||||
+ cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK);
|
||||
+ val |= cur << MTK_PDMA_DELAY_RX_PINT_SHIFT;
|
||||
+
|
||||
+ mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
|
||||
+ mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
|
||||
+
|
||||
+ spin_unlock_bh(ð->dim_lock);
|
||||
+
|
||||
+ dim->state = DIM_START_MEASURE;
|
||||
+}
|
||||
+
|
||||
+static void mtk_dim_tx(struct work_struct *work)
|
||||
+{
|
||||
+ struct dim *dim = container_of(work, struct dim, work);
|
||||
+ struct mtk_eth *eth = container_of(dim, struct mtk_eth, tx_dim);
|
||||
+ struct dim_cq_moder cur_profile;
|
||||
+ u32 val, cur;
|
||||
+
|
||||
+ cur_profile = net_dim_get_tx_moderation(eth->tx_dim.mode,
|
||||
+ dim->profile_ix);
|
||||
+ spin_lock_bh(ð->dim_lock);
|
||||
+
|
||||
+ val = mtk_r32(eth, MTK_PDMA_DELAY_INT);
|
||||
+ val &= MTK_PDMA_DELAY_RX_MASK;
|
||||
+ val |= MTK_PDMA_DELAY_TX_EN;
|
||||
+
|
||||
+ cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK);
|
||||
+ val |= cur << MTK_PDMA_DELAY_TX_PTIME_SHIFT;
|
||||
+
|
||||
+ cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK);
|
||||
+ val |= cur << MTK_PDMA_DELAY_TX_PINT_SHIFT;
|
||||
+
|
||||
+ mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
|
||||
+ mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
|
||||
+
|
||||
+ spin_unlock_bh(ð->dim_lock);
|
||||
+
|
||||
+ dim->state = DIM_START_MEASURE;
|
||||
+}
|
||||
+
|
||||
static int mtk_hw_init(struct mtk_eth *eth)
|
||||
{
|
||||
int i, val, ret;
|
||||
@@ -2419,9 +2497,6 @@ static int mtk_hw_init(struct mtk_eth *e
|
||||
goto err_disable_pm;
|
||||
}
|
||||
|
||||
- /* enable interrupt delay for RX */
|
||||
- mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
|
||||
-
|
||||
/* disable delay and normal interrupt */
|
||||
mtk_tx_irq_disable(eth, ~0);
|
||||
mtk_rx_irq_disable(eth, ~0);
|
||||
@@ -2460,11 +2535,11 @@ static int mtk_hw_init(struct mtk_eth *e
|
||||
/* Enable RX VLan Offloading */
|
||||
mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
|
||||
|
||||
- /* enable interrupt delay for RX */
|
||||
- mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
|
||||
+ /* set interrupt delays based on current Net DIM sample */
|
||||
+ mtk_dim_rx(ð->rx_dim.work);
|
||||
+ mtk_dim_tx(ð->tx_dim.work);
|
||||
|
||||
/* disable delay and normal interrupt */
|
||||
- mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
|
||||
mtk_tx_irq_disable(eth, ~0);
|
||||
mtk_rx_irq_disable(eth, ~0);
|
||||
|
||||
@@ -2969,6 +3044,13 @@ static int mtk_probe(struct platform_dev
|
||||
spin_lock_init(ð->page_lock);
|
||||
spin_lock_init(ð->tx_irq_lock);
|
||||
spin_lock_init(ð->rx_irq_lock);
|
||||
+ spin_lock_init(ð->dim_lock);
|
||||
+
|
||||
+ eth->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
|
||||
+ INIT_WORK(ð->rx_dim.work, mtk_dim_rx);
|
||||
+
|
||||
+ eth->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
|
||||
+ INIT_WORK(ð->tx_dim.work, mtk_dim_tx);
|
||||
|
||||
if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
|
||||
eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -16,6 +16,7 @@
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/phylink.h>
|
||||
#include <linux/rhashtable.h>
|
||||
+#include <linux/dim.h>
|
||||
#include "mtk_ppe.h"
|
||||
|
||||
#define MTK_QDMA_PAGE_SIZE 2048
|
||||
@@ -136,13 +137,18 @@
|
||||
|
||||
/* PDMA Delay Interrupt Register */
|
||||
#define MTK_PDMA_DELAY_INT 0xa0c
|
||||
+#define MTK_PDMA_DELAY_RX_MASK GENMASK(15, 0)
|
||||
#define MTK_PDMA_DELAY_RX_EN BIT(15)
|
||||
-#define MTK_PDMA_DELAY_RX_PINT 4
|
||||
#define MTK_PDMA_DELAY_RX_PINT_SHIFT 8
|
||||
-#define MTK_PDMA_DELAY_RX_PTIME 4
|
||||
-#define MTK_PDMA_DELAY_RX_DELAY \
|
||||
- (MTK_PDMA_DELAY_RX_EN | MTK_PDMA_DELAY_RX_PTIME | \
|
||||
- (MTK_PDMA_DELAY_RX_PINT << MTK_PDMA_DELAY_RX_PINT_SHIFT))
|
||||
+#define MTK_PDMA_DELAY_RX_PTIME_SHIFT 0
|
||||
+
|
||||
+#define MTK_PDMA_DELAY_TX_MASK GENMASK(31, 16)
|
||||
+#define MTK_PDMA_DELAY_TX_EN BIT(31)
|
||||
+#define MTK_PDMA_DELAY_TX_PINT_SHIFT 24
|
||||
+#define MTK_PDMA_DELAY_TX_PTIME_SHIFT 16
|
||||
+
|
||||
+#define MTK_PDMA_DELAY_PINT_MASK 0x7f
|
||||
+#define MTK_PDMA_DELAY_PTIME_MASK 0xff
|
||||
|
||||
/* PDMA Interrupt Status Register */
|
||||
#define MTK_PDMA_INT_STATUS 0xa20
|
||||
@@ -224,6 +230,7 @@
|
||||
/* QDMA Interrupt Status Register */
|
||||
#define MTK_QDMA_INT_STATUS 0x1A18
|
||||
#define MTK_RX_DONE_DLY BIT(30)
|
||||
+#define MTK_TX_DONE_DLY BIT(28)
|
||||
#define MTK_RX_DONE_INT3 BIT(19)
|
||||
#define MTK_RX_DONE_INT2 BIT(18)
|
||||
#define MTK_RX_DONE_INT1 BIT(17)
|
||||
@@ -233,8 +240,7 @@
|
||||
#define MTK_TX_DONE_INT1 BIT(1)
|
||||
#define MTK_TX_DONE_INT0 BIT(0)
|
||||
#define MTK_RX_DONE_INT MTK_RX_DONE_DLY
|
||||
-#define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
|
||||
- MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
|
||||
+#define MTK_TX_DONE_INT MTK_TX_DONE_DLY
|
||||
|
||||
/* QDMA Interrupt grouping registers */
|
||||
#define MTK_QDMA_INT_GRP1 0x1a20
|
||||
@@ -863,6 +869,7 @@ struct mtk_sgmii {
|
||||
* @page_lock: Make sure that register operations are atomic
|
||||
* @tx_irq__lock: Make sure that IRQ register operations are atomic
|
||||
* @rx_irq__lock: Make sure that IRQ register operations are atomic
|
||||
+ * @dim_lock: Make sure that Net DIM operations are atomic
|
||||
* @dummy_dev: we run 2 netdevs on 1 physical DMA ring and need a
|
||||
* dummy for NAPI to work
|
||||
* @netdev: The netdev instances
|
||||
@@ -881,6 +888,14 @@ struct mtk_sgmii {
|
||||
* @rx_ring_qdma: Pointer to the memory holding info about the QDMA RX ring
|
||||
* @tx_napi: The TX NAPI struct
|
||||
* @rx_napi: The RX NAPI struct
|
||||
+ * @rx_events: Net DIM RX event counter
|
||||
+ * @rx_packets: Net DIM RX packet counter
|
||||
+ * @rx_bytes: Net DIM RX byte counter
|
||||
+ * @rx_dim: Net DIM RX context
|
||||
+ * @tx_events: Net DIM TX event counter
|
||||
+ * @tx_packets: Net DIM TX packet counter
|
||||
+ * @tx_bytes: Net DIM TX byte counter
|
||||
+ * @tx_dim: Net DIM TX context
|
||||
* @scratch_ring: Newer SoCs need memory for a second HW managed TX ring
|
||||
* @phy_scratch_ring: physical address of scratch_ring
|
||||
* @scratch_head: The scratch memory that scratch_ring points to.
|
||||
@@ -925,6 +940,18 @@ struct mtk_eth {
|
||||
|
||||
const struct mtk_soc_data *soc;
|
||||
|
||||
+ spinlock_t dim_lock;
|
||||
+
|
||||
+ u32 rx_events;
|
||||
+ u32 rx_packets;
|
||||
+ u32 rx_bytes;
|
||||
+ struct dim rx_dim;
|
||||
+
|
||||
+ u32 tx_events;
|
||||
+ u32 tx_packets;
|
||||
+ u32 tx_bytes;
|
||||
+ struct dim tx_dim;
|
||||
+
|
||||
u32 tx_int_mask_reg;
|
||||
u32 tx_int_status_reg;
|
||||
u32 rx_dma_l4_valid;
|
@ -1,73 +0,0 @@
|
||||
From 4e6bf609569c59b6bd6acf4a607c096cbd820d79 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:03 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: cache HW pointer of last freed TX
|
||||
descriptor
|
||||
|
||||
The value is only updated by the CPU, so it is cheaper to access from the
|
||||
ring data structure than from a hardware register.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++++----
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 ++
|
||||
2 files changed, 6 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1385,7 +1385,7 @@ static int mtk_poll_tx_qdma(struct mtk_e
|
||||
struct mtk_tx_buf *tx_buf;
|
||||
u32 cpu, dma;
|
||||
|
||||
- cpu = mtk_r32(eth, MTK_QTX_CRX_PTR);
|
||||
+ cpu = ring->last_free_ptr;
|
||||
dma = mtk_r32(eth, MTK_QTX_DRX_PTR);
|
||||
|
||||
desc = mtk_qdma_phys_to_virt(ring, cpu);
|
||||
@@ -1419,6 +1419,7 @@ static int mtk_poll_tx_qdma(struct mtk_e
|
||||
cpu = next_cpu;
|
||||
}
|
||||
|
||||
+ ring->last_free_ptr = cpu;
|
||||
mtk_w32(eth, cpu, MTK_QTX_CRX_PTR);
|
||||
|
||||
return budget;
|
||||
@@ -1619,6 +1620,7 @@ static int mtk_tx_alloc(struct mtk_eth *
|
||||
atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
|
||||
ring->next_free = &ring->dma[0];
|
||||
ring->last_free = &ring->dma[MTK_DMA_SIZE - 1];
|
||||
+ ring->last_free_ptr = (u32)(ring->phys + ((MTK_DMA_SIZE - 1) * sz));
|
||||
ring->thresh = MAX_SKB_FRAGS;
|
||||
|
||||
/* make sure that all changes to the dma ring are flushed before we
|
||||
@@ -1632,9 +1634,7 @@ static int mtk_tx_alloc(struct mtk_eth *
|
||||
mtk_w32(eth,
|
||||
ring->phys + ((MTK_DMA_SIZE - 1) * sz),
|
||||
MTK_QTX_CRX_PTR);
|
||||
- mtk_w32(eth,
|
||||
- ring->phys + ((MTK_DMA_SIZE - 1) * sz),
|
||||
- MTK_QTX_DRX_PTR);
|
||||
+ mtk_w32(eth, ring->last_free_ptr, MTK_QTX_DRX_PTR);
|
||||
mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES,
|
||||
MTK_QTX_CFG(0));
|
||||
} else {
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -656,6 +656,7 @@ struct mtk_tx_buf {
|
||||
* @phys: The physical addr of tx_buf
|
||||
* @next_free: Pointer to the next free descriptor
|
||||
* @last_free: Pointer to the last free descriptor
|
||||
+ * @last_free_ptr: Hardware pointer value of the last free descriptor
|
||||
* @thresh: The threshold of minimum amount of free descriptors
|
||||
* @free_count: QDMA uses a linked list. Track how many free descriptors
|
||||
* are present
|
||||
@@ -666,6 +667,7 @@ struct mtk_tx_ring {
|
||||
dma_addr_t phys;
|
||||
struct mtk_tx_dma *next_free;
|
||||
struct mtk_tx_dma *last_free;
|
||||
+ u32 last_free_ptr;
|
||||
u16 thresh;
|
||||
atomic_t free_count;
|
||||
int dma_size;
|
@ -1,49 +0,0 @@
|
||||
From 816ac3e6e67bdd78d86226c6eb53619780750e92 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:04 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: only read the full RX descriptor
|
||||
if DMA is done
|
||||
|
||||
Uncached memory access is expensive, and there is no need to access all
|
||||
descriptor words if we can't process them anyway
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 12 ++++++++----
|
||||
1 file changed, 8 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -798,13 +798,18 @@ static inline int mtk_max_buf_size(int f
|
||||
return buf_size;
|
||||
}
|
||||
|
||||
-static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd,
|
||||
+static inline bool mtk_rx_get_desc(struct mtk_rx_dma *rxd,
|
||||
struct mtk_rx_dma *dma_rxd)
|
||||
{
|
||||
- rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
|
||||
rxd->rxd2 = READ_ONCE(dma_rxd->rxd2);
|
||||
+ if (!(rxd->rxd2 & RX_DMA_DONE))
|
||||
+ return false;
|
||||
+
|
||||
+ rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
|
||||
rxd->rxd3 = READ_ONCE(dma_rxd->rxd3);
|
||||
rxd->rxd4 = READ_ONCE(dma_rxd->rxd4);
|
||||
+
|
||||
+ return true;
|
||||
}
|
||||
|
||||
/* the qdma core needs scratch memory to be setup */
|
||||
@@ -1276,8 +1281,7 @@ static int mtk_poll_rx(struct napi_struc
|
||||
rxd = &ring->dma[idx];
|
||||
data = ring->data[idx];
|
||||
|
||||
- mtk_rx_get_desc(&trxd, rxd);
|
||||
- if (!(trxd.rxd2 & RX_DMA_DONE))
|
||||
+ if (!mtk_rx_get_desc(&trxd, rxd))
|
||||
break;
|
||||
|
||||
/* find out which mac the packet come from. values start at 1 */
|
@ -1,39 +0,0 @@
|
||||
From 16769a8923fad5a5377253bcd76b0e0d64976c73 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:05 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: reduce unnecessary interrupts
|
||||
|
||||
Avoid rearming interrupt if napi_complete returns false
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 9 +++++----
|
||||
1 file changed, 5 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1540,8 +1540,8 @@ static int mtk_napi_tx(struct napi_struc
|
||||
if (status & MTK_TX_DONE_INT)
|
||||
return budget;
|
||||
|
||||
- napi_complete(napi);
|
||||
- mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
|
||||
+ if (napi_complete(napi))
|
||||
+ mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
|
||||
|
||||
return tx_done;
|
||||
}
|
||||
@@ -1574,8 +1574,9 @@ poll_again:
|
||||
remain_budget -= rx_done;
|
||||
goto poll_again;
|
||||
}
|
||||
- napi_complete(napi);
|
||||
- mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
|
||||
+
|
||||
+ if (napi_complete(napi))
|
||||
+ mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
|
||||
|
||||
return rx_done + budget - remain_budget;
|
||||
}
|
@ -1,110 +0,0 @@
|
||||
From db2c7b353db3b3f71b55f9ff4627d8a786446fbe Mon Sep 17 00:00:00 2001
|
||||
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Date: Thu, 22 Apr 2021 22:21:06 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: rework NAPI callbacks
|
||||
|
||||
Use napi_complete_done to communicate total TX and RX work done to NAPI.
|
||||
Count total RX work up instead of remaining work down for clarity.
|
||||
Remove unneeded local variables for clarity. Use do {} while instead of
|
||||
goto for clarity.
|
||||
|
||||
Suggested-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 54 +++++++++------------
|
||||
1 file changed, 24 insertions(+), 30 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -1517,7 +1517,6 @@ static void mtk_handle_status_irq(struct
|
||||
static int mtk_napi_tx(struct napi_struct *napi, int budget)
|
||||
{
|
||||
struct mtk_eth *eth = container_of(napi, struct mtk_eth, tx_napi);
|
||||
- u32 status, mask;
|
||||
int tx_done = 0;
|
||||
|
||||
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
|
||||
@@ -1526,21 +1525,19 @@ static int mtk_napi_tx(struct napi_struc
|
||||
tx_done = mtk_poll_tx(eth, budget);
|
||||
|
||||
if (unlikely(netif_msg_intr(eth))) {
|
||||
- status = mtk_r32(eth, eth->tx_int_status_reg);
|
||||
- mask = mtk_r32(eth, eth->tx_int_mask_reg);
|
||||
dev_info(eth->dev,
|
||||
- "done tx %d, intr 0x%08x/0x%x\n",
|
||||
- tx_done, status, mask);
|
||||
+ "done tx %d, intr 0x%08x/0x%x\n", tx_done,
|
||||
+ mtk_r32(eth, eth->tx_int_status_reg),
|
||||
+ mtk_r32(eth, eth->tx_int_mask_reg));
|
||||
}
|
||||
|
||||
if (tx_done == budget)
|
||||
return budget;
|
||||
|
||||
- status = mtk_r32(eth, eth->tx_int_status_reg);
|
||||
- if (status & MTK_TX_DONE_INT)
|
||||
+ if (mtk_r32(eth, eth->tx_int_status_reg) & MTK_TX_DONE_INT)
|
||||
return budget;
|
||||
|
||||
- if (napi_complete(napi))
|
||||
+ if (napi_complete_done(napi, tx_done))
|
||||
mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
|
||||
|
||||
return tx_done;
|
||||
@@ -1549,36 +1546,33 @@ static int mtk_napi_tx(struct napi_struc
|
||||
static int mtk_napi_rx(struct napi_struct *napi, int budget)
|
||||
{
|
||||
struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
|
||||
- u32 status, mask;
|
||||
- int rx_done = 0;
|
||||
- int remain_budget = budget;
|
||||
+ int rx_done_total = 0;
|
||||
|
||||
mtk_handle_status_irq(eth);
|
||||
|
||||
-poll_again:
|
||||
- mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS);
|
||||
- rx_done = mtk_poll_rx(napi, remain_budget, eth);
|
||||
+ do {
|
||||
+ int rx_done;
|
||||
|
||||
- if (unlikely(netif_msg_intr(eth))) {
|
||||
- status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
|
||||
- mask = mtk_r32(eth, MTK_PDMA_INT_MASK);
|
||||
- dev_info(eth->dev,
|
||||
- "done rx %d, intr 0x%08x/0x%x\n",
|
||||
- rx_done, status, mask);
|
||||
- }
|
||||
- if (rx_done == remain_budget)
|
||||
- return budget;
|
||||
+ mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS);
|
||||
+ rx_done = mtk_poll_rx(napi, budget - rx_done_total, eth);
|
||||
+ rx_done_total += rx_done;
|
||||
+
|
||||
+ if (unlikely(netif_msg_intr(eth))) {
|
||||
+ dev_info(eth->dev,
|
||||
+ "done rx %d, intr 0x%08x/0x%x\n", rx_done,
|
||||
+ mtk_r32(eth, MTK_PDMA_INT_STATUS),
|
||||
+ mtk_r32(eth, MTK_PDMA_INT_MASK));
|
||||
+ }
|
||||
|
||||
- status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
|
||||
- if (status & MTK_RX_DONE_INT) {
|
||||
- remain_budget -= rx_done;
|
||||
- goto poll_again;
|
||||
- }
|
||||
+ if (rx_done_total == budget)
|
||||
+ return budget;
|
||||
+
|
||||
+ } while (mtk_r32(eth, MTK_PDMA_INT_STATUS) & MTK_RX_DONE_INT);
|
||||
|
||||
- if (napi_complete(napi))
|
||||
+ if (napi_complete_done(napi, rx_done_total))
|
||||
mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
|
||||
|
||||
- return rx_done + budget - remain_budget;
|
||||
+ return rx_done_total;
|
||||
}
|
||||
|
||||
static int mtk_tx_alloc(struct mtk_eth *eth)
|
@ -1,47 +0,0 @@
|
||||
From fa817272c37ef78e25dc14e4760ac78a7043a18a Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Apr 2021 22:21:07 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: set PPE flow hash as skb hash if
|
||||
present
|
||||
|
||||
This improves GRO performance
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
[Ilya: Use MTK_RXD4_FOE_ENTRY instead of GENMASK(13, 0)]
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/pinctrl/devinfo.h>
|
||||
#include <linux/phylink.h>
|
||||
+#include <linux/jhash.h>
|
||||
#include <net/dsa.h>
|
||||
|
||||
#include "mtk_eth_soc.h"
|
||||
@@ -1271,6 +1272,7 @@ static int mtk_poll_rx(struct napi_struc
|
||||
struct net_device *netdev;
|
||||
unsigned int pktlen;
|
||||
dma_addr_t dma_addr;
|
||||
+ u32 hash;
|
||||
int mac;
|
||||
|
||||
ring = mtk_get_rx_ring(eth);
|
||||
@@ -1340,6 +1342,12 @@ static int mtk_poll_rx(struct napi_struc
|
||||
skb->protocol = eth_type_trans(skb, netdev);
|
||||
bytes += pktlen;
|
||||
|
||||
+ hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY;
|
||||
+ if (hash != MTK_RXD4_FOE_ENTRY) {
|
||||
+ hash = jhash_1word(hash, 0);
|
||||
+ skb_set_hash(skb, hash, PKT_HASH_TYPE_L4);
|
||||
+ }
|
||||
+
|
||||
if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
|
||||
(trxd.rxd2 & RX_DMA_VTAG))
|
||||
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
|
@ -1,71 +0,0 @@
|
||||
From 3bc8e0aff23be0526af0dbc7973a8866a08d73f1 Mon Sep 17 00:00:00 2001
|
||||
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Date: Thu, 22 Apr 2021 22:21:08 -0700
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: use iopoll.h macro for DMA init
|
||||
|
||||
Replace a tight busy-wait loop without a pause with a standard
|
||||
readx_poll_timeout_atomic routine with a 5 us poll period.
|
||||
|
||||
Tested by booting a MT7621 device to ensure the driver initializes
|
||||
properly.
|
||||
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 29 +++++++++------------
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +-
|
||||
2 files changed, 14 insertions(+), 17 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -2054,25 +2054,22 @@ static int mtk_set_features(struct net_d
|
||||
/* wait for DMA to finish whatever it is doing before we start using it again */
|
||||
static int mtk_dma_busy_wait(struct mtk_eth *eth)
|
||||
{
|
||||
- unsigned long t_start = jiffies;
|
||||
+ unsigned int reg;
|
||||
+ int ret;
|
||||
+ u32 val;
|
||||
|
||||
- while (1) {
|
||||
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
|
||||
- if (!(mtk_r32(eth, MTK_QDMA_GLO_CFG) &
|
||||
- (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
|
||||
- return 0;
|
||||
- } else {
|
||||
- if (!(mtk_r32(eth, MTK_PDMA_GLO_CFG) &
|
||||
- (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
|
||||
- return 0;
|
||||
- }
|
||||
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
|
||||
+ reg = MTK_QDMA_GLO_CFG;
|
||||
+ else
|
||||
+ reg = MTK_PDMA_GLO_CFG;
|
||||
|
||||
- if (time_after(jiffies, t_start + MTK_DMA_BUSY_TIMEOUT))
|
||||
- break;
|
||||
- }
|
||||
+ ret = readx_poll_timeout_atomic(__raw_readl, eth->base + reg, val,
|
||||
+ !(val & (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)),
|
||||
+ 5, MTK_DMA_BUSY_TIMEOUT_US);
|
||||
+ if (ret)
|
||||
+ dev_err(eth->dev, "DMA init timeout\n");
|
||||
|
||||
- dev_err(eth->dev, "DMA init timeout\n");
|
||||
- return -1;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
static int mtk_dma_init(struct mtk_eth *eth)
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -213,7 +213,7 @@
|
||||
#define MTK_TX_DMA_BUSY BIT(1)
|
||||
#define MTK_RX_DMA_EN BIT(2)
|
||||
#define MTK_TX_DMA_EN BIT(0)
|
||||
-#define MTK_DMA_BUSY_TIMEOUT HZ
|
||||
+#define MTK_DMA_BUSY_TIMEOUT_US 1000000
|
||||
|
||||
/* QDMA Reset Index Register */
|
||||
#define MTK_QDMA_RST_IDX 0x1A08
|
@ -1,63 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Sun, 18 Apr 2021 23:11:44 +0200
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: missing mutex
|
||||
|
||||
Patch 2ed37183abb7 ("netfilter: flowtable: separate replace, destroy and
|
||||
stats to different workqueues") splits the workqueue per event type. Add
|
||||
a mutex to serialize updates.
|
||||
|
||||
Fixes: 502e84e2382d ("net: ethernet: mtk_eth_soc: add flow offloading support")
|
||||
Reported-by: Frank Wunderlich <frank-w@public-files.de>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
@@ -392,6 +392,8 @@ mtk_flow_offload_stats(struct mtk_eth *e
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static DEFINE_MUTEX(mtk_flow_offload_mutex);
|
||||
+
|
||||
static int
|
||||
mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
|
||||
{
|
||||
@@ -399,6 +401,7 @@ mtk_eth_setup_tc_block_cb(enum tc_setup_
|
||||
struct net_device *dev = cb_priv;
|
||||
struct mtk_mac *mac = netdev_priv(dev);
|
||||
struct mtk_eth *eth = mac->hw;
|
||||
+ int err;
|
||||
|
||||
if (!tc_can_offload(dev))
|
||||
return -EOPNOTSUPP;
|
||||
@@ -406,18 +409,24 @@ mtk_eth_setup_tc_block_cb(enum tc_setup_
|
||||
if (type != TC_SETUP_CLSFLOWER)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
+ mutex_lock(&mtk_flow_offload_mutex);
|
||||
switch (cls->command) {
|
||||
case FLOW_CLS_REPLACE:
|
||||
- return mtk_flow_offload_replace(eth, cls);
|
||||
+ err = mtk_flow_offload_replace(eth, cls);
|
||||
+ break;
|
||||
case FLOW_CLS_DESTROY:
|
||||
- return mtk_flow_offload_destroy(eth, cls);
|
||||
+ err = mtk_flow_offload_destroy(eth, cls);
|
||||
+ break;
|
||||
case FLOW_CLS_STATS:
|
||||
- return mtk_flow_offload_stats(eth, cls);
|
||||
+ err = mtk_flow_offload_stats(eth, cls);
|
||||
+ break;
|
||||
default:
|
||||
- return -EOPNOTSUPP;
|
||||
+ err = -EOPNOTSUPP;
|
||||
+ break;
|
||||
}
|
||||
+ mutex_unlock(&mtk_flow_offload_mutex);
|
||||
|
||||
- return 0;
|
||||
+ return err;
|
||||
}
|
||||
|
||||
static int
|
@ -1,22 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Sun, 18 Apr 2021 23:11:45 +0200
|
||||
Subject: [PATCH] net: ethernet: mtk_eth_soc: handle VLAN pop action
|
||||
|
||||
Do not hit EOPNOTSUPP when flowtable offload provides a VLAN pop action.
|
||||
|
||||
Fixes: efce49dfe6a8 ("netfilter: flowtable: add vlan pop action offload support")
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
|
||||
@@ -233,6 +233,8 @@ mtk_flow_offload_replace(struct mtk_eth
|
||||
data.vlan.proto = act->vlan.proto;
|
||||
data.vlan.num++;
|
||||
break;
|
||||
+ case FLOW_ACTION_VLAN_POP:
|
||||
+ break;
|
||||
case FLOW_ACTION_PPPOE_PUSH:
|
||||
if (data.pppoe.num == 1)
|
||||
return -EOPNOTSUPP;
|
@ -1,159 +0,0 @@
|
||||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Sun, 28 Mar 2021 23:08:55 +0200
|
||||
Subject: [PATCH] netfilter: flowtable: dst_check() from garbage collector path
|
||||
|
||||
Move dst_check() to the garbage collector path. Stale routes trigger the
|
||||
flow entry teardown state which makes affected flows go back to the
|
||||
classic forwarding path to re-evaluate flow offloading.
|
||||
|
||||
IPv6 requires the dst cookie to work, store it in the flow_tuple,
|
||||
otherwise dst_check() always fails.
|
||||
|
||||
Fixes: e5075c0badaa ("netfilter: flowtable: call dst_check() to fall back to classic forwarding")
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -129,7 +129,10 @@ struct flow_offload_tuple {
|
||||
in_vlan_ingress:2;
|
||||
u16 mtu;
|
||||
union {
|
||||
- struct dst_entry *dst_cache;
|
||||
+ struct {
|
||||
+ struct dst_entry *dst_cache;
|
||||
+ u32 dst_cookie;
|
||||
+ };
|
||||
struct {
|
||||
u32 ifidx;
|
||||
u32 hw_ifidx;
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -74,6 +74,18 @@ err_ct_refcnt:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(flow_offload_alloc);
|
||||
|
||||
+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
|
||||
+{
|
||||
+ const struct rt6_info *rt;
|
||||
+
|
||||
+ if (flow_tuple->l3proto == NFPROTO_IPV6) {
|
||||
+ rt = (const struct rt6_info *)flow_tuple->dst_cache;
|
||||
+ return rt6_get_cookie(rt);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int flow_offload_fill_route(struct flow_offload *flow,
|
||||
const struct nf_flow_route *route,
|
||||
enum flow_offload_tuple_dir dir)
|
||||
@@ -116,6 +128,7 @@ static int flow_offload_fill_route(struc
|
||||
return -1;
|
||||
|
||||
flow_tuple->dst_cache = dst;
|
||||
+ flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
|
||||
break;
|
||||
}
|
||||
flow_tuple->xmit_type = route->tuple[dir].xmit_type;
|
||||
@@ -389,11 +402,33 @@ nf_flow_table_iterate(struct nf_flowtabl
|
||||
return err;
|
||||
}
|
||||
|
||||
+static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
|
||||
+{
|
||||
+ struct dst_entry *dst;
|
||||
+
|
||||
+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
+ tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
|
||||
+ dst = tuple->dst_cache;
|
||||
+ if (!dst_check(dst, tuple->dst_cookie))
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static bool nf_flow_has_stale_dst(struct flow_offload *flow)
|
||||
+{
|
||||
+ return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
|
||||
+ flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
|
||||
+}
|
||||
+
|
||||
static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
|
||||
{
|
||||
struct nf_flowtable *flow_table = data;
|
||||
|
||||
- if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct))
|
||||
+ if (nf_flow_has_expired(flow) ||
|
||||
+ nf_ct_is_dying(flow->ct) ||
|
||||
+ nf_flow_has_stale_dst(flow))
|
||||
set_bit(NF_FLOW_TEARDOWN, &flow->flags);
|
||||
|
||||
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
|
||||
--- a/net/netfilter/nf_flow_table_ip.c
|
||||
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||
@@ -364,15 +364,6 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
- tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
|
||||
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
|
||||
- if (!dst_check(&rt->dst, 0)) {
|
||||
- flow_offload_teardown(flow);
|
||||
- return NF_ACCEPT;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
@@ -391,6 +382,7 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
|
||||
|
||||
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
|
||||
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
|
||||
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
|
||||
IPCB(skb)->iif = skb->dev->ifindex;
|
||||
IPCB(skb)->flags = IPSKB_FORWARDED;
|
||||
@@ -399,6 +391,7 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||
|
||||
switch (tuplehash->tuple.xmit_type) {
|
||||
case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
|
||||
outdev = rt->dst.dev;
|
||||
skb->dev = outdev;
|
||||
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
|
||||
@@ -607,15 +600,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
|
||||
return NF_ACCEPT;
|
||||
|
||||
- if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
|
||||
- tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
|
||||
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
|
||||
- if (!dst_check(&rt->dst, 0)) {
|
||||
- flow_offload_teardown(flow);
|
||||
- return NF_ACCEPT;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
if (skb_try_make_writable(skb, thoff + hdrsize))
|
||||
return NF_DROP;
|
||||
|
||||
@@ -633,6 +617,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
|
||||
|
||||
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
|
||||
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
|
||||
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
|
||||
IP6CB(skb)->iif = skb->dev->ifindex;
|
||||
IP6CB(skb)->flags = IP6SKB_FORWARDED;
|
||||
@@ -641,6 +626,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||
|
||||
switch (tuplehash->tuple.xmit_type) {
|
||||
case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
|
||||
outdev = rt->dst.dev;
|
||||
skb->dev = outdev;
|
||||
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
|
@ -1,94 +0,0 @@
|
||||
From: Oz Shlomo <ozsh@nvidia.com>
|
||||
Date: Thu, 3 Jun 2021 15:12:33 +0300
|
||||
Subject: [PATCH] netfilter: conntrack: Introduce tcp offload timeout
|
||||
configuration
|
||||
|
||||
TCP connections may be offloaded from nf conntrack to nf flow table.
|
||||
Offloaded connections are aged after 30 seconds of inactivity.
|
||||
Once aged, ownership is returned to conntrack with a hard coded pickup
|
||||
time of 120 seconds, after which the connection may be deleted.
|
||||
eted. The current aging intervals may be too aggressive for some users.
|
||||
|
||||
Provide users with the ability to control the nf flow table offload
|
||||
aging and pickup time intervals via sysctl parameter as a pre-step for
|
||||
configuring the nf flow table GC timeout intervals.
|
||||
|
||||
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
|
||||
Reviewed-by: Paul Blakey <paulb@nvidia.com>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netns/conntrack.h
|
||||
+++ b/include/net/netns/conntrack.h
|
||||
@@ -27,6 +27,10 @@ struct nf_tcp_net {
|
||||
int tcp_loose;
|
||||
int tcp_be_liberal;
|
||||
int tcp_max_retrans;
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ unsigned int offload_timeout;
|
||||
+ unsigned int offload_pickup;
|
||||
+#endif
|
||||
};
|
||||
|
||||
enum udp_conntrack {
|
||||
--- a/net/netfilter/nf_conntrack_proto_tcp.c
|
||||
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
|
||||
@@ -1438,6 +1438,11 @@ void nf_conntrack_tcp_init_net(struct ne
|
||||
tn->tcp_loose = nf_ct_tcp_loose;
|
||||
tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
|
||||
tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
|
||||
+
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ tn->offload_timeout = 30 * HZ;
|
||||
+ tn->offload_pickup = 120 * HZ;
|
||||
+#endif
|
||||
}
|
||||
|
||||
const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
|
||||
--- a/net/netfilter/nf_conntrack_standalone.c
|
||||
+++ b/net/netfilter/nf_conntrack_standalone.c
|
||||
@@ -567,6 +567,10 @@ enum nf_ct_sysctl_index {
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
|
||||
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
|
||||
+#endif
|
||||
NF_SYSCTL_CT_PROTO_TCP_LOOSE,
|
||||
NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
|
||||
NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
|
||||
@@ -758,6 +762,20 @@ static struct ctl_table nf_ct_sysctl_tab
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_jiffies,
|
||||
},
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
|
||||
+ .procname = "nf_flowtable_tcp_timeout",
|
||||
+ .maxlen = sizeof(unsigned int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_dointvec_jiffies,
|
||||
+ },
|
||||
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
|
||||
+ .procname = "nf_flowtable_tcp_pickup",
|
||||
+ .maxlen = sizeof(unsigned int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_dointvec_jiffies,
|
||||
+ },
|
||||
+#endif
|
||||
[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
|
||||
.procname = "nf_conntrack_tcp_loose",
|
||||
.maxlen = sizeof(int),
|
||||
@@ -967,6 +985,12 @@ static void nf_conntrack_standalone_init
|
||||
XASSIGN(LIBERAL, &tn->tcp_be_liberal);
|
||||
XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
|
||||
#undef XASSIGN
|
||||
+
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
|
||||
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup;
|
||||
+#endif
|
||||
+
|
||||
}
|
||||
|
||||
static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
|
@ -1,92 +0,0 @@
|
||||
From: Oz Shlomo <ozsh@nvidia.com>
|
||||
Date: Thu, 3 Jun 2021 15:12:34 +0300
|
||||
Subject: [PATCH] netfilter: conntrack: Introduce udp offload timeout
|
||||
configuration
|
||||
|
||||
UDP connections may be offloaded from nf conntrack to nf flow table.
|
||||
Offloaded connections are aged after 30 seconds of inactivity.
|
||||
Once aged, ownership is returned to conntrack with a hard coded pickup
|
||||
time of 30 seconds, after which the connection may be deleted.
|
||||
eted. The current aging intervals may be too aggressive for some users.
|
||||
|
||||
Provide users with the ability to control the nf flow table offload
|
||||
aging and pickup time intervals via sysctl parameter as a pre-step for
|
||||
configuring the nf flow table GC timeout intervals.
|
||||
|
||||
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
|
||||
Reviewed-by: Paul Blakey <paulb@nvidia.com>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netns/conntrack.h
|
||||
+++ b/include/net/netns/conntrack.h
|
||||
@@ -41,6 +41,10 @@ enum udp_conntrack {
|
||||
|
||||
struct nf_udp_net {
|
||||
unsigned int timeouts[UDP_CT_MAX];
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ unsigned int offload_timeout;
|
||||
+ unsigned int offload_pickup;
|
||||
+#endif
|
||||
};
|
||||
|
||||
struct nf_icmp_net {
|
||||
--- a/net/netfilter/nf_conntrack_proto_udp.c
|
||||
+++ b/net/netfilter/nf_conntrack_proto_udp.c
|
||||
@@ -273,6 +273,11 @@ void nf_conntrack_udp_init_net(struct ne
|
||||
|
||||
for (i = 0; i < UDP_CT_MAX; i++)
|
||||
un->timeouts[i] = udp_timeouts[i];
|
||||
+
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ un->offload_timeout = 30 * HZ;
|
||||
+ un->offload_pickup = 30 * HZ;
|
||||
+#endif
|
||||
}
|
||||
|
||||
const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
|
||||
--- a/net/netfilter/nf_conntrack_standalone.c
|
||||
+++ b/net/netfilter/nf_conntrack_standalone.c
|
||||
@@ -576,6 +576,10 @@ enum nf_ct_sysctl_index {
|
||||
NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
|
||||
+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
|
||||
+#endif
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
|
||||
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
|
||||
#ifdef CONFIG_NF_CT_PROTO_SCTP
|
||||
@@ -810,6 +814,20 @@ static struct ctl_table nf_ct_sysctl_tab
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_jiffies,
|
||||
},
|
||||
+#if IS_ENABLED(CONFIG_NFT_FLOW_OFFLOAD)
|
||||
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
|
||||
+ .procname = "nf_flowtable_udp_timeout",
|
||||
+ .maxlen = sizeof(unsigned int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_dointvec_jiffies,
|
||||
+ },
|
||||
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
|
||||
+ .procname = "nf_flowtable_udp_pickup",
|
||||
+ .maxlen = sizeof(unsigned int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_dointvec_jiffies,
|
||||
+ },
|
||||
+#endif
|
||||
[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
|
||||
.procname = "nf_conntrack_icmp_timeout",
|
||||
.maxlen = sizeof(unsigned int),
|
||||
@@ -1078,6 +1096,10 @@ static int nf_conntrack_standalone_init_
|
||||
table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
|
||||
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
|
||||
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
|
||||
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
|
||||
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
|
||||
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup;
|
||||
+#endif
|
||||
|
||||
nf_conntrack_standalone_init_tcp_sysctl(net, table);
|
||||
nf_conntrack_standalone_init_sctp_sysctl(net, table);
|
@ -1,134 +0,0 @@
|
||||
From: Oz Shlomo <ozsh@nvidia.com>
|
||||
Date: Thu, 3 Jun 2021 15:12:35 +0300
|
||||
Subject: [PATCH] netfilter: flowtable: Set offload timeouts according to proto
|
||||
values
|
||||
|
||||
Currently the aging period for tcp/udp connections is hard coded to
|
||||
30 seconds. Aged tcp/udp connections configure a hard coded 120/30
|
||||
seconds pickup timeout for conntrack.
|
||||
This configuration may be too aggressive or permissive for some users.
|
||||
|
||||
Dynamically configure the nf flow table GC timeout intervals according
|
||||
to the user defined values.
|
||||
|
||||
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
|
||||
Reviewed-by: Paul Blakey <paulb@nvidia.com>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -174,6 +174,8 @@ struct flow_offload {
|
||||
#define NF_FLOW_TIMEOUT (30 * HZ)
|
||||
#define nf_flowtable_time_stamp (u32)jiffies
|
||||
|
||||
+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
|
||||
+
|
||||
static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
|
||||
{
|
||||
return (__s32)(timeout - nf_flowtable_time_stamp);
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -175,12 +175,10 @@ static void flow_offload_fixup_tcp(struc
|
||||
tcp->seen[1].td_maxwin = 0;
|
||||
}
|
||||
|
||||
-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
|
||||
-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
|
||||
-
|
||||
static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
|
||||
{
|
||||
const struct nf_conntrack_l4proto *l4proto;
|
||||
+ struct net *net = nf_ct_net(ct);
|
||||
int l4num = nf_ct_protonum(ct);
|
||||
unsigned int timeout;
|
||||
|
||||
@@ -188,12 +186,17 @@ static void flow_offload_fixup_ct_timeou
|
||||
if (!l4proto)
|
||||
return;
|
||||
|
||||
- if (l4num == IPPROTO_TCP)
|
||||
- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
|
||||
- else if (l4num == IPPROTO_UDP)
|
||||
- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
|
||||
- else
|
||||
+ if (l4num == IPPROTO_TCP) {
|
||||
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
|
||||
+
|
||||
+ timeout = tn->offload_pickup;
|
||||
+ } else if (l4num == IPPROTO_UDP) {
|
||||
+ struct nf_udp_net *tn = nf_udp_pernet(net);
|
||||
+
|
||||
+ timeout = tn->offload_pickup;
|
||||
+ } else {
|
||||
return;
|
||||
+ }
|
||||
|
||||
if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
|
||||
WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
|
||||
@@ -265,11 +268,35 @@ static const struct rhashtable_params nf
|
||||
.automatic_shrinking = true,
|
||||
};
|
||||
|
||||
+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
|
||||
+{
|
||||
+ const struct nf_conntrack_l4proto *l4proto;
|
||||
+ unsigned long timeout = NF_FLOW_TIMEOUT;
|
||||
+ struct net *net = nf_ct_net(flow->ct);
|
||||
+ int l4num = nf_ct_protonum(flow->ct);
|
||||
+
|
||||
+ l4proto = nf_ct_l4proto_find(l4num);
|
||||
+ if (!l4proto)
|
||||
+ return timeout;
|
||||
+
|
||||
+ if (l4num == IPPROTO_TCP) {
|
||||
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
|
||||
+
|
||||
+ timeout = tn->offload_timeout;
|
||||
+ } else if (l4num == IPPROTO_UDP) {
|
||||
+ struct nf_udp_net *tn = nf_udp_pernet(net);
|
||||
+
|
||||
+ timeout = tn->offload_timeout;
|
||||
+ }
|
||||
+
|
||||
+ return timeout;
|
||||
+}
|
||||
+
|
||||
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
|
||||
{
|
||||
int err;
|
||||
|
||||
- flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
|
||||
+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
|
||||
|
||||
err = rhashtable_insert_fast(&flow_table->rhashtable,
|
||||
&flow->tuplehash[0].node,
|
||||
@@ -301,7 +328,7 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
|
||||
void flow_offload_refresh(struct nf_flowtable *flow_table,
|
||||
struct flow_offload *flow)
|
||||
{
|
||||
- flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
|
||||
+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
|
||||
|
||||
if (likely(!nf_flowtable_hw_offload(flow_table)))
|
||||
return;
|
||||
--- a/net/netfilter/nf_flow_table_offload.c
|
||||
+++ b/net/netfilter/nf_flow_table_offload.c
|
||||
@@ -885,7 +885,7 @@ static void flow_offload_work_stats(stru
|
||||
|
||||
lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
|
||||
offload->flow->timeout = max_t(u64, offload->flow->timeout,
|
||||
- lastused + NF_FLOW_TIMEOUT);
|
||||
+ lastused + flow_offload_get_timeout(offload->flow));
|
||||
|
||||
if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
|
||||
if (stats[0].pkts)
|
||||
@@ -989,7 +989,7 @@ void nf_flow_offload_stats(struct nf_flo
|
||||
__s32 delta;
|
||||
|
||||
delta = nf_flow_timeout_delta(flow->timeout);
|
||||
- if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10))
|
||||
+ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
|
||||
return;
|
||||
|
||||
offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
|
@ -1,138 +0,0 @@
|
||||
From 4fd59792097a6b2fb949d41264386a7ecade469e Mon Sep 17 00:00:00 2001
|
||||
From: DENG Qingfang <dqfext@gmail.com>
|
||||
Date: Mon, 25 Jan 2021 12:20:46 +0800
|
||||
Subject: [PATCH] net: ethernet: mediatek: support setting MTU
|
||||
|
||||
MT762x HW, except for MT7628, supports frame length up to 2048
|
||||
(maximum length on GDM), so allow setting MTU up to 2030.
|
||||
|
||||
Also set the default frame length to the hardware default 1518.
|
||||
|
||||
Signed-off-by: DENG Qingfang <dqfext@gmail.com>
|
||||
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
|
||||
Link: https://lore.kernel.org/r/20210125042046.5599-1-dqfext@gmail.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 43 ++++++++++++++++++---
|
||||
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 12 ++++--
|
||||
2 files changed, 47 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
|
||||
@@ -355,7 +355,7 @@ static void mtk_mac_config(struct phylin
|
||||
/* Setup gmac */
|
||||
mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
|
||||
mcr_new = mcr_cur;
|
||||
- mcr_new |= MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
|
||||
+ mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
|
||||
MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK;
|
||||
|
||||
/* Only update control register when needed! */
|
||||
@@ -782,8 +782,8 @@ static void mtk_get_stats64(struct net_d
|
||||
static inline int mtk_max_frag_size(int mtu)
|
||||
{
|
||||
/* make sure buf_size will be at least MTK_MAX_RX_LENGTH */
|
||||
- if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH)
|
||||
- mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
|
||||
+ if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH_2K)
|
||||
+ mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN;
|
||||
|
||||
return SKB_DATA_ALIGN(MTK_RX_HLEN + mtu) +
|
||||
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||
@@ -794,7 +794,7 @@ static inline int mtk_max_buf_size(int f
|
||||
int buf_size = frag_size - NET_SKB_PAD - NET_IP_ALIGN -
|
||||
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||
|
||||
- WARN_ON(buf_size < MTK_MAX_RX_LENGTH);
|
||||
+ WARN_ON(buf_size < MTK_MAX_RX_LENGTH_2K);
|
||||
|
||||
return buf_size;
|
||||
}
|
||||
@@ -2606,6 +2606,35 @@ static void mtk_uninit(struct net_device
|
||||
mtk_rx_irq_disable(eth, ~0);
|
||||
}
|
||||
|
||||
+static int mtk_change_mtu(struct net_device *dev, int new_mtu)
|
||||
+{
|
||||
+ int length = new_mtu + MTK_RX_ETH_HLEN;
|
||||
+ struct mtk_mac *mac = netdev_priv(dev);
|
||||
+ struct mtk_eth *eth = mac->hw;
|
||||
+ u32 mcr_cur, mcr_new;
|
||||
+
|
||||
+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
|
||||
+ mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
|
||||
+ mcr_new = mcr_cur & ~MAC_MCR_MAX_RX_MASK;
|
||||
+
|
||||
+ if (length <= 1518)
|
||||
+ mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1518);
|
||||
+ else if (length <= 1536)
|
||||
+ mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1536);
|
||||
+ else if (length <= 1552)
|
||||
+ mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1552);
|
||||
+ else
|
||||
+ mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_2048);
|
||||
+
|
||||
+ if (mcr_new != mcr_cur)
|
||||
+ mtk_w32(mac->hw, mcr_new, MTK_MAC_MCR(mac->id));
|
||||
+ }
|
||||
+
|
||||
+ dev->mtu = new_mtu;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
|
||||
{
|
||||
struct mtk_mac *mac = netdev_priv(dev);
|
||||
@@ -2902,6 +2931,7 @@ static const struct net_device_ops mtk_n
|
||||
.ndo_set_mac_address = mtk_set_mac_address,
|
||||
.ndo_validate_addr = eth_validate_addr,
|
||||
.ndo_do_ioctl = mtk_do_ioctl,
|
||||
+ .ndo_change_mtu = mtk_change_mtu,
|
||||
.ndo_tx_timeout = mtk_tx_timeout,
|
||||
.ndo_get_stats64 = mtk_get_stats64,
|
||||
.ndo_fix_features = mtk_fix_features,
|
||||
@@ -3004,7 +3034,10 @@ static int mtk_add_mac(struct mtk_eth *e
|
||||
eth->netdev[id]->irq = eth->irq[0];
|
||||
eth->netdev[id]->dev.of_node = np;
|
||||
|
||||
- eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
|
||||
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628))
|
||||
+ eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
|
||||
+ else
|
||||
+ eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN;
|
||||
|
||||
return 0;
|
||||
|
||||
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
|
||||
@@ -20,12 +20,13 @@
|
||||
#include "mtk_ppe.h"
|
||||
|
||||
#define MTK_QDMA_PAGE_SIZE 2048
|
||||
-#define MTK_MAX_RX_LENGTH 1536
|
||||
+#define MTK_MAX_RX_LENGTH 1536
|
||||
+#define MTK_MAX_RX_LENGTH_2K 2048
|
||||
#define MTK_TX_DMA_BUF_LEN 0x3fff
|
||||
#define MTK_DMA_SIZE 512
|
||||
#define MTK_NAPI_WEIGHT 64
|
||||
#define MTK_MAC_COUNT 2
|
||||
-#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
|
||||
+#define MTK_RX_ETH_HLEN (ETH_HLEN + ETH_FCS_LEN)
|
||||
#define MTK_RX_HLEN (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN)
|
||||
#define MTK_DMA_DUMMY_DESC 0xffffffff
|
||||
#define MTK_DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | \
|
||||
@@ -352,7 +353,12 @@
|
||||
|
||||
/* Mac control registers */
|
||||
#define MTK_MAC_MCR(x) (0x10100 + (x * 0x100))
|
||||
-#define MAC_MCR_MAX_RX_1536 BIT(24)
|
||||
+#define MAC_MCR_MAX_RX_MASK GENMASK(25, 24)
|
||||
+#define MAC_MCR_MAX_RX(_x) (MAC_MCR_MAX_RX_MASK & ((_x) << 24))
|
||||
+#define MAC_MCR_MAX_RX_1518 0x0
|
||||
+#define MAC_MCR_MAX_RX_1536 0x1
|
||||
+#define MAC_MCR_MAX_RX_1552 0x2
|
||||
+#define MAC_MCR_MAX_RX_2048 0x3
|
||||
#define MAC_MCR_IPG_CFG (BIT(18) | BIT(16))
|
||||
#define MAC_MCR_FORCE_MODE BIT(15)
|
||||
#define MAC_MCR_TX_EN BIT(14)
|
@ -1,108 +0,0 @@
|
||||
From c329e5afb42ff0a88285eb4d8a391a18793e4777 Mon Sep 17 00:00:00 2001
|
||||
From: David Bauer <mail@david-bauer.net>
|
||||
Date: Thu, 15 Apr 2021 03:26:50 +0200
|
||||
Subject: [PATCH] net: phy: at803x: select correct page on config init
|
||||
|
||||
The Atheros AR8031 and AR8033 expose different registers for SGMII/Fiber
|
||||
as well as the copper side of the PHY depending on the BT_BX_REG_SEL bit
|
||||
in the chip configure register.
|
||||
|
||||
The driver assumes the copper side is selected on probe, but this might
|
||||
not be the case depending which page was last selected by the
|
||||
bootloader. Notably, Ubiquiti UniFi bootloaders show this behavior.
|
||||
|
||||
Select the copper page when probing to circumvent this.
|
||||
|
||||
Signed-off-by: David Bauer <mail@david-bauer.net>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/phy/at803x.c | 50 +++++++++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 49 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/phy/at803x.c
|
||||
+++ b/drivers/net/phy/at803x.c
|
||||
@@ -139,6 +139,9 @@
|
||||
#define ATH8035_PHY_ID 0x004dd072
|
||||
#define AT8030_PHY_ID_MASK 0xffffffef
|
||||
|
||||
+#define AT803X_PAGE_FIBER 0
|
||||
+#define AT803X_PAGE_COPPER 1
|
||||
+
|
||||
MODULE_DESCRIPTION("Qualcomm Atheros AR803x PHY driver");
|
||||
MODULE_AUTHOR("Matus Ujhelyi");
|
||||
MODULE_LICENSE("GPL");
|
||||
@@ -190,6 +193,35 @@ static int at803x_debug_reg_mask(struct
|
||||
return phy_write(phydev, AT803X_DEBUG_DATA, val);
|
||||
}
|
||||
|
||||
+static int at803x_write_page(struct phy_device *phydev, int page)
|
||||
+{
|
||||
+ int mask;
|
||||
+ int set;
|
||||
+
|
||||
+ if (page == AT803X_PAGE_COPPER) {
|
||||
+ set = AT803X_BT_BX_REG_SEL;
|
||||
+ mask = 0;
|
||||
+ } else {
|
||||
+ set = 0;
|
||||
+ mask = AT803X_BT_BX_REG_SEL;
|
||||
+ }
|
||||
+
|
||||
+ return __phy_modify(phydev, AT803X_REG_CHIP_CONFIG, mask, set);
|
||||
+}
|
||||
+
|
||||
+static int at803x_read_page(struct phy_device *phydev)
|
||||
+{
|
||||
+ int ccr = __phy_read(phydev, AT803X_REG_CHIP_CONFIG);
|
||||
+
|
||||
+ if (ccr < 0)
|
||||
+ return ccr;
|
||||
+
|
||||
+ if (ccr & AT803X_BT_BX_REG_SEL)
|
||||
+ return AT803X_PAGE_COPPER;
|
||||
+
|
||||
+ return AT803X_PAGE_FIBER;
|
||||
+}
|
||||
+
|
||||
static int at803x_enable_rx_delay(struct phy_device *phydev)
|
||||
{
|
||||
return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, 0,
|
||||
@@ -508,6 +540,7 @@ static int at803x_probe(struct phy_devic
|
||||
{
|
||||
struct device *dev = &phydev->mdio.dev;
|
||||
struct at803x_priv *priv;
|
||||
+ int ret;
|
||||
|
||||
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
|
||||
if (!priv)
|
||||
@@ -515,7 +548,20 @@ static int at803x_probe(struct phy_devic
|
||||
|
||||
phydev->priv = priv;
|
||||
|
||||
- return at803x_parse_dt(phydev);
|
||||
+ ret = at803x_parse_dt(phydev);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ /* Some bootloaders leave the fiber page selected.
|
||||
+ * Switch to the copper page, as otherwise we read
|
||||
+ * the PHY capabilities from the fiber side.
|
||||
+ */
|
||||
+ if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
|
||||
+ ret = phy_select_page(phydev, AT803X_PAGE_COPPER);
|
||||
+ ret = phy_restore_page(phydev, AT803X_PAGE_COPPER, ret);
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
static void at803x_remove(struct phy_device *phydev)
|
||||
@@ -1097,6 +1143,8 @@ static struct phy_driver at803x_driver[]
|
||||
.get_wol = at803x_get_wol,
|
||||
.suspend = at803x_suspend,
|
||||
.resume = at803x_resume,
|
||||
+ .read_page = at803x_read_page,
|
||||
+ .write_page = at803x_write_page,
|
||||
/* PHY_GBIT_FEATURES */
|
||||
.read_status = at803x_read_status,
|
||||
.aneg_done = at803x_aneg_done,
|
@ -1,73 +0,0 @@
|
||||
From 8f7e876273e294b732b42af2e5e6bba91d798954 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Walle <michael@walle.cc>
|
||||
Date: Tue, 20 Apr 2021 12:29:29 +0200
|
||||
Subject: [PATCH] net: phy: at803x: fix probe error if copper page is selected
|
||||
|
||||
The commit c329e5afb42f ("net: phy: at803x: select correct page on
|
||||
config init") selects the copper page during probe. This fails if the
|
||||
copper page was already selected. In this case, the value of the copper
|
||||
page (which is 1) is propagated through phy_restore_page() and is
|
||||
finally returned for at803x_probe(). Fix it, by just using the
|
||||
at803x_page_write() directly.
|
||||
|
||||
Also in case of an error, the regulator is not disabled and leads to a
|
||||
WARN_ON() when the probe fails. This couldn't happen before, because
|
||||
at803x_parse_dt() was the last call in at803x_probe(). It is hard to
|
||||
see, that the parse_dt() actually enables the regulator. Thus move the
|
||||
regulator_enable() to the probe function and undo it in case of an
|
||||
error.
|
||||
|
||||
Fixes: c329e5afb42f ("net: phy: at803x: select correct page on config init")
|
||||
Signed-off-by: Michael Walle <michael@walle.cc>
|
||||
Reviewed-by: David Bauer <mail@david-bauer.net>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/phy/at803x.c | 23 +++++++++++++++++------
|
||||
1 file changed, 17 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/net/phy/at803x.c
|
||||
+++ b/drivers/net/phy/at803x.c
|
||||
@@ -527,10 +527,6 @@ static int at803x_parse_dt(struct phy_de
|
||||
phydev_err(phydev, "failed to get VDDIO regulator\n");
|
||||
return PTR_ERR(priv->vddio);
|
||||
}
|
||||
-
|
||||
- ret = regulator_enable(priv->vddio);
|
||||
- if (ret < 0)
|
||||
- return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -552,15 +548,30 @@ static int at803x_probe(struct phy_devic
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
+ if (priv->vddio) {
|
||||
+ ret = regulator_enable(priv->vddio);
|
||||
+ if (ret < 0)
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
/* Some bootloaders leave the fiber page selected.
|
||||
* Switch to the copper page, as otherwise we read
|
||||
* the PHY capabilities from the fiber side.
|
||||
*/
|
||||
if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
|
||||
- ret = phy_select_page(phydev, AT803X_PAGE_COPPER);
|
||||
- ret = phy_restore_page(phydev, AT803X_PAGE_COPPER, ret);
|
||||
+ phy_lock_mdio_bus(phydev);
|
||||
+ ret = at803x_write_page(phydev, AT803X_PAGE_COPPER);
|
||||
+ phy_unlock_mdio_bus(phydev);
|
||||
+ if (ret)
|
||||
+ goto err;
|
||||
}
|
||||
|
||||
+ return 0;
|
||||
+
|
||||
+err:
|
||||
+ if (priv->vddio)
|
||||
+ regulator_disable(priv->vddio);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
@ -1,56 +0,0 @@
|
||||
From b1ae3587d16a8c8fc9453e147c8708d6f006ffbb Mon Sep 17 00:00:00 2001
|
||||
From: Bjarni Jonasson <bjarni.jonasson@microchip.com>
|
||||
Date: Wed, 13 Jan 2021 12:56:25 +0100
|
||||
Subject: [PATCH] net: phy: Add 100 base-x mode
|
||||
|
||||
Sparx-5 supports this mode and it is missing in the PHY core.
|
||||
|
||||
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
|
||||
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
Documentation/networking/phy.rst | 5 +++++
|
||||
include/linux/phy.h | 4 ++++
|
||||
2 files changed, 9 insertions(+)
|
||||
|
||||
--- a/Documentation/networking/phy.rst
|
||||
+++ b/Documentation/networking/phy.rst
|
||||
@@ -286,6 +286,11 @@ Some of the interface modes are describe
|
||||
Note: due to legacy usage, some 10GBASE-R usage incorrectly makes
|
||||
use of this definition.
|
||||
|
||||
+``PHY_INTERFACE_MODE_100BASEX``
|
||||
+ This defines IEEE 802.3 Clause 24. The link operates at a fixed data
|
||||
+ rate of 125Mpbs using a 4B/5B encoding scheme, resulting in an underlying
|
||||
+ data rate of 100Mpbs.
|
||||
+
|
||||
Pause frames / flow control
|
||||
===========================
|
||||
|
||||
--- a/include/linux/phy.h
|
||||
+++ b/include/linux/phy.h
|
||||
@@ -104,6 +104,7 @@ extern const int phy_10gbit_features_arr
|
||||
* @PHY_INTERFACE_MODE_MOCA: Multimedia over Coax
|
||||
* @PHY_INTERFACE_MODE_QSGMII: Quad SGMII
|
||||
* @PHY_INTERFACE_MODE_TRGMII: Turbo RGMII
|
||||
+ * @PHY_INTERFACE_MODE_100BASEX: 100 BaseX
|
||||
* @PHY_INTERFACE_MODE_1000BASEX: 1000 BaseX
|
||||
* @PHY_INTERFACE_MODE_2500BASEX: 2500 BaseX
|
||||
* @PHY_INTERFACE_MODE_RXAUI: Reduced XAUI
|
||||
@@ -135,6 +136,7 @@ typedef enum {
|
||||
PHY_INTERFACE_MODE_MOCA,
|
||||
PHY_INTERFACE_MODE_QSGMII,
|
||||
PHY_INTERFACE_MODE_TRGMII,
|
||||
+ PHY_INTERFACE_MODE_100BASEX,
|
||||
PHY_INTERFACE_MODE_1000BASEX,
|
||||
PHY_INTERFACE_MODE_2500BASEX,
|
||||
PHY_INTERFACE_MODE_RXAUI,
|
||||
@@ -217,6 +219,8 @@ static inline const char *phy_modes(phy_
|
||||
return "usxgmii";
|
||||
case PHY_INTERFACE_MODE_10GKR:
|
||||
return "10gbase-kr";
|
||||
+ case PHY_INTERFACE_MODE_100BASEX:
|
||||
+ return "100base-x";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
From 6e12f35cef6b8a458d7ecf507ae330e0bffaad8c Mon Sep 17 00:00:00 2001
|
||||
From: Bjarni Jonasson <bjarni.jonasson@microchip.com>
|
||||
Date: Wed, 13 Jan 2021 12:56:26 +0100
|
||||
Subject: [PATCH] sfp: add support for 100 base-x SFPs
|
||||
|
||||
Add support for 100Base-FX, 100Base-LX, 100Base-PX and 100Base-BX10 modules
|
||||
This is needed for Sparx-5 switch.
|
||||
|
||||
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
|
||||
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
drivers/net/phy/sfp-bus.c | 9 +++++++++
|
||||
1 file changed, 9 insertions(+)
|
||||
|
||||
--- a/drivers/net/phy/sfp-bus.c
|
||||
+++ b/drivers/net/phy/sfp-bus.c
|
||||
@@ -280,6 +280,12 @@ void sfp_parse_support(struct sfp_bus *b
|
||||
br_min <= 1300 && br_max >= 1200)
|
||||
phylink_set(modes, 1000baseX_Full);
|
||||
|
||||
+ /* 100Base-FX, 100Base-LX, 100Base-PX, 100Base-BX10 */
|
||||
+ if (id->base.e100_base_fx || id->base.e100_base_lx)
|
||||
+ phylink_set(modes, 100baseFX_Full);
|
||||
+ if ((id->base.e_base_px || id->base.e_base_bx10) && br_nom == 100)
|
||||
+ phylink_set(modes, 100baseFX_Full);
|
||||
+
|
||||
/* For active or passive cables, select the link modes
|
||||
* based on the bit rates and the cable compliance bytes.
|
||||
*/
|
||||
@@ -399,6 +405,9 @@ phy_interface_t sfp_select_interface(str
|
||||
if (phylink_test(link_modes, 1000baseX_Full))
|
||||
return PHY_INTERFACE_MODE_1000BASEX;
|
||||
|
||||
+ if (phylink_test(link_modes, 100baseFX_Full))
|
||||
+ return PHY_INTERFACE_MODE_100BASEX;
|
||||
+
|
||||
dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n");
|
||||
|
||||
return PHY_INTERFACE_MODE_NA;
|
@ -1,549 +0,0 @@
|
||||
From 41d26bf4aba070dfd2ab48866cc27a48ee6228c7 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Marek=20Beh=C3=BAn?= <kabel@kernel.org>
|
||||
Date: Tue, 20 Apr 2021 09:53:59 +0200
|
||||
Subject: [PATCH] net: phy: marvell: refactor HWMON OOP style
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Use a structure of Marvell PHY specific HWMON methods to reduce code
|
||||
duplication. Store a pointer to this structure into the PHY driver's
|
||||
driver_data member.
|
||||
|
||||
Signed-off-by: Marek Behún <kabel@kernel.org>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/phy/marvell.c | 369 +++++++++++++-------------------------
|
||||
1 file changed, 125 insertions(+), 244 deletions(-)
|
||||
|
||||
--- a/drivers/net/phy/marvell.c
|
||||
+++ b/drivers/net/phy/marvell.c
|
||||
@@ -2141,6 +2141,19 @@ static int marvell_vct7_cable_test_get_s
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HWMON
|
||||
+struct marvell_hwmon_ops {
|
||||
+ int (*get_temp)(struct phy_device *phydev, long *temp);
|
||||
+ int (*get_temp_critical)(struct phy_device *phydev, long *temp);
|
||||
+ int (*set_temp_critical)(struct phy_device *phydev, long temp);
|
||||
+ int (*get_temp_alarm)(struct phy_device *phydev, long *alarm);
|
||||
+};
|
||||
+
|
||||
+static const struct marvell_hwmon_ops *
|
||||
+to_marvell_hwmon_ops(const struct phy_device *phydev)
|
||||
+{
|
||||
+ return phydev->drv->driver_data;
|
||||
+}
|
||||
+
|
||||
static int m88e1121_get_temp(struct phy_device *phydev, long *temp)
|
||||
{
|
||||
int oldpage;
|
||||
@@ -2184,75 +2197,6 @@ error:
|
||||
return phy_restore_page(phydev, oldpage, ret);
|
||||
}
|
||||
|
||||
-static int m88e1121_hwmon_read(struct device *dev,
|
||||
- enum hwmon_sensor_types type,
|
||||
- u32 attr, int channel, long *temp)
|
||||
-{
|
||||
- struct phy_device *phydev = dev_get_drvdata(dev);
|
||||
- int err;
|
||||
-
|
||||
- switch (attr) {
|
||||
- case hwmon_temp_input:
|
||||
- err = m88e1121_get_temp(phydev, temp);
|
||||
- break;
|
||||
- default:
|
||||
- return -EOPNOTSUPP;
|
||||
- }
|
||||
-
|
||||
- return err;
|
||||
-}
|
||||
-
|
||||
-static umode_t m88e1121_hwmon_is_visible(const void *data,
|
||||
- enum hwmon_sensor_types type,
|
||||
- u32 attr, int channel)
|
||||
-{
|
||||
- if (type != hwmon_temp)
|
||||
- return 0;
|
||||
-
|
||||
- switch (attr) {
|
||||
- case hwmon_temp_input:
|
||||
- return 0444;
|
||||
- default:
|
||||
- return 0;
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-static u32 m88e1121_hwmon_chip_config[] = {
|
||||
- HWMON_C_REGISTER_TZ,
|
||||
- 0
|
||||
-};
|
||||
-
|
||||
-static const struct hwmon_channel_info m88e1121_hwmon_chip = {
|
||||
- .type = hwmon_chip,
|
||||
- .config = m88e1121_hwmon_chip_config,
|
||||
-};
|
||||
-
|
||||
-static u32 m88e1121_hwmon_temp_config[] = {
|
||||
- HWMON_T_INPUT,
|
||||
- 0
|
||||
-};
|
||||
-
|
||||
-static const struct hwmon_channel_info m88e1121_hwmon_temp = {
|
||||
- .type = hwmon_temp,
|
||||
- .config = m88e1121_hwmon_temp_config,
|
||||
-};
|
||||
-
|
||||
-static const struct hwmon_channel_info *m88e1121_hwmon_info[] = {
|
||||
- &m88e1121_hwmon_chip,
|
||||
- &m88e1121_hwmon_temp,
|
||||
- NULL
|
||||
-};
|
||||
-
|
||||
-static const struct hwmon_ops m88e1121_hwmon_hwmon_ops = {
|
||||
- .is_visible = m88e1121_hwmon_is_visible,
|
||||
- .read = m88e1121_hwmon_read,
|
||||
-};
|
||||
-
|
||||
-static const struct hwmon_chip_info m88e1121_hwmon_chip_info = {
|
||||
- .ops = &m88e1121_hwmon_hwmon_ops,
|
||||
- .info = m88e1121_hwmon_info,
|
||||
-};
|
||||
-
|
||||
static int m88e1510_get_temp(struct phy_device *phydev, long *temp)
|
||||
{
|
||||
int ret;
|
||||
@@ -2315,92 +2259,6 @@ static int m88e1510_get_temp_alarm(struc
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int m88e1510_hwmon_read(struct device *dev,
|
||||
- enum hwmon_sensor_types type,
|
||||
- u32 attr, int channel, long *temp)
|
||||
-{
|
||||
- struct phy_device *phydev = dev_get_drvdata(dev);
|
||||
- int err;
|
||||
-
|
||||
- switch (attr) {
|
||||
- case hwmon_temp_input:
|
||||
- err = m88e1510_get_temp(phydev, temp);
|
||||
- break;
|
||||
- case hwmon_temp_crit:
|
||||
- err = m88e1510_get_temp_critical(phydev, temp);
|
||||
- break;
|
||||
- case hwmon_temp_max_alarm:
|
||||
- err = m88e1510_get_temp_alarm(phydev, temp);
|
||||
- break;
|
||||
- default:
|
||||
- return -EOPNOTSUPP;
|
||||
- }
|
||||
-
|
||||
- return err;
|
||||
-}
|
||||
-
|
||||
-static int m88e1510_hwmon_write(struct device *dev,
|
||||
- enum hwmon_sensor_types type,
|
||||
- u32 attr, int channel, long temp)
|
||||
-{
|
||||
- struct phy_device *phydev = dev_get_drvdata(dev);
|
||||
- int err;
|
||||
-
|
||||
- switch (attr) {
|
||||
- case hwmon_temp_crit:
|
||||
- err = m88e1510_set_temp_critical(phydev, temp);
|
||||
- break;
|
||||
- default:
|
||||
- return -EOPNOTSUPP;
|
||||
- }
|
||||
- return err;
|
||||
-}
|
||||
-
|
||||
-static umode_t m88e1510_hwmon_is_visible(const void *data,
|
||||
- enum hwmon_sensor_types type,
|
||||
- u32 attr, int channel)
|
||||
-{
|
||||
- if (type != hwmon_temp)
|
||||
- return 0;
|
||||
-
|
||||
- switch (attr) {
|
||||
- case hwmon_temp_input:
|
||||
- case hwmon_temp_max_alarm:
|
||||
- return 0444;
|
||||
- case hwmon_temp_crit:
|
||||
- return 0644;
|
||||
- default:
|
||||
- return 0;
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-static u32 m88e1510_hwmon_temp_config[] = {
|
||||
- HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_MAX_ALARM,
|
||||
- 0
|
||||
-};
|
||||
-
|
||||
-static const struct hwmon_channel_info m88e1510_hwmon_temp = {
|
||||
- .type = hwmon_temp,
|
||||
- .config = m88e1510_hwmon_temp_config,
|
||||
-};
|
||||
-
|
||||
-static const struct hwmon_channel_info *m88e1510_hwmon_info[] = {
|
||||
- &m88e1121_hwmon_chip,
|
||||
- &m88e1510_hwmon_temp,
|
||||
- NULL
|
||||
-};
|
||||
-
|
||||
-static const struct hwmon_ops m88e1510_hwmon_hwmon_ops = {
|
||||
- .is_visible = m88e1510_hwmon_is_visible,
|
||||
- .read = m88e1510_hwmon_read,
|
||||
- .write = m88e1510_hwmon_write,
|
||||
-};
|
||||
-
|
||||
-static const struct hwmon_chip_info m88e1510_hwmon_chip_info = {
|
||||
- .ops = &m88e1510_hwmon_hwmon_ops,
|
||||
- .info = m88e1510_hwmon_info,
|
||||
-};
|
||||
-
|
||||
static int m88e6390_get_temp(struct phy_device *phydev, long *temp)
|
||||
{
|
||||
int sum = 0;
|
||||
@@ -2459,63 +2317,112 @@ error:
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static int m88e6390_hwmon_read(struct device *dev,
|
||||
- enum hwmon_sensor_types type,
|
||||
- u32 attr, int channel, long *temp)
|
||||
+static int marvell_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
|
||||
+ u32 attr, int channel, long *temp)
|
||||
{
|
||||
struct phy_device *phydev = dev_get_drvdata(dev);
|
||||
- int err;
|
||||
+ const struct marvell_hwmon_ops *ops = to_marvell_hwmon_ops(phydev);
|
||||
+ int err = -EOPNOTSUPP;
|
||||
|
||||
switch (attr) {
|
||||
case hwmon_temp_input:
|
||||
- err = m88e6390_get_temp(phydev, temp);
|
||||
+ if (ops->get_temp)
|
||||
+ err = ops->get_temp(phydev, temp);
|
||||
+ break;
|
||||
+ case hwmon_temp_crit:
|
||||
+ if (ops->get_temp_critical)
|
||||
+ err = ops->get_temp_critical(phydev, temp);
|
||||
+ break;
|
||||
+ case hwmon_temp_max_alarm:
|
||||
+ if (ops->get_temp_alarm)
|
||||
+ err = ops->get_temp_alarm(phydev, temp);
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static int marvell_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
|
||||
+ u32 attr, int channel, long temp)
|
||||
+{
|
||||
+ struct phy_device *phydev = dev_get_drvdata(dev);
|
||||
+ const struct marvell_hwmon_ops *ops = to_marvell_hwmon_ops(phydev);
|
||||
+ int err = -EOPNOTSUPP;
|
||||
+
|
||||
+ switch (attr) {
|
||||
+ case hwmon_temp_crit:
|
||||
+ if (ops->set_temp_critical)
|
||||
+ err = ops->set_temp_critical(phydev, temp);
|
||||
break;
|
||||
default:
|
||||
- return -EOPNOTSUPP;
|
||||
+ fallthrough;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
-static umode_t m88e6390_hwmon_is_visible(const void *data,
|
||||
- enum hwmon_sensor_types type,
|
||||
- u32 attr, int channel)
|
||||
+static umode_t marvell_hwmon_is_visible(const void *data,
|
||||
+ enum hwmon_sensor_types type,
|
||||
+ u32 attr, int channel)
|
||||
{
|
||||
+ const struct phy_device *phydev = data;
|
||||
+ const struct marvell_hwmon_ops *ops = to_marvell_hwmon_ops(phydev);
|
||||
+
|
||||
if (type != hwmon_temp)
|
||||
return 0;
|
||||
|
||||
switch (attr) {
|
||||
case hwmon_temp_input:
|
||||
- return 0444;
|
||||
+ return ops->get_temp ? 0444 : 0;
|
||||
+ case hwmon_temp_max_alarm:
|
||||
+ return ops->get_temp_alarm ? 0444 : 0;
|
||||
+ case hwmon_temp_crit:
|
||||
+ return (ops->get_temp_critical ? 0444 : 0) |
|
||||
+ (ops->set_temp_critical ? 0200 : 0);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
-static u32 m88e6390_hwmon_temp_config[] = {
|
||||
- HWMON_T_INPUT,
|
||||
+static u32 marvell_hwmon_chip_config[] = {
|
||||
+ HWMON_C_REGISTER_TZ,
|
||||
0
|
||||
};
|
||||
|
||||
-static const struct hwmon_channel_info m88e6390_hwmon_temp = {
|
||||
+static const struct hwmon_channel_info marvell_hwmon_chip = {
|
||||
+ .type = hwmon_chip,
|
||||
+ .config = marvell_hwmon_chip_config,
|
||||
+};
|
||||
+
|
||||
+/* we can define HWMON_T_CRIT and HWMON_T_MAX_ALARM even though these are not
|
||||
+ * defined for all PHYs, because the hwmon code checks whether the attributes
|
||||
+ * exists via the .is_visible method
|
||||
+ */
|
||||
+static u32 marvell_hwmon_temp_config[] = {
|
||||
+ HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_MAX_ALARM,
|
||||
+ 0
|
||||
+};
|
||||
+
|
||||
+static const struct hwmon_channel_info marvell_hwmon_temp = {
|
||||
.type = hwmon_temp,
|
||||
- .config = m88e6390_hwmon_temp_config,
|
||||
+ .config = marvell_hwmon_temp_config,
|
||||
};
|
||||
|
||||
-static const struct hwmon_channel_info *m88e6390_hwmon_info[] = {
|
||||
- &m88e1121_hwmon_chip,
|
||||
- &m88e6390_hwmon_temp,
|
||||
+static const struct hwmon_channel_info *marvell_hwmon_info[] = {
|
||||
+ &marvell_hwmon_chip,
|
||||
+ &marvell_hwmon_temp,
|
||||
NULL
|
||||
};
|
||||
|
||||
-static const struct hwmon_ops m88e6390_hwmon_hwmon_ops = {
|
||||
- .is_visible = m88e6390_hwmon_is_visible,
|
||||
- .read = m88e6390_hwmon_read,
|
||||
+static const struct hwmon_ops marvell_hwmon_hwmon_ops = {
|
||||
+ .is_visible = marvell_hwmon_is_visible,
|
||||
+ .read = marvell_hwmon_read,
|
||||
+ .write = marvell_hwmon_write,
|
||||
};
|
||||
|
||||
-static const struct hwmon_chip_info m88e6390_hwmon_chip_info = {
|
||||
- .ops = &m88e6390_hwmon_hwmon_ops,
|
||||
- .info = m88e6390_hwmon_info,
|
||||
+static const struct hwmon_chip_info marvell_hwmon_chip_info = {
|
||||
+ .ops = &marvell_hwmon_hwmon_ops,
|
||||
+ .info = marvell_hwmon_info,
|
||||
};
|
||||
|
||||
static int marvell_hwmon_name(struct phy_device *phydev)
|
||||
@@ -2538,49 +2445,48 @@ static int marvell_hwmon_name(struct phy
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int marvell_hwmon_probe(struct phy_device *phydev,
|
||||
- const struct hwmon_chip_info *chip)
|
||||
+static int marvell_hwmon_probe(struct phy_device *phydev)
|
||||
{
|
||||
+ const struct marvell_hwmon_ops *ops = to_marvell_hwmon_ops(phydev);
|
||||
struct marvell_priv *priv = phydev->priv;
|
||||
struct device *dev = &phydev->mdio.dev;
|
||||
int err;
|
||||
|
||||
+ if (!ops)
|
||||
+ return 0;
|
||||
+
|
||||
err = marvell_hwmon_name(phydev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
priv->hwmon_dev = devm_hwmon_device_register_with_info(
|
||||
- dev, priv->hwmon_name, phydev, chip, NULL);
|
||||
+ dev, priv->hwmon_name, phydev, &marvell_hwmon_chip_info, NULL);
|
||||
|
||||
return PTR_ERR_OR_ZERO(priv->hwmon_dev);
|
||||
}
|
||||
|
||||
-static int m88e1121_hwmon_probe(struct phy_device *phydev)
|
||||
-{
|
||||
- return marvell_hwmon_probe(phydev, &m88e1121_hwmon_chip_info);
|
||||
-}
|
||||
+static const struct marvell_hwmon_ops m88e1121_hwmon_ops = {
|
||||
+ .get_temp = m88e1121_get_temp,
|
||||
+};
|
||||
|
||||
-static int m88e1510_hwmon_probe(struct phy_device *phydev)
|
||||
-{
|
||||
- return marvell_hwmon_probe(phydev, &m88e1510_hwmon_chip_info);
|
||||
-}
|
||||
+static const struct marvell_hwmon_ops m88e1510_hwmon_ops = {
|
||||
+ .get_temp = m88e1510_get_temp,
|
||||
+ .get_temp_critical = m88e1510_get_temp_critical,
|
||||
+ .set_temp_critical = m88e1510_set_temp_critical,
|
||||
+ .get_temp_alarm = m88e1510_get_temp_alarm,
|
||||
+};
|
||||
+
|
||||
+static const struct marvell_hwmon_ops m88e6390_hwmon_ops = {
|
||||
+ .get_temp = m88e6390_get_temp,
|
||||
+};
|
||||
+
|
||||
+#define DEF_MARVELL_HWMON_OPS(s) (&(s))
|
||||
|
||||
-static int m88e6390_hwmon_probe(struct phy_device *phydev)
|
||||
-{
|
||||
- return marvell_hwmon_probe(phydev, &m88e6390_hwmon_chip_info);
|
||||
-}
|
||||
#else
|
||||
-static int m88e1121_hwmon_probe(struct phy_device *phydev)
|
||||
-{
|
||||
- return 0;
|
||||
-}
|
||||
|
||||
-static int m88e1510_hwmon_probe(struct phy_device *phydev)
|
||||
-{
|
||||
- return 0;
|
||||
-}
|
||||
+#define DEF_MARVELL_HWMON_OPS(s) NULL
|
||||
|
||||
-static int m88e6390_hwmon_probe(struct phy_device *phydev)
|
||||
+static int marvell_hwmon_probe(struct phy_device *phydev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -2596,40 +2502,7 @@ static int marvell_probe(struct phy_devi
|
||||
|
||||
phydev->priv = priv;
|
||||
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static int m88e1121_probe(struct phy_device *phydev)
|
||||
-{
|
||||
- int err;
|
||||
-
|
||||
- err = marvell_probe(phydev);
|
||||
- if (err)
|
||||
- return err;
|
||||
-
|
||||
- return m88e1121_hwmon_probe(phydev);
|
||||
-}
|
||||
-
|
||||
-static int m88e1510_probe(struct phy_device *phydev)
|
||||
-{
|
||||
- int err;
|
||||
-
|
||||
- err = marvell_probe(phydev);
|
||||
- if (err)
|
||||
- return err;
|
||||
-
|
||||
- return m88e1510_hwmon_probe(phydev);
|
||||
-}
|
||||
-
|
||||
-static int m88e6390_probe(struct phy_device *phydev)
|
||||
-{
|
||||
- int err;
|
||||
-
|
||||
- err = marvell_probe(phydev);
|
||||
- if (err)
|
||||
- return err;
|
||||
-
|
||||
- return m88e6390_hwmon_probe(phydev);
|
||||
+ return marvell_hwmon_probe(phydev);
|
||||
}
|
||||
|
||||
static struct phy_driver marvell_drivers[] = {
|
||||
@@ -2714,8 +2587,9 @@ static struct phy_driver marvell_drivers
|
||||
.phy_id = MARVELL_PHY_ID_88E1121R,
|
||||
.phy_id_mask = MARVELL_PHY_ID_MASK,
|
||||
.name = "Marvell 88E1121R",
|
||||
+ .driver_data = DEF_MARVELL_HWMON_OPS(m88e1121_hwmon_ops),
|
||||
/* PHY_GBIT_FEATURES */
|
||||
- .probe = m88e1121_probe,
|
||||
+ .probe = marvell_probe,
|
||||
.config_init = marvell_config_init,
|
||||
.config_aneg = m88e1121_config_aneg,
|
||||
.read_status = marvell_read_status,
|
||||
@@ -2834,9 +2708,10 @@ static struct phy_driver marvell_drivers
|
||||
.phy_id = MARVELL_PHY_ID_88E1510,
|
||||
.phy_id_mask = MARVELL_PHY_ID_MASK,
|
||||
.name = "Marvell 88E1510",
|
||||
+ .driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
|
||||
.features = PHY_GBIT_FIBRE_FEATURES,
|
||||
.flags = PHY_POLL_CABLE_TEST,
|
||||
- .probe = m88e1510_probe,
|
||||
+ .probe = marvell_probe,
|
||||
.config_init = m88e1510_config_init,
|
||||
.config_aneg = m88e1510_config_aneg,
|
||||
.read_status = marvell_read_status,
|
||||
@@ -2863,9 +2738,10 @@ static struct phy_driver marvell_drivers
|
||||
.phy_id = MARVELL_PHY_ID_88E1540,
|
||||
.phy_id_mask = MARVELL_PHY_ID_MASK,
|
||||
.name = "Marvell 88E1540",
|
||||
+ .driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
|
||||
/* PHY_GBIT_FEATURES */
|
||||
.flags = PHY_POLL_CABLE_TEST,
|
||||
- .probe = m88e1510_probe,
|
||||
+ .probe = marvell_probe,
|
||||
.config_init = marvell_config_init,
|
||||
.config_aneg = m88e1510_config_aneg,
|
||||
.read_status = marvell_read_status,
|
||||
@@ -2889,7 +2765,8 @@ static struct phy_driver marvell_drivers
|
||||
.phy_id = MARVELL_PHY_ID_88E1545,
|
||||
.phy_id_mask = MARVELL_PHY_ID_MASK,
|
||||
.name = "Marvell 88E1545",
|
||||
- .probe = m88e1510_probe,
|
||||
+ .driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
|
||||
+ .probe = marvell_probe,
|
||||
/* PHY_GBIT_FEATURES */
|
||||
.flags = PHY_POLL_CABLE_TEST,
|
||||
.config_init = marvell_config_init,
|
||||
@@ -2935,9 +2812,10 @@ static struct phy_driver marvell_drivers
|
||||
.phy_id = MARVELL_PHY_ID_88E6341_FAMILY,
|
||||
.phy_id_mask = MARVELL_PHY_ID_MASK,
|
||||
.name = "Marvell 88E6341 Family",
|
||||
+ .driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
|
||||
/* PHY_GBIT_FEATURES */
|
||||
.flags = PHY_POLL_CABLE_TEST,
|
||||
- .probe = m88e1510_probe,
|
||||
+ .probe = marvell_probe,
|
||||
.config_init = marvell_config_init,
|
||||
.config_aneg = m88e6390_config_aneg,
|
||||
.read_status = marvell_read_status,
|
||||
@@ -2961,9 +2839,10 @@ static struct phy_driver marvell_drivers
|
||||
.phy_id = MARVELL_PHY_ID_88E6390_FAMILY,
|
||||
.phy_id_mask = MARVELL_PHY_ID_MASK,
|
||||
.name = "Marvell 88E6390 Family",
|
||||
+ .driver_data = DEF_MARVELL_HWMON_OPS(m88e6390_hwmon_ops),
|
||||
/* PHY_GBIT_FEATURES */
|
||||
.flags = PHY_POLL_CABLE_TEST,
|
||||
- .probe = m88e6390_probe,
|
||||
+ .probe = marvell_probe,
|
||||
.config_init = marvell_config_init,
|
||||
.config_aneg = m88e6390_config_aneg,
|
||||
.read_status = marvell_read_status,
|
||||
@@ -2987,7 +2866,8 @@ static struct phy_driver marvell_drivers
|
||||
.phy_id = MARVELL_PHY_ID_88E1340S,
|
||||
.phy_id_mask = MARVELL_PHY_ID_MASK,
|
||||
.name = "Marvell 88E1340S",
|
||||
- .probe = m88e1510_probe,
|
||||
+ .driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
|
||||
+ .probe = marvell_probe,
|
||||
/* PHY_GBIT_FEATURES */
|
||||
.config_init = marvell_config_init,
|
||||
.config_aneg = m88e1510_config_aneg,
|
||||
@@ -3009,7 +2889,8 @@ static struct phy_driver marvell_drivers
|
||||
.phy_id = MARVELL_PHY_ID_88E1548P,
|
||||
.phy_id_mask = MARVELL_PHY_ID_MASK,
|
||||
.name = "Marvell 88E1548P",
|
||||
- .probe = m88e1510_probe,
|
||||
+ .driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
|
||||
+ .probe = marvell_probe,
|
||||
.features = PHY_GBIT_FIBRE_FEATURES,
|
||||
.config_init = marvell_config_init,
|
||||
.config_aneg = m88e1510_config_aneg,
|
@ -1,161 +0,0 @@
|
||||
From b697d9d38a5a5ab405d7cc4743d39fe2c5d7517c Mon Sep 17 00:00:00 2001
|
||||
From: Ivan Bornyakov <i.bornyakov@metrotek.ru>
|
||||
Date: Thu, 12 Aug 2021 16:42:56 +0300
|
||||
Subject: [PATCH] net: phy: marvell: add SFP support for 88E1510
|
||||
|
||||
Add support for SFP cages connected to the Marvell 88E1512 transceiver.
|
||||
88E1512 supports for SGMII/1000Base-X/100Base-FX media type with RGMII
|
||||
on system interface. Configure PHY to appropriate mode depending on the
|
||||
type of SFP inserted. On SFP removal configure PHY to the RGMII-copper
|
||||
mode so RJ-45 port can still work.
|
||||
|
||||
Signed-off-by: Ivan Bornyakov <i.bornyakov@metrotek.ru>
|
||||
Link: https://lore.kernel.org/r/20210812134256.2436-1-i.bornyakov@metrotek.ru
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
drivers/net/phy/marvell.c | 105 +++++++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 104 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/phy/marvell.c
|
||||
+++ b/drivers/net/phy/marvell.c
|
||||
@@ -32,6 +32,7 @@
|
||||
#include <linux/marvell_phy.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/of.h>
|
||||
+#include <linux/sfp.h>
|
||||
|
||||
#include <linux/io.h>
|
||||
#include <asm/irq.h>
|
||||
@@ -46,6 +47,7 @@
|
||||
#define MII_MARVELL_MISC_TEST_PAGE 0x06
|
||||
#define MII_MARVELL_VCT7_PAGE 0x07
|
||||
#define MII_MARVELL_WOL_PAGE 0x11
|
||||
+#define MII_MARVELL_MODE_PAGE 0x12
|
||||
|
||||
#define MII_M1011_IEVENT 0x13
|
||||
#define MII_M1011_IEVENT_CLEAR 0x0000
|
||||
@@ -162,7 +164,14 @@
|
||||
|
||||
#define MII_88E1510_GEN_CTRL_REG_1 0x14
|
||||
#define MII_88E1510_GEN_CTRL_REG_1_MODE_MASK 0x7
|
||||
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII 0x0 /* RGMII to copper */
|
||||
#define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII 0x1 /* SGMII to copper */
|
||||
+/* RGMII to 1000BASE-X */
|
||||
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_1000X 0x2
|
||||
+/* RGMII to 100BASE-FX */
|
||||
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_100FX 0x3
|
||||
+/* RGMII to SGMII */
|
||||
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_SGMII 0x4
|
||||
#define MII_88E1510_GEN_CTRL_REG_1_RESET 0x8000 /* Soft reset */
|
||||
|
||||
#define MII_VCT5_TX_RX_MDI0_COUPLING 0x10
|
||||
@@ -2505,6 +2514,100 @@ static int marvell_probe(struct phy_devi
|
||||
return marvell_hwmon_probe(phydev);
|
||||
}
|
||||
|
||||
+static int m88e1510_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
|
||||
+{
|
||||
+ struct phy_device *phydev = upstream;
|
||||
+ phy_interface_t interface;
|
||||
+ struct device *dev;
|
||||
+ int oldpage;
|
||||
+ int ret = 0;
|
||||
+ u16 mode;
|
||||
+
|
||||
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
|
||||
+
|
||||
+ dev = &phydev->mdio.dev;
|
||||
+
|
||||
+ sfp_parse_support(phydev->sfp_bus, id, supported);
|
||||
+ interface = sfp_select_interface(phydev->sfp_bus, supported);
|
||||
+
|
||||
+ dev_info(dev, "%s SFP module inserted\n", phy_modes(interface));
|
||||
+
|
||||
+ switch (interface) {
|
||||
+ case PHY_INTERFACE_MODE_1000BASEX:
|
||||
+ mode = MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_1000X;
|
||||
+
|
||||
+ break;
|
||||
+ case PHY_INTERFACE_MODE_100BASEX:
|
||||
+ mode = MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_100FX;
|
||||
+
|
||||
+ break;
|
||||
+ case PHY_INTERFACE_MODE_SGMII:
|
||||
+ mode = MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_SGMII;
|
||||
+
|
||||
+ break;
|
||||
+ default:
|
||||
+ dev_err(dev, "Incompatible SFP module inserted\n");
|
||||
+
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ oldpage = phy_select_page(phydev, MII_MARVELL_MODE_PAGE);
|
||||
+ if (oldpage < 0)
|
||||
+ goto error;
|
||||
+
|
||||
+ ret = __phy_modify(phydev, MII_88E1510_GEN_CTRL_REG_1,
|
||||
+ MII_88E1510_GEN_CTRL_REG_1_MODE_MASK, mode);
|
||||
+ if (ret < 0)
|
||||
+ goto error;
|
||||
+
|
||||
+ ret = __phy_set_bits(phydev, MII_88E1510_GEN_CTRL_REG_1,
|
||||
+ MII_88E1510_GEN_CTRL_REG_1_RESET);
|
||||
+
|
||||
+error:
|
||||
+ return phy_restore_page(phydev, oldpage, ret);
|
||||
+}
|
||||
+
|
||||
+static void m88e1510_sfp_remove(void *upstream)
|
||||
+{
|
||||
+ struct phy_device *phydev = upstream;
|
||||
+ int oldpage;
|
||||
+ int ret = 0;
|
||||
+
|
||||
+ oldpage = phy_select_page(phydev, MII_MARVELL_MODE_PAGE);
|
||||
+ if (oldpage < 0)
|
||||
+ goto error;
|
||||
+
|
||||
+ ret = __phy_modify(phydev, MII_88E1510_GEN_CTRL_REG_1,
|
||||
+ MII_88E1510_GEN_CTRL_REG_1_MODE_MASK,
|
||||
+ MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII);
|
||||
+ if (ret < 0)
|
||||
+ goto error;
|
||||
+
|
||||
+ ret = __phy_set_bits(phydev, MII_88E1510_GEN_CTRL_REG_1,
|
||||
+ MII_88E1510_GEN_CTRL_REG_1_RESET);
|
||||
+
|
||||
+error:
|
||||
+ phy_restore_page(phydev, oldpage, ret);
|
||||
+}
|
||||
+
|
||||
+static const struct sfp_upstream_ops m88e1510_sfp_ops = {
|
||||
+ .module_insert = m88e1510_sfp_insert,
|
||||
+ .module_remove = m88e1510_sfp_remove,
|
||||
+ .attach = phy_sfp_attach,
|
||||
+ .detach = phy_sfp_detach,
|
||||
+};
|
||||
+
|
||||
+static int m88e1510_probe(struct phy_device *phydev)
|
||||
+{
|
||||
+ int err;
|
||||
+
|
||||
+ err = marvell_probe(phydev);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ return phy_sfp_probe(phydev, &m88e1510_sfp_ops);
|
||||
+}
|
||||
+
|
||||
static struct phy_driver marvell_drivers[] = {
|
||||
{
|
||||
.phy_id = MARVELL_PHY_ID_88E1101,
|
||||
@@ -2711,7 +2814,7 @@ static struct phy_driver marvell_drivers
|
||||
.driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
|
||||
.features = PHY_GBIT_FIBRE_FEATURES,
|
||||
.flags = PHY_POLL_CABLE_TEST,
|
||||
- .probe = marvell_probe,
|
||||
+ .probe = m88e1510_probe,
|
||||
.config_init = m88e1510_config_init,
|
||||
.config_aneg = m88e1510_config_aneg,
|
||||
.read_status = marvell_read_status,
|
@ -1,85 +0,0 @@
|
||||
From 9d5ef190e5615a7b63af89f88c4106a5bc127974 Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Date: Fri, 5 Feb 2021 15:37:10 +0200
|
||||
Subject: [PATCH] net: dsa: automatically bring up DSA master when opening user
|
||||
port
|
||||
|
||||
DSA wants the master interface to be open before the user port is due to
|
||||
historical reasons. The promiscuity of interfaces that are down used to
|
||||
have issues, as referenced Lennert Buytenhek in commit df02c6ff2e39
|
||||
("dsa: fix master interface allmulti/promisc handling").
|
||||
|
||||
The bugfix mentioned there, commit b6c40d68ff64 ("net: only invoke
|
||||
dev->change_rx_flags when device is UP"), was basically a "don't do
|
||||
that" approach to working around the promiscuity while down issue.
|
||||
|
||||
Further work done by Vlad Yasevich in commit d2615bf45069 ("net: core:
|
||||
Always propagate flag changes to interfaces") has resolved the
|
||||
underlying issue, and it is strictly up to the DSA and 8021q drivers
|
||||
now, it is no longer mandated by the networking core that the master
|
||||
interface must be up when changing its promiscuity.
|
||||
|
||||
From DSA's point of view, deciding to error out in dsa_slave_open
|
||||
because the master isn't up is
|
||||
(a) a bad user experience and
|
||||
(b) knocking at an open door.
|
||||
Even if there still was an issue with promiscuity while down, DSA could
|
||||
still just open the master and avoid it.
|
||||
|
||||
Doing it this way has the additional benefit that user space can now
|
||||
remove DSA-specific workarounds, like systemd-networkd with BindCarrier:
|
||||
https://github.com/systemd/systemd/issues/7478
|
||||
|
||||
And we can finally remove one of the 2 bullets in the "Common pitfalls
|
||||
using DSA setups" chapter.
|
||||
|
||||
Tested with two cascaded DSA switches:
|
||||
|
||||
$ ip link set sw0p2 up
|
||||
fsl_enetc 0000:00:00.2 eno2: configuring for fixed/internal link mode
|
||||
fsl_enetc 0000:00:00.2 eno2: Link is Up - 1Gbps/Full - flow control rx/tx
|
||||
mscc_felix 0000:00:00.5 swp0: configuring for fixed/sgmii link mode
|
||||
mscc_felix 0000:00:00.5 swp0: Link is Up - 1Gbps/Full - flow control off
|
||||
8021q: adding VLAN 0 to HW filter on device swp0
|
||||
sja1105 spi2.0 sw0p2: configuring for phy/rgmii-id link mode
|
||||
IPv6: ADDRCONF(NETDEV_CHANGE): eno2: link becomes ready
|
||||
IPv6: ADDRCONF(NETDEV_CHANGE): swp0: link becomes ready
|
||||
|
||||
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
|
||||
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
Documentation/networking/dsa/dsa.rst | 4 ----
|
||||
net/dsa/slave.c | 7 +++++--
|
||||
2 files changed, 5 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/Documentation/networking/dsa/dsa.rst
|
||||
+++ b/Documentation/networking/dsa/dsa.rst
|
||||
@@ -273,10 +273,6 @@ will not make us go through the switch t
|
||||
the Ethernet switch on the other end, expecting a tag will typically drop this
|
||||
frame.
|
||||
|
||||
-Slave network devices check that the master network device is UP before allowing
|
||||
-you to administratively bring UP these slave network devices. A common
|
||||
-configuration mistake is forgetting to bring UP the master network device first.
|
||||
-
|
||||
Interactions with other subsystems
|
||||
==================================
|
||||
|
||||
--- a/net/dsa/slave.c
|
||||
+++ b/net/dsa/slave.c
|
||||
@@ -68,8 +68,11 @@ static int dsa_slave_open(struct net_dev
|
||||
struct dsa_port *dp = dsa_slave_to_port(dev);
|
||||
int err;
|
||||
|
||||
- if (!(master->flags & IFF_UP))
|
||||
- return -ENETDOWN;
|
||||
+ err = dev_open(master, NULL);
|
||||
+ if (err < 0) {
|
||||
+ netdev_err(dev, "failed to open master %s\n", master->name);
|
||||
+ goto out;
|
||||
+ }
|
||||
|
||||
if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) {
|
||||
err = dev_uc_add(master, dev->dev_addr);
|
@ -1,126 +0,0 @@
|
||||
From 90dc8fd36078a536671adae884d0b929cce6480a Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Date: Wed, 6 Jan 2021 11:51:30 +0200
|
||||
Subject: [PATCH] net: bridge: notify switchdev of disappearance of old FDB
|
||||
entry upon migration
|
||||
|
||||
Currently the bridge emits atomic switchdev notifications for
|
||||
dynamically learnt FDB entries. Monitoring these notifications works
|
||||
wonders for switchdev drivers that want to keep their hardware FDB in
|
||||
sync with the bridge's FDB.
|
||||
|
||||
For example station A wants to talk to station B in the diagram below,
|
||||
and we are concerned with the behavior of the bridge on the DUT device:
|
||||
|
||||
DUT
|
||||
+-------------------------------------+
|
||||
| br0 |
|
||||
| +------+ +------+ +------+ +------+ |
|
||||
| | | | | | | | | |
|
||||
| | swp0 | | swp1 | | swp2 | | eth0 | |
|
||||
+-------------------------------------+
|
||||
| | |
|
||||
Station A | |
|
||||
| |
|
||||
+--+------+--+ +--+------+--+
|
||||
| | | | | | | |
|
||||
| | swp0 | | | | swp0 | |
|
||||
Another | +------+ | | +------+ | Another
|
||||
switch | br0 | | br0 | switch
|
||||
| +------+ | | +------+ |
|
||||
| | | | | | | |
|
||||
| | swp1 | | | | swp1 | |
|
||||
+--+------+--+ +--+------+--+
|
||||
|
|
||||
Station B
|
||||
|
||||
Interfaces swp0, swp1, swp2 are handled by a switchdev driver that has
|
||||
the following property: frames injected from its control interface bypass
|
||||
the internal address analyzer logic, and therefore, this hardware does
|
||||
not learn from the source address of packets transmitted by the network
|
||||
stack through it. So, since bridging between eth0 (where Station B is
|
||||
attached) and swp0 (where Station A is attached) is done in software,
|
||||
the switchdev hardware will never learn the source address of Station B.
|
||||
So the traffic towards that destination will be treated as unknown, i.e.
|
||||
flooded.
|
||||
|
||||
This is where the bridge notifications come in handy. When br0 on the
|
||||
DUT sees frames with Station B's MAC address on eth0, the switchdev
|
||||
driver gets these notifications and can install a rule to send frames
|
||||
towards Station B's address that are incoming from swp0, swp1, swp2,
|
||||
only towards the control interface. This is all switchdev driver private
|
||||
business, which the notification makes possible.
|
||||
|
||||
All is fine until someone unplugs Station B's cable and moves it to the
|
||||
other switch:
|
||||
|
||||
DUT
|
||||
+-------------------------------------+
|
||||
| br0 |
|
||||
| +------+ +------+ +------+ +------+ |
|
||||
| | | | | | | | | |
|
||||
| | swp0 | | swp1 | | swp2 | | eth0 | |
|
||||
+-------------------------------------+
|
||||
| | |
|
||||
Station A | |
|
||||
| |
|
||||
+--+------+--+ +--+------+--+
|
||||
| | | | | | | |
|
||||
| | swp0 | | | | swp0 | |
|
||||
Another | +------+ | | +------+ | Another
|
||||
switch | br0 | | br0 | switch
|
||||
| +------+ | | +------+ |
|
||||
| | | | | | | |
|
||||
| | swp1 | | | | swp1 | |
|
||||
+--+------+--+ +--+------+--+
|
||||
|
|
||||
Station B
|
||||
|
||||
Luckily for the use cases we care about, Station B is noisy enough that
|
||||
the DUT hears it (on swp1 this time). swp1 receives the frames and
|
||||
delivers them to the bridge, who enters the unlikely path in br_fdb_update
|
||||
of updating an existing entry. It moves the entry in the software bridge
|
||||
to swp1 and emits an addition notification towards that.
|
||||
|
||||
As far as the switchdev driver is concerned, all that it needs to ensure
|
||||
is that traffic between Station A and Station B is not forever broken.
|
||||
If it does nothing, then the stale rule to send frames for Station B
|
||||
towards the control interface remains in place. But Station B is no
|
||||
longer reachable via the control interface, but via a port that can
|
||||
offload the bridge port learning attribute. It's just that the port is
|
||||
prevented from learning this address, since the rule overrides FDB
|
||||
updates. So the rule needs to go. The question is via what mechanism.
|
||||
|
||||
It sure would be possible for this switchdev driver to keep track of all
|
||||
addresses which are sent to the control interface, and then also listen
|
||||
for bridge notifier events on its own ports, searching for the ones that
|
||||
have a MAC address which was previously sent to the control interface.
|
||||
But this is cumbersome and inefficient. Instead, with one small change,
|
||||
the bridge could notify of the address deletion from the old port, in a
|
||||
symmetrical manner with how it did for the insertion. Then the switchdev
|
||||
driver would not be required to monitor learn/forget events for its own
|
||||
ports. It could just delete the rule towards the control interface upon
|
||||
bridge entry migration. This would make hardware address learning be
|
||||
possible again. Then it would take a few more packets until the hardware
|
||||
and software FDB would be in sync again.
|
||||
|
||||
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
|
||||
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
|
||||
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
|
||||
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
net/bridge/br_fdb.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/net/bridge/br_fdb.c
|
||||
+++ b/net/bridge/br_fdb.c
|
||||
@@ -602,6 +602,7 @@ void br_fdb_update(struct net_bridge *br
|
||||
/* fastpath: update of existing entry */
|
||||
if (unlikely(source != fdb->dst &&
|
||||
!test_bit(BR_FDB_STICKY, &fdb->flags))) {
|
||||
+ br_switchdev_fdb_notify(fdb, RTM_DELNEIGH);
|
||||
fdb->dst = source;
|
||||
fdb_modified = true;
|
||||
/* Take over HW learned entry */
|
@ -1,52 +0,0 @@
|
||||
From 2fd186501b1cff155cc4a755c210793cfc0dffb5 Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Date: Wed, 6 Jan 2021 11:51:31 +0200
|
||||
Subject: [PATCH] net: dsa: be louder when a non-legacy FDB operation fails
|
||||
|
||||
The dev_close() call was added in commit c9eb3e0f8701 ("net: dsa: Add
|
||||
support for learning FDB through notification") "to indicate inconsistent
|
||||
situation" when we could not delete an FDB entry from the port.
|
||||
|
||||
bridge fdb del d8:58:d7:00:ca:6d dev swp0 self master
|
||||
|
||||
It is a bit drastic and at the same time not helpful if the above fails
|
||||
to only print with netdev_dbg log level, but on the other hand to bring
|
||||
the interface down.
|
||||
|
||||
So increase the verbosity of the error message, and drop dev_close().
|
||||
|
||||
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
|
||||
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
net/dsa/slave.c | 10 +++++++---
|
||||
1 file changed, 7 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/net/dsa/slave.c
|
||||
+++ b/net/dsa/slave.c
|
||||
@@ -2112,7 +2112,9 @@ static void dsa_slave_switchdev_event_wo
|
||||
|
||||
err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid);
|
||||
if (err) {
|
||||
- netdev_dbg(dev, "fdb add failed err=%d\n", err);
|
||||
+ netdev_err(dev,
|
||||
+ "failed to add %pM vid %d to fdb: %d\n",
|
||||
+ fdb_info->addr, fdb_info->vid, err);
|
||||
break;
|
||||
}
|
||||
fdb_info->offloaded = true;
|
||||
@@ -2127,9 +2129,11 @@ static void dsa_slave_switchdev_event_wo
|
||||
|
||||
err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid);
|
||||
if (err) {
|
||||
- netdev_dbg(dev, "fdb del failed err=%d\n", err);
|
||||
- dev_close(dev);
|
||||
+ netdev_err(dev,
|
||||
+ "failed to delete %pM vid %d from fdb: %d\n",
|
||||
+ fdb_info->addr, fdb_info->vid, err);
|
||||
}
|
||||
+
|
||||
break;
|
||||
}
|
||||
rtnl_unlock();
|
@ -1,226 +0,0 @@
|
||||
From c4bb76a9a0ef87c4cc1f636defed5f12deb9f5a7 Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Date: Wed, 6 Jan 2021 11:51:32 +0200
|
||||
Subject: [PATCH] net: dsa: don't use switchdev_notifier_fdb_info in
|
||||
dsa_switchdev_event_work
|
||||
|
||||
Currently DSA doesn't add FDB entries on the CPU port, because it only
|
||||
does so through switchdev, which is associated with a net_device, and
|
||||
there are none of those for the CPU port.
|
||||
|
||||
But actually FDB addresses on the CPU port have some use cases of their
|
||||
own, if the switchdev operations are initiated from within the DSA
|
||||
layer. There is just one problem with the existing code: it passes a
|
||||
structure in dsa_switchdev_event_work which was retrieved directly from
|
||||
switchdev, so it contains a net_device. We need to generalize the
|
||||
contents to something that covers the CPU port as well: the "ds, port"
|
||||
tuple is fine for that.
|
||||
|
||||
Note that the new procedure for notifying the successful FDB offload is
|
||||
inspired from the rocker model.
|
||||
|
||||
Also, nothing was being done if added_by_user was false. Let's check for
|
||||
that a lot earlier, and don't actually bother to schedule the worker
|
||||
for nothing.
|
||||
|
||||
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
net/dsa/dsa_priv.h | 12 +++++
|
||||
net/dsa/slave.c | 106 ++++++++++++++++++++++-----------------------
|
||||
2 files changed, 65 insertions(+), 53 deletions(-)
|
||||
|
||||
--- a/net/dsa/dsa_priv.h
|
||||
+++ b/net/dsa/dsa_priv.h
|
||||
@@ -73,6 +73,18 @@ struct dsa_notifier_mtu_info {
|
||||
int mtu;
|
||||
};
|
||||
|
||||
+struct dsa_switchdev_event_work {
|
||||
+ struct dsa_switch *ds;
|
||||
+ int port;
|
||||
+ struct work_struct work;
|
||||
+ unsigned long event;
|
||||
+ /* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and
|
||||
+ * SWITCHDEV_FDB_DEL_TO_DEVICE
|
||||
+ */
|
||||
+ unsigned char addr[ETH_ALEN];
|
||||
+ u16 vid;
|
||||
+};
|
||||
+
|
||||
struct dsa_slave_priv {
|
||||
/* Copy of CPU port xmit for faster access in slave transmit hot path */
|
||||
struct sk_buff * (*xmit)(struct sk_buff *skb,
|
||||
--- a/net/dsa/slave.c
|
||||
+++ b/net/dsa/slave.c
|
||||
@@ -2087,76 +2087,66 @@ static int dsa_slave_netdevice_event(str
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
-struct dsa_switchdev_event_work {
|
||||
- struct work_struct work;
|
||||
- struct switchdev_notifier_fdb_info fdb_info;
|
||||
- struct net_device *dev;
|
||||
- unsigned long event;
|
||||
-};
|
||||
+static void
|
||||
+dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work)
|
||||
+{
|
||||
+ struct dsa_switch *ds = switchdev_work->ds;
|
||||
+ struct switchdev_notifier_fdb_info info;
|
||||
+ struct dsa_port *dp;
|
||||
+
|
||||
+ if (!dsa_is_user_port(ds, switchdev_work->port))
|
||||
+ return;
|
||||
+
|
||||
+ info.addr = switchdev_work->addr;
|
||||
+ info.vid = switchdev_work->vid;
|
||||
+ info.offloaded = true;
|
||||
+ dp = dsa_to_port(ds, switchdev_work->port);
|
||||
+ call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED,
|
||||
+ dp->slave, &info.info, NULL);
|
||||
+}
|
||||
|
||||
static void dsa_slave_switchdev_event_work(struct work_struct *work)
|
||||
{
|
||||
struct dsa_switchdev_event_work *switchdev_work =
|
||||
container_of(work, struct dsa_switchdev_event_work, work);
|
||||
- struct net_device *dev = switchdev_work->dev;
|
||||
- struct switchdev_notifier_fdb_info *fdb_info;
|
||||
- struct dsa_port *dp = dsa_slave_to_port(dev);
|
||||
+ struct dsa_switch *ds = switchdev_work->ds;
|
||||
+ struct dsa_port *dp;
|
||||
int err;
|
||||
|
||||
+ dp = dsa_to_port(ds, switchdev_work->port);
|
||||
+
|
||||
rtnl_lock();
|
||||
switch (switchdev_work->event) {
|
||||
case SWITCHDEV_FDB_ADD_TO_DEVICE:
|
||||
- fdb_info = &switchdev_work->fdb_info;
|
||||
- if (!fdb_info->added_by_user)
|
||||
- break;
|
||||
-
|
||||
- err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid);
|
||||
+ err = dsa_port_fdb_add(dp, switchdev_work->addr,
|
||||
+ switchdev_work->vid);
|
||||
if (err) {
|
||||
- netdev_err(dev,
|
||||
- "failed to add %pM vid %d to fdb: %d\n",
|
||||
- fdb_info->addr, fdb_info->vid, err);
|
||||
+ dev_err(ds->dev,
|
||||
+ "port %d failed to add %pM vid %d to fdb: %d\n",
|
||||
+ dp->index, switchdev_work->addr,
|
||||
+ switchdev_work->vid, err);
|
||||
break;
|
||||
}
|
||||
- fdb_info->offloaded = true;
|
||||
- call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
|
||||
- &fdb_info->info, NULL);
|
||||
+ dsa_fdb_offload_notify(switchdev_work);
|
||||
break;
|
||||
|
||||
case SWITCHDEV_FDB_DEL_TO_DEVICE:
|
||||
- fdb_info = &switchdev_work->fdb_info;
|
||||
- if (!fdb_info->added_by_user)
|
||||
- break;
|
||||
-
|
||||
- err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid);
|
||||
+ err = dsa_port_fdb_del(dp, switchdev_work->addr,
|
||||
+ switchdev_work->vid);
|
||||
if (err) {
|
||||
- netdev_err(dev,
|
||||
- "failed to delete %pM vid %d from fdb: %d\n",
|
||||
- fdb_info->addr, fdb_info->vid, err);
|
||||
+ dev_err(ds->dev,
|
||||
+ "port %d failed to delete %pM vid %d from fdb: %d\n",
|
||||
+ dp->index, switchdev_work->addr,
|
||||
+ switchdev_work->vid, err);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
rtnl_unlock();
|
||||
|
||||
- kfree(switchdev_work->fdb_info.addr);
|
||||
kfree(switchdev_work);
|
||||
- dev_put(dev);
|
||||
-}
|
||||
-
|
||||
-static int
|
||||
-dsa_slave_switchdev_fdb_work_init(struct dsa_switchdev_event_work *
|
||||
- switchdev_work,
|
||||
- const struct switchdev_notifier_fdb_info *
|
||||
- fdb_info)
|
||||
-{
|
||||
- memcpy(&switchdev_work->fdb_info, fdb_info,
|
||||
- sizeof(switchdev_work->fdb_info));
|
||||
- switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
|
||||
- if (!switchdev_work->fdb_info.addr)
|
||||
- return -ENOMEM;
|
||||
- ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
|
||||
- fdb_info->addr);
|
||||
- return 0;
|
||||
+ if (dsa_is_user_port(ds, dp->index))
|
||||
+ dev_put(dp->slave);
|
||||
}
|
||||
|
||||
/* Called under rcu_read_lock() */
|
||||
@@ -2164,7 +2154,9 @@ static int dsa_slave_switchdev_event(str
|
||||
unsigned long event, void *ptr)
|
||||
{
|
||||
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
|
||||
+ const struct switchdev_notifier_fdb_info *fdb_info;
|
||||
struct dsa_switchdev_event_work *switchdev_work;
|
||||
+ struct dsa_port *dp;
|
||||
int err;
|
||||
|
||||
if (event == SWITCHDEV_PORT_ATTR_SET) {
|
||||
@@ -2177,20 +2169,32 @@ static int dsa_slave_switchdev_event(str
|
||||
if (!dsa_slave_dev_check(dev))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
+ dp = dsa_slave_to_port(dev);
|
||||
+
|
||||
switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
|
||||
if (!switchdev_work)
|
||||
return NOTIFY_BAD;
|
||||
|
||||
INIT_WORK(&switchdev_work->work,
|
||||
dsa_slave_switchdev_event_work);
|
||||
- switchdev_work->dev = dev;
|
||||
+ switchdev_work->ds = dp->ds;
|
||||
+ switchdev_work->port = dp->index;
|
||||
switchdev_work->event = event;
|
||||
|
||||
switch (event) {
|
||||
case SWITCHDEV_FDB_ADD_TO_DEVICE:
|
||||
case SWITCHDEV_FDB_DEL_TO_DEVICE:
|
||||
- if (dsa_slave_switchdev_fdb_work_init(switchdev_work, ptr))
|
||||
- goto err_fdb_work_init;
|
||||
+ fdb_info = ptr;
|
||||
+
|
||||
+ if (!fdb_info->added_by_user) {
|
||||
+ kfree(switchdev_work);
|
||||
+ return NOTIFY_OK;
|
||||
+ }
|
||||
+
|
||||
+ ether_addr_copy(switchdev_work->addr,
|
||||
+ fdb_info->addr);
|
||||
+ switchdev_work->vid = fdb_info->vid;
|
||||
+
|
||||
dev_hold(dev);
|
||||
break;
|
||||
default:
|
||||
@@ -2200,10 +2204,6 @@ static int dsa_slave_switchdev_event(str
|
||||
|
||||
dsa_schedule_work(&switchdev_work->work);
|
||||
return NOTIFY_OK;
|
||||
-
|
||||
-err_fdb_work_init:
|
||||
- kfree(switchdev_work);
|
||||
- return NOTIFY_BAD;
|
||||
}
|
||||
|
||||
static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused,
|
@ -1,85 +0,0 @@
|
||||
From 447d290a58bd335d68f665713842365d3d6447df Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Date: Wed, 6 Jan 2021 11:51:33 +0200
|
||||
Subject: [PATCH] net: dsa: move switchdev event implementation under the same
|
||||
switch/case statement
|
||||
|
||||
We'll need to start listening to SWITCHDEV_FDB_{ADD,DEL}_TO_DEVICE
|
||||
events even for interfaces where dsa_slave_dev_check returns false, so
|
||||
we need that check inside the switch-case statement for SWITCHDEV_FDB_*.
|
||||
|
||||
This movement also avoids a useless allocation / free of switchdev_work
|
||||
on the untreated "default event" case.
|
||||
|
||||
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
net/dsa/slave.c | 35 ++++++++++++++++-------------------
|
||||
1 file changed, 16 insertions(+), 19 deletions(-)
|
||||
|
||||
--- a/net/dsa/slave.c
|
||||
+++ b/net/dsa/slave.c
|
||||
@@ -2159,31 +2159,29 @@ static int dsa_slave_switchdev_event(str
|
||||
struct dsa_port *dp;
|
||||
int err;
|
||||
|
||||
- if (event == SWITCHDEV_PORT_ATTR_SET) {
|
||||
+ switch (event) {
|
||||
+ case SWITCHDEV_PORT_ATTR_SET:
|
||||
err = switchdev_handle_port_attr_set(dev, ptr,
|
||||
dsa_slave_dev_check,
|
||||
dsa_slave_port_attr_set);
|
||||
return notifier_from_errno(err);
|
||||
- }
|
||||
-
|
||||
- if (!dsa_slave_dev_check(dev))
|
||||
- return NOTIFY_DONE;
|
||||
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
|
||||
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
|
||||
+ if (!dsa_slave_dev_check(dev))
|
||||
+ return NOTIFY_DONE;
|
||||
|
||||
- dp = dsa_slave_to_port(dev);
|
||||
+ dp = dsa_slave_to_port(dev);
|
||||
|
||||
- switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
|
||||
- if (!switchdev_work)
|
||||
- return NOTIFY_BAD;
|
||||
-
|
||||
- INIT_WORK(&switchdev_work->work,
|
||||
- dsa_slave_switchdev_event_work);
|
||||
- switchdev_work->ds = dp->ds;
|
||||
- switchdev_work->port = dp->index;
|
||||
- switchdev_work->event = event;
|
||||
+ switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
|
||||
+ if (!switchdev_work)
|
||||
+ return NOTIFY_BAD;
|
||||
+
|
||||
+ INIT_WORK(&switchdev_work->work,
|
||||
+ dsa_slave_switchdev_event_work);
|
||||
+ switchdev_work->ds = dp->ds;
|
||||
+ switchdev_work->port = dp->index;
|
||||
+ switchdev_work->event = event;
|
||||
|
||||
- switch (event) {
|
||||
- case SWITCHDEV_FDB_ADD_TO_DEVICE:
|
||||
- case SWITCHDEV_FDB_DEL_TO_DEVICE:
|
||||
fdb_info = ptr;
|
||||
|
||||
if (!fdb_info->added_by_user) {
|
||||
@@ -2196,13 +2194,12 @@ static int dsa_slave_switchdev_event(str
|
||||
switchdev_work->vid = fdb_info->vid;
|
||||
|
||||
dev_hold(dev);
|
||||
+ dsa_schedule_work(&switchdev_work->work);
|
||||
break;
|
||||
default:
|
||||
- kfree(switchdev_work);
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
- dsa_schedule_work(&switchdev_work->work);
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
@ -1,42 +0,0 @@
|
||||
From 5fb4a451a87d8ed3363d28b63a3295399373d6c4 Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Date: Wed, 6 Jan 2021 11:51:34 +0200
|
||||
Subject: [PATCH] net: dsa: exit early in dsa_slave_switchdev_event if we can't
|
||||
program the FDB
|
||||
|
||||
Right now, the following would happen for a switch driver that does not
|
||||
implement .port_fdb_add or .port_fdb_del.
|
||||
|
||||
dsa_slave_switchdev_event returns NOTIFY_OK and schedules:
|
||||
-> dsa_slave_switchdev_event_work
|
||||
-> dsa_port_fdb_add
|
||||
-> dsa_port_notify(DSA_NOTIFIER_FDB_ADD)
|
||||
-> dsa_switch_fdb_add
|
||||
-> if (!ds->ops->port_fdb_add) return -EOPNOTSUPP;
|
||||
-> an error is printed with dev_dbg, and
|
||||
dsa_fdb_offload_notify(switchdev_work) is not called.
|
||||
|
||||
We can avoid scheduling the worker for nothing and say NOTIFY_DONE.
|
||||
Because we don't call dsa_fdb_offload_notify, the static FDB entry will
|
||||
remain just in the software bridge.
|
||||
|
||||
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
|
||||
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
net/dsa/slave.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
--- a/net/dsa/slave.c
|
||||
+++ b/net/dsa/slave.c
|
||||
@@ -2172,6 +2172,9 @@ static int dsa_slave_switchdev_event(str
|
||||
|
||||
dp = dsa_slave_to_port(dev);
|
||||
|
||||
+ if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del)
|
||||
+ return NOTIFY_DONE;
|
||||
+
|
||||
switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
|
||||
if (!switchdev_work)
|
||||
return NOTIFY_BAD;
|
@ -1,264 +0,0 @@
|
||||
From d5f19486cee79d04c054427577ac96ed123706db Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Date: Wed, 6 Jan 2021 11:51:35 +0200
|
||||
Subject: [PATCH] net: dsa: listen for SWITCHDEV_{FDB,DEL}_ADD_TO_DEVICE on
|
||||
foreign bridge neighbors
|
||||
|
||||
Some DSA switches (and not only) cannot learn source MAC addresses from
|
||||
packets injected from the CPU. They only perform hardware address
|
||||
learning from inbound traffic.
|
||||
|
||||
This can be problematic when we have a bridge spanning some DSA switch
|
||||
ports and some non-DSA ports (which we'll call "foreign interfaces" from
|
||||
DSA's perspective).
|
||||
|
||||
There are 2 classes of problems created by the lack of learning on
|
||||
CPU-injected traffic:
|
||||
- excessive flooding, due to the fact that DSA treats those addresses as
|
||||
unknown
|
||||
- the risk of stale routes, which can lead to temporary packet loss
|
||||
|
||||
To illustrate the second class, consider the following situation, which
|
||||
is common in production equipment (wireless access points, where there
|
||||
is a WLAN interface and an Ethernet switch, and these form a single
|
||||
bridging domain).
|
||||
|
||||
AP 1:
|
||||
+------------------------------------------------------------------------+
|
||||
| br0 |
|
||||
+------------------------------------------------------------------------+
|
||||
+------------+ +------------+ +------------+ +------------+ +------------+
|
||||
| swp0 | | swp1 | | swp2 | | swp3 | | wlan0 |
|
||||
+------------+ +------------+ +------------+ +------------+ +------------+
|
||||
| ^ ^
|
||||
| | |
|
||||
| | |
|
||||
| Client A Client B
|
||||
|
|
||||
|
|
||||
|
|
||||
+------------+ +------------+ +------------+ +------------+ +------------+
|
||||
| swp0 | | swp1 | | swp2 | | swp3 | | wlan0 |
|
||||
+------------+ +------------+ +------------+ +------------+ +------------+
|
||||
+------------------------------------------------------------------------+
|
||||
| br0 |
|
||||
+------------------------------------------------------------------------+
|
||||
AP 2
|
||||
|
||||
- br0 of AP 1 will know that Clients A and B are reachable via wlan0
|
||||
- the hardware fdb of a DSA switch driver today is not kept in sync with
|
||||
the software entries on other bridge ports, so it will not know that
|
||||
clients A and B are reachable via the CPU port UNLESS the hardware
|
||||
switch itself performs SA learning from traffic injected from the CPU.
|
||||
Nonetheless, a substantial number of switches don't.
|
||||
- the hardware fdb of the DSA switch on AP 2 may autonomously learn that
|
||||
Client A and B are reachable through swp0. Therefore, the software br0
|
||||
of AP 2 also may or may not learn this. In the example we're
|
||||
illustrating, some Ethernet traffic has been going on, and br0 from AP
|
||||
2 has indeed learnt that it can reach Client B through swp0.
|
||||
|
||||
One of the wireless clients, say Client B, disconnects from AP 1 and
|
||||
roams to AP 2. The topology now looks like this:
|
||||
|
||||
AP 1:
|
||||
+------------------------------------------------------------------------+
|
||||
| br0 |
|
||||
+------------------------------------------------------------------------+
|
||||
+------------+ +------------+ +------------+ +------------+ +------------+
|
||||
| swp0 | | swp1 | | swp2 | | swp3 | | wlan0 |
|
||||
+------------+ +------------+ +------------+ +------------+ +------------+
|
||||
| ^
|
||||
| |
|
||||
| Client A
|
||||
|
|
||||
|
|
||||
| Client B
|
||||
| |
|
||||
| v
|
||||
+------------+ +------------+ +------------+ +------------+ +------------+
|
||||
| swp0 | | swp1 | | swp2 | | swp3 | | wlan0 |
|
||||
+------------+ +------------+ +------------+ +------------+ +------------+
|
||||
+------------------------------------------------------------------------+
|
||||
| br0 |
|
||||
+------------------------------------------------------------------------+
|
||||
AP 2
|
||||
|
||||
- br0 of AP 1 still knows that Client A is reachable via wlan0 (no change)
|
||||
- br0 of AP 1 will (possibly) know that Client B has left wlan0. There
|
||||
are cases where it might never find out though. Either way, DSA today
|
||||
does not process that notification in any way.
|
||||
- the hardware FDB of the DSA switch on AP 1 may learn autonomously that
|
||||
Client B can be reached via swp0, if it receives any packet with
|
||||
Client 1's source MAC address over Ethernet.
|
||||
- the hardware FDB of the DSA switch on AP 2 still thinks that Client B
|
||||
can be reached via swp0. It does not know that it has roamed to wlan0,
|
||||
because it doesn't perform SA learning from the CPU port.
|
||||
|
||||
Now Client A contacts Client B.
|
||||
AP 1 routes the packet fine towards swp0 and delivers it on the Ethernet
|
||||
segment.
|
||||
AP 2 sees a frame on swp0 and its fdb says that the destination is swp0.
|
||||
Hairpinning is disabled => drop.
|
||||
|
||||
This problem comes from the fact that these switches have a 'blind spot'
|
||||
for addresses coming from software bridging. The generic solution is not
|
||||
to assume that hardware learning can be enabled somehow, but to listen
|
||||
to more bridge learning events. It turns out that the bridge driver does
|
||||
learn in software from all inbound frames, in __br_handle_local_finish.
|
||||
A proper SWITCHDEV_FDB_ADD_TO_DEVICE notification is emitted for the
|
||||
addresses serviced by the bridge on 'foreign' interfaces. The software
|
||||
bridge also does the right thing on migration, by notifying that the old
|
||||
entry is deleted, so that does not need to be special-cased in DSA. When
|
||||
it is deleted, we just need to delete our static FDB entry towards the
|
||||
CPU too, and wait.
|
||||
|
||||
The problem is that DSA currently only cares about SWITCHDEV_FDB_ADD_TO_DEVICE
|
||||
events received on its own interfaces, such as static FDB entries.
|
||||
|
||||
Luckily we can change that, and DSA can listen to all switchdev FDB
|
||||
add/del events in the system and figure out if those events were emitted
|
||||
by a bridge that spans at least one of DSA's own ports. In case that is
|
||||
true, DSA will also offload that address towards its own CPU port, in
|
||||
the eventuality that there might be bridge clients attached to the DSA
|
||||
switch who want to talk to the station connected to the foreign
|
||||
interface.
|
||||
|
||||
In terms of implementation, we need to keep the fdb_info->added_by_user
|
||||
check for the case where the switchdev event was targeted directly at a
|
||||
DSA switch port. But we don't need to look at that flag for snooped
|
||||
events. So the check is currently too late, we need to move it earlier.
|
||||
This also simplifies the code a bit, since we avoid uselessly allocating
|
||||
and freeing switchdev_work.
|
||||
|
||||
We could probably do some improvements in the future. For example,
|
||||
multi-bridge support is rudimentary at the moment. If there are two
|
||||
bridges spanning a DSA switch's ports, and both of them need to service
|
||||
the same MAC address, then what will happen is that the migration of one
|
||||
of those stations will trigger the deletion of the FDB entry from the
|
||||
CPU port while it is still used by other bridge. That could be improved
|
||||
with reference counting but is left for another time.
|
||||
|
||||
This behavior needs to be enabled at driver level by setting
|
||||
ds->assisted_learning_on_cpu_port = true. This is because we don't want
|
||||
to inflict a potential performance penalty (accesses through
|
||||
MDIO/I2C/SPI are expensive) to hardware that really doesn't need it
|
||||
because address learning on the CPU port works there.
|
||||
|
||||
Reported-by: DENG Qingfang <dqfext@gmail.com>
|
||||
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
|
||||
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
include/net/dsa.h | 5 +++++
|
||||
net/dsa/slave.c | 66 +++++++++++++++++++++++++++++++++++++++++++++----------
|
||||
2 files changed, 60 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/include/net/dsa.h
|
||||
+++ b/include/net/dsa.h
|
||||
@@ -317,6 +317,11 @@ struct dsa_switch {
|
||||
*/
|
||||
bool untag_bridge_pvid;
|
||||
|
||||
+ /* Let DSA manage the FDB entries towards the CPU, based on the
|
||||
+ * software bridge database.
|
||||
+ */
|
||||
+ bool assisted_learning_on_cpu_port;
|
||||
+
|
||||
/* In case vlan_filtering_is_global is set, the VLAN awareness state
|
||||
* should be retrieved from here and not from the per-port settings.
|
||||
*/
|
||||
--- a/net/dsa/slave.c
|
||||
+++ b/net/dsa/slave.c
|
||||
@@ -2149,6 +2149,28 @@ static void dsa_slave_switchdev_event_wo
|
||||
dev_put(dp->slave);
|
||||
}
|
||||
|
||||
+static int dsa_lower_dev_walk(struct net_device *lower_dev,
|
||||
+ struct netdev_nested_priv *priv)
|
||||
+{
|
||||
+ if (dsa_slave_dev_check(lower_dev)) {
|
||||
+ priv->data = (void *)netdev_priv(lower_dev);
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static struct dsa_slave_priv *dsa_slave_dev_lower_find(struct net_device *dev)
|
||||
+{
|
||||
+ struct netdev_nested_priv priv = {
|
||||
+ .data = NULL,
|
||||
+ };
|
||||
+
|
||||
+ netdev_walk_all_lower_dev_rcu(dev, dsa_lower_dev_walk, &priv);
|
||||
+
|
||||
+ return (struct dsa_slave_priv *)priv.data;
|
||||
+}
|
||||
+
|
||||
/* Called under rcu_read_lock() */
|
||||
static int dsa_slave_switchdev_event(struct notifier_block *unused,
|
||||
unsigned long event, void *ptr)
|
||||
@@ -2167,10 +2189,37 @@ static int dsa_slave_switchdev_event(str
|
||||
return notifier_from_errno(err);
|
||||
case SWITCHDEV_FDB_ADD_TO_DEVICE:
|
||||
case SWITCHDEV_FDB_DEL_TO_DEVICE:
|
||||
- if (!dsa_slave_dev_check(dev))
|
||||
- return NOTIFY_DONE;
|
||||
+ fdb_info = ptr;
|
||||
+
|
||||
+ if (dsa_slave_dev_check(dev)) {
|
||||
+ if (!fdb_info->added_by_user)
|
||||
+ return NOTIFY_OK;
|
||||
+
|
||||
+ dp = dsa_slave_to_port(dev);
|
||||
+ } else {
|
||||
+ /* Snoop addresses learnt on foreign interfaces
|
||||
+ * bridged with us, for switches that don't
|
||||
+ * automatically learn SA from CPU-injected traffic
|
||||
+ */
|
||||
+ struct net_device *br_dev;
|
||||
+ struct dsa_slave_priv *p;
|
||||
+
|
||||
+ br_dev = netdev_master_upper_dev_get_rcu(dev);
|
||||
+ if (!br_dev)
|
||||
+ return NOTIFY_DONE;
|
||||
+
|
||||
+ if (!netif_is_bridge_master(br_dev))
|
||||
+ return NOTIFY_DONE;
|
||||
+
|
||||
+ p = dsa_slave_dev_lower_find(br_dev);
|
||||
+ if (!p)
|
||||
+ return NOTIFY_DONE;
|
||||
|
||||
- dp = dsa_slave_to_port(dev);
|
||||
+ dp = p->dp->cpu_dp;
|
||||
+
|
||||
+ if (!dp->ds->assisted_learning_on_cpu_port)
|
||||
+ return NOTIFY_DONE;
|
||||
+ }
|
||||
|
||||
if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del)
|
||||
return NOTIFY_DONE;
|
||||
@@ -2185,18 +2234,13 @@ static int dsa_slave_switchdev_event(str
|
||||
switchdev_work->port = dp->index;
|
||||
switchdev_work->event = event;
|
||||
|
||||
- fdb_info = ptr;
|
||||
-
|
||||
- if (!fdb_info->added_by_user) {
|
||||
- kfree(switchdev_work);
|
||||
- return NOTIFY_OK;
|
||||
- }
|
||||
-
|
||||
ether_addr_copy(switchdev_work->addr,
|
||||
fdb_info->addr);
|
||||
switchdev_work->vid = fdb_info->vid;
|
||||
|
||||
- dev_hold(dev);
|
||||
+ /* Hold a reference on the slave for dsa_fdb_offload_notify */
|
||||
+ if (dsa_is_user_port(dp->ds, dp->index))
|
||||
+ dev_hold(dev);
|
||||
dsa_schedule_work(&switchdev_work->work);
|
||||
break;
|
||||
default:
|
@ -1,84 +0,0 @@
|
||||
From c3b8e07909dbe67b0d580416c1a5257643a73be7 Mon Sep 17 00:00:00 2001
|
||||
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Date: Fri, 12 Mar 2021 00:07:03 -0800
|
||||
Subject: [PATCH] net: dsa: mt7530: setup core clock even in TRGMII mode
|
||||
|
||||
A recent change to MIPS ralink reset logic made it so mt7530 actually
|
||||
resets the switch on platforms such as mt7621 (where bit 2 is the reset
|
||||
line for the switch). That exposed an issue where the switch would not
|
||||
function properly in TRGMII mode after a reset.
|
||||
|
||||
Reconfigure core clock in TRGMII mode to fix the issue.
|
||||
|
||||
Tested on Ubiquiti ER-X (MT7621) with TRGMII mode enabled.
|
||||
|
||||
Fixes: 3f9ef7785a9c ("MIPS: ralink: manage low reset lines")
|
||||
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/dsa/mt7530.c | 52 +++++++++++++++++++---------------------
|
||||
1 file changed, 25 insertions(+), 27 deletions(-)
|
||||
|
||||
--- a/drivers/net/dsa/mt7530.c
|
||||
+++ b/drivers/net/dsa/mt7530.c
|
||||
@@ -436,34 +436,32 @@ mt7530_pad_clk_setup(struct dsa_switch *
|
||||
TD_DM_DRVP(8) | TD_DM_DRVN(8));
|
||||
|
||||
/* Setup core clock for MT7530 */
|
||||
- if (!trgint) {
|
||||
- /* Disable MT7530 core clock */
|
||||
- core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
|
||||
-
|
||||
- /* Disable PLL, since phy_device has not yet been created
|
||||
- * provided for phy_[read,write]_mmd_indirect is called, we
|
||||
- * provide our own core_write_mmd_indirect to complete this
|
||||
- * function.
|
||||
- */
|
||||
- core_write_mmd_indirect(priv,
|
||||
- CORE_GSWPLL_GRP1,
|
||||
- MDIO_MMD_VEND2,
|
||||
- 0);
|
||||
-
|
||||
- /* Set core clock into 500Mhz */
|
||||
- core_write(priv, CORE_GSWPLL_GRP2,
|
||||
- RG_GSWPLL_POSDIV_500M(1) |
|
||||
- RG_GSWPLL_FBKDIV_500M(25));
|
||||
-
|
||||
- /* Enable PLL */
|
||||
- core_write(priv, CORE_GSWPLL_GRP1,
|
||||
- RG_GSWPLL_EN_PRE |
|
||||
- RG_GSWPLL_POSDIV_200M(2) |
|
||||
- RG_GSWPLL_FBKDIV_200M(32));
|
||||
-
|
||||
- /* Enable MT7530 core clock */
|
||||
- core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
|
||||
- }
|
||||
+ /* Disable MT7530 core clock */
|
||||
+ core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
|
||||
+
|
||||
+ /* Disable PLL, since phy_device has not yet been created
|
||||
+ * provided for phy_[read,write]_mmd_indirect is called, we
|
||||
+ * provide our own core_write_mmd_indirect to complete this
|
||||
+ * function.
|
||||
+ */
|
||||
+ core_write_mmd_indirect(priv,
|
||||
+ CORE_GSWPLL_GRP1,
|
||||
+ MDIO_MMD_VEND2,
|
||||
+ 0);
|
||||
+
|
||||
+ /* Set core clock into 500Mhz */
|
||||
+ core_write(priv, CORE_GSWPLL_GRP2,
|
||||
+ RG_GSWPLL_POSDIV_500M(1) |
|
||||
+ RG_GSWPLL_FBKDIV_500M(25));
|
||||
+
|
||||
+ /* Enable PLL */
|
||||
+ core_write(priv, CORE_GSWPLL_GRP1,
|
||||
+ RG_GSWPLL_EN_PRE |
|
||||
+ RG_GSWPLL_POSDIV_200M(2) |
|
||||
+ RG_GSWPLL_FBKDIV_200M(32));
|
||||
+
|
||||
+ /* Enable MT7530 core clock */
|
||||
+ core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
|
||||
|
||||
/* Setup the MT7530 TRGMII Tx Clock */
|
||||
core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
|
@ -1,181 +0,0 @@
|
||||
From 429a0edeefd88cbfca5c417dfb8561047bb50769 Mon Sep 17 00:00:00 2001
|
||||
From: DENG Qingfang <dqfext@gmail.com>
|
||||
Date: Mon, 25 Jan 2021 12:43:22 +0800
|
||||
Subject: [PATCH] net: dsa: mt7530: MT7530 optional GPIO support
|
||||
|
||||
MT7530's LED controller can drive up to 15 LED/GPIOs.
|
||||
|
||||
Add support for GPIO control and allow users to use its GPIOs by
|
||||
setting gpio-controller property in device tree.
|
||||
|
||||
Signed-off-by: DENG Qingfang <dqfext@gmail.com>
|
||||
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
|
||||
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
drivers/net/dsa/mt7530.c | 110 +++++++++++++++++++++++++++++++++++++++
|
||||
drivers/net/dsa/mt7530.h | 20 +++++++
|
||||
2 files changed, 130 insertions(+)
|
||||
|
||||
--- a/drivers/net/dsa/mt7530.c
|
||||
+++ b/drivers/net/dsa/mt7530.c
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <linux/regulator/consumer.h>
|
||||
#include <linux/reset.h>
|
||||
#include <linux/gpio/consumer.h>
|
||||
+#include <linux/gpio/driver.h>
|
||||
#include <net/dsa.h>
|
||||
|
||||
#include "mt7530.h"
|
||||
@@ -1534,6 +1535,109 @@ mtk_get_tag_protocol(struct dsa_switch *
|
||||
}
|
||||
}
|
||||
|
||||
+static inline u32
|
||||
+mt7530_gpio_to_bit(unsigned int offset)
|
||||
+{
|
||||
+ /* Map GPIO offset to register bit
|
||||
+ * [ 2: 0] port 0 LED 0..2 as GPIO 0..2
|
||||
+ * [ 6: 4] port 1 LED 0..2 as GPIO 3..5
|
||||
+ * [10: 8] port 2 LED 0..2 as GPIO 6..8
|
||||
+ * [14:12] port 3 LED 0..2 as GPIO 9..11
|
||||
+ * [18:16] port 4 LED 0..2 as GPIO 12..14
|
||||
+ */
|
||||
+ return BIT(offset + offset / 3);
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mt7530_gpio_get(struct gpio_chip *gc, unsigned int offset)
|
||||
+{
|
||||
+ struct mt7530_priv *priv = gpiochip_get_data(gc);
|
||||
+ u32 bit = mt7530_gpio_to_bit(offset);
|
||||
+
|
||||
+ return !!(mt7530_read(priv, MT7530_LED_GPIO_DATA) & bit);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+mt7530_gpio_set(struct gpio_chip *gc, unsigned int offset, int value)
|
||||
+{
|
||||
+ struct mt7530_priv *priv = gpiochip_get_data(gc);
|
||||
+ u32 bit = mt7530_gpio_to_bit(offset);
|
||||
+
|
||||
+ if (value)
|
||||
+ mt7530_set(priv, MT7530_LED_GPIO_DATA, bit);
|
||||
+ else
|
||||
+ mt7530_clear(priv, MT7530_LED_GPIO_DATA, bit);
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mt7530_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
|
||||
+{
|
||||
+ struct mt7530_priv *priv = gpiochip_get_data(gc);
|
||||
+ u32 bit = mt7530_gpio_to_bit(offset);
|
||||
+
|
||||
+ return (mt7530_read(priv, MT7530_LED_GPIO_DIR) & bit) ?
|
||||
+ GPIO_LINE_DIRECTION_OUT : GPIO_LINE_DIRECTION_IN;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mt7530_gpio_direction_input(struct gpio_chip *gc, unsigned int offset)
|
||||
+{
|
||||
+ struct mt7530_priv *priv = gpiochip_get_data(gc);
|
||||
+ u32 bit = mt7530_gpio_to_bit(offset);
|
||||
+
|
||||
+ mt7530_clear(priv, MT7530_LED_GPIO_OE, bit);
|
||||
+ mt7530_clear(priv, MT7530_LED_GPIO_DIR, bit);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mt7530_gpio_direction_output(struct gpio_chip *gc, unsigned int offset, int value)
|
||||
+{
|
||||
+ struct mt7530_priv *priv = gpiochip_get_data(gc);
|
||||
+ u32 bit = mt7530_gpio_to_bit(offset);
|
||||
+
|
||||
+ mt7530_set(priv, MT7530_LED_GPIO_DIR, bit);
|
||||
+
|
||||
+ if (value)
|
||||
+ mt7530_set(priv, MT7530_LED_GPIO_DATA, bit);
|
||||
+ else
|
||||
+ mt7530_clear(priv, MT7530_LED_GPIO_DATA, bit);
|
||||
+
|
||||
+ mt7530_set(priv, MT7530_LED_GPIO_OE, bit);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+mt7530_setup_gpio(struct mt7530_priv *priv)
|
||||
+{
|
||||
+ struct device *dev = priv->dev;
|
||||
+ struct gpio_chip *gc;
|
||||
+
|
||||
+ gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL);
|
||||
+ if (!gc)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ mt7530_write(priv, MT7530_LED_GPIO_OE, 0);
|
||||
+ mt7530_write(priv, MT7530_LED_GPIO_DIR, 0);
|
||||
+ mt7530_write(priv, MT7530_LED_IO_MODE, 0);
|
||||
+
|
||||
+ gc->label = "mt7530";
|
||||
+ gc->parent = dev;
|
||||
+ gc->owner = THIS_MODULE;
|
||||
+ gc->get_direction = mt7530_gpio_get_direction;
|
||||
+ gc->direction_input = mt7530_gpio_direction_input;
|
||||
+ gc->direction_output = mt7530_gpio_direction_output;
|
||||
+ gc->get = mt7530_gpio_get;
|
||||
+ gc->set = mt7530_gpio_set;
|
||||
+ gc->base = -1;
|
||||
+ gc->ngpio = 15;
|
||||
+ gc->can_sleep = true;
|
||||
+
|
||||
+ return devm_gpiochip_add_data(dev, gc, priv);
|
||||
+}
|
||||
+
|
||||
static int
|
||||
mt7530_setup(struct dsa_switch *ds)
|
||||
{
|
||||
@@ -1675,6 +1779,12 @@ mt7530_setup(struct dsa_switch *ds)
|
||||
}
|
||||
}
|
||||
|
||||
+ if (of_property_read_bool(priv->dev->of_node, "gpio-controller")) {
|
||||
+ ret = mt7530_setup_gpio(priv);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
mt7530_setup_port5(ds, interface);
|
||||
|
||||
/* Flush the FDB table */
|
||||
--- a/drivers/net/dsa/mt7530.h
|
||||
+++ b/drivers/net/dsa/mt7530.h
|
||||
@@ -529,6 +529,26 @@ enum mt7531_clk_skew {
|
||||
#define MT7531_GPIO12_RG_RXD3_MASK GENMASK(19, 16)
|
||||
#define MT7531_EXT_P_MDIO_12 (2 << 16)
|
||||
|
||||
+/* Registers for LED GPIO control (MT7530 only)
|
||||
+ * All registers follow this pattern:
|
||||
+ * [ 2: 0] port 0
|
||||
+ * [ 6: 4] port 1
|
||||
+ * [10: 8] port 2
|
||||
+ * [14:12] port 3
|
||||
+ * [18:16] port 4
|
||||
+ */
|
||||
+
|
||||
+/* LED enable, 0: Disable, 1: Enable (Default) */
|
||||
+#define MT7530_LED_EN 0x7d00
|
||||
+/* LED mode, 0: GPIO mode, 1: PHY mode (Default) */
|
||||
+#define MT7530_LED_IO_MODE 0x7d04
|
||||
+/* GPIO direction, 0: Input, 1: Output */
|
||||
+#define MT7530_LED_GPIO_DIR 0x7d10
|
||||
+/* GPIO output enable, 0: Disable, 1: Enable */
|
||||
+#define MT7530_LED_GPIO_OE 0x7d14
|
||||
+/* GPIO value, 0: Low, 1: High */
|
||||
+#define MT7530_LED_GPIO_DATA 0x7d18
|
||||
+
|
||||
#define MT7530_CREV 0x7ffc
|
||||
#define CHIP_NAME_SHIFT 16
|
||||
#define MT7530_ID 0x7530
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user