mirror of
https://github.com/openwrt/openwrt.git
synced 2024-12-21 14:37:57 +00:00
kernel: backport upstream GSO improvements
Fixes some corner cases regarding segmenting packets that were assembled by GRO. Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
parent
e497c975d3
commit
79fce42404
@ -0,0 +1,94 @@
|
|||||||
|
From: Jakub Sitnicki <jakub@cloudflare.com>
|
||||||
|
Date: Wed, 26 Jun 2024 19:51:26 +0200
|
||||||
|
Subject: [PATCH] udp: Allow GSO transmit from devices with no checksum offload
|
||||||
|
|
||||||
|
Today sending a UDP GSO packet from a TUN device results in an EIO error:
|
||||||
|
|
||||||
|
import fcntl, os, struct
|
||||||
|
from socket import *
|
||||||
|
|
||||||
|
TUNSETIFF = 0x400454CA
|
||||||
|
IFF_TUN = 0x0001
|
||||||
|
IFF_NO_PI = 0x1000
|
||||||
|
UDP_SEGMENT = 103
|
||||||
|
|
||||||
|
tun_fd = os.open("/dev/net/tun", os.O_RDWR)
|
||||||
|
ifr = struct.pack("16sH", b"tun0", IFF_TUN | IFF_NO_PI)
|
||||||
|
fcntl.ioctl(tun_fd, TUNSETIFF, ifr)
|
||||||
|
|
||||||
|
os.system("ip addr add 192.0.2.1/24 dev tun0")
|
||||||
|
os.system("ip link set dev tun0 up")
|
||||||
|
|
||||||
|
s = socket(AF_INET, SOCK_DGRAM)
|
||||||
|
s.setsockopt(SOL_UDP, UDP_SEGMENT, 1200)
|
||||||
|
s.sendto(b"x" * 3000, ("192.0.2.2", 9)) # EIO
|
||||||
|
|
||||||
|
This is due to a check in the udp stack if the egress device offers
|
||||||
|
checksum offload. While TUN/TAP devices, by default, don't advertise this
|
||||||
|
capability because it requires support from the TUN/TAP reader.
|
||||||
|
|
||||||
|
However, the GSO stack has a software fallback for checksum calculation,
|
||||||
|
which we can use. This way we don't force UDP_SEGMENT users to handle the
|
||||||
|
EIO error and implement a segmentation fallback.
|
||||||
|
|
||||||
|
Lift the restriction so that UDP_SEGMENT can be used with any egress
|
||||||
|
device. We also need to adjust the UDP GSO code to match the GSO stack
|
||||||
|
expectation about ip_summed field, as set in commit 8d63bee643f1 ("net:
|
||||||
|
avoid skb_warn_bad_offload false positives on UFO"). Otherwise we will hit
|
||||||
|
the bad offload check.
|
||||||
|
|
||||||
|
Users should, however, expect a potential performance impact when
|
||||||
|
batch-sending packets with UDP_SEGMENT without checksum offload on the
|
||||||
|
egress device. In such case the packet payload is read twice: first during
|
||||||
|
the sendmsg syscall when copying data from user memory, and then in the GSO
|
||||||
|
stack for checksum computation. This double memory read can be less
|
||||||
|
efficient than a regular sendmsg where the checksum is calculated during
|
||||||
|
the initial data copy from user memory.
|
||||||
|
|
||||||
|
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
|
||||||
|
Reviewed-by: Willem de Bruijn <willemb@google.com>
|
||||||
|
Link: https://patch.msgid.link/20240626-linux-udpgso-v2-1-422dfcbd6b48@cloudflare.com
|
||||||
|
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/ipv4/udp.c
|
||||||
|
+++ b/net/ipv4/udp.c
|
||||||
|
@@ -942,8 +942,7 @@ static int udp_send_skb(struct sk_buff *
|
||||||
|
kfree_skb(skb);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
- if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
|
||||||
|
- dst_xfrm(skb_dst(skb))) {
|
||||||
|
+ if (is_udplite || dst_xfrm(skb_dst(skb))) {
|
||||||
|
kfree_skb(skb);
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
--- a/net/ipv4/udp_offload.c
|
||||||
|
+++ b/net/ipv4/udp_offload.c
|
||||||
|
@@ -357,6 +357,14 @@ struct sk_buff *__udp_gso_segment(struct
|
||||||
|
else
|
||||||
|
uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0;
|
||||||
|
|
||||||
|
+ /* On the TX path, CHECKSUM_NONE and CHECKSUM_UNNECESSARY have the same
|
||||||
|
+ * meaning. However, check for bad offloads in the GSO stack expects the
|
||||||
|
+ * latter, if the checksum was calculated in software. To vouch for the
|
||||||
|
+ * segment skbs we actually need to set it on the gso_skb.
|
||||||
|
+ */
|
||||||
|
+ if (gso_skb->ip_summed == CHECKSUM_NONE)
|
||||||
|
+ gso_skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||||
|
+
|
||||||
|
/* update refcount for the packet */
|
||||||
|
if (copy_dtor) {
|
||||||
|
int delta = sum_truesize - gso_skb->truesize;
|
||||||
|
--- a/net/ipv6/udp.c
|
||||||
|
+++ b/net/ipv6/udp.c
|
||||||
|
@@ -1261,8 +1261,7 @@ static int udp_v6_send_skb(struct sk_buf
|
||||||
|
kfree_skb(skb);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
- if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
|
||||||
|
- dst_xfrm(skb_dst(skb))) {
|
||||||
|
+ if (is_udplite || dst_xfrm(skb_dst(skb))) {
|
||||||
|
kfree_skb(skb);
|
||||||
|
return -EIO;
|
||||||
|
}
|
@ -0,0 +1,69 @@
|
|||||||
|
From: Jakub Sitnicki <jakub@cloudflare.com>
|
||||||
|
Date: Thu, 8 Aug 2024 11:56:21 +0200
|
||||||
|
Subject: [PATCH] net: Make USO depend on CSUM offload
|
||||||
|
|
||||||
|
UDP segmentation offload inherently depends on checksum offload. It should
|
||||||
|
not be possible to disable checksum offload while leaving USO enabled.
|
||||||
|
Enforce this dependency in code.
|
||||||
|
|
||||||
|
There is a single tx-udp-segmentation feature flag to indicate support for
|
||||||
|
both IPv4/6, hence the devices wishing to support USO must offer checksum
|
||||||
|
offload for both IP versions.
|
||||||
|
|
||||||
|
Fixes: 10154dbded6d ("udp: Allow GSO transmit from devices with no checksum offload")
|
||||||
|
Suggested-by: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
|
||||||
|
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
|
||||||
|
Reviewed-by: Willem de Bruijn <willemb@google.com>
|
||||||
|
Link: https://patch.msgid.link/20240808-udp-gso-egress-from-tunnel-v4-1-f5c5b4149ab9@cloudflare.com
|
||||||
|
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/core/dev.c
|
||||||
|
+++ b/net/core/dev.c
|
||||||
|
@@ -9751,6 +9751,15 @@ static void netdev_sync_lower_features(s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+static bool netdev_has_ip_or_hw_csum(netdev_features_t features)
|
||||||
|
+{
|
||||||
|
+ netdev_features_t ip_csum_mask = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
|
||||||
|
+ bool ip_csum = (features & ip_csum_mask) == ip_csum_mask;
|
||||||
|
+ bool hw_csum = features & NETIF_F_HW_CSUM;
|
||||||
|
+
|
||||||
|
+ return ip_csum || hw_csum;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static netdev_features_t netdev_fix_features(struct net_device *dev,
|
||||||
|
netdev_features_t features)
|
||||||
|
{
|
||||||
|
@@ -9832,15 +9841,9 @@ static netdev_features_t netdev_fix_feat
|
||||||
|
features &= ~NETIF_F_LRO;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (features & NETIF_F_HW_TLS_TX) {
|
||||||
|
- bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) ==
|
||||||
|
- (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
|
||||||
|
- bool hw_csum = features & NETIF_F_HW_CSUM;
|
||||||
|
-
|
||||||
|
- if (!ip_csum && !hw_csum) {
|
||||||
|
- netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n");
|
||||||
|
- features &= ~NETIF_F_HW_TLS_TX;
|
||||||
|
- }
|
||||||
|
+ if ((features & NETIF_F_HW_TLS_TX) && !netdev_has_ip_or_hw_csum(features)) {
|
||||||
|
+ netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n");
|
||||||
|
+ features &= ~NETIF_F_HW_TLS_TX;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) {
|
||||||
|
@@ -9848,6 +9851,11 @@ static netdev_features_t netdev_fix_feat
|
||||||
|
features &= ~NETIF_F_HW_TLS_RX;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if ((features & NETIF_F_GSO_UDP_L4) && !netdev_has_ip_or_hw_csum(features)) {
|
||||||
|
+ netdev_dbg(dev, "Dropping USO feature since no CSUM feature.\n");
|
||||||
|
+ features &= ~NETIF_F_GSO_UDP_L4;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
return features;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,86 @@
|
|||||||
|
From: Jakub Sitnicki <jakub@cloudflare.com>
|
||||||
|
Date: Thu, 8 Aug 2024 11:56:22 +0200
|
||||||
|
Subject: [PATCH] udp: Fall back to software USO if IPv6 extension headers are
|
||||||
|
present
|
||||||
|
|
||||||
|
In commit 10154dbded6d ("udp: Allow GSO transmit from devices with no
|
||||||
|
checksum offload") we have intentionally allowed UDP GSO packets marked
|
||||||
|
CHECKSUM_NONE to pass to the GSO stack, so that they can be segmented and
|
||||||
|
checksummed by a software fallback when the egress device lacks these
|
||||||
|
features.
|
||||||
|
|
||||||
|
What was not taken into consideration is that a CHECKSUM_NONE skb can be
|
||||||
|
handed over to the GSO stack also when the egress device advertises the
|
||||||
|
tx-udp-segmentation / NETIF_F_GSO_UDP_L4 feature.
|
||||||
|
|
||||||
|
This will happen when there are IPv6 extension headers present, which we
|
||||||
|
check for in __ip6_append_data(). Syzbot has discovered this scenario,
|
||||||
|
producing a warning as below:
|
||||||
|
|
||||||
|
ip6tnl0: caps=(0x00000006401d7869, 0x00000006401d7869)
|
||||||
|
WARNING: CPU: 0 PID: 5112 at net/core/dev.c:3293 skb_warn_bad_offload+0x166/0x1a0 net/core/dev.c:3291
|
||||||
|
Modules linked in:
|
||||||
|
CPU: 0 PID: 5112 Comm: syz-executor391 Not tainted 6.10.0-rc7-syzkaller-01603-g80ab5445da62 #0
|
||||||
|
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/07/2024
|
||||||
|
RIP: 0010:skb_warn_bad_offload+0x166/0x1a0 net/core/dev.c:3291
|
||||||
|
[...]
|
||||||
|
Call Trace:
|
||||||
|
<TASK>
|
||||||
|
__skb_gso_segment+0x3be/0x4c0 net/core/gso.c:127
|
||||||
|
skb_gso_segment include/net/gso.h:83 [inline]
|
||||||
|
validate_xmit_skb+0x585/0x1120 net/core/dev.c:3661
|
||||||
|
__dev_queue_xmit+0x17a4/0x3e90 net/core/dev.c:4415
|
||||||
|
neigh_output include/net/neighbour.h:542 [inline]
|
||||||
|
ip6_finish_output2+0xffa/0x1680 net/ipv6/ip6_output.c:137
|
||||||
|
ip6_finish_output+0x41e/0x810 net/ipv6/ip6_output.c:222
|
||||||
|
ip6_send_skb+0x112/0x230 net/ipv6/ip6_output.c:1958
|
||||||
|
udp_v6_send_skb+0xbf5/0x1870 net/ipv6/udp.c:1292
|
||||||
|
udpv6_sendmsg+0x23b3/0x3270 net/ipv6/udp.c:1588
|
||||||
|
sock_sendmsg_nosec net/socket.c:730 [inline]
|
||||||
|
__sock_sendmsg+0xef/0x270 net/socket.c:745
|
||||||
|
____sys_sendmsg+0x525/0x7d0 net/socket.c:2585
|
||||||
|
___sys_sendmsg net/socket.c:2639 [inline]
|
||||||
|
__sys_sendmmsg+0x3b2/0x740 net/socket.c:2725
|
||||||
|
__do_sys_sendmmsg net/socket.c:2754 [inline]
|
||||||
|
__se_sys_sendmmsg net/socket.c:2751 [inline]
|
||||||
|
__x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2751
|
||||||
|
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
|
||||||
|
do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
|
||||||
|
entry_SYSCALL_64_after_hwframe+0x77/0x7f
|
||||||
|
[...]
|
||||||
|
</TASK>
|
||||||
|
|
||||||
|
We are hitting the bad offload warning because when an egress device is
|
||||||
|
capable of handling segmentation offload requested by
|
||||||
|
skb_shinfo(skb)->gso_type, the chain of gso_segment callbacks won't produce
|
||||||
|
any segment skbs and return NULL. See the skb_gso_ok() branch in
|
||||||
|
{__udp,tcp,sctp}_gso_segment helpers.
|
||||||
|
|
||||||
|
To fix it, force a fallback to software USO when processing a packet with
|
||||||
|
IPv6 extension headers, since we don't know if these can checksummed by
|
||||||
|
all devices which offer USO.
|
||||||
|
|
||||||
|
Fixes: 10154dbded6d ("udp: Allow GSO transmit from devices with no checksum offload")
|
||||||
|
Reported-by: syzbot+e15b7e15b8a751a91d9a@syzkaller.appspotmail.com
|
||||||
|
Closes: https://lore.kernel.org/all/000000000000e1609a061d5330ce@google.com/
|
||||||
|
Reviewed-by: Willem de Bruijn <willemb@google.com>
|
||||||
|
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
|
||||||
|
Link: https://patch.msgid.link/20240808-udp-gso-egress-from-tunnel-v4-2-f5c5b4149ab9@cloudflare.com
|
||||||
|
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/ipv4/udp_offload.c
|
||||||
|
+++ b/net/ipv4/udp_offload.c
|
||||||
|
@@ -278,6 +278,12 @@ struct sk_buff *__udp_gso_segment(struct
|
||||||
|
if (gso_skb->len <= sizeof(*uh) + mss)
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
|
+ /* We don't know if egress device can segment and checksum the packet
|
||||||
|
+ * when IPv6 extension headers are present. Fall back to software GSO.
|
||||||
|
+ */
|
||||||
|
+ if (gso_skb->ip_summed != CHECKSUM_PARTIAL)
|
||||||
|
+ features &= ~(NETIF_F_GSO_UDP_L4 | NETIF_F_CSUM_MASK);
|
||||||
|
+
|
||||||
|
if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) {
|
||||||
|
/* Packet is from an untrusted source, reset gso_segs. */
|
||||||
|
skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh),
|
@ -379,7 +379,7 @@ Signe-off-by: Felix Fietkau <nbd@nbd.name>
|
|||||||
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
|
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
|
||||||
--- a/net/ipv4/udp_offload.c
|
--- a/net/ipv4/udp_offload.c
|
||||||
+++ b/net/ipv4/udp_offload.c
|
+++ b/net/ipv4/udp_offload.c
|
||||||
@@ -433,33 +433,6 @@ out:
|
@@ -447,33 +447,6 @@ out:
|
||||||
return segs;
|
return segs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user