mirror of
https://github.com/openwrt/openwrt.git
synced 2025-01-07 14:28:50 +00:00
253 lines
8.3 KiB
Diff
253 lines
8.3 KiB
Diff
|
From e89db675171a7a12f19b6ec0089a9cc62807cdf1 Mon Sep 17 00:00:00 2001
|
||
|
From: Camelia Groza <camelia.groza@nxp.com>
|
||
|
Date: Tue, 29 Oct 2019 16:34:08 +0200
|
||
|
Subject: [PATCH] sdk_dpaa: ls1043a errata: update and optimize the
|
||
|
restrictions
|
||
|
|
||
|
An skb is in no danger of triggering the errata under the following
|
||
|
conditions:
|
||
|
- the paged data doesn't cross a 4K page boundary OR the linear data
|
||
|
is aligned to 256 bytes when crossing a 4K page boundary
|
||
|
- the linear and the paged data are 16 byte aligned
|
||
|
- the paged data is a multiple of 16 bytes in size
|
||
|
|
||
|
Optimize the detection for each skb that might trigger the errata. Parse
|
||
|
the skb twice, at most, and realign it only once.
|
||
|
|
||
|
Signed-off-by: Camelia Groza <camelia.groza@nxp.com>
|
||
|
---
|
||
|
drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h | 2 +-
|
||
|
.../net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c | 147 +++++++++++++++------
|
||
|
2 files changed, 111 insertions(+), 38 deletions(-)
|
||
|
|
||
|
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
|
||
|
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
|
||
|
@@ -662,7 +662,7 @@ static inline void _dpa_bp_free_pf(void
|
||
|
#ifndef CONFIG_PPC
|
||
|
extern bool dpaa_errata_a010022; /* SoC affected by A010022 errata */
|
||
|
#define NONREC_MARK 0x01
|
||
|
-#define HAS_DMA_ISSUE(start, size) \
|
||
|
+#define CROSS_4K(start, size) \
|
||
|
(((uintptr_t)(start) + (size)) > \
|
||
|
(((uintptr_t)(start) + 0x1000) & ~0xFFF))
|
||
|
/* The headroom needs to accommodate our private data (64 bytes) but
|
||
|
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c
|
||
|
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c
|
||
|
@@ -771,32 +771,73 @@ int __hot skb_to_contig_fd(struct dpa_pr
|
||
|
EXPORT_SYMBOL(skb_to_contig_fd);
|
||
|
|
||
|
#ifndef CONFIG_PPC
|
||
|
-/* Verify the conditions that trigger the A010022 errata: data unaligned to
|
||
|
- * 16 bytes, 4K memory address crossings and S/G fragments.
|
||
|
+/* Verify the conditions that trigger the A010022 errata:
|
||
|
+ * - 4K memory address boundary crossings when the data/SG fragments aren't
|
||
|
+ * aligned to 256 bytes
|
||
|
+ * - data and SG fragments that aren't aligned to 16 bytes
|
||
|
+ * - SG fragments that aren't mod 16 bytes in size (except for the last
|
||
|
+ * fragment)
|
||
|
*/
|
||
|
static bool a010022_check_skb(struct sk_buff *skb, struct dpa_priv_s *priv)
|
||
|
{
|
||
|
- /* Check if the headroom is aligned */
|
||
|
- if (((uintptr_t)skb->data - priv->tx_headroom) %
|
||
|
- priv->buf_layout[TX].data_align != 0)
|
||
|
- return true;
|
||
|
+ skb_frag_t *frag;
|
||
|
+ int i, nr_frags;
|
||
|
|
||
|
- /* Check for paged data in the skb. We do not support S/G fragments */
|
||
|
- if (skb_is_nonlinear(skb))
|
||
|
+ nr_frags = skb_shinfo(skb)->nr_frags;
|
||
|
+
|
||
|
+ /* Check if the linear data is 16 byte aligned */
|
||
|
+ if ((uintptr_t)skb->data % 16)
|
||
|
return true;
|
||
|
|
||
|
- /* Check if the headroom crosses a boundary */
|
||
|
- if (HAS_DMA_ISSUE(skb->head, skb_headroom(skb)))
|
||
|
+ /* Check if the needed headroom crosses a 4K address boundary without
|
||
|
+ * being 256 byte aligned
|
||
|
+ */
|
||
|
+ if (CROSS_4K(skb->data - priv->tx_headroom, priv->tx_headroom) &&
|
||
|
+ (((uintptr_t)skb->data - priv->tx_headroom) % 256))
|
||
|
return true;
|
||
|
|
||
|
- /* Check if the non-paged data crosses a boundary */
|
||
|
- if (HAS_DMA_ISSUE(skb->data, skb_headlen(skb)))
|
||
|
+ /* Check if the linear data crosses a 4K address boundary without
|
||
|
+ * being 256 byte aligned
|
||
|
+ */
|
||
|
+ if (CROSS_4K(skb->data, skb_headlen(skb)) &&
|
||
|
+ ((uintptr_t)skb->data % 256))
|
||
|
return true;
|
||
|
|
||
|
- /* Check if the entire linear skb crosses a boundary */
|
||
|
- if (HAS_DMA_ISSUE(skb->head, skb_end_offset(skb)))
|
||
|
+ /* When using Scatter/Gather, the linear data becomes the first
|
||
|
+ * fragment in the list and must follow the same restrictions as the
|
||
|
+ * other fragments.
|
||
|
+ *
|
||
|
+ * Check if the linear data is mod 16 bytes in size.
|
||
|
+ */
|
||
|
+ if (nr_frags && (skb_headlen(skb) % 16))
|
||
|
return true;
|
||
|
|
||
|
+ /* Check the SG fragments. They must follow the same rules as the
|
||
|
+ * linear data with and additional restriction: they must be multiple
|
||
|
+ * of 16 bytes in size to account for the hardware carryover effect.
|
||
|
+ */
|
||
|
+ for (i = 0; i < nr_frags; i++) {
|
||
|
+ frag = &skb_shinfo(skb)->frags[i];
|
||
|
+
|
||
|
+ /* Check if the fragment is a multiple of 16 bytes in size.
|
||
|
+ * The last fragment is exempt from this restriction.
|
||
|
+ */
|
||
|
+ if ((i != (nr_frags - 1)) && (skb_frag_size(frag) % 16))
|
||
|
+ return true;
|
||
|
+
|
||
|
+ /* Check if the fragment is 16 byte aligned */
|
||
|
+ if (skb_frag_off(frag) % 16)
|
||
|
+ return true;
|
||
|
+
|
||
|
+ /* Check if the fragment crosses a 4K address boundary. Since
|
||
|
+ * the alignment of previous fragments can influence the
|
||
|
+ * current fragment, checking for the 256 byte alignment
|
||
|
+ * isn't relevant.
|
||
|
+ */
|
||
|
+ if (CROSS_4K(skb_frag_off(frag), skb_frag_size(frag)))
|
||
|
+ return true;
|
||
|
+ }
|
||
|
+
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
@@ -1062,10 +1103,24 @@ int __hot dpa_tx_extended(struct sk_buff
|
||
|
struct dpa_percpu_priv_s *percpu_priv;
|
||
|
struct rtnl_link_stats64 *percpu_stats;
|
||
|
int err = 0;
|
||
|
- bool nonlinear;
|
||
|
+ bool nonlinear, skb_changed, skb_need_wa;
|
||
|
int *countptr, offset = 0;
|
||
|
struct sk_buff *nskb;
|
||
|
|
||
|
+ /* Flags to help optimize the A010022 errata restriction checks.
|
||
|
+ *
|
||
|
+ * First flag marks if the skb changed between the first A010022 check
|
||
|
+ * and the moment it's converted to an FD.
|
||
|
+ *
|
||
|
+ * The second flag marks if the skb needs to be realigned in order to
|
||
|
+ * avoid the errata.
|
||
|
+ *
|
||
|
+ * The flags should have minimal impact on platforms not impacted by
|
||
|
+ * the errata.
|
||
|
+ */
|
||
|
+ skb_changed = false;
|
||
|
+ skb_need_wa = false;
|
||
|
+
|
||
|
priv = netdev_priv(net_dev);
|
||
|
/* Non-migratable context, safe to use raw_cpu_ptr */
|
||
|
percpu_priv = raw_cpu_ptr(priv->percpu_priv);
|
||
|
@@ -1075,13 +1130,8 @@ int __hot dpa_tx_extended(struct sk_buff
|
||
|
clear_fd(&fd);
|
||
|
|
||
|
#ifndef CONFIG_PPC
|
||
|
- if (unlikely(dpaa_errata_a010022) && a010022_check_skb(skb, priv)) {
|
||
|
- nskb = a010022_realign_skb(skb, priv);
|
||
|
- if (!nskb)
|
||
|
- goto skb_to_fd_failed;
|
||
|
- dev_kfree_skb(skb);
|
||
|
- skb = nskb;
|
||
|
- }
|
||
|
+ if (unlikely(dpaa_errata_a010022) && a010022_check_skb(skb, priv))
|
||
|
+ skb_need_wa = true;
|
||
|
#endif
|
||
|
|
||
|
nonlinear = skb_is_nonlinear(skb);
|
||
|
@@ -1102,8 +1152,8 @@ int __hot dpa_tx_extended(struct sk_buff
|
||
|
* Btw, we're using the first sgt entry to store the linear part of
|
||
|
* the skb, so we're one extra frag short.
|
||
|
*/
|
||
|
- if (nonlinear &&
|
||
|
- likely(skb_shinfo(skb)->nr_frags < DPA_SGT_MAX_ENTRIES)) {
|
||
|
+ if (nonlinear && !skb_need_wa &&
|
||
|
+ likely(skb_shinfo(skb)->nr_frags < DPA_SGT_MAX_ENTRIES)) {
|
||
|
/* Just create a S/G fd based on the skb */
|
||
|
err = skb_to_sg_fd(priv, skb, &fd);
|
||
|
percpu_priv->tx_frag_skbuffs++;
|
||
|
@@ -1128,39 +1178,62 @@ int __hot dpa_tx_extended(struct sk_buff
|
||
|
|
||
|
dev_kfree_skb(skb);
|
||
|
skb = skb_new;
|
||
|
+ skb_changed = true;
|
||
|
}
|
||
|
|
||
|
/* We're going to store the skb backpointer at the beginning
|
||
|
* of the data buffer, so we need a privately owned skb
|
||
|
+ *
|
||
|
+ * Under the A010022 errata, we are going to have a privately
|
||
|
+ * owned skb after realigning the current one, so no point in
|
||
|
+ * copying it here in that case.
|
||
|
*/
|
||
|
|
||
|
/* Code borrowed from skb_unshare(). */
|
||
|
- if (skb_cloned(skb)) {
|
||
|
+ if (skb_cloned(skb) && !skb_need_wa) {
|
||
|
nskb = skb_copy(skb, GFP_ATOMIC);
|
||
|
kfree_skb(skb);
|
||
|
skb = nskb;
|
||
|
-#ifndef CONFIG_PPC
|
||
|
- if (unlikely(dpaa_errata_a010022) &&
|
||
|
- a010022_check_skb(skb, priv)) {
|
||
|
- nskb = a010022_realign_skb(skb, priv);
|
||
|
- if (!nskb)
|
||
|
- goto skb_to_fd_failed;
|
||
|
- dev_kfree_skb(skb);
|
||
|
- skb = nskb;
|
||
|
- }
|
||
|
-#endif
|
||
|
+ skb_changed = true;
|
||
|
+
|
||
|
/* skb_copy() has now linearized the skbuff. */
|
||
|
- } else if (unlikely(nonlinear)) {
|
||
|
+ } else if (unlikely(nonlinear) && !skb_need_wa) {
|
||
|
/* We are here because the egress skb contains
|
||
|
* more fragments than we support. In this case,
|
||
|
* we have no choice but to linearize it ourselves.
|
||
|
*/
|
||
|
- err = __skb_linearize(skb);
|
||
|
+#ifndef CONFIG_PPC
|
||
|
+ /* No point in linearizing the skb now if we are going
|
||
|
+ * to realign and linearize it again further down due
|
||
|
+ * to the A010022 errata
|
||
|
+ */
|
||
|
+ if (unlikely(dpaa_errata_a010022))
|
||
|
+ skb_need_wa = true;
|
||
|
+ else
|
||
|
+#endif
|
||
|
+ err = __skb_linearize(skb);
|
||
|
}
|
||
|
if (unlikely(!skb || err < 0))
|
||
|
/* Common out-of-memory error path */
|
||
|
goto enomem;
|
||
|
|
||
|
+#ifndef CONFIG_PPC
|
||
|
+ /* Verify the skb a second time if it has been updated since
|
||
|
+ * the previous check
|
||
|
+ */
|
||
|
+ if (unlikely(dpaa_errata_a010022) && skb_changed &&
|
||
|
+ a010022_check_skb(skb, priv))
|
||
|
+ skb_need_wa = true;
|
||
|
+
|
||
|
+ if (unlikely(dpaa_errata_a010022) && skb_need_wa) {
|
||
|
+ nskb = a010022_realign_skb(skb, priv);
|
||
|
+ if (!nskb)
|
||
|
+ goto skb_to_fd_failed;
|
||
|
+ dev_kfree_skb(skb);
|
||
|
+ skb = nskb;
|
||
|
+ }
|
||
|
+#endif
|
||
|
+
|
||
|
err = skb_to_contig_fd(priv, skb, &fd, countptr, &offset);
|
||
|
}
|
||
|
if (unlikely(err < 0))
|