openwrt/target/linux/layerscape/patches-5.4/701-net-0358-sdk_dpaa-ls1043a-errata-update-and-optimize-the-rest.patch

From e89db675171a7a12f19b6ec0089a9cc62807cdf1 Mon Sep 17 00:00:00 2001
From: Camelia Groza <camelia.groza@nxp.com>
Date: Tue, 29 Oct 2019 16:34:08 +0200
Subject: [PATCH] sdk_dpaa: ls1043a errata: update and optimize the
 restrictions

An skb is in no danger of triggering the errata under the following
conditions:
- the paged data doesn't cross a 4K page boundary OR the linear data
is aligned to 256 bytes when crossing a 4K page boundary
- the linear and the paged data are 16 byte aligned
- the paged data is a multiple of 16 bytes in size

Optimize the detection for each skb that might trigger the errata. Parse
the skb twice, at most, and realign it only once.

Signed-off-by: Camelia Groza <camelia.groza@nxp.com>
---
 drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h |   2 +-
 .../net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c  | 147 +++++++++++++++------
 2 files changed, 111 insertions(+), 38 deletions(-)

--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
@@ -662,7 +662,7 @@ static inline void _dpa_bp_free_pf(void
 #ifndef CONFIG_PPC
 extern bool dpaa_errata_a010022; /* SoC affected by A010022 errata */
 #define NONREC_MARK	0x01
-#define HAS_DMA_ISSUE(start, size) \
+#define CROSS_4K(start, size) \
 	(((uintptr_t)(start) + (size)) > \
 	 (((uintptr_t)(start) + 0x1000) & ~0xFFF))
 /* The headroom needs to accommodate our private data (64 bytes) but
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c
@@ -771,32 +771,73 @@ int __hot skb_to_contig_fd(struct dpa_pr
 EXPORT_SYMBOL(skb_to_contig_fd);
 
 #ifndef CONFIG_PPC
-/* Verify the conditions that trigger the A010022 errata: data unaligned to
- * 16 bytes, 4K memory address crossings and S/G fragments.
+/* Verify the conditions that trigger the A010022 errata:
+ * - 4K memory address boundary crossings when the data/SG fragments aren't
+ *   aligned to 256 bytes
+ * - data and SG fragments that aren't aligned to 16 bytes
+ * - SG fragments that aren't mod 16 bytes in size (except for the last
+ *   fragment)
  */
 static bool a010022_check_skb(struct sk_buff *skb, struct dpa_priv_s *priv)
 {
-	/* Check if the headroom is aligned */
-	if (((uintptr_t)skb->data - priv->tx_headroom) %
-	    priv->buf_layout[TX].data_align != 0)
-		return true;
+	skb_frag_t *frag;
+	int i, nr_frags;
 
-	/* Check for paged data in the skb. We do not support S/G fragments */
-	if (skb_is_nonlinear(skb))
+	nr_frags = skb_shinfo(skb)->nr_frags;
+
+	/* Check if the linear data is 16 byte aligned */
+	if ((uintptr_t)skb->data % 16)
 		return true;
 
-	/* Check if the headroom crosses a boundary */
-	if (HAS_DMA_ISSUE(skb->head, skb_headroom(skb)))
+	/* Check if the needed headroom crosses a 4K address boundary without
+	 * being 256 byte aligned
+	 */
+	if (CROSS_4K(skb->data - priv->tx_headroom, priv->tx_headroom) &&
+	    (((uintptr_t)skb->data - priv->tx_headroom) % 256))
 		return true;
 
-	/* Check if the non-paged data crosses a boundary */
-	if (HAS_DMA_ISSUE(skb->data, skb_headlen(skb)))
+	/* Check if the linear data crosses a 4K address boundary without
+	 * being 256 byte aligned
+	 */
+	if (CROSS_4K(skb->data, skb_headlen(skb)) &&
+	    ((uintptr_t)skb->data % 256))
 		return true;
 
-	/* Check if the entire linear skb crosses a boundary */
-	if (HAS_DMA_ISSUE(skb->head, skb_end_offset(skb)))
+	/* When using Scatter/Gather, the linear data becomes the first
+	 * fragment in the list and must follow the same restrictions as the
+	 * other fragments.
+	 *
+	 * Check if the linear data is mod 16 bytes in size.
+	 */
+	if (nr_frags && (skb_headlen(skb) % 16))
 		return true;
 
+	/* Check the SG fragments. They must follow the same rules as the
+	 * linear data with and additional restriction: they must be multiple
+	 * of 16 bytes in size to account for the hardware carryover effect.
+	 */
+	for (i = 0; i < nr_frags; i++) {
+		frag = &skb_shinfo(skb)->frags[i];
+
+		/* Check if the fragment is a multiple of 16 bytes in size.
+		 * The last fragment is exempt from this restriction.
+		 */
+		if ((i != (nr_frags - 1)) && (skb_frag_size(frag) % 16))
+			return true;
+
+		/* Check if the fragment is 16 byte aligned */
+		if (skb_frag_off(frag) % 16)
+			return true;
+
+		/* Check if the fragment crosses a 4K address boundary. Since
+		 * the alignment of previous fragments can influence the
+		 * current fragment, checking for the 256 byte alignment
+		 * isn't relevant.
+		 */
+		if (CROSS_4K(skb_frag_off(frag), skb_frag_size(frag)))
+			return true;
+	}
+
 	return false;
 }
 
@@ -1062,10 +1103,24 @@ int __hot dpa_tx_extended(struct sk_buff
 	struct dpa_percpu_priv_s *percpu_priv;
 	struct rtnl_link_stats64 *percpu_stats;
 	int err = 0;
-	bool nonlinear;
+	bool nonlinear, skb_changed, skb_need_wa;
 	int *countptr, offset = 0;
 	struct sk_buff *nskb;
 
+	/* Flags to help optimize the A010022 errata restriction checks.
+	 *
+	 * First flag marks if the skb changed between the first A010022 check
+	 * and the moment it's converted to an FD.
+	 *
+	 * The second flag marks if the skb needs to be realigned in order to
+	 * avoid the errata.
+	 *
+	 * The flags should have minimal impact on platforms not impacted by
+	 * the errata.
+	 */
+	skb_changed = false;
+	skb_need_wa = false;
+
 	priv = netdev_priv(net_dev);
 	/* Non-migratable context, safe to use raw_cpu_ptr */
 	percpu_priv = raw_cpu_ptr(priv->percpu_priv);
@@ -1075,13 +1130,8 @@ int __hot dpa_tx_extended(struct sk_buff
 	clear_fd(&fd);
 
 #ifndef CONFIG_PPC
-	if (unlikely(dpaa_errata_a010022) && a010022_check_skb(skb, priv)) {
-		nskb = a010022_realign_skb(skb, priv);
-		if (!nskb)
-			goto skb_to_fd_failed;
-		dev_kfree_skb(skb);
-		skb = nskb;
-	}
+	if (unlikely(dpaa_errata_a010022) && a010022_check_skb(skb, priv))
+		skb_need_wa = true;
 #endif
 
 	nonlinear = skb_is_nonlinear(skb);
@@ -1102,8 +1152,8 @@ int __hot dpa_tx_extended(struct sk_buff
 	 * Btw, we're using the first sgt entry to store the linear part of
 	 * the skb, so we're one extra frag short.
 	 */
-	if (nonlinear &&
-		likely(skb_shinfo(skb)->nr_frags < DPA_SGT_MAX_ENTRIES)) {
+	if (nonlinear && !skb_need_wa &&
+	    likely(skb_shinfo(skb)->nr_frags < DPA_SGT_MAX_ENTRIES)) {
 		/* Just create a S/G fd based on the skb */
 		err = skb_to_sg_fd(priv, skb, &fd);
 		percpu_priv->tx_frag_skbuffs++;
@@ -1128,39 +1178,62 @@ int __hot dpa_tx_extended(struct sk_buff
 
 			dev_kfree_skb(skb);
 			skb = skb_new;
+			skb_changed = true;
 		}
 
 		/* We're going to store the skb backpointer at the beginning
 		 * of the data buffer, so we need a privately owned skb
+		 *
+		 * Under the A010022 errata, we are going to have a privately
+		 * owned skb after realigning the current one, so no point in
+		 * copying it here in that case.
 		 */
 
 		/* Code borrowed from skb_unshare(). */
-		if (skb_cloned(skb)) {
+		if (skb_cloned(skb) && !skb_need_wa) {
 			nskb = skb_copy(skb, GFP_ATOMIC);
 			kfree_skb(skb);
 			skb = nskb;
-#ifndef CONFIG_PPC
-			if (unlikely(dpaa_errata_a010022) &&
-			    a010022_check_skb(skb, priv)) {
-				nskb = a010022_realign_skb(skb, priv);
-				if (!nskb)
-					goto skb_to_fd_failed;
-				dev_kfree_skb(skb);
-				skb = nskb;
-			}
-#endif
+			skb_changed = true;
+
 			/* skb_copy() has now linearized the skbuff. */
-		} else if (unlikely(nonlinear)) {
+		} else if (unlikely(nonlinear) && !skb_need_wa) {
 			/* We are here because the egress skb contains
 			 * more fragments than we support. In this case,
 			 * we have no choice but to linearize it ourselves.
 			 */
-			err = __skb_linearize(skb);
+#ifndef CONFIG_PPC
+			/* No point in linearizing the skb now if we are going
+			 * to realign and linearize it again further down due
+			 * to the A010022 errata
+			 */
+			if (unlikely(dpaa_errata_a010022))
+				skb_need_wa = true;
+			else
+#endif
+				err = __skb_linearize(skb);
 		}
 		if (unlikely(!skb || err < 0))
 			/* Common out-of-memory error path */
 			goto enomem;
 
+#ifndef CONFIG_PPC
+		/* Verify the skb a second time if it has been updated since
+		 * the previous check
+		 */
+		if (unlikely(dpaa_errata_a010022) && skb_changed &&
+		    a010022_check_skb(skb, priv))
+			skb_need_wa = true;
+
+		if (unlikely(dpaa_errata_a010022) && skb_need_wa) {
+			nskb = a010022_realign_skb(skb, priv);
+			if (!nskb)
+				goto skb_to_fd_failed;
+			dev_kfree_skb(skb);
+			skb = nskb;
+		}
+#endif
+
 		err = skb_to_contig_fd(priv, skb, &fd, countptr, &offset);
 	}
 	if (unlikely(err < 0))
layerscape: add patches-5.4 Add patches for linux-5.4. The patches are from NXP LSDK-20.04 release which was tagged LSDK-20.04-V5.4. https://source.codeaurora.org/external/qoriq/qoriq-components/linux/ For boards LS1021A-IOT, and Traverse-LS1043 which are not involved in LSDK, port the dts patches from 4.14. The patches are sorted into the following categories: 301-arch-xxxx 302-dts-xxxx 303-core-xxxx 701-net-xxxx 801-audio-xxxx 802-can-xxxx 803-clock-xxxx 804-crypto-xxxx 805-display-xxxx 806-dma-xxxx 807-gpio-xxxx 808-i2c-xxxx 809-jailhouse-xxxx 810-keys-xxxx 811-kvm-xxxx 812-pcie-xxxx 813-pm-xxxx 814-qe-xxxx 815-sata-xxxx 816-sdhc-xxxx 817-spi-xxxx 818-thermal-xxxx 819-uart-xxxx 820-usb-xxxx 821-vfio-xxxx Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com> 2020-04-10 02:47:05 +00:00			`From e89db675171a7a12f19b6ec0089a9cc62807cdf1 Mon Sep 17 00:00:00 2001`
			`From: Camelia Groza <camelia.groza@nxp.com>`
			`Date: Tue, 29 Oct 2019 16:34:08 +0200`
			`Subject: [PATCH] sdk_dpaa: ls1043a errata: update and optimize the`
			`restrictions`

			`An skb is in no danger of triggering the errata under the following`
			`conditions:`
			`- the paged data doesn't cross a 4K page boundary OR the linear data`
			`is aligned to 256 bytes when crossing a 4K page boundary`
			`- the linear and the paged data are 16 byte aligned`
			`- the paged data is a multiple of 16 bytes in size`

			`Optimize the detection for each skb that might trigger the errata. Parse`
			`the skb twice, at most, and realign it only once.`

			`Signed-off-by: Camelia Groza <camelia.groza@nxp.com>`
			`---`
			`drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h \| 2 +-`
			`.../net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c \| 147 +++++++++++++++------`
			`2 files changed, 111 insertions(+), 38 deletions(-)`

			`--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h`
			`+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h`
			`@@ -662,7 +662,7 @@ static inline void _dpa_bp_free_pf(void`
			`#ifndef CONFIG_PPC`
			`extern bool dpaa_errata_a010022; /* SoC affected by A010022 errata */`
			`#define NONREC_MARK 0x01`
			`-#define HAS_DMA_ISSUE(start, size) \`
			`+#define CROSS_4K(start, size) \`
			`(((uintptr_t)(start) + (size)) > \`
			`(((uintptr_t)(start) + 0x1000) & ~0xFFF))`
			`/* The headroom needs to accommodate our private data (64 bytes) but`
			`--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c`
			`+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c`
			`@@ -771,32 +771,73 @@ int __hot skb_to_contig_fd(struct dpa_pr`
			`EXPORT_SYMBOL(skb_to_contig_fd);`

			`#ifndef CONFIG_PPC`
			`-/* Verify the conditions that trigger the A010022 errata: data unaligned to`
			`- * 16 bytes, 4K memory address crossings and S/G fragments.`
			`+/* Verify the conditions that trigger the A010022 errata:`
			`+ * - 4K memory address boundary crossings when the data/SG fragments aren't`
			`+ * aligned to 256 bytes`
			`+ * - data and SG fragments that aren't aligned to 16 bytes`
			`+ * - SG fragments that aren't mod 16 bytes in size (except for the last`
			`+ * fragment)`
			`*/`
			`static bool a010022_check_skb(struct sk_buff skb, struct dpa_priv_s priv)`
			`{`
			`- /* Check if the headroom is aligned */`
			`- if (((uintptr_t)skb->data - priv->tx_headroom) %`
			`- priv->buf_layout[TX].data_align != 0)`
			`- return true;`
			`+ skb_frag_t *frag;`
			`+ int i, nr_frags;`

			`- /* Check for paged data in the skb. We do not support S/G fragments */`
			`- if (skb_is_nonlinear(skb))`
			`+ nr_frags = skb_shinfo(skb)->nr_frags;`
			`+`
			`+ /* Check if the linear data is 16 byte aligned */`
			`+ if ((uintptr_t)skb->data % 16)`
			`return true;`

			`- /* Check if the headroom crosses a boundary */`
			`- if (HAS_DMA_ISSUE(skb->head, skb_headroom(skb)))`
			`+ /* Check if the needed headroom crosses a 4K address boundary without`
			`+ * being 256 byte aligned`
			`+ */`
			`+ if (CROSS_4K(skb->data - priv->tx_headroom, priv->tx_headroom) &&`
			`+ (((uintptr_t)skb->data - priv->tx_headroom) % 256))`
			`return true;`

			`- /* Check if the non-paged data crosses a boundary */`
			`- if (HAS_DMA_ISSUE(skb->data, skb_headlen(skb)))`
			`+ /* Check if the linear data crosses a 4K address boundary without`
			`+ * being 256 byte aligned`
			`+ */`
			`+ if (CROSS_4K(skb->data, skb_headlen(skb)) &&`
			`+ ((uintptr_t)skb->data % 256))`
			`return true;`

			`- /* Check if the entire linear skb crosses a boundary */`
			`- if (HAS_DMA_ISSUE(skb->head, skb_end_offset(skb)))`
			`+ /* When using Scatter/Gather, the linear data becomes the first`
			`+ * fragment in the list and must follow the same restrictions as the`
			`+ * other fragments.`
			`+ *`
			`+ * Check if the linear data is mod 16 bytes in size.`
			`+ */`
			`+ if (nr_frags && (skb_headlen(skb) % 16))`
			`return true;`

			`+ /* Check the SG fragments. They must follow the same rules as the`
			`+ * linear data with and additional restriction: they must be multiple`
			`+ * of 16 bytes in size to account for the hardware carryover effect.`
			`+ */`
			`+ for (i = 0; i < nr_frags; i++) {`
			`+ frag = &skb_shinfo(skb)->frags[i];`
			`+`
			`+ /* Check if the fragment is a multiple of 16 bytes in size.`
			`+ * The last fragment is exempt from this restriction.`
			`+ */`
			`+ if ((i != (nr_frags - 1)) && (skb_frag_size(frag) % 16))`
			`+ return true;`
			`+`
			`+ /* Check if the fragment is 16 byte aligned */`
			`+ if (skb_frag_off(frag) % 16)`
			`+ return true;`
			`+`
			`+ /* Check if the fragment crosses a 4K address boundary. Since`
			`+ * the alignment of previous fragments can influence the`
			`+ * current fragment, checking for the 256 byte alignment`
			`+ * isn't relevant.`
			`+ */`
			`+ if (CROSS_4K(skb_frag_off(frag), skb_frag_size(frag)))`
			`+ return true;`
			`+ }`
			`+`
			`return false;`
			`}`

			`@@ -1062,10 +1103,24 @@ int __hot dpa_tx_extended(struct sk_buff`
			`struct dpa_percpu_priv_s *percpu_priv;`
			`struct rtnl_link_stats64 *percpu_stats;`
			`int err = 0;`
			`- bool nonlinear;`
			`+ bool nonlinear, skb_changed, skb_need_wa;`
			`int *countptr, offset = 0;`
			`struct sk_buff *nskb;`

			`+ /* Flags to help optimize the A010022 errata restriction checks.`
			`+ *`
			`+ * First flag marks if the skb changed between the first A010022 check`
			`+ * and the moment it's converted to an FD.`
			`+ *`
			`+ * The second flag marks if the skb needs to be realigned in order to`
			`+ * avoid the errata.`
			`+ *`
			`+ * The flags should have minimal impact on platforms not impacted by`
			`+ * the errata.`
			`+ */`
			`+ skb_changed = false;`
			`+ skb_need_wa = false;`
			`+`
			`priv = netdev_priv(net_dev);`
			`/* Non-migratable context, safe to use raw_cpu_ptr */`
			`percpu_priv = raw_cpu_ptr(priv->percpu_priv);`
			`@@ -1075,13 +1130,8 @@ int __hot dpa_tx_extended(struct sk_buff`
			`clear_fd(&fd);`

			`#ifndef CONFIG_PPC`
			`- if (unlikely(dpaa_errata_a010022) && a010022_check_skb(skb, priv)) {`
			`- nskb = a010022_realign_skb(skb, priv);`
			`- if (!nskb)`
			`- goto skb_to_fd_failed;`
			`- dev_kfree_skb(skb);`
			`- skb = nskb;`
			`- }`
			`+ if (unlikely(dpaa_errata_a010022) && a010022_check_skb(skb, priv))`
			`+ skb_need_wa = true;`
			`#endif`

			`nonlinear = skb_is_nonlinear(skb);`
			`@@ -1102,8 +1152,8 @@ int __hot dpa_tx_extended(struct sk_buff`
			`* Btw, we're using the first sgt entry to store the linear part of`
			`* the skb, so we're one extra frag short.`
			`*/`
			`- if (nonlinear &&`
			`- likely(skb_shinfo(skb)->nr_frags < DPA_SGT_MAX_ENTRIES)) {`
			`+ if (nonlinear && !skb_need_wa &&`
			`+ likely(skb_shinfo(skb)->nr_frags < DPA_SGT_MAX_ENTRIES)) {`
			`/* Just create a S/G fd based on the skb */`
			`err = skb_to_sg_fd(priv, skb, &fd);`
			`percpu_priv->tx_frag_skbuffs++;`
			`@@ -1128,39 +1178,62 @@ int __hot dpa_tx_extended(struct sk_buff`

			`dev_kfree_skb(skb);`
			`skb = skb_new;`
			`+ skb_changed = true;`
			`}`

			`/* We're going to store the skb backpointer at the beginning`
			`* of the data buffer, so we need a privately owned skb`
			`+ *`
			`+ * Under the A010022 errata, we are going to have a privately`
			`+ * owned skb after realigning the current one, so no point in`
			`+ * copying it here in that case.`
			`*/`

			`/* Code borrowed from skb_unshare(). */`
			`- if (skb_cloned(skb)) {`
			`+ if (skb_cloned(skb) && !skb_need_wa) {`
			`nskb = skb_copy(skb, GFP_ATOMIC);`
			`kfree_skb(skb);`
			`skb = nskb;`
			`-#ifndef CONFIG_PPC`
			`- if (unlikely(dpaa_errata_a010022) &&`
			`- a010022_check_skb(skb, priv)) {`
			`- nskb = a010022_realign_skb(skb, priv);`
			`- if (!nskb)`
			`- goto skb_to_fd_failed;`
			`- dev_kfree_skb(skb);`
			`- skb = nskb;`
			`- }`
			`-#endif`
			`+ skb_changed = true;`
			`+`
			`/* skb_copy() has now linearized the skbuff. */`
			`- } else if (unlikely(nonlinear)) {`
			`+ } else if (unlikely(nonlinear) && !skb_need_wa) {`
			`/* We are here because the egress skb contains`
			`* more fragments than we support. In this case,`
			`* we have no choice but to linearize it ourselves.`
			`*/`
			`- err = __skb_linearize(skb);`
			`+#ifndef CONFIG_PPC`
			`+ /* No point in linearizing the skb now if we are going`
			`+ * to realign and linearize it again further down due`
			`+ * to the A010022 errata`
			`+ */`
			`+ if (unlikely(dpaa_errata_a010022))`
			`+ skb_need_wa = true;`
			`+ else`
			`+#endif`
			`+ err = __skb_linearize(skb);`
			`}`
			`if (unlikely(!skb \|\| err < 0))`
			`/* Common out-of-memory error path */`
			`goto enomem;`

			`+#ifndef CONFIG_PPC`
			`+ /* Verify the skb a second time if it has been updated since`
			`+ * the previous check`
			`+ */`
			`+ if (unlikely(dpaa_errata_a010022) && skb_changed &&`
			`+ a010022_check_skb(skb, priv))`
			`+ skb_need_wa = true;`
			`+`
			`+ if (unlikely(dpaa_errata_a010022) && skb_need_wa) {`
			`+ nskb = a010022_realign_skb(skb, priv);`
			`+ if (!nskb)`
			`+ goto skb_to_fd_failed;`
			`+ dev_kfree_skb(skb);`
			`+ skb = nskb;`
			`+ }`
			`+#endif`
			`+`
			`err = skb_to_contig_fd(priv, skb, &fd, countptr, &offset);`
			`}`
			`if (unlikely(err < 0))`