From ba09ce751542f6f80019433364d46a14ade59dd0 Mon Sep 17 00:00:00 2001
From: Jonathan Bell <jonathan@raspberrypi.com>
Date: Wed, 16 Feb 2022 14:31:02 +0000
Subject: [PATCH] usb: xhci: add a quirk for Superspeed bulk OUT
 transfers on VL805

The VL805 has a bug in its internal FIFO space accounting that results
in bulk OUT babble if a TRB in a large multi-element TD has a data
buffer size that is larger than and not a multiple of wMaxPacketSize for
the endpoint. If the downstream USB3.0 link is congested, or latency is
increased through an intermediate hub, then the VL805 enters a suspected
FIFO overflow condition and transmits repeated, garbled data to the
endpoint.

TDs with TRBs of exact multiples of wMaxPacketSize and smaller than
wMaxPacketSize appear to be handled correctly, so split buffers at a
1024-byte length boundary and put the remainder in a separate smaller
TRB.

Signed-off-by: Jonathan Bell <jonathan@raspberrypi.com>
---
 drivers/usb/host/xhci-pci.c  |  1 +
 drivers/usb/host/xhci-ring.c | 21 +++++++++++++++++++--
 drivers/usb/host/xhci.h      |  1 +
 3 files changed, 21 insertions(+), 2 deletions(-)

--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -297,6 +297,7 @@ static void xhci_pci_quirks(struct devic
 		xhci->quirks |= XHCI_LPM_SUPPORT;
 		xhci->quirks |= XHCI_AVOID_DQ_ON_LINK;
 		xhci->quirks |= XHCI_VLI_TRB_CACHE_BUG;
+		xhci->quirks |= XHCI_VLI_SS_BULK_OUT_BUG;
 	}
 
 	if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3555,14 +3555,15 @@ int xhci_queue_bulk_tx(struct xhci_hcd *
 	unsigned int num_trbs;
 	unsigned int start_cycle, num_sgs = 0;
 	unsigned int enqd_len, block_len, trb_buff_len, full_len;
-	int sent_len, ret;
-	u32 field, length_field, remainder;
+	int sent_len, ret, vli_quirk = 0;
+	u32 field, length_field, remainder, maxpacket;
 	u64 addr, send_addr;
 
 	ring = xhci_urb_to_transfer_ring(xhci, urb);
 	if (!ring)
 		return -EINVAL;
 
+	maxpacket = usb_endpoint_maxp(&urb->ep->desc);
 	full_len = urb->transfer_buffer_length;
 	/* If we have scatter/gather list, we use it. */
 	if (urb->num_sgs && !(urb->transfer_flags & URB_DMA_MAP_SINGLE)) {
@@ -3599,6 +3600,17 @@ int xhci_queue_bulk_tx(struct xhci_hcd *
 	start_cycle = ring->cycle_state;
 	send_addr = addr;
 
+	if (xhci->quirks & XHCI_VLI_SS_BULK_OUT_BUG &&
+	    !usb_urb_dir_in(urb) && urb->dev->speed >= USB_SPEED_SUPER) {
+		/*
+		 * VL805 - superspeed bulk OUT traffic can cause
+		 * an internal fifo overflow if the TRB buffer is larger
+		 * than wMaxPacket and the length is not an integer
+		 * multiple of wMaxPacket.
+		 */
+		vli_quirk = 1;
+	}
+
 	/* Queue the TRBs, even if they are zero-length */
 	for (enqd_len = 0; first_trb || enqd_len < full_len;
 			enqd_len += trb_buff_len) {
@@ -3611,6 +3623,11 @@ int xhci_queue_bulk_tx(struct xhci_hcd *
 		if (enqd_len + trb_buff_len > full_len)
 			trb_buff_len = full_len - enqd_len;
 
+		if (vli_quirk && trb_buff_len > maxpacket) {
+			/* SS bulk wMaxPacket is 1024B */
+			remainder = trb_buff_len & (maxpacket - 1);
+			trb_buff_len -= remainder;
+		}
 		/* Don't change the cycle bit of the first TRB until later */
 		if (first_trb) {
 			first_trb = false;
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1904,6 +1904,7 @@ struct xhci_hcd {
 #define XHCI_ZHAOXIN_HOST	BIT_ULL(46)
 #define XHCI_AVOID_DQ_ON_LINK  BIT_ULL(47)
 #define XHCI_VLI_TRB_CACHE_BUG BIT_ULL(48)
+#define XHCI_VLI_SS_BULK_OUT_BUG       BIT_ULL(49)
 
 	unsigned int		num_active_eps;
 	unsigned int		limit_active_eps;