diff --git a/package/kernel/mac80211/patches/ath11k/001-wifi-ath11k-Fix-DMA-buffer-allocation-to-resolve-SWIOTLB-issues.patch b/package/kernel/mac80211/patches/ath11k/001-wifi-ath11k-Fix-DMA-buffer-allocation-to-resolve-SWIOTLB-issues.patch new file mode 100644 index 00000000000..d5d80a395d7 --- /dev/null +++ b/package/kernel/mac80211/patches/ath11k/001-wifi-ath11k-Fix-DMA-buffer-allocation-to-resolve-SWIOTLB-issues.patch @@ -0,0 +1,91 @@ +wifi: ath11k: Fix DMA buffer allocation to resolve SWIOTLB issues +Currently, the driver allocates cacheable DMA buffers for rings like +HAL_REO_DST and HAL_WBM2SW_RELEASE. The buffers for HAL_WBM2SW_RELEASE +are large (1024 KiB), exceeding the SWIOTLB slot size of 256 KiB. This +leads to "swiotlb buffer is full" error messages on systems without an +IOMMU that use SWIOTLB, causing driver initialization failures. The driver +calls dma_map_single() with these large buffers obtained from kzalloc(), +resulting in ring initialization errors on systems without an IOMMU that +use SWIOTLB. + +To address these issues, replace the flawed buffer allocation mechanism +with the appropriate DMA API. Specifically, use dma_alloc_noncoherent() +for cacheable DMA buffers, ensuring proper freeing of buffers with +dma_free_noncoherent(). + +Error log: +[ 10.194343] ath11k_pci 0000:04:00.0: swiotlb buffer is full (sz:1048583 bytes), total 32768 (slots), used 2529 (slots) +[ 10.194406] ath11k_pci 0000:04:00.0: failed to set up tcl_comp ring (0) :-12 +[ 10.194781] ath11k_pci 0000:04:00.0: failed to init DP: -12 + +Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1 +Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3 + +Reported-by: Tim Harvey +Closes: https://lore.kernel.org/all/20241210041133.GA17116@lst.de/ +Signed-off-by: P Praneesh +Tested-by: Tim Harvey +Link: https://patch.msgid.link/20250119164219.647059-2-quic_ppranees@quicinc.com +Signed-off-by: Jeff Johnson +--- a/drivers/net/wireless/ath/ath11k/dp.c ++++ b/drivers/net/wireless/ath/ath11k/dp.c +@@ -1,7 +1,7 @@ + // SPDX-License-Identifier: BSD-3-Clause-Clear + /* + * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. +- * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. ++ * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + + #include +@@ -104,14 +104,12 @@ void ath11k_dp_srng_cleanup(struct ath11 + if (!ring->vaddr_unaligned) + return; + +- if (ring->cached) { +- dma_unmap_single(ab->dev, ring->paddr_unaligned, ring->size, +- DMA_FROM_DEVICE); +- kfree(ring->vaddr_unaligned); +- } else { ++ if (ring->cached) ++ dma_free_noncoherent(ab->dev, ring->size, ring->vaddr_unaligned, ++ ring->paddr_unaligned, DMA_FROM_DEVICE); ++ else + dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned, + ring->paddr_unaligned); +- } + + ring->vaddr_unaligned = NULL; + } +@@ -249,25 +247,14 @@ int ath11k_dp_srng_setup(struct ath11k_b + default: + cached = false; + } +- +- if (cached) { +- ring->vaddr_unaligned = kzalloc(ring->size, GFP_KERNEL); +- if (!ring->vaddr_unaligned) +- return -ENOMEM; +- +- ring->paddr_unaligned = dma_map_single(ab->dev, +- ring->vaddr_unaligned, +- ring->size, +- DMA_FROM_DEVICE); +- if (dma_mapping_error(ab->dev, ring->paddr_unaligned)) { +- kfree(ring->vaddr_unaligned); +- ring->vaddr_unaligned = NULL; +- return -ENOMEM; +- } +- } + } + +- if (!cached) ++ if (cached) ++ ring->vaddr_unaligned = dma_alloc_noncoherent(ab->dev, ring->size, ++ &ring->paddr_unaligned, ++ DMA_FROM_DEVICE, ++ GFP_KERNEL); ++ else + ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size, + &ring->paddr_unaligned, + GFP_KERNEL); diff --git a/package/kernel/mac80211/patches/ath11k/002-wifi-ath11k-use-dma-alloc-noncoherent-for-rx-tid-buffer-allocation.patch b/package/kernel/mac80211/patches/ath11k/002-wifi-ath11k-use-dma-alloc-noncoherent-for-rx-tid-buffer-allocation.patch new file mode 100644 index 00000000000..2257b069107 --- /dev/null +++ b/package/kernel/mac80211/patches/ath11k/002-wifi-ath11k-use-dma-alloc-noncoherent-for-rx-tid-buffer-allocation.patch @@ -0,0 +1,255 @@ +wifi: ath11k: Use dma_alloc_noncoherent for rx_tid buffer allocation + +Currently, the driver allocates cacheable DMA buffers for the rx_tid +structure using kzalloc() and dma_map_single(). These buffers are +long-lived and can persist for the lifetime of the peer, which is not +advisable. Instead of using kzalloc() and dma_map_single() for allocating +cacheable DMA buffers, utilize the dma_alloc_noncoherent() helper for the +allocation of long-lived cacheable DMA buffers, such as the peer's rx_tid. +Since dma_alloc_noncoherent() returns unaligned physical and virtual +addresses, align them internally before use within the driver. This +ensures proper allocation of non-coherent memory through the kernel +helper. + +Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1 +Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3 + +Signed-off-by: P Praneesh +--- a/drivers/net/wireless/ath/ath11k/dp.h ++++ b/drivers/net/wireless/ath/ath11k/dp.h +@@ -1,7 +1,7 @@ + /* SPDX-License-Identifier: BSD-3-Clause-Clear */ + /* + * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. +- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. ++ * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + + #ifndef ATH11K_DP_H +@@ -20,7 +20,6 @@ struct ath11k_ext_irq_grp; + + struct dp_rx_tid { + u8 tid; +- u32 *vaddr; + dma_addr_t paddr; + u32 size; + u32 ba_win_sz; +@@ -37,6 +36,9 @@ struct dp_rx_tid { + /* Timer info related to fragments */ + struct timer_list frag_timer; + struct ath11k_base *ab; ++ u32 *vaddr_unaligned; ++ dma_addr_t paddr_unaligned; ++ u32 unaligned_size; + }; + + #define DP_REO_DESC_FREE_THRESHOLD 64 +--- a/drivers/net/wireless/ath/ath11k/dp_rx.c ++++ b/drivers/net/wireless/ath/ath11k/dp_rx.c +@@ -1,7 +1,7 @@ + // SPDX-License-Identifier: BSD-3-Clause-Clear + /* + * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. +- * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. ++ * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + + #include +@@ -675,11 +675,11 @@ void ath11k_dp_reo_cmd_list_cleanup(stru + list_for_each_entry_safe(cmd, tmp, &dp->reo_cmd_list, list) { + list_del(&cmd->list); + rx_tid = &cmd->data; +- if (rx_tid->vaddr) { +- dma_unmap_single(ab->dev, rx_tid->paddr, +- rx_tid->size, DMA_BIDIRECTIONAL); +- kfree(rx_tid->vaddr); +- rx_tid->vaddr = NULL; ++ if (rx_tid->vaddr_unaligned) { ++ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, ++ rx_tid->vaddr_unaligned, ++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); ++ rx_tid->vaddr_unaligned = NULL; + } + kfree(cmd); + } +@@ -689,11 +689,11 @@ void ath11k_dp_reo_cmd_list_cleanup(stru + list_del(&cmd_cache->list); + dp->reo_cmd_cache_flush_count--; + rx_tid = &cmd_cache->data; +- if (rx_tid->vaddr) { +- dma_unmap_single(ab->dev, rx_tid->paddr, +- rx_tid->size, DMA_BIDIRECTIONAL); +- kfree(rx_tid->vaddr); +- rx_tid->vaddr = NULL; ++ if (rx_tid->vaddr_unaligned) { ++ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, ++ rx_tid->vaddr_unaligned, ++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); ++ rx_tid->vaddr_unaligned = NULL; + } + kfree(cmd_cache); + } +@@ -708,11 +708,11 @@ static void ath11k_dp_reo_cmd_free(struc + if (status != HAL_REO_CMD_SUCCESS) + ath11k_warn(dp->ab, "failed to flush rx tid hw desc, tid %d status %d\n", + rx_tid->tid, status); +- if (rx_tid->vaddr) { +- dma_unmap_single(dp->ab->dev, rx_tid->paddr, rx_tid->size, +- DMA_BIDIRECTIONAL); +- kfree(rx_tid->vaddr); +- rx_tid->vaddr = NULL; ++ if (rx_tid->vaddr_unaligned) { ++ dma_free_noncoherent(dp->ab->dev, rx_tid->unaligned_size, ++ rx_tid->vaddr_unaligned, ++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); ++ rx_tid->vaddr_unaligned = NULL; + } + } + +@@ -749,10 +749,10 @@ static void ath11k_dp_reo_cache_flush(st + if (ret) { + ath11k_err(ab, "failed to send HAL_REO_CMD_FLUSH_CACHE cmd, tid %d (%d)\n", + rx_tid->tid, ret); +- dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size, +- DMA_BIDIRECTIONAL); +- kfree(rx_tid->vaddr); +- rx_tid->vaddr = NULL; ++ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, ++ rx_tid->vaddr_unaligned, ++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); ++ rx_tid->vaddr_unaligned = NULL; + } + } + +@@ -802,10 +802,10 @@ static void ath11k_dp_rx_tid_del_func(st + + return; + free_desc: +- dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size, +- DMA_BIDIRECTIONAL); +- kfree(rx_tid->vaddr); +- rx_tid->vaddr = NULL; ++ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, ++ rx_tid->vaddr_unaligned, ++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); ++ rx_tid->vaddr_unaligned = NULL; + } + + void ath11k_peer_rx_tid_delete(struct ath11k *ar, +@@ -831,14 +831,16 @@ void ath11k_peer_rx_tid_delete(struct at + if (ret != -ESHUTDOWN) + ath11k_err(ar->ab, "failed to send HAL_REO_CMD_UPDATE_RX_QUEUE cmd, tid %d (%d)\n", + tid, ret); +- dma_unmap_single(ar->ab->dev, rx_tid->paddr, rx_tid->size, +- DMA_BIDIRECTIONAL); +- kfree(rx_tid->vaddr); +- rx_tid->vaddr = NULL; ++ dma_free_noncoherent(ar->ab->dev, rx_tid->unaligned_size, ++ rx_tid->vaddr_unaligned, ++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); ++ rx_tid->vaddr_unaligned = NULL; + } + + rx_tid->paddr = 0; ++ rx_tid->paddr_unaligned = 0; + rx_tid->size = 0; ++ rx_tid->unaligned_size = 0; + } + + static int ath11k_dp_rx_link_desc_return(struct ath11k_base *ab, +@@ -982,10 +984,9 @@ static void ath11k_dp_rx_tid_mem_free(st + if (!rx_tid->active) + goto unlock_exit; + +- dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size, +- DMA_BIDIRECTIONAL); +- kfree(rx_tid->vaddr); +- rx_tid->vaddr = NULL; ++ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, rx_tid->vaddr_unaligned, ++ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); ++ rx_tid->vaddr_unaligned = NULL; + + rx_tid->active = false; + +@@ -1000,9 +1001,8 @@ int ath11k_peer_rx_tid_setup(struct ath1 + struct ath11k_base *ab = ar->ab; + struct ath11k_peer *peer; + struct dp_rx_tid *rx_tid; +- u32 hw_desc_sz; +- u32 *addr_aligned; +- void *vaddr; ++ u32 hw_desc_sz, *vaddr; ++ void *vaddr_unaligned; + dma_addr_t paddr; + int ret; + +@@ -1050,37 +1050,34 @@ int ath11k_peer_rx_tid_setup(struct ath1 + else + hw_desc_sz = ath11k_hal_reo_qdesc_size(DP_BA_WIN_SZ_MAX, tid); + +- vaddr = kzalloc(hw_desc_sz + HAL_LINK_DESC_ALIGN - 1, GFP_ATOMIC); +- if (!vaddr) { ++ rx_tid->unaligned_size = hw_desc_sz + HAL_LINK_DESC_ALIGN - 1; ++ vaddr_unaligned = dma_alloc_noncoherent(ab->dev, rx_tid->unaligned_size, &paddr, ++ DMA_BIDIRECTIONAL, GFP_ATOMIC); ++ if (!vaddr_unaligned) { + spin_unlock_bh(&ab->base_lock); + return -ENOMEM; + } + +- addr_aligned = PTR_ALIGN(vaddr, HAL_LINK_DESC_ALIGN); +- +- ath11k_hal_reo_qdesc_setup(addr_aligned, tid, ba_win_sz, +- ssn, pn_type); +- +- paddr = dma_map_single(ab->dev, addr_aligned, hw_desc_sz, +- DMA_BIDIRECTIONAL); +- +- ret = dma_mapping_error(ab->dev, paddr); +- if (ret) { +- spin_unlock_bh(&ab->base_lock); +- ath11k_warn(ab, "failed to setup dma map for peer %pM rx tid %d: %d\n", +- peer_mac, tid, ret); +- goto err_mem_free; +- } +- +- rx_tid->vaddr = vaddr; +- rx_tid->paddr = paddr; ++ rx_tid->vaddr_unaligned = vaddr_unaligned; ++ vaddr = PTR_ALIGN(vaddr_unaligned, HAL_LINK_DESC_ALIGN); ++ rx_tid->paddr_unaligned = paddr; ++ rx_tid->paddr = rx_tid->paddr_unaligned + ((unsigned long)vaddr - ++ (unsigned long)rx_tid->vaddr_unaligned); ++ ath11k_hal_reo_qdesc_setup(vaddr, tid, ba_win_sz, ssn, pn_type); + rx_tid->size = hw_desc_sz; + rx_tid->active = true; + ++ /* After dma_alloc_noncoherent, vaddr is being modified for reo qdesc setup. ++ * Since these changes are not reflected in the device, driver now needs to ++ * explicitly call dma_sync_single_for_device. ++ */ ++ dma_sync_single_for_device(ab->dev, rx_tid->paddr, ++ rx_tid->size, ++ DMA_TO_DEVICE); + spin_unlock_bh(&ab->base_lock); + +- ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac, +- paddr, tid, 1, ba_win_sz); ++ ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac, rx_tid->paddr, ++ tid, 1, ba_win_sz); + if (ret) { + ath11k_warn(ar->ab, "failed to setup rx reorder queue for peer %pM tid %d: %d\n", + peer_mac, tid, ret); +@@ -1088,12 +1085,6 @@ int ath11k_peer_rx_tid_setup(struct ath1 + } + + return ret; +- +-err_mem_free: +- kfree(rx_tid->vaddr); +- rx_tid->vaddr = NULL; +- +- return ret; + } + + int ath11k_dp_rx_ampdu_start(struct ath11k *ar,