mac80211: ath11k: Fix DMA buffer allocation to resolve SWIOTLB issues
Some checks are pending
Build Kernel / Build all affected Kernels (push) Waiting to run
Build all core packages / Build all core packages for selected target (push) Waiting to run

Backport two commits to resolve issues with ath1kk causing it to fail
driver registration on iommuless systems with DRAM outside of 32bit
addressing such as a 4GiB imx8mm:

commit 1bcd20981834 ("wifi: ath11k: Fix DMA buffer allocation
to resolve SWIOTLB issues")
commit eeadc6baf8b3 ("wifi: ath11k: Use dma_alloc_noncoherent
for rx_tid buffer allocation")

Signed-off-by: Tim Harvey <tharvey@gateworks.com>
Link: https://github.com/openwrt/openwrt/pull/17751
Signed-off-by: Robert Marko <robimarko@gmail.com>
This commit is contained in:
Tim Harvey 2025-01-27 14:44:27 -08:00 committed by Robert Marko
parent f169c57242
commit fa50e53aa9
2 changed files with 346 additions and 0 deletions

View File

@ -0,0 +1,91 @@
wifi: ath11k: Fix DMA buffer allocation to resolve SWIOTLB issues
Currently, the driver allocates cacheable DMA buffers for rings like
HAL_REO_DST and HAL_WBM2SW_RELEASE. The buffers for HAL_WBM2SW_RELEASE
are large (1024 KiB), exceeding the SWIOTLB slot size of 256 KiB. This
leads to "swiotlb buffer is full" error messages on systems without an
IOMMU that use SWIOTLB, causing driver initialization failures. The driver
calls dma_map_single() with these large buffers obtained from kzalloc(),
resulting in ring initialization errors on systems without an IOMMU that
use SWIOTLB.
To address these issues, replace the flawed buffer allocation mechanism
with the appropriate DMA API. Specifically, use dma_alloc_noncoherent()
for cacheable DMA buffers, ensuring proper freeing of buffers with
dma_free_noncoherent().
Error log:
[ 10.194343] ath11k_pci 0000:04:00.0: swiotlb buffer is full (sz:1048583 bytes), total 32768 (slots), used 2529 (slots)
[ 10.194406] ath11k_pci 0000:04:00.0: failed to set up tcl_comp ring (0) :-12
[ 10.194781] ath11k_pci 0000:04:00.0: failed to init DP: -12
Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
Reported-by: Tim Harvey <tharvey@gateworks.com>
Closes: https://lore.kernel.org/all/20241210041133.GA17116@lst.de/
Signed-off-by: P Praneesh <quic_ppranees@quicinc.com>
Tested-by: Tim Harvey <tharvey@gateworks.com>
Link: https://patch.msgid.link/20250119164219.647059-2-quic_ppranees@quicinc.com
Signed-off-by: Jeff Johnson <jeff.johnson@oss.qualcomm.com>
--- a/drivers/net/wireless/ath/ath11k/dp.c
+++ b/drivers/net/wireless/ath/ath11k/dp.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <crypto/hash.h>
@@ -104,14 +104,12 @@ void ath11k_dp_srng_cleanup(struct ath11
if (!ring->vaddr_unaligned)
return;
- if (ring->cached) {
- dma_unmap_single(ab->dev, ring->paddr_unaligned, ring->size,
- DMA_FROM_DEVICE);
- kfree(ring->vaddr_unaligned);
- } else {
+ if (ring->cached)
+ dma_free_noncoherent(ab->dev, ring->size, ring->vaddr_unaligned,
+ ring->paddr_unaligned, DMA_FROM_DEVICE);
+ else
dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
ring->paddr_unaligned);
- }
ring->vaddr_unaligned = NULL;
}
@@ -249,25 +247,14 @@ int ath11k_dp_srng_setup(struct ath11k_b
default:
cached = false;
}
-
- if (cached) {
- ring->vaddr_unaligned = kzalloc(ring->size, GFP_KERNEL);
- if (!ring->vaddr_unaligned)
- return -ENOMEM;
-
- ring->paddr_unaligned = dma_map_single(ab->dev,
- ring->vaddr_unaligned,
- ring->size,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(ab->dev, ring->paddr_unaligned)) {
- kfree(ring->vaddr_unaligned);
- ring->vaddr_unaligned = NULL;
- return -ENOMEM;
- }
- }
}
- if (!cached)
+ if (cached)
+ ring->vaddr_unaligned = dma_alloc_noncoherent(ab->dev, ring->size,
+ &ring->paddr_unaligned,
+ DMA_FROM_DEVICE,
+ GFP_KERNEL);
+ else
ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size,
&ring->paddr_unaligned,
GFP_KERNEL);

View File

@ -0,0 +1,255 @@
wifi: ath11k: Use dma_alloc_noncoherent for rx_tid buffer allocation
Currently, the driver allocates cacheable DMA buffers for the rx_tid
structure using kzalloc() and dma_map_single(). These buffers are
long-lived and can persist for the lifetime of the peer, which is not
advisable. Instead of using kzalloc() and dma_map_single() for allocating
cacheable DMA buffers, utilize the dma_alloc_noncoherent() helper for the
allocation of long-lived cacheable DMA buffers, such as the peer's rx_tid.
Since dma_alloc_noncoherent() returns unaligned physical and virtual
addresses, align them internally before use within the driver. This
ensures proper allocation of non-coherent memory through the kernel
helper.
Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
Signed-off-by: P Praneesh <quic_ppranees@quicinc.com>
--- a/drivers/net/wireless/ath/ath11k/dp.h
+++ b/drivers/net/wireless/ath/ath11k/dp.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef ATH11K_DP_H
@@ -20,7 +20,6 @@ struct ath11k_ext_irq_grp;
struct dp_rx_tid {
u8 tid;
- u32 *vaddr;
dma_addr_t paddr;
u32 size;
u32 ba_win_sz;
@@ -37,6 +36,9 @@ struct dp_rx_tid {
/* Timer info related to fragments */
struct timer_list frag_timer;
struct ath11k_base *ab;
+ u32 *vaddr_unaligned;
+ dma_addr_t paddr_unaligned;
+ u32 unaligned_size;
};
#define DP_REO_DESC_FREE_THRESHOLD 64
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/ieee80211.h>
@@ -675,11 +675,11 @@ void ath11k_dp_reo_cmd_list_cleanup(stru
list_for_each_entry_safe(cmd, tmp, &dp->reo_cmd_list, list) {
list_del(&cmd->list);
rx_tid = &cmd->data;
- if (rx_tid->vaddr) {
- dma_unmap_single(ab->dev, rx_tid->paddr,
- rx_tid->size, DMA_BIDIRECTIONAL);
- kfree(rx_tid->vaddr);
- rx_tid->vaddr = NULL;
+ if (rx_tid->vaddr_unaligned) {
+ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
+ rx_tid->vaddr_unaligned,
+ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+ rx_tid->vaddr_unaligned = NULL;
}
kfree(cmd);
}
@@ -689,11 +689,11 @@ void ath11k_dp_reo_cmd_list_cleanup(stru
list_del(&cmd_cache->list);
dp->reo_cmd_cache_flush_count--;
rx_tid = &cmd_cache->data;
- if (rx_tid->vaddr) {
- dma_unmap_single(ab->dev, rx_tid->paddr,
- rx_tid->size, DMA_BIDIRECTIONAL);
- kfree(rx_tid->vaddr);
- rx_tid->vaddr = NULL;
+ if (rx_tid->vaddr_unaligned) {
+ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
+ rx_tid->vaddr_unaligned,
+ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+ rx_tid->vaddr_unaligned = NULL;
}
kfree(cmd_cache);
}
@@ -708,11 +708,11 @@ static void ath11k_dp_reo_cmd_free(struc
if (status != HAL_REO_CMD_SUCCESS)
ath11k_warn(dp->ab, "failed to flush rx tid hw desc, tid %d status %d\n",
rx_tid->tid, status);
- if (rx_tid->vaddr) {
- dma_unmap_single(dp->ab->dev, rx_tid->paddr, rx_tid->size,
- DMA_BIDIRECTIONAL);
- kfree(rx_tid->vaddr);
- rx_tid->vaddr = NULL;
+ if (rx_tid->vaddr_unaligned) {
+ dma_free_noncoherent(dp->ab->dev, rx_tid->unaligned_size,
+ rx_tid->vaddr_unaligned,
+ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+ rx_tid->vaddr_unaligned = NULL;
}
}
@@ -749,10 +749,10 @@ static void ath11k_dp_reo_cache_flush(st
if (ret) {
ath11k_err(ab, "failed to send HAL_REO_CMD_FLUSH_CACHE cmd, tid %d (%d)\n",
rx_tid->tid, ret);
- dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
- DMA_BIDIRECTIONAL);
- kfree(rx_tid->vaddr);
- rx_tid->vaddr = NULL;
+ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
+ rx_tid->vaddr_unaligned,
+ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+ rx_tid->vaddr_unaligned = NULL;
}
}
@@ -802,10 +802,10 @@ static void ath11k_dp_rx_tid_del_func(st
return;
free_desc:
- dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
- DMA_BIDIRECTIONAL);
- kfree(rx_tid->vaddr);
- rx_tid->vaddr = NULL;
+ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
+ rx_tid->vaddr_unaligned,
+ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+ rx_tid->vaddr_unaligned = NULL;
}
void ath11k_peer_rx_tid_delete(struct ath11k *ar,
@@ -831,14 +831,16 @@ void ath11k_peer_rx_tid_delete(struct at
if (ret != -ESHUTDOWN)
ath11k_err(ar->ab, "failed to send HAL_REO_CMD_UPDATE_RX_QUEUE cmd, tid %d (%d)\n",
tid, ret);
- dma_unmap_single(ar->ab->dev, rx_tid->paddr, rx_tid->size,
- DMA_BIDIRECTIONAL);
- kfree(rx_tid->vaddr);
- rx_tid->vaddr = NULL;
+ dma_free_noncoherent(ar->ab->dev, rx_tid->unaligned_size,
+ rx_tid->vaddr_unaligned,
+ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+ rx_tid->vaddr_unaligned = NULL;
}
rx_tid->paddr = 0;
+ rx_tid->paddr_unaligned = 0;
rx_tid->size = 0;
+ rx_tid->unaligned_size = 0;
}
static int ath11k_dp_rx_link_desc_return(struct ath11k_base *ab,
@@ -982,10 +984,9 @@ static void ath11k_dp_rx_tid_mem_free(st
if (!rx_tid->active)
goto unlock_exit;
- dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
- DMA_BIDIRECTIONAL);
- kfree(rx_tid->vaddr);
- rx_tid->vaddr = NULL;
+ dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, rx_tid->vaddr_unaligned,
+ rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+ rx_tid->vaddr_unaligned = NULL;
rx_tid->active = false;
@@ -1000,9 +1001,8 @@ int ath11k_peer_rx_tid_setup(struct ath1
struct ath11k_base *ab = ar->ab;
struct ath11k_peer *peer;
struct dp_rx_tid *rx_tid;
- u32 hw_desc_sz;
- u32 *addr_aligned;
- void *vaddr;
+ u32 hw_desc_sz, *vaddr;
+ void *vaddr_unaligned;
dma_addr_t paddr;
int ret;
@@ -1050,37 +1050,34 @@ int ath11k_peer_rx_tid_setup(struct ath1
else
hw_desc_sz = ath11k_hal_reo_qdesc_size(DP_BA_WIN_SZ_MAX, tid);
- vaddr = kzalloc(hw_desc_sz + HAL_LINK_DESC_ALIGN - 1, GFP_ATOMIC);
- if (!vaddr) {
+ rx_tid->unaligned_size = hw_desc_sz + HAL_LINK_DESC_ALIGN - 1;
+ vaddr_unaligned = dma_alloc_noncoherent(ab->dev, rx_tid->unaligned_size, &paddr,
+ DMA_BIDIRECTIONAL, GFP_ATOMIC);
+ if (!vaddr_unaligned) {
spin_unlock_bh(&ab->base_lock);
return -ENOMEM;
}
- addr_aligned = PTR_ALIGN(vaddr, HAL_LINK_DESC_ALIGN);
-
- ath11k_hal_reo_qdesc_setup(addr_aligned, tid, ba_win_sz,
- ssn, pn_type);
-
- paddr = dma_map_single(ab->dev, addr_aligned, hw_desc_sz,
- DMA_BIDIRECTIONAL);
-
- ret = dma_mapping_error(ab->dev, paddr);
- if (ret) {
- spin_unlock_bh(&ab->base_lock);
- ath11k_warn(ab, "failed to setup dma map for peer %pM rx tid %d: %d\n",
- peer_mac, tid, ret);
- goto err_mem_free;
- }
-
- rx_tid->vaddr = vaddr;
- rx_tid->paddr = paddr;
+ rx_tid->vaddr_unaligned = vaddr_unaligned;
+ vaddr = PTR_ALIGN(vaddr_unaligned, HAL_LINK_DESC_ALIGN);
+ rx_tid->paddr_unaligned = paddr;
+ rx_tid->paddr = rx_tid->paddr_unaligned + ((unsigned long)vaddr -
+ (unsigned long)rx_tid->vaddr_unaligned);
+ ath11k_hal_reo_qdesc_setup(vaddr, tid, ba_win_sz, ssn, pn_type);
rx_tid->size = hw_desc_sz;
rx_tid->active = true;
+ /* After dma_alloc_noncoherent, vaddr is being modified for reo qdesc setup.
+ * Since these changes are not reflected in the device, driver now needs to
+ * explicitly call dma_sync_single_for_device.
+ */
+ dma_sync_single_for_device(ab->dev, rx_tid->paddr,
+ rx_tid->size,
+ DMA_TO_DEVICE);
spin_unlock_bh(&ab->base_lock);
- ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac,
- paddr, tid, 1, ba_win_sz);
+ ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac, rx_tid->paddr,
+ tid, 1, ba_win_sz);
if (ret) {
ath11k_warn(ar->ab, "failed to setup rx reorder queue for peer %pM tid %d: %d\n",
peer_mac, tid, ret);
@@ -1088,12 +1085,6 @@ int ath11k_peer_rx_tid_setup(struct ath1
}
return ret;
-
-err_mem_free:
- kfree(rx_tid->vaddr);
- rx_tid->vaddr = NULL;
-
- return ret;
}
int ath11k_dp_rx_ampdu_start(struct ath11k *ar,