mac80211: merge performance improvement patches

Fix fq_codel performance issues
Add a new rx function for batch processing

Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
Felix Fietkau 2020-07-26 15:12:32 +02:00
parent 431b177afa
commit 3d731fc903
3 changed files with 260 additions and 0 deletions

View File

@ -0,0 +1,186 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 25 Jul 2020 20:53:23 +0200
Subject: [PATCH] mac80211: add a function for running rx without passing skbs
to the stack
This can be used to run mac80211 rx processing on a batch of frames in NAPI
poll before passing them to the network stack in a large batch.
This can improve icache footprint, or it can be used to pass frames via
netif_receive_skb_list.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4358,6 +4358,31 @@ void ieee80211_free_hw(struct ieee80211_
void ieee80211_restart_hw(struct ieee80211_hw *hw);
/**
+ * ieee80211_rx_list - receive frame and store processed skbs in a list
+ *
+ * Use this function to hand received frames to mac80211. The receive
+ * buffer in @skb must start with an IEEE 802.11 header. In case of a
+ * paged @skb is used, the driver is recommended to put the ieee80211
+ * header of the frame on the linear part of the @skb to avoid memory
+ * allocation and/or memcpy by the stack.
+ *
+ * This function may not be called in IRQ context. Calls to this function
+ * for a single hardware must be synchronized against each other. Calls to
+ * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be
+ * mixed for a single hardware. Must not run concurrently with
+ * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ *
+ * This function must be called with BHs disabled and RCU read lock
+ *
+ * @hw: the hardware this frame came in on
+ * @sta: the station the frame was received from, or %NULL
+ * @skb: the buffer to receive, owned by mac80211 after this call
+ * @list: the destination list
+ */
+void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
+ struct sk_buff *skb, struct list_head *list);
+
+/**
* ieee80211_rx_napi - receive frame from NAPI context
*
* Use this function to hand received frames to mac80211. The receive
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -218,7 +218,7 @@ enum ieee80211_rx_flags {
};
struct ieee80211_rx_data {
- struct napi_struct *napi;
+ struct list_head *list;
struct sk_buff *skb;
struct ieee80211_local *local;
struct ieee80211_sub_if_data *sdata;
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2552,8 +2552,8 @@ static void ieee80211_deliver_skb_to_loc
memset(skb->cb, 0, sizeof(skb->cb));
/* deliver to local stack */
- if (rx->napi)
- napi_gro_receive(rx->napi, skb);
+ if (rx->list)
+ list_add_tail(&skb->list, rx->list);
else
netif_receive_skb(skb);
}
@@ -3843,7 +3843,6 @@ void ieee80211_release_reorder_timeout(s
/* This is OK -- must be QoS data frame */
.security_idx = tid,
.seqno_idx = tid,
- .napi = NULL, /* must be NULL to not have races */
};
struct tid_ampdu_rx *tid_agg_rx;
@@ -4453,8 +4452,8 @@ static bool ieee80211_invoke_fast_rx(str
/* deliver to local stack */
skb->protocol = eth_type_trans(skb, fast_rx->dev);
memset(skb->cb, 0, sizeof(skb->cb));
- if (rx->napi)
- napi_gro_receive(rx->napi, skb);
+ if (rx->list)
+ list_add_tail(&skb->list, rx->list);
else
netif_receive_skb(skb);
@@ -4521,7 +4520,7 @@ static bool ieee80211_prepare_and_rx_han
static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
struct ieee80211_sta *pubsta,
struct sk_buff *skb,
- struct napi_struct *napi)
+ struct list_head *list)
{
struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_sub_if_data *sdata;
@@ -4536,7 +4535,7 @@ static void __ieee80211_rx_handle_packet
memset(&rx, 0, sizeof(rx));
rx.skb = skb;
rx.local = local;
- rx.napi = napi;
+ rx.list = list;
if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
@@ -4644,8 +4643,8 @@ static void __ieee80211_rx_handle_packet
* This is the receive path handler. It is called by a low level driver when an
* 802.11 MPDU is received from the hardware.
*/
-void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
- struct sk_buff *skb, struct napi_struct *napi)
+void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
+ struct sk_buff *skb, struct list_head *list)
{
struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_rate *rate = NULL;
@@ -4737,36 +4736,53 @@ void ieee80211_rx_napi(struct ieee80211_
status->rx_flags = 0;
/*
- * key references and virtual interfaces are protected using RCU
- * and this requires that we are in a read-side RCU section during
- * receive processing
- */
- rcu_read_lock();
-
- /*
* Frames with failed FCS/PLCP checksum are not returned,
* all other frames are returned without radiotap header
* if it was previously present.
* Also, frames with less than 16 bytes are dropped.
*/
skb = ieee80211_rx_monitor(local, skb, rate);
- if (!skb) {
- rcu_read_unlock();
+ if (!skb)
return;
- }
ieee80211_tpt_led_trig_rx(local,
((struct ieee80211_hdr *)skb->data)->frame_control,
skb->len);
- __ieee80211_rx_handle_packet(hw, pubsta, skb, napi);
-
- rcu_read_unlock();
+ __ieee80211_rx_handle_packet(hw, pubsta, skb, list);
return;
drop:
kfree_skb(skb);
}
+EXPORT_SYMBOL(ieee80211_rx_list);
+
+void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
+ struct sk_buff *skb, struct napi_struct *napi)
+{
+ struct sk_buff *tmp;
+ LIST_HEAD(list);
+
+
+ /*
+ * key references and virtual interfaces are protected using RCU
+ * and this requires that we are in a read-side RCU section during
+ * receive processing
+ */
+ rcu_read_lock();
+ ieee80211_rx_list(hw, pubsta, skb, &list);
+ rcu_read_unlock();
+
+ if (!napi) {
+ netif_receive_skb_list(&list);
+ return;
+ }
+
+ list_for_each_entry_safe(skb, tmp, &list, list) {
+ skb_list_del_init(skb);
+ napi_gro_receive(napi, skb);
+ }
+}
EXPORT_SYMBOL(ieee80211_rx_napi);
/* This is a version of the rx handler that can be called from hard irq

View File

@ -0,0 +1,55 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 26 Jul 2020 14:37:02 +0200
Subject: [PATCH] net/fq_impl: use skb_get_hash instead of
skb_get_hash_perturb
This avoids unnecessary regenerating of the skb flow hash
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -69,15 +69,6 @@ struct fq {
struct list_head backlogs;
spinlock_t lock;
u32 flows_cnt;
-#if LINUX_VERSION_IS_GEQ(5,3,10) || \
- LINUX_VERSION_IN_RANGE(4,19,83, 4,20,0) || \
- LINUX_VERSION_IN_RANGE(4,14,153, 4,15,0) || \
- LINUX_VERSION_IN_RANGE(4,9,200, 4,10,0) || \
- LINUX_VERSION_IN_RANGE(4,4,200, 4,5,0)
- siphash_key_t perturbation;
-#else
- u32 perturbation;
-#endif
u32 limit;
u32 memory_limit;
u32 memory_usage;
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -108,15 +108,7 @@ begin:
static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb)
{
-#if LINUX_VERSION_IS_GEQ(5,3,10) || \
- LINUX_VERSION_IN_RANGE(4,19,83, 4,20,0) || \
- LINUX_VERSION_IN_RANGE(4,14,153, 4,15,0) || \
- LINUX_VERSION_IN_RANGE(4,9,200, 4,10,0) || \
- LINUX_VERSION_IN_RANGE(4,4,200, 4,5,0)
- u32 hash = skb_get_hash_perturb(skb, &fq->perturbation);
-#else
- u32 hash = skb_get_hash_perturb(skb, fq->perturbation);
-#endif
+ u32 hash = skb_get_hash(skb);
return reciprocal_scale(hash, fq->flows_cnt);
}
@@ -316,7 +308,6 @@ static int fq_init(struct fq *fq, int fl
INIT_LIST_HEAD(&fq->backlogs);
spin_lock_init(&fq->lock);
fq->flows_cnt = max_t(u32, flows_cnt, 1);
- get_random_bytes(&fq->perturbation, sizeof(fq->perturbation));
fq->quantum = 300;
fq->limit = 8192;
fq->memory_limit = 16 << 20; /* 16 MBytes */

View File

@ -0,0 +1,19 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 26 Jul 2020 14:42:58 +0200
Subject: [PATCH] mac80211: calculcate skb hash early when using itxq
This avoids flow separation issues when using software encryption
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3937,6 +3937,7 @@ void __ieee80211_subif_start_xmit(struct
if (local->ops->wake_tx_queue) {
u16 queue = __ieee80211_select_queue(sdata, sta, skb);
skb_set_queue_mapping(skb, queue);
+ skb_get_hash(skb);
}
if (sta) {