nic/ipxe: batch TX requests

A bunch of transmit requests received by the Uplink server (nic_router)
are currently added one by one to the ring buffer and every time the hardware
is notified to process each single request.

Instead, add as many as possible transmit requests in the ring buffer of
the hardware and when done trigger the hardware to process the ring.

Additionally, don't receive an "processed" TX IRQ for each element in the
ring, which causes high CPU load.

With this commit the TX IRQs in the ipxe driver for a

iperf -c X.X.X.X -t 60

from within a VM to the outside iperf server is reduced from about
~2'600'000 IRQs to about ~200'000. The overall CPU load for the driver
(when executed alone on CPU 0) is reduced from ~85 percent load to ~45 percent
load.

Issue #5149
This commit is contained in:
Alexander Boettcher 2024-02-07 09:09:38 +01:00 committed by Christian Helmuth
parent feba5a138e
commit 31e8b50b7c
6 changed files with 132 additions and 5 deletions

View File

@ -68,6 +68,8 @@ extern void dde_ipxe_nic_unregister_callbacks();
*/ */
extern int dde_ipxe_nic_tx(unsigned if_index, const char *packet, unsigned packet_len); extern int dde_ipxe_nic_tx(unsigned if_index, const char *packet, unsigned packet_len);
extern void dde_ipxe_nic_tx_done();
/** /**
* Get MAC address of device * Get MAC address of device
* *

View File

@ -0,0 +1,111 @@
--- a/src/drivers/net/intel.c
+++ b/src/drivers/net/intel.c
@@ -717,6 +717,23 @@
intel_reset ( intel );
}
+static int intel_transmit_done ( struct net_device *netdev)
+{
+ struct intel_nic *intel = netdev->priv;
+
+ unsigned int tx_tail;
+
+ tx_tail = ( intel->tx.prod % INTEL_NUM_TX_DESC );
+
+ wmb();
+
+ /* Notify card that there are packets ready to transmit */
+ writel ( tx_tail, intel->regs + intel->tx.reg + INTEL_xDT );
+
+ return 0;
+
+}
+
/**
* Transmit packet
*
@@ -745,10 +762,11 @@
address = virt_to_bus ( iobuf->data );
len = iob_len ( iobuf );
intel->tx.describe ( tx, address, len );
- wmb();
- /* Notify card that there are packets ready to transmit */
- writel ( tx_tail, intel->regs + intel->tx.reg + INTEL_xDT );
+ /* ptr check just in case we make transmit_done configurable - set/unset */
+ if ((intel->tx.cons == ((tx_tail + 1) % INTEL_NUM_TX_DESC)) || !netdev->op->transmit_done) {
+ intel_transmit_done(netdev);
+ }
DBGC2 ( intel, "INTEL %p TX %d is [%llx,%llx)\n", intel, tx_idx,
( ( unsigned long long ) address ),
@@ -845,7 +863,7 @@
return;
/* Poll for TX completions, if applicable */
- if ( icr & INTEL_IRQ_TXDW )
+ if ( icr & (INTEL_IRQ_TXDW | INTEL_IRQ_TXQE))
intel_poll_tx ( netdev );
/* Poll for RX completions, if applicable */
@@ -882,7 +900,7 @@
struct intel_nic *intel = netdev->priv;
uint32_t mask;
- mask = ( INTEL_IRQ_TXDW | INTEL_IRQ_LSC | INTEL_IRQ_RXT0 );
+ mask = ( INTEL_IRQ_TXQE | INTEL_IRQ_LSC | INTEL_IRQ_RXT0 );
if ( enable ) {
writel ( mask, intel->regs + INTEL_IMS );
} else {
@@ -897,6 +915,7 @@
.transmit = intel_transmit,
.poll = intel_poll,
.irq = intel_irq,
+ .transmit_done = intel_transmit_done,
};
/******************************************************************************
--- a/src/include/ipxe/netdevice.h
+++ b/src/include/ipxe/netdevice.h
@@ -260,6 +260,8 @@
* supported.
*/
void ( * irq ) ( struct net_device *netdev, int enable );
+
+ int ( * transmit_done ) ( struct net_device *netdev);
};
/** Network device error */
@@ -575,6 +577,7 @@
extern void netdev_link_err ( struct net_device *netdev, int rc );
extern void netdev_link_down ( struct net_device *netdev );
extern int netdev_tx ( struct net_device *netdev, struct io_buffer *iobuf );
+extern int netdev_tx_done ( struct net_device *netdev );
extern void netdev_tx_defer ( struct net_device *netdev,
struct io_buffer *iobuf );
extern void netdev_tx_err ( struct net_device *netdev,
--- a/src/net/netdevice.c
+++ b/src/net/netdevice.c
@@ -213,6 +213,22 @@
return rc;
}
+int netdev_tx_done ( struct net_device *netdev ) {
+
+ int rc;
+
+ /* Avoid calling transmit() on unopened network devices */
+ if ( ! netdev_is_open ( netdev ) ) {
+ rc = -ENETUNREACH;
+ return rc;
+ }
+
+ if ( !netdev->op || !netdev->op->transmit_done )
+ return -ENETUNREACH;
+
+ return netdev->op->transmit_done ( netdev );
+}
+
/**
* Defer transmitted packet
*

View File

@ -1 +1 @@
79cd5d4ab2b1451966107c40fb0890a4a206279d c20a39b17bca4d9cefcee43f366f5c23296396e0

View File

@ -10,7 +10,8 @@ PATCHES := patches/dde_ipxe.patch \
patches/intel.patch \ patches/intel.patch \
patches/intel_update.patch \ patches/intel_update.patch \
patches/tg3.patch \ patches/tg3.patch \
patches/realtek.patch patches/realtek.patch \
patches/intel_tx_batch.patch
PATCH_OPT := -p1 -d ${DIR(ipxe)} PATCH_OPT := -p1 -d ${DIR(ipxe)}

View File

@ -94,6 +94,11 @@ class Uplink_client : public Uplink_client_base
return Transmit_result::REJECTED; return Transmit_result::REJECTED;
} }
void _drv_finish_transmitted_pkts() override
{
dde_ipxe_nic_tx_done();
}
public: public:
Uplink_client(Env &env, Uplink_client(Env &env,

View File

@ -239,6 +239,14 @@ int dde_ipxe_nic_link_state(unsigned if_index)
} }
void dde_ipxe_nic_tx_done()
{
dde_lock_enter();
netdev_tx_done(net_dev);
dde_lock_leave();
}
int dde_ipxe_nic_tx(unsigned if_index, const char *packet, unsigned packet_len) int dde_ipxe_nic_tx(unsigned if_index, const char *packet, unsigned packet_len)
{ {
if (if_index != 1) if (if_index != 1)
@ -248,13 +256,13 @@ int dde_ipxe_nic_tx(unsigned if_index, const char *packet, unsigned packet_len)
struct io_buffer *iobuf = alloc_iob(packet_len); struct io_buffer *iobuf = alloc_iob(packet_len);
if (!iobuf) {
dde_lock_leave(); dde_lock_leave();
if (!iobuf)
return -1; return -1;
}
memcpy(iob_put(iobuf, packet_len), packet, packet_len); memcpy(iob_put(iobuf, packet_len), packet, packet_len);
dde_lock_enter();
netdev_poll(net_dev); netdev_poll(net_dev);
netdev_tx(net_dev, iob_disown(iobuf)); netdev_tx(net_dev, iob_disown(iobuf));