From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 12 Apr 2015 10:13:28 +0200
Subject: [PATCH] bgmac: set received skb headroom to NET_SKB_PAD

A packet buffer offset of 30 bytes is inefficient, because the first 2
bytes end up in a different cacheline.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
---

--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -346,13 +346,13 @@ static int bgmac_dma_rx_skb_for_slot(str
 		return -ENOMEM;
 
 	/* Poison - if everything goes fine, hardware will overwrite it */
-	rx = buf;
+	rx = buf + BGMAC_RX_BUF_OFFSET;
 	rx->len = cpu_to_le16(0xdead);
 	rx->flags = cpu_to_le16(0xbeef);
 
 	/* Map skb for the DMA */
-	dma_addr = dma_map_single(dma_dev, buf, BGMAC_RX_BUF_SIZE,
-				  DMA_FROM_DEVICE);
+	dma_addr = dma_map_single(dma_dev, buf + BGMAC_RX_BUF_OFFSET,
+				  BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
 	if (dma_mapping_error(dma_dev, dma_addr)) {
 		bgmac_err(bgmac, "DMA mapping error\n");
 		put_page(virt_to_head_page(buf));
@@ -403,7 +403,7 @@ static int bgmac_dma_rx_read(struct bgma
 	while (ring->start != ring->end) {
 		struct device *dma_dev = bgmac->core->dma_dev;
 		struct bgmac_slot_info *slot = &ring->slots[ring->start];
-		struct bgmac_rx_header *rx = slot->buf;
+		struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
 		struct sk_buff *skb;
 		void *buf = slot->buf;
 		u16 len, flags;
@@ -454,8 +454,10 @@ static int bgmac_dma_rx_read(struct bgma
 					 BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
 
 			skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE);
-			skb_put(skb, BGMAC_RX_FRAME_OFFSET + len);
-			skb_pull(skb, BGMAC_RX_FRAME_OFFSET);
+			skb_put(skb, BGMAC_RX_FRAME_OFFSET +
+				BGMAC_RX_BUF_OFFSET + len);
+			skb_pull(skb, BGMAC_RX_FRAME_OFFSET +
+				 BGMAC_RX_BUF_OFFSET);
 
 			skb_checksum_none_assert(skb);
 			skb->protocol = eth_type_trans(skb, bgmac->net_dev);
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -360,9 +360,11 @@
 
 #define BGMAC_RX_HEADER_LEN			28		/* Last 24 bytes are unused. Well... */
 #define BGMAC_RX_FRAME_OFFSET			30		/* There are 2 unused bytes between header and real data */
+#define BGMAC_RX_BUF_OFFSET			(NET_SKB_PAD + NET_IP_ALIGN - \
+						 BGMAC_RX_FRAME_OFFSET)
 #define BGMAC_RX_MAX_FRAME_SIZE			1536		/* Copied from b44/tg3 */
 #define BGMAC_RX_BUF_SIZE			(BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE)
-#define BGMAC_RX_ALLOC_SIZE			(SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE) + \
+#define BGMAC_RX_ALLOC_SIZE			(SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE + BGMAC_RX_BUF_OFFSET) + \
 						 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
 #define BGMAC_BFL_ENETROBO			0x0010		/* has ephy roboswitch spi */