From: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
Date: Fri, 7 Jun 2013 18:35:22 -0500
Subject: MIPS: r4k_cache: use more efficient cache blast

Optimize the compiler output for larger cache blast cases that are
common for DMA-based networking.

Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -286,14 +286,46 @@ static inline void prot##extra##blast_##
 						    unsigned long end)	\
 {									\
 	unsigned long lsize = cpu_##desc##_line_size();			\
+	unsigned long lsize_2 = lsize * 2;				\
+	unsigned long lsize_3 = lsize * 3;				\
+	unsigned long lsize_4 = lsize * 4;				\
+	unsigned long lsize_5 = lsize * 5;				\
+	unsigned long lsize_6 = lsize * 6;				\
+	unsigned long lsize_7 = lsize * 7;				\
+	unsigned long lsize_8 = lsize * 8;				\
 	unsigned long addr = start & ~(lsize - 1);			\
-	unsigned long aend = (end - 1) & ~(lsize - 1);			\
+	unsigned long aend = (end + lsize - 1) & ~(lsize - 1);		\
+	int lines = (aend - addr) / lsize;				\
 									\
-	while (1) {							\
+	while (lines >= 8) {						\
+		prot##cache_op(hitop, addr);				\
+		prot##cache_op(hitop, addr + lsize);			\
+		prot##cache_op(hitop, addr + lsize_2);			\
+		prot##cache_op(hitop, addr + lsize_3);			\
+		prot##cache_op(hitop, addr + lsize_4);			\
+		prot##cache_op(hitop, addr + lsize_5);			\
+		prot##cache_op(hitop, addr + lsize_6);			\
+		prot##cache_op(hitop, addr + lsize_7);			\
+		addr += lsize_8;					\
+		lines -= 8;						\
+	}								\
+									\
+	if (lines & 0x4) {						\
+		prot##cache_op(hitop, addr);				\
+		prot##cache_op(hitop, addr + lsize);			\
+		prot##cache_op(hitop, addr + lsize_2);			\
+		prot##cache_op(hitop, addr + lsize_3);			\
+		addr += lsize_4;					\
+	}								\
+									\
+	if (lines & 0x2) {						\
+		prot##cache_op(hitop, addr);				\
+		prot##cache_op(hitop, addr + lsize);			\
+		addr += lsize_2;					\
+	}								\
+									\
+	if (lines & 0x1) {						\
 		prot##cache_op(hitop, addr);				\
-		if (addr == aend)					\
-			break;						\
-		addr += lsize;						\
 	}								\
 }