From: Ben Menchaca <ben.menchaca@qca.qualcomm.com> Date: Fri, 7 Jun 2013 18:35:22 -0500 Subject: MIPS: r4k_cache: use more efficient cache blast Optimize the compiler output for larger cache blast cases that are common for DMA-based networking. Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com> Signed-off-by: Felix Fietkau <nbd@nbd.name> --- --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -286,14 +286,46 @@ static inline void prot##extra##blast_## unsigned long end) \ { \ unsigned long lsize = cpu_##desc##_line_size(); \ + unsigned long lsize_2 = lsize * 2; \ + unsigned long lsize_3 = lsize * 3; \ + unsigned long lsize_4 = lsize * 4; \ + unsigned long lsize_5 = lsize * 5; \ + unsigned long lsize_6 = lsize * 6; \ + unsigned long lsize_7 = lsize * 7; \ + unsigned long lsize_8 = lsize * 8; \ unsigned long addr = start & ~(lsize - 1); \ - unsigned long aend = (end - 1) & ~(lsize - 1); \ + unsigned long aend = (end + lsize - 1) & ~(lsize - 1); \ + int lines = (aend - addr) / lsize; \ \ - while (1) { \ + while (lines >= 8) { \ + prot##cache_op(hitop, addr); \ + prot##cache_op(hitop, addr + lsize); \ + prot##cache_op(hitop, addr + lsize_2); \ + prot##cache_op(hitop, addr + lsize_3); \ + prot##cache_op(hitop, addr + lsize_4); \ + prot##cache_op(hitop, addr + lsize_5); \ + prot##cache_op(hitop, addr + lsize_6); \ + prot##cache_op(hitop, addr + lsize_7); \ + addr += lsize_8; \ + lines -= 8; \ + } \ + \ + if (lines & 0x4) { \ + prot##cache_op(hitop, addr); \ + prot##cache_op(hitop, addr + lsize); \ + prot##cache_op(hitop, addr + lsize_2); \ + prot##cache_op(hitop, addr + lsize_3); \ + addr += lsize_4; \ + } \ + \ + if (lines & 0x2) { \ + prot##cache_op(hitop, addr); \ + prot##cache_op(hitop, addr + lsize); \ + addr += lsize_2; \ + } \ + \ + if (lines & 0x1) { \ prot##cache_op(hitop, addr); \ - if (addr == aend) \ - break; \ - addr += lsize; \ } \ }