From 43e769a2c40287d36c54438898a8b7d9358adba9 Mon Sep 17 00:00:00 2001
From: Phil Elwell <phil@raspberrypi.com>
Date: Tue, 11 Jan 2022 10:48:30 +0000
Subject: [PATCH] arm: Fix custom rpi __memset32 and __memset64

See: https://github.com/raspberrypi/linux/issues/4798

Signed-off-by: Phil Elwell <phil@raspberrypi.com>
---
 arch/arm/lib/memset_rpi.S | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

--- a/arch/arm/lib/memset_rpi.S
+++ b/arch/arm/lib/memset_rpi.S
@@ -53,8 +53,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBI
 ENTRY(mmioset)
 ENTRY(memset)
 ENTRY(__memset)
-ENTRY(__memset32)
-ENTRY(__memset64)
 
         S       .req    a1
         DAT0    .req    a2
@@ -64,10 +62,14 @@ ENTRY(__memset64)
         DAT3    .req    lr
 
         orr     DAT0, DAT0, DAT0, lsl #8
-        push    {S, lr}
         orr     DAT0, DAT0, DAT0, lsl #16
+
+ENTRY(__memset32)
         mov     DAT1, DAT0
 
+ENTRY(__memset64)
+        push    {S, lr}
+
         /* See if we're guaranteed to have at least one 16-byte aligned 16-byte write */
         cmp     N, #31
         blo     170f
@@ -89,7 +91,7 @@ ENTRY(__memset64)
         stmcsia S!, {DAT0, DAT1}
 164:    /* Delayed set up of DAT2 and DAT3 so we could use them as scratch registers above */
         mov     DAT2, DAT0
-        mov     DAT3, DAT0
+        mov     DAT3, DAT1
         /* Now the inner loop of 16-byte stores */
 165:    stmia   S!, {DAT0, DAT1, DAT2, DAT3}
         subs    N, N, #16
@@ -105,7 +107,7 @@ ENTRY(__memset64)
 
 170:    /* Short case */
         mov     DAT2, DAT0
-        mov     DAT3, DAT0
+        mov     DAT3, DAT1
         tst     S, #3
         beq     174f
 172:    subs    N, N, #1