mirror of
https://github.com/openwrt/openwrt.git
synced 2025-01-01 11:36:49 +00:00
e4e278d82a
SVN-Revision: 23072
356 lines
8.2 KiB
Diff
356 lines
8.2 KiB
Diff
---
|
|
arch/arm/boot/compressed/lib1funcs.S | 348 +++++++++++++++++++++++++++++++++++
|
|
1 file changed, 348 insertions(+)
|
|
|
|
--- /dev/null
|
|
+++ linux-2.6.35/arch/arm/boot/compressed/lib1funcs.S
|
|
@@ -0,0 +1,348 @@
|
|
+/*
|
|
+ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
|
|
+ *
|
|
+ * Author: Nicolas Pitre <nico@fluxnic.net>
|
|
+ * - contributed to gcc-3.4 on Sep 30, 2003
|
|
+ * - adapted for the Linux kernel on Oct 2, 2003
|
|
+ */
|
|
+
|
|
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
|
|
+
|
|
+This file is free software; you can redistribute it and/or modify it
|
|
+under the terms of the GNU General Public License as published by the
|
|
+Free Software Foundation; either version 2, or (at your option) any
|
|
+later version.
|
|
+
|
|
+In addition to the permissions in the GNU General Public License, the
|
|
+Free Software Foundation gives you unlimited permission to link the
|
|
+compiled version of this file into combinations with other programs,
|
|
+and to distribute those combinations without any restriction coming
|
|
+from the use of this file. (The General Public License restrictions
|
|
+do apply in other respects; for example, they cover modification of
|
|
+the file, and distribution when not linked into a combine
|
|
+executable.)
|
|
+
|
|
+This file is distributed in the hope that it will be useful, but
|
|
+WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+General Public License for more details.
|
|
+
|
|
+You should have received a copy of the GNU General Public License
|
|
+along with this program; see the file COPYING. If not, write to
|
|
+the Free Software Foundation, 59 Temple Place - Suite 330,
|
|
+Boston, MA 02111-1307, USA. */
|
|
+
|
|
+
|
|
+#include <linux/linkage.h>
|
|
+#include <asm/assembler.h>
|
|
+
|
|
+
|
|
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
|
|
+
|
|
+#if __LINUX_ARM_ARCH__ >= 5
|
|
+
|
|
+ clz \curbit, \divisor
|
|
+ clz \result, \dividend
|
|
+ sub \result, \curbit, \result
|
|
+ mov \curbit, #1
|
|
+ mov \divisor, \divisor, lsl \result
|
|
+ mov \curbit, \curbit, lsl \result
|
|
+ mov \result, #0
|
|
+
|
|
+#else
|
|
+
|
|
+ @ Initially shift the divisor left 3 bits if possible,
|
|
+ @ set curbit accordingly. This allows for curbit to be located
|
|
+ @ at the left end of each 4 bit nibbles in the division loop
|
|
+ @ to save one loop in most cases.
|
|
+ tst \divisor, #0xe0000000
|
|
+ moveq \divisor, \divisor, lsl #3
|
|
+ moveq \curbit, #8
|
|
+ movne \curbit, #1
|
|
+
|
|
+ @ Unless the divisor is very big, shift it up in multiples of
|
|
+ @ four bits, since this is the amount of unwinding in the main
|
|
+ @ division loop. Continue shifting until the divisor is
|
|
+ @ larger than the dividend.
|
|
+1: cmp \divisor, #0x10000000
|
|
+ cmplo \divisor, \dividend
|
|
+ movlo \divisor, \divisor, lsl #4
|
|
+ movlo \curbit, \curbit, lsl #4
|
|
+ blo 1b
|
|
+
|
|
+ @ For very big divisors, we must shift it a bit at a time, or
|
|
+ @ we will be in danger of overflowing.
|
|
+1: cmp \divisor, #0x80000000
|
|
+ cmplo \divisor, \dividend
|
|
+ movlo \divisor, \divisor, lsl #1
|
|
+ movlo \curbit, \curbit, lsl #1
|
|
+ blo 1b
|
|
+
|
|
+ mov \result, #0
|
|
+
|
|
+#endif
|
|
+
|
|
+ @ Division loop
|
|
+1: cmp \dividend, \divisor
|
|
+ subhs \dividend, \dividend, \divisor
|
|
+ orrhs \result, \result, \curbit
|
|
+ cmp \dividend, \divisor, lsr #1
|
|
+ subhs \dividend, \dividend, \divisor, lsr #1
|
|
+ orrhs \result, \result, \curbit, lsr #1
|
|
+ cmp \dividend, \divisor, lsr #2
|
|
+ subhs \dividend, \dividend, \divisor, lsr #2
|
|
+ orrhs \result, \result, \curbit, lsr #2
|
|
+ cmp \dividend, \divisor, lsr #3
|
|
+ subhs \dividend, \dividend, \divisor, lsr #3
|
|
+ orrhs \result, \result, \curbit, lsr #3
|
|
+ cmp \dividend, #0 @ Early termination?
|
|
+ movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
|
|
+ movne \divisor, \divisor, lsr #4
|
|
+ bne 1b
|
|
+
|
|
+.endm
|
|
+
|
|
+
|
|
+.macro ARM_DIV2_ORDER divisor, order
|
|
+
|
|
+#if __LINUX_ARM_ARCH__ >= 5
|
|
+
|
|
+ clz \order, \divisor
|
|
+ rsb \order, \order, #31
|
|
+
|
|
+#else
|
|
+
|
|
+ cmp \divisor, #(1 << 16)
|
|
+ movhs \divisor, \divisor, lsr #16
|
|
+ movhs \order, #16
|
|
+ movlo \order, #0
|
|
+
|
|
+ cmp \divisor, #(1 << 8)
|
|
+ movhs \divisor, \divisor, lsr #8
|
|
+ addhs \order, \order, #8
|
|
+
|
|
+ cmp \divisor, #(1 << 4)
|
|
+ movhs \divisor, \divisor, lsr #4
|
|
+ addhs \order, \order, #4
|
|
+
|
|
+ cmp \divisor, #(1 << 2)
|
|
+ addhi \order, \order, #3
|
|
+ addls \order, \order, \divisor, lsr #1
|
|
+
|
|
+#endif
|
|
+
|
|
+.endm
|
|
+
|
|
+
|
|
+.macro ARM_MOD_BODY dividend, divisor, order, spare
|
|
+
|
|
+#if __LINUX_ARM_ARCH__ >= 5
|
|
+
|
|
+ clz \order, \divisor
|
|
+ clz \spare, \dividend
|
|
+ sub \order, \order, \spare
|
|
+ mov \divisor, \divisor, lsl \order
|
|
+
|
|
+#else
|
|
+
|
|
+ mov \order, #0
|
|
+
|
|
+ @ Unless the divisor is very big, shift it up in multiples of
|
|
+ @ four bits, since this is the amount of unwinding in the main
|
|
+ @ division loop. Continue shifting until the divisor is
|
|
+ @ larger than the dividend.
|
|
+1: cmp \divisor, #0x10000000
|
|
+ cmplo \divisor, \dividend
|
|
+ movlo \divisor, \divisor, lsl #4
|
|
+ addlo \order, \order, #4
|
|
+ blo 1b
|
|
+
|
|
+ @ For very big divisors, we must shift it a bit at a time, or
|
|
+ @ we will be in danger of overflowing.
|
|
+1: cmp \divisor, #0x80000000
|
|
+ cmplo \divisor, \dividend
|
|
+ movlo \divisor, \divisor, lsl #1
|
|
+ addlo \order, \order, #1
|
|
+ blo 1b
|
|
+
|
|
+#endif
|
|
+
|
|
+ @ Perform all needed substractions to keep only the reminder.
|
|
+ @ Do comparisons in batch of 4 first.
|
|
+ subs \order, \order, #3 @ yes, 3 is intended here
|
|
+ blt 2f
|
|
+
|
|
+1: cmp \dividend, \divisor
|
|
+ subhs \dividend, \dividend, \divisor
|
|
+ cmp \dividend, \divisor, lsr #1
|
|
+ subhs \dividend, \dividend, \divisor, lsr #1
|
|
+ cmp \dividend, \divisor, lsr #2
|
|
+ subhs \dividend, \dividend, \divisor, lsr #2
|
|
+ cmp \dividend, \divisor, lsr #3
|
|
+ subhs \dividend, \dividend, \divisor, lsr #3
|
|
+ cmp \dividend, #1
|
|
+ mov \divisor, \divisor, lsr #4
|
|
+ subges \order, \order, #4
|
|
+ bge 1b
|
|
+
|
|
+ tst \order, #3
|
|
+ teqne \dividend, #0
|
|
+ beq 5f
|
|
+
|
|
+ @ Either 1, 2 or 3 comparison/substractions are left.
|
|
+2: cmn \order, #2
|
|
+ blt 4f
|
|
+ beq 3f
|
|
+ cmp \dividend, \divisor
|
|
+ subhs \dividend, \dividend, \divisor
|
|
+ mov \divisor, \divisor, lsr #1
|
|
+3: cmp \dividend, \divisor
|
|
+ subhs \dividend, \dividend, \divisor
|
|
+ mov \divisor, \divisor, lsr #1
|
|
+4: cmp \dividend, \divisor
|
|
+ subhs \dividend, \dividend, \divisor
|
|
+5:
|
|
+.endm
|
|
+
|
|
+
|
|
+ENTRY(__udivsi3)
|
|
+ENTRY(__aeabi_uidiv)
|
|
+
|
|
+ subs r2, r1, #1
|
|
+ moveq pc, lr
|
|
+ bcc Ldiv0
|
|
+ cmp r0, r1
|
|
+ bls 11f
|
|
+ tst r1, r2
|
|
+ beq 12f
|
|
+
|
|
+ ARM_DIV_BODY r0, r1, r2, r3
|
|
+
|
|
+ mov r0, r2
|
|
+ mov pc, lr
|
|
+
|
|
+11: moveq r0, #1
|
|
+ movne r0, #0
|
|
+ mov pc, lr
|
|
+
|
|
+12: ARM_DIV2_ORDER r1, r2
|
|
+
|
|
+ mov r0, r0, lsr r2
|
|
+ mov pc, lr
|
|
+
|
|
+ENDPROC(__udivsi3)
|
|
+ENDPROC(__aeabi_uidiv)
|
|
+
|
|
+ENTRY(__umodsi3)
|
|
+
|
|
+ subs r2, r1, #1 @ compare divisor with 1
|
|
+ bcc Ldiv0
|
|
+ cmpne r0, r1 @ compare dividend with divisor
|
|
+ moveq r0, #0
|
|
+ tsthi r1, r2 @ see if divisor is power of 2
|
|
+ andeq r0, r0, r2
|
|
+ movls pc, lr
|
|
+
|
|
+ ARM_MOD_BODY r0, r1, r2, r3
|
|
+
|
|
+ mov pc, lr
|
|
+
|
|
+ENDPROC(__umodsi3)
|
|
+
|
|
+ENTRY(__divsi3)
|
|
+ENTRY(__aeabi_idiv)
|
|
+
|
|
+ cmp r1, #0
|
|
+ eor ip, r0, r1 @ save the sign of the result.
|
|
+ beq Ldiv0
|
|
+ rsbmi r1, r1, #0 @ loops below use unsigned.
|
|
+ subs r2, r1, #1 @ division by 1 or -1 ?
|
|
+ beq 10f
|
|
+ movs r3, r0
|
|
+ rsbmi r3, r0, #0 @ positive dividend value
|
|
+ cmp r3, r1
|
|
+ bls 11f
|
|
+ tst r1, r2 @ divisor is power of 2 ?
|
|
+ beq 12f
|
|
+
|
|
+ ARM_DIV_BODY r3, r1, r0, r2
|
|
+
|
|
+ cmp ip, #0
|
|
+ rsbmi r0, r0, #0
|
|
+ mov pc, lr
|
|
+
|
|
+10: teq ip, r0 @ same sign ?
|
|
+ rsbmi r0, r0, #0
|
|
+ mov pc, lr
|
|
+
|
|
+11: movlo r0, #0
|
|
+ moveq r0, ip, asr #31
|
|
+ orreq r0, r0, #1
|
|
+ mov pc, lr
|
|
+
|
|
+12: ARM_DIV2_ORDER r1, r2
|
|
+
|
|
+ cmp ip, #0
|
|
+ mov r0, r3, lsr r2
|
|
+ rsbmi r0, r0, #0
|
|
+ mov pc, lr
|
|
+
|
|
+ENDPROC(__divsi3)
|
|
+ENDPROC(__aeabi_idiv)
|
|
+
|
|
+ENTRY(__modsi3)
|
|
+
|
|
+ cmp r1, #0
|
|
+ beq Ldiv0
|
|
+ rsbmi r1, r1, #0 @ loops below use unsigned.
|
|
+ movs ip, r0 @ preserve sign of dividend
|
|
+ rsbmi r0, r0, #0 @ if negative make positive
|
|
+ subs r2, r1, #1 @ compare divisor with 1
|
|
+ cmpne r0, r1 @ compare dividend with divisor
|
|
+ moveq r0, #0
|
|
+ tsthi r1, r2 @ see if divisor is power of 2
|
|
+ andeq r0, r0, r2
|
|
+ bls 10f
|
|
+
|
|
+ ARM_MOD_BODY r0, r1, r2, r3
|
|
+
|
|
+10: cmp ip, #0
|
|
+ rsbmi r0, r0, #0
|
|
+ mov pc, lr
|
|
+
|
|
+ENDPROC(__modsi3)
|
|
+
|
|
+#ifdef CONFIG_AEABI
|
|
+
|
|
+ENTRY(__aeabi_uidivmod)
|
|
+
|
|
+ stmfd sp!, {r0, r1, ip, lr}
|
|
+ bl __aeabi_uidiv
|
|
+ ldmfd sp!, {r1, r2, ip, lr}
|
|
+ mul r3, r0, r2
|
|
+ sub r1, r1, r3
|
|
+ mov pc, lr
|
|
+
|
|
+ENDPROC(__aeabi_uidivmod)
|
|
+
|
|
+ENTRY(__aeabi_idivmod)
|
|
+
|
|
+ stmfd sp!, {r0, r1, ip, lr}
|
|
+ bl __aeabi_idiv
|
|
+ ldmfd sp!, {r1, r2, ip, lr}
|
|
+ mul r3, r0, r2
|
|
+ sub r1, r1, r3
|
|
+ mov pc, lr
|
|
+
|
|
+ENDPROC(__aeabi_idivmod)
|
|
+
|
|
+#endif
|
|
+
|
|
+Ldiv0:
|
|
+
|
|
+ str lr, [sp, #-8]!
|
|
+ bl __div0
|
|
+ mov r0, #0 @ About as wrong as it could be.
|
|
+ ldr pc, [sp], #8
|
|
+
|
|
+
|