corda/src/x86.S

562 lines
10 KiB
ArmAsm
Raw Normal View History

2009-03-15 12:02:36 -06:00
/* Copyright (c) 2008-2009, Avian Contributors
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
There is NO WARRANTY for this software. See license.txt for
details. */
2007-06-28 20:58:48 -06:00
#include "types.h"
#define LOCAL(x) .L##x
#if defined __APPLE__ \
|| ((defined __MINGW32__ || defined __CYGWIN32__) && ! defined __x86_64__)
# define GLOBAL(x) _##x
#else
# define GLOBAL(x) x
#endif
2007-06-28 20:58:48 -06:00
.text
2009-06-11 09:42:07 -06:00
2007-10-03 18:41:54 -06:00
#ifdef __x86_64__
2009-06-11 09:42:07 -06:00
rework VM exception handling; throw OOMEs when appropriate This rather large commit modifies the VM to use non-local returns to throw exceptions instead of simply setting Thread::exception and returning frame-by-frame as it used to. This has several benefits: * Functions no longer need to check Thread::exception after each call which might throw an exception (which would be especially tedious and error-prone now that any function which allocates objects directly or indirectly might throw an OutOfMemoryError) * There's no need to audit the code for calls to functions which previously did not throw exceptions but later do * Performance should be improved slightly due to both the reduced need for conditionals and because undwinding now occurs in a single jump instead of a series of returns The main disadvantages are: * Slightly higher overhead for entering and leaving the VM via the JNI and JDK methods * Non-local returns can make the code harder to read * We must be careful to register destructors for stack-allocated resources with the Thread so they can be called prior to a non-local return The non-local return implementation is similar to setjmp/longjmp, except it uses continuation-passing style to avoid the need for cooperation from the C/C++ compiler. Native C++ exceptions would have also been an option, but that would introduce a dependence on libstdc++, which we're trying to avoid for portability reasons. Finally, this commit ensures that the VM throws an OutOfMemoryError instead of aborting when it reaches its memory ceiling. Currently, we treat the ceiling as a soft limit and temporarily exceed it as necessary to allow garbage collection and certain internal allocations to succeed, but refuse to allocate any Java objects until the heap size drops back below the ceiling.
2010-12-27 15:55:23 -07:00
#define CHECKPOINT_THREAD 8
#define CHECKPOINT_STACK 48
#define CHECKPOINT_BASE 56
#ifdef __MINGW32__
.globl GLOBAL(detectFeature)
GLOBAL(detectFeature):
pushq %rbp
movq %rsp, %rbp
pushq %rdx
pushq %rcx
pushq %rbx
pushq %rsi
pushq %rdi
movl %ecx, %edi
movl %edx, %esi
movl $1, %eax
cpuid
andl %esi, %edx
andl %edi, %ecx
orl %edx, %ecx
test %ecx, %ecx
je LOCAL(NOSSE)
movl $1, %eax
jmp LOCAL(SSEEND)
LOCAL(NOSSE):
movl $0, %eax
LOCAL(SSEEND):
popq %rdi
popq %rsi
popq %rbx
popq %rcx
popq %rdx
movq %rbp,%rsp
popq %rbp
ret
2009-06-11 09:42:07 -06:00
.globl GLOBAL(vmNativeCall)
GLOBAL(vmNativeCall):
2009-06-11 09:42:07 -06:00
pushq %rbp
//save nonvolatile registers
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movq %rsp, %rbp
// %rcx: function
// %rdx: arguments
// %r8: arguments count
// %r9: return type
movq %rcx, %r10
movq %rdx, %r11
movq %r8, %r12
movq %r9, %r13
// %r10: function
// %r11: arguments
// %r12: arguments count
// %r13: return type
//allocate initial stack space
subq $32, %rsp
//first arg
cmp $0, %r12
je LOCAL(call)
movq 0(%r11),%rcx
movq 0(%r11),%xmm0
subq $1, %r12
//second arg
cmp $0, %r12
je LOCAL(call)
movq 8(%r11),%rdx
movq 8(%r11),%xmm1
subq $1, %r12
//third arg
cmp $0, %r12
je LOCAL(call)
movq 16(%r11),%r8
movq 16(%r11),%xmm2
subq $1, %r12
//fourth arg
cmp $0, %r12
je LOCAL(call)
movq 24(%r11),%r9
movq 24(%r11),%xmm3
subq $1, %r12
//calculate stack space for arguments, aligned
movq $8, %r15
leaq (%r15, %r12, 8), %r15
andq $0xFFFFFFFFFFFFFFF0, %r15
//reserve stack space for arguments
subq %r15, %rsp
//reset the counter
addq $3, %r12
jmp LOCAL(loopend)
LOCAL(loop):
movq (%r11, %r12, 8), %r14
movq %r14, (%rsp, %r12, 8);
subq $1, %r12
LOCAL(loopend):
//we don't need to move arg 3 and lower
cmpq $3, %r12
jne LOCAL(loop)
LOCAL(call):
call *%r10
LOCAL(void):
cmpq $VOID_TYPE,%r13
jne LOCAL(float)
jmp LOCAL(exit)
LOCAL(float):
cmpq $FLOAT_TYPE,%r13
je LOCAL(copy)
cmpq $DOUBLE_TYPE,%r13
jne LOCAL(exit)
LOCAL(copy):
movq %xmm0,%rax
LOCAL(exit):
movq %rbp, %rsp
//return nonvolatile registers to their former state
popq %r15
popq %r14
popq %r13
popq %r12
2007-10-03 18:41:54 -06:00
2009-06-11 09:42:07 -06:00
popq %rbp
ret
.globl GLOBAL(vmJump)
GLOBAL(vmJump):
2009-06-11 09:42:07 -06:00
movq %rdx,%rbp
movq 40(%rsp),%rax
movq 48(%rsp),%rdx
2009-06-11 09:42:07 -06:00
movq %r8,%rsp
movq %r9,%rbx
jmp *%rcx
rework VM exception handling; throw OOMEs when appropriate This rather large commit modifies the VM to use non-local returns to throw exceptions instead of simply setting Thread::exception and returning frame-by-frame as it used to. This has several benefits: * Functions no longer need to check Thread::exception after each call which might throw an exception (which would be especially tedious and error-prone now that any function which allocates objects directly or indirectly might throw an OutOfMemoryError) * There's no need to audit the code for calls to functions which previously did not throw exceptions but later do * Performance should be improved slightly due to both the reduced need for conditionals and because undwinding now occurs in a single jump instead of a series of returns The main disadvantages are: * Slightly higher overhead for entering and leaving the VM via the JNI and JDK methods * Non-local returns can make the code harder to read * We must be careful to register destructors for stack-allocated resources with the Thread so they can be called prior to a non-local return The non-local return implementation is similar to setjmp/longjmp, except it uses continuation-passing style to avoid the need for cooperation from the C/C++ compiler. Native C++ exceptions would have also been an option, but that would introduce a dependence on libstdc++, which we're trying to avoid for portability reasons. Finally, this commit ensures that the VM throws an OutOfMemoryError instead of aborting when it reaches its memory ceiling. Currently, we treat the ceiling as a soft limit and temporarily exceed it as necessary to allow garbage collection and certain internal allocations to succeed, but refuse to allocate any Java objects until the heap size drops back below the ceiling.
2010-12-27 15:55:23 -07:00
#define VMRUN_FRAME_SIZE 80
.globl GLOBAL(vmRun)
GLOBAL(vmRun):
// %rcx: function
// %rdx: arguments
// %r8 : checkpoint
pushq %rbp
movq %rsp,%rbp
subq $VMRUN_FRAME_SIZE,%rsp
movq %rbx,16(%rsp)
movq %r12,24(%rsp)
movq %r13,32(%rsp)
movq %r14,40(%rsp)
movq %r15,48(%rsp)
movq %rsi,56(%rsp)
movq %rdi,64(%rsp)
movq %rsp,CHECKPOINT_STACK(%rcx)
movq %rbp,CHECKPOINT_BASE(%rcx)
movq %rcx,%r11
movq CHECKPOINT_THREAD(%rdx),%rcx
call *%r11
.globl GLOBAL(vmRun_returnAddress)
GLOBAL(vmRun_returnAddress):
movq 16(%rsp),%rbx
movq 24(%rsp),%r12
movq 32(%rsp),%r13
movq 40(%rsp),%r14
movq 48(%rsp),%r15
movq 56(%rsp),%rsi
movq 64(%rsp),%rdi
addq $VMRUN_FRAME_SIZE,%rsp
popq %rbp
ret
2009-06-11 09:42:07 -06:00
#else // not __MINGW32__
.globl GLOBAL(detectFeature)
GLOBAL(detectFeature):
pushq %rbp
movq %rsp, %rbp
pushq %rdx
pushq %rcx
pushq %rbx
movl $1, %eax
cpuid
andl %esi, %edx
andl %edi, %ecx
orl %edx, %ecx
test %ecx, %ecx
je LOCAL(NOSSE)
movl $1, %eax
jmp LOCAL(SSEEND)
LOCAL(NOSSE):
movl $0, %eax
LOCAL(SSEEND):
popq %rbx
popq %rcx
popq %rdx
movq %rbp,%rsp
popq %rbp
ret
2009-06-11 09:42:07 -06:00
.globl GLOBAL(vmNativeCall)
GLOBAL(vmNativeCall):
2007-06-28 20:58:48 -06:00
pushq %rbp
movq %rsp,%rbp
2007-06-28 20:58:48 -06:00
// %rdi aka -48(%rbp): function
// %rsi aka -40(%rbp): stack
// %rdx aka -32(%rbp): stackSize
// %rcx aka -24(%rbp): gprTable
// %r8 aka -16(%rbp): sseTable
// %r9 aka -8(%rbp): returnType
2007-06-28 20:58:48 -06:00
// save our argument registers so we can clobber them
pushq %r9
pushq %r8
pushq %rcx
pushq %rdx
pushq %rsi
pushq %rdi
// reserve space for arguments passed via memory
subq %rdx,%rsp
// align to a 16 byte boundary
andq $0xFFFFFFFFFFFFFFF0,%rsp
2007-06-28 20:58:48 -06:00
// copy memory arguments into place
movq $0,%rcx
jmp LOCAL(test)
2007-06-28 20:58:48 -06:00
LOCAL(loop):
2007-06-28 20:58:48 -06:00
movq %rcx,%rax
movq %rcx,%rdx
addq %rsp,%rdx
addq -40(%rbp),%rax
movq (%rax),%rax
2007-06-28 20:58:48 -06:00
movq %rax,(%rdx)
addq $8,%rcx
LOCAL(test):
cmpq -32(%rbp),%rcx
jb LOCAL(loop)
2007-06-28 20:58:48 -06:00
// do we need to load the general-purpose registers?
cmpq $0,-24(%rbp)
je LOCAL(sse)
2007-06-28 20:58:48 -06:00
// yes, we do
movq -24(%rbp),%rax
2007-06-28 20:58:48 -06:00
movq 0(%rax),%rdi
movq 8(%rax),%rsi
2007-06-29 10:42:39 -06:00
movq 16(%rax),%rdx
movq 24(%rax),%rcx
2007-06-28 20:58:48 -06:00
movq 32(%rax),%r8
movq 40(%rax),%r9
LOCAL(sse):
2007-06-28 20:58:48 -06:00
// do we need to load the SSE registers?
cmpq $0,-16(%rbp)
je LOCAL(call)
2007-06-28 20:58:48 -06:00
// yes, we do
movq -16(%rbp),%rax
2007-06-28 20:58:48 -06:00
movq 0(%rax),%xmm0
movq 8(%rax),%xmm1
movq 16(%rax),%xmm2
movq 24(%rax),%xmm3
movq 32(%rax),%xmm4
movq 40(%rax),%xmm5
movq 48(%rax),%xmm6
movq 64(%rax),%xmm7
LOCAL(call):
call *-48(%rbp)
2007-06-28 20:58:48 -06:00
// handle return value based on expected type
movq -8(%rbp),%rcx
2007-06-28 20:58:48 -06:00
LOCAL(void):
2007-06-28 20:58:48 -06:00
cmpq $VOID_TYPE,%rcx
jne LOCAL(float)
jmp LOCAL(exit)
2007-06-28 20:58:48 -06:00
LOCAL(float):
2007-06-28 20:58:48 -06:00
cmpq $FLOAT_TYPE,%rcx
je LOCAL(copy)
2007-06-28 20:58:48 -06:00
cmpq $DOUBLE_TYPE,%rcx
jne LOCAL(exit)
2007-06-28 20:58:48 -06:00
LOCAL(copy):
2009-10-14 10:01:37 -06:00
#ifdef __APPLE__
// as of OS X 10.6, Apple is still using an assembler that doesn't
// understand movq SSE,GPR, but movd does the same thing, despite
// the name
movd %xmm0,%rax
#else
2007-06-28 20:58:48 -06:00
movq %xmm0,%rax
2009-10-14 10:01:37 -06:00
#endif
2007-06-28 20:58:48 -06:00
LOCAL(exit):
2007-06-28 20:58:48 -06:00
movq %rbp,%rsp
popq %rbp
ret
2007-10-03 18:41:54 -06:00
.globl GLOBAL(vmJump)
GLOBAL(vmJump):
movq %rsi,%rbp
movq %rdx,%rsp
movq %rcx,%rbx
2009-05-14 20:08:01 -06:00
movq %r8,%rax
movq %r9,%rdx
jmp *%rdi
rework VM exception handling; throw OOMEs when appropriate This rather large commit modifies the VM to use non-local returns to throw exceptions instead of simply setting Thread::exception and returning frame-by-frame as it used to. This has several benefits: * Functions no longer need to check Thread::exception after each call which might throw an exception (which would be especially tedious and error-prone now that any function which allocates objects directly or indirectly might throw an OutOfMemoryError) * There's no need to audit the code for calls to functions which previously did not throw exceptions but later do * Performance should be improved slightly due to both the reduced need for conditionals and because undwinding now occurs in a single jump instead of a series of returns The main disadvantages are: * Slightly higher overhead for entering and leaving the VM via the JNI and JDK methods * Non-local returns can make the code harder to read * We must be careful to register destructors for stack-allocated resources with the Thread so they can be called prior to a non-local return The non-local return implementation is similar to setjmp/longjmp, except it uses continuation-passing style to avoid the need for cooperation from the C/C++ compiler. Native C++ exceptions would have also been an option, but that would introduce a dependence on libstdc++, which we're trying to avoid for portability reasons. Finally, this commit ensures that the VM throws an OutOfMemoryError instead of aborting when it reaches its memory ceiling. Currently, we treat the ceiling as a soft limit and temporarily exceed it as necessary to allow garbage collection and certain internal allocations to succeed, but refuse to allocate any Java objects until the heap size drops back below the ceiling.
2010-12-27 15:55:23 -07:00
#define VMRUN_FRAME_SIZE 64
.globl GLOBAL(vmRun)
GLOBAL(vmRun):
// %rdi: function
// %rsi: arguments
// %rdx: checkpoint
pushq %rbp
movq %rsp,%rbp
subq $VMRUN_FRAME_SIZE,%rsp
movq %rbx,16(%rsp)
movq %r12,24(%rsp)
movq %r13,32(%rsp)
movq %r14,40(%rsp)
movq %r15,48(%rsp)
movq %rsp,CHECKPOINT_STACK(%rdx)
movq %rbp,CHECKPOINT_BASE(%rdx)
movq %rdi,%r11
movq CHECKPOINT_THREAD(%rdx),%rdi
call *%r11
.globl GLOBAL(vmRun_returnAddress)
GLOBAL(vmRun_returnAddress):
movq 16(%rsp),%rbx
movq 24(%rsp),%r12
movq 32(%rsp),%r13
movq 40(%rsp),%r14
movq 48(%rsp),%r15
addq $VMRUN_FRAME_SIZE,%rsp
popq %rbp
ret
#endif // not __MINGW32__
2007-10-03 18:41:54 -06:00
#elif defined __i386__
2007-10-24 11:24:19 -06:00
rework VM exception handling; throw OOMEs when appropriate This rather large commit modifies the VM to use non-local returns to throw exceptions instead of simply setting Thread::exception and returning frame-by-frame as it used to. This has several benefits: * Functions no longer need to check Thread::exception after each call which might throw an exception (which would be especially tedious and error-prone now that any function which allocates objects directly or indirectly might throw an OutOfMemoryError) * There's no need to audit the code for calls to functions which previously did not throw exceptions but later do * Performance should be improved slightly due to both the reduced need for conditionals and because undwinding now occurs in a single jump instead of a series of returns The main disadvantages are: * Slightly higher overhead for entering and leaving the VM via the JNI and JDK methods * Non-local returns can make the code harder to read * We must be careful to register destructors for stack-allocated resources with the Thread so they can be called prior to a non-local return The non-local return implementation is similar to setjmp/longjmp, except it uses continuation-passing style to avoid the need for cooperation from the C/C++ compiler. Native C++ exceptions would have also been an option, but that would introduce a dependence on libstdc++, which we're trying to avoid for portability reasons. Finally, this commit ensures that the VM throws an OutOfMemoryError instead of aborting when it reaches its memory ceiling. Currently, we treat the ceiling as a soft limit and temporarily exceed it as necessary to allow garbage collection and certain internal allocations to succeed, but refuse to allocate any Java objects until the heap size drops back below the ceiling.
2010-12-27 15:55:23 -07:00
#define CHECKPOINT_THREAD 4
#define CHECKPOINT_STACK 24
#define CHECKPOINT_BASE 28
.globl GLOBAL(detectFeature)
GLOBAL(detectFeature):
pushl %ebp
movl %esp, %ebp
pushl %edx
pushl %ecx
pushl %ebx
pushl %esi
pushl %edi
movl 12(%ebp), %esi
movl 8(%ebp), %edi
movl $1, %eax
cpuid
andl %esi, %edx
andl %edi, %ecx
orl %edx, %ecx
test %ecx, %ecx
je LOCAL(NOSSE)
movl $1, %eax
jmp LOCAL(SSEEND)
LOCAL(NOSSE):
movl $0, %eax
LOCAL(SSEEND):
popl %edi
popl %esi
popl %ebx
popl %ecx
popl %edx
movl %ebp,%esp
popl %ebp
ret
.globl GLOBAL(vmNativeCall)
GLOBAL(vmNativeCall):
2007-10-03 18:41:54 -06:00
pushl %ebp
movl %esp,%ebp
// 8(%ebp): function
// 12(%ebp): stack
// 16(%ebp): stackSize
// 20(%ebp): returnType
// reserve space for arguments
movl 16(%ebp),%ecx
2007-10-03 21:19:39 -06:00
subl %ecx,%esp
2007-10-03 18:41:54 -06:00
2007-10-24 11:24:19 -06:00
# ifdef __APPLE__
2007-10-03 18:41:54 -06:00
// align to a 16 byte boundary on Darwin
2007-10-03 21:19:39 -06:00
andl $0xFFFFFFF0,%esp
2007-10-24 11:24:19 -06:00
# endif
2007-10-03 18:41:54 -06:00
// copy arguments into place
movl $0,%ecx
jmp LOCAL(test)
2007-10-03 18:41:54 -06:00
LOCAL(loop):
2007-10-03 18:41:54 -06:00
movl %ecx,%eax
movl %ecx,%edx
addl %esp,%edx
addl 12(%ebp),%eax
movl (%eax),%eax
movl %eax,(%edx)
addl $4,%ecx
LOCAL(test):
2007-10-03 18:41:54 -06:00
cmpl 16(%ebp),%ecx
jb LOCAL(loop)
2007-10-03 18:41:54 -06:00
// call function
call *8(%ebp)
// handle return value based on expected type
movl 20(%ebp),%ecx
LOCAL(void):
2007-10-03 18:41:54 -06:00
cmpl $VOID_TYPE,%ecx
jne LOCAL(int64)
jmp LOCAL(exit)
2007-10-03 18:41:54 -06:00
LOCAL(int64):
2007-10-03 18:41:54 -06:00
cmpl $INT64_TYPE,%ecx
jne LOCAL(float)
jmp LOCAL(exit)
2007-10-03 18:41:54 -06:00
LOCAL(float):
2007-10-03 18:41:54 -06:00
cmpl $FLOAT_TYPE,%ecx
jne LOCAL(double)
2007-10-03 18:41:54 -06:00
fstps 8(%ebp)
movl 8(%ebp),%eax
jmp LOCAL(exit)
2007-10-03 18:41:54 -06:00
LOCAL(double):
2007-10-03 18:41:54 -06:00
cmpl $DOUBLE_TYPE,%ecx
jne LOCAL(exit)
2007-10-03 18:41:54 -06:00
fstpl 8(%ebp)
movl 8(%ebp),%eax
movl 12(%ebp),%edx
LOCAL(exit):
2007-10-03 18:41:54 -06:00
movl %ebp,%esp
popl %ebp
ret
.globl GLOBAL(vmJump)
GLOBAL(vmJump):
2009-05-24 18:57:59 -06:00
movl 4(%esp),%esi
movl 8(%esp),%ebp
movl 16(%esp),%ebx
2009-05-24 18:57:59 -06:00
movl 20(%esp),%eax
movl 24(%esp),%edx
movl 12(%esp),%esp
2009-05-24 18:57:59 -06:00
jmp *%esi
2007-10-03 18:41:54 -06:00
rework VM exception handling; throw OOMEs when appropriate This rather large commit modifies the VM to use non-local returns to throw exceptions instead of simply setting Thread::exception and returning frame-by-frame as it used to. This has several benefits: * Functions no longer need to check Thread::exception after each call which might throw an exception (which would be especially tedious and error-prone now that any function which allocates objects directly or indirectly might throw an OutOfMemoryError) * There's no need to audit the code for calls to functions which previously did not throw exceptions but later do * Performance should be improved slightly due to both the reduced need for conditionals and because undwinding now occurs in a single jump instead of a series of returns The main disadvantages are: * Slightly higher overhead for entering and leaving the VM via the JNI and JDK methods * Non-local returns can make the code harder to read * We must be careful to register destructors for stack-allocated resources with the Thread so they can be called prior to a non-local return The non-local return implementation is similar to setjmp/longjmp, except it uses continuation-passing style to avoid the need for cooperation from the C/C++ compiler. Native C++ exceptions would have also been an option, but that would introduce a dependence on libstdc++, which we're trying to avoid for portability reasons. Finally, this commit ensures that the VM throws an OutOfMemoryError instead of aborting when it reaches its memory ceiling. Currently, we treat the ceiling as a soft limit and temporarily exceed it as necessary to allow garbage collection and certain internal allocations to succeed, but refuse to allocate any Java objects until the heap size drops back below the ceiling.
2010-12-27 15:55:23 -07:00
#define VMRUN_FRAME_SIZE 32
.globl GLOBAL(vmRun)
GLOBAL(vmRun):
// 8(%ebp): function
// 12(%ebp): arguments
// 16(%ebp): checkpoint
pushl %ebp
movl %esp,%ebp
subl $VMRUN_FRAME_SIZE,%esp
movl %ebx,8(%esp)
movl %esi,12(%esp)
movl %edi,16(%esp)
movl 12(%ebp),%eax
movl %eax,4(%esp)
movl 16(%ebp),%ecx
movl CHECKPOINT_THREAD(%ecx),%eax
movl %eax,0(%esp)
movl %esp,CHECKPOINT_STACK(%ecx)
movl %ebp,CHECKPOINT_BASE(%ecx)
call *8(%ebp)
.globl GLOBAL(vmRun_returnAddress)
GLOBAL(vmRun_returnAddress):
movl 8(%esp),%ebx
movl 12(%esp),%esi
movl 16(%esp),%edi
addl $VMRUN_FRAME_SIZE,%esp
popl %ebp
ret
#endif // __i386__