use inline asm for sse detection

This commit is contained in:
Joshua Warner 2014-07-29 13:36:45 -06:00
parent 51b510cbea
commit ef3f77695c
3 changed files with 34 additions and 125 deletions

View File

@ -13,11 +13,39 @@
#include "context.h"
#ifndef _MSC_VER
#include <cpuid.h>
#else
// MSVC implementation:
static int __get_cpuid(unsigned int __level,
unsigned int* __eax,
unsigned int* __ebx,
unsigned int* __ecx,
unsigned int* __edx)
{
_asm
{
mov eax, __level;
cpuid;
mov[__eax], eax;
mov[__ebx], ebx;
mov[__ecx], ecx;
mov[__edx], edx;
}
return 1;
}
#define bit_SSE (1 << 25)
#define bit_SSE2 (1 << 26)
#endif
namespace avian {
namespace codegen {
namespace x86 {
extern "C" bool detectFeature(unsigned ecx, unsigned edx);
// TODO: this should be moved such that it's called by the client (e.g. whatever
// allocates the Archivecture). That way, we can link the x86 code generator on
// another architecture (e.g. arm).
bool useSSE(ArchitectureContext* c)
{
@ -27,8 +55,11 @@ bool useSSE(ArchitectureContext* c)
} else if (c->useNativeFeatures) {
static int supported = -1;
if (supported == -1) {
supported = detectFeature(0, 0x2000000) // SSE 1
and detectFeature(0, 0x4000000); // SSE 2
unsigned eax;
unsigned ebx;
unsigned ecx;
unsigned edx;
supported = __get_cpuid(1, &eax, &ebx, &ecx, &edx) && (edx & bit_SSE) && (edx & bit_SSE2);
}
return supported;
} else {

View File

@ -27,37 +27,6 @@
#define CHECKPOINT_STACK 48
#ifdef __MINGW32__
.globl GLOBAL(detectFeature)
GLOBAL(detectFeature):
pushq %rbp
movq %rsp, %rbp
pushq %rdx
pushq %rcx
pushq %rbx
pushq %rsi
pushq %rdi
movl %ecx, %edi
movl %edx, %esi
movl $1, %eax
cpuid
andl %esi, %edx
andl %edi, %ecx
orl %edx, %ecx
test %ecx, %ecx
je LOCAL(NOSSE)
movl $1, %eax
jmp LOCAL(SSEEND)
LOCAL(NOSSE):
movl $0, %eax
LOCAL(SSEEND):
popq %rdi
popq %rsi
popq %rbx
popq %rcx
popq %rdx
movq %rbp,%rsp
popq %rbp
ret
.globl GLOBAL(vmNativeCall)
GLOBAL(vmNativeCall):
@ -219,31 +188,6 @@ GLOBAL(vmRun_returnAddress):
ret
#else // not __MINGW32__
.globl GLOBAL(detectFeature)
GLOBAL(detectFeature):
pushq %rbp
movq %rsp, %rbp
pushq %rdx
pushq %rcx
pushq %rbx
movl $1, %eax
cpuid
andl %esi, %edx
andl %edi, %ecx
orl %edx, %ecx
test %ecx, %ecx
je LOCAL(NOSSE)
movl $1, %eax
jmp LOCAL(SSEEND)
LOCAL(NOSSE):
movl $0, %eax
LOCAL(SSEEND):
popq %rbx
popq %rcx
popq %rdx
movq %rbp,%rsp
popq %rbp
ret
.globl GLOBAL(vmNativeCall)
GLOBAL(vmNativeCall):
@ -403,38 +347,6 @@ GLOBAL(vmRun_returnAddress):
#define CHECKPOINT_STACK 24
#define CHECKPOINT_BASE 28
.globl GLOBAL(detectFeature)
GLOBAL(detectFeature):
pushl %ebp
movl %esp, %ebp
pushl %edx
pushl %ecx
pushl %ebx
pushl %esi
pushl %edi
movl 12(%ebp), %esi
movl 8(%ebp), %edi
movl $1, %eax
cpuid
andl %esi, %edx
andl %edi, %ecx
orl %edx, %ecx
test %ecx, %ecx
je LOCAL(NOSSE)
movl $1, %eax
jmp LOCAL(SSEEND)
LOCAL(NOSSE):
movl $0, %eax
LOCAL(SSEEND):
popl %edi
popl %esi
popl %ebx
popl %ecx
popl %edx
movl %ebp,%esp
popl %ebp
ret
.globl GLOBAL(vmNativeCall)
GLOBAL(vmNativeCall):
pushl %ebp

View File

@ -30,40 +30,6 @@ CHECKPOINT_BASE equ 28
_TEXT SEGMENT
public C detectFeature
detectFeature:
push ebp
mov ebp,esp
push edx
push ecx
push ebx
push esi
push edi
mov esi,ds:dword ptr[12+ebp]
mov edi,ds:dword ptr[8+ebp]
mov eax,1
cpuid
and edx,esi
and ecx,edi
or ecx,edx
test ecx,ecx
je LNOSSE
mov eax,1
jmp LSSEEND
LNOSSE:
mov eax,0
LSSEEND:
pop edi
pop esi
pop ebx
pop ecx
pop edx
mov esp,ebp
pop ebp
ret
public C vmNativeCall
vmNativeCall:
push ebp