diff --git a/CMakeLists.txt b/CMakeLists.txt index 71cade07..b34b3768 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -514,12 +514,12 @@ else() endif() if(NOT WHISPER_NO_AVX512) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw") - endif() - if(NOT WHISPER_NO_AVX512_VBMI) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vbmi") - endif() - if(NOT WHISPER_NO_AVX512_VNNI) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vnni") + if(NOT WHISPER_NO_AVX512_VBMI) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vbmi") + endif() + if(NOT WHISPER_NO_AVX512_VNNI) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vnni") + endif() endif() if(NOT WHISPER_NO_FMA) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma") diff --git a/Makefile b/Makefile index b7e5a0e9..901fe216 100644 --- a/Makefile +++ b/Makefile @@ -142,60 +142,69 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64)) CPUINFO_CMD := sysinfo -cpu endif + # x86 ISA extensions (chronological order) ifdef CPUINFO_CMD - AVX_M := $(shell $(CPUINFO_CMD) | grep -iwE 'AVX|AVX1.0') - ifneq (,$(AVX_M)) - CFLAGS += -mavx - CXXFLAGS += -mavx - endif - - AVX2_M := $(shell $(CPUINFO_CMD) | grep -iw 'AVX2') - ifneq (,$(AVX2_M)) - CFLAGS += -mavx2 - CXXFLAGS += -mavx2 - endif - - AVX512F_M := $(shell $(CPUINFO_CMD) | grep -iw 'AVX512F') - ifneq (,$(AVX512F_M)) - CFLAGS += -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw - CXXFLAGS += -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw - endif - - AVX512VNNI_M := $(shell $(CPUINFO_CMD) | grep -iwE 'AVX512_VNNI|AVX512VNNI') - ifneq (,$(AVX512VNNI_M)) - CFLAGS += -mavx512vnni - CXXFLAGS += -mavx512vnni - endif - - AVX512VBMI_M := $(shell $(CPUINFO_CMD) | grep -iw 'AVX512VBMI') - ifneq (,$(AVX512VBMI_M)) - CFLAGS += -mavx512vbmi - CXXFLAGS += -mavx512vbmi - endif - - FMA_M := $(shell $(CPUINFO_CMD) | grep -iw 'FMA') - ifneq (,$(FMA_M)) - CFLAGS += -mfma - CXXFLAGS += -mfma - endif - - F16C_M := $(shell $(CPUINFO_CMD) | grep -iw 'F16C') - ifneq (,$(F16C_M)) - CFLAGS += -mf16c - CXXFLAGS += -mf16c - endif - SSE3_M := $(shell $(CPUINFO_CMD) | grep -iwE 'PNI|SSE3') + SSSE3_M := $(shell $(CPUINFO_CMD) | grep -iw 'SSSE3') + AVX_M := $(shell $(CPUINFO_CMD) | grep -iwE 'AVX|AVX1.0') + F16C_M := $(shell $(CPUINFO_CMD) | grep -iw 'F16C') + FMA_M := $(shell $(CPUINFO_CMD) | grep -iw 'FMA') + AVX2_M := $(shell $(CPUINFO_CMD) | grep -iw 'AVX2') + AVX512F_M := $(shell $(CPUINFO_CMD) | grep -iw 'AVX512F') + AVX512VBMI_M := $(shell $(CPUINFO_CMD) | grep -iw 'AVX512VBMI') + AVX512VNNI_M := $(shell $(CPUINFO_CMD) | grep -iwE 'AVX512_VNNI|AVX512VNNI') + + # AVX-512 has many subsets, so let's make it easy to disable them all + ifneq ($(filter-out 0,$(WHISPER_NO_AVX512)),) + AVX512F_M := + AVX512VBMI_M := + AVX512VNNI_M := + endif + ifneq (,$(SSE3_M)) CFLAGS += -msse3 CXXFLAGS += -msse3 endif - SSSE3_M := $(shell $(CPUINFO_CMD) | grep -iw 'SSSE3') ifneq (,$(SSSE3_M)) CFLAGS += -mssse3 CXXFLAGS += -mssse3 endif + + ifneq (,$(AVX_M)) + CFLAGS += -mavx + CXXFLAGS += -mavx + endif + + ifneq (,$(F16C_M)) + CFLAGS += -mf16c + CXXFLAGS += -mf16c + endif + + ifneq (,$(FMA_M)) + CFLAGS += -mfma + CXXFLAGS += -mfma + endif + + ifneq (,$(AVX2_M)) + CFLAGS += -mavx2 + CXXFLAGS += -mavx2 + endif + + ifneq (,$(AVX512F_M)) + CFLAGS += -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw + CXXFLAGS += -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw + endif + + ifneq (,$(AVX512VBMI_M)) + CFLAGS += -mavx512vbmi + CXXFLAGS += -mavx512vbmi + endif + + ifneq (,$(AVX512VNNI_M)) + CFLAGS += -mavx512vnni + CXXFLAGS += -mavx512vnni + endif endif endif