mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2024-12-20 05:07:52 +00:00
build : add WHISPER_COREML_ALLOW_FALLBACK to make / CMake (#812)
This commit is contained in:
parent
94a7cd2a07
commit
3efb81dec6
@ -39,32 +39,33 @@ endif()
|
|||||||
|
|
||||||
# options
|
# options
|
||||||
|
|
||||||
option(BUILD_SHARED_LIBS "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
|
option(BUILD_SHARED_LIBS "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
|
||||||
|
|
||||||
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
||||||
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
||||||
|
|
||||||
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
||||||
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
||||||
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
||||||
|
|
||||||
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
||||||
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
||||||
|
|
||||||
option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
|
option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
|
||||||
|
|
||||||
if (APPLE)
|
if (APPLE)
|
||||||
option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
|
option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
|
||||||
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
|
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
|
||||||
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
|
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
|
||||||
option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
|
option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
|
||||||
|
|
||||||
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
|
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
|
||||||
|
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
|
||||||
else()
|
else()
|
||||||
option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
|
option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
option(WHISPER_PERF "whisper: enable perf timings" OFF)
|
option(WHISPER_PERF "whisper: enable perf timings" OFF)
|
||||||
|
|
||||||
# sanitizers
|
# sanitizers
|
||||||
|
|
||||||
@ -119,6 +120,10 @@ if (APPLE)
|
|||||||
else()
|
else()
|
||||||
message(WARNING "CoreML framework not found")
|
message(WARNING "CoreML framework not found")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (WHISPER_COREML_ALLOW_FALLBACK)
|
||||||
|
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML_ALLOW_FALLBACK)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
13
Makefile
13
Makefile
@ -123,6 +123,7 @@ endif
|
|||||||
ifeq ($(UNAME_M),amd64)
|
ifeq ($(UNAME_M),amd64)
|
||||||
CFLAGS += -mavx -mavx2 -mfma -mf16c
|
CFLAGS += -mavx -mavx2 -mfma -mf16c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
||||||
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
||||||
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
||||||
@ -133,6 +134,7 @@ ifneq ($(filter ppc64%,$(UNAME_M)),)
|
|||||||
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
|
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef WHISPER_NO_ACCELERATE
|
ifndef WHISPER_NO_ACCELERATE
|
||||||
# Mac M1 - include Accelerate framework
|
# Mac M1 - include Accelerate framework
|
||||||
ifeq ($(UNAME_S),Darwin)
|
ifeq ($(UNAME_S),Darwin)
|
||||||
@ -140,26 +142,36 @@ ifndef WHISPER_NO_ACCELERATE
|
|||||||
LDFLAGS += -framework Accelerate
|
LDFLAGS += -framework Accelerate
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef WHISPER_COREML
|
ifdef WHISPER_COREML
|
||||||
CXXFLAGS += -DWHISPER_USE_COREML
|
CXXFLAGS += -DWHISPER_USE_COREML
|
||||||
LDFLAGS += -framework Foundation -framework CoreML
|
LDFLAGS += -framework Foundation -framework CoreML
|
||||||
|
|
||||||
|
ifdef WHISPER_COREML_ALLOW_FALLBACK
|
||||||
|
CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifdef WHISPER_OPENBLAS
|
ifdef WHISPER_OPENBLAS
|
||||||
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
||||||
LDFLAGS += -lopenblas
|
LDFLAGS += -lopenblas
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef WHISPER_GPROF
|
ifdef WHISPER_GPROF
|
||||||
CFLAGS += -pg
|
CFLAGS += -pg
|
||||||
CXXFLAGS += -pg
|
CXXFLAGS += -pg
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
||||||
CFLAGS += -mcpu=native
|
CFLAGS += -mcpu=native
|
||||||
CXXFLAGS += -mcpu=native
|
CXXFLAGS += -mcpu=native
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifneq ($(filter armv6%,$(UNAME_M)),)
|
ifneq ($(filter armv6%,$(UNAME_M)),)
|
||||||
# 32-bit Raspberry Pi 1, 2, 3
|
# 32-bit Raspberry Pi 1, 2, 3
|
||||||
CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access
|
CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifneq ($(filter armv7%,$(UNAME_M)),)
|
ifneq ($(filter armv7%,$(UNAME_M)),)
|
||||||
# 32-bit ARM, for example on Armbian or possibly raspbian
|
# 32-bit ARM, for example on Armbian or possibly raspbian
|
||||||
CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
||||||
@ -167,6 +179,7 @@ ifneq ($(filter armv7%,$(UNAME_M)),)
|
|||||||
# 64-bit ARM, use these (TODO: auto-detect 64-bit)
|
# 64-bit ARM, use these (TODO: auto-detect 64-bit)
|
||||||
# CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
# CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifneq ($(filter armv8%,$(UNAME_M)),)
|
ifneq ($(filter armv8%,$(UNAME_M)),)
|
||||||
# Raspberry Pi 4
|
# Raspberry Pi 4
|
||||||
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
||||||
|
115
whisper.cpp
115
whisper.cpp
@ -1393,18 +1393,17 @@ static bool whisper_encode_internal(
|
|||||||
const bool use_coreml = wstate.ctx_coreml != nullptr;
|
const bool use_coreml = wstate.ctx_coreml != nullptr;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!use_coreml)
|
if (!use_coreml) {
|
||||||
{
|
|
||||||
// convolution + gelu
|
// convolution + gelu
|
||||||
{
|
{
|
||||||
wstate.use_buf(ctx0, 1);
|
wstate.use_buf(ctx0, 1);
|
||||||
|
|
||||||
cur = ggml_conv_1d_1s(ctx0, model.e_conv_1_w, mel);
|
cur = ggml_conv_1d_1s(ctx0, model.e_conv_1_w, mel);
|
||||||
cur = ggml_add(ctx0,
|
cur = ggml_add(ctx0,
|
||||||
ggml_repeat(ctx0,
|
ggml_repeat(ctx0,
|
||||||
model.e_conv_1_b,
|
model.e_conv_1_b,
|
||||||
cur),
|
cur),
|
||||||
cur);
|
cur);
|
||||||
|
|
||||||
cur = ggml_gelu(ctx0, cur);
|
cur = ggml_gelu(ctx0, cur);
|
||||||
|
|
||||||
@ -1412,10 +1411,10 @@ static bool whisper_encode_internal(
|
|||||||
|
|
||||||
cur = ggml_conv_1d_2s(ctx0, model.e_conv_2_w, cur);
|
cur = ggml_conv_1d_2s(ctx0, model.e_conv_2_w, cur);
|
||||||
cur = ggml_add(ctx0,
|
cur = ggml_add(ctx0,
|
||||||
ggml_repeat(ctx0,
|
ggml_repeat(ctx0,
|
||||||
model.e_conv_2_b,
|
model.e_conv_2_b,
|
||||||
cur),
|
cur),
|
||||||
cur);
|
cur);
|
||||||
|
|
||||||
cur = ggml_gelu(ctx0, cur);
|
cur = ggml_gelu(ctx0, cur);
|
||||||
}
|
}
|
||||||
@ -1461,10 +1460,10 @@ static bool whisper_encode_internal(
|
|||||||
|
|
||||||
// cur = ln_0_w*cur + ln_0_b
|
// cur = ln_0_w*cur + ln_0_b
|
||||||
cur = ggml_add(ctx0,
|
cur = ggml_add(ctx0,
|
||||||
ggml_mul(ctx0,
|
ggml_mul(ctx0,
|
||||||
ggml_repeat(ctx0, layer.attn_ln_0_w, cur),
|
ggml_repeat(ctx0, layer.attn_ln_0_w, cur),
|
||||||
cur),
|
cur),
|
||||||
ggml_repeat(ctx0, layer.attn_ln_0_b, cur));
|
ggml_repeat(ctx0, layer.attn_ln_0_b, cur));
|
||||||
}
|
}
|
||||||
|
|
||||||
// self-attention
|
// self-attention
|
||||||
@ -1472,39 +1471,39 @@ static bool whisper_encode_internal(
|
|||||||
wstate.use_buf(ctx0, 1);
|
wstate.use_buf(ctx0, 1);
|
||||||
|
|
||||||
struct ggml_tensor * Qcur = ggml_mul_mat(ctx0,
|
struct ggml_tensor * Qcur = ggml_mul_mat(ctx0,
|
||||||
layer.attn_q_w,
|
layer.attn_q_w,
|
||||||
cur);
|
cur);
|
||||||
|
|
||||||
Qcur = ggml_add(ctx0,
|
Qcur = ggml_add(ctx0,
|
||||||
ggml_repeat(ctx0,
|
ggml_repeat(ctx0,
|
||||||
layer.attn_q_b,
|
layer.attn_q_b,
|
||||||
Qcur),
|
Qcur),
|
||||||
Qcur);
|
Qcur);
|
||||||
|
|
||||||
//Qcur = ggml_scale(ctx0, Qcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
|
//Qcur = ggml_scale(ctx0, Qcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
|
||||||
|
|
||||||
// note: no bias for Key
|
// note: no bias for Key
|
||||||
struct ggml_tensor * Kcur = ggml_mul_mat(ctx0,
|
struct ggml_tensor * Kcur = ggml_mul_mat(ctx0,
|
||||||
layer.attn_k_w,
|
layer.attn_k_w,
|
||||||
cur);
|
cur);
|
||||||
|
|
||||||
//Kcur = ggml_scale(ctx0, Kcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
|
//Kcur = ggml_scale(ctx0, Kcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
|
||||||
|
|
||||||
struct ggml_tensor * Vcur = ggml_mul_mat(ctx0,
|
struct ggml_tensor * Vcur = ggml_mul_mat(ctx0,
|
||||||
layer.attn_v_w,
|
layer.attn_v_w,
|
||||||
cur);
|
cur);
|
||||||
|
|
||||||
Vcur = ggml_add(ctx0,
|
Vcur = ggml_add(ctx0,
|
||||||
ggml_repeat(ctx0,
|
ggml_repeat(ctx0,
|
||||||
layer.attn_v_b,
|
layer.attn_v_b,
|
||||||
Vcur),
|
Vcur),
|
||||||
Vcur);
|
Vcur);
|
||||||
|
|
||||||
// ------
|
// ------
|
||||||
|
|
||||||
wstate.use_buf(ctx0, 0);
|
wstate.use_buf(ctx0, 0);
|
||||||
|
|
||||||
#ifdef WHISPER_USE_FLASH_ATTN
|
#ifdef WHISPER_USE_FLASH_ATTN
|
||||||
struct ggml_tensor * Q =
|
struct ggml_tensor * Q =
|
||||||
ggml_permute(ctx0,
|
ggml_permute(ctx0,
|
||||||
ggml_cpy(ctx0,
|
ggml_cpy(ctx0,
|
||||||
@ -1529,7 +1528,7 @@ static bool whisper_encode_internal(
|
|||||||
ggml_new_tensor_3d(ctx0, wctx.wtype, n_ctx, n_state/n_head, n_head));
|
ggml_new_tensor_3d(ctx0, wctx.wtype, n_ctx, n_state/n_head, n_head));
|
||||||
|
|
||||||
struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, false);
|
struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, false);
|
||||||
#else
|
#else
|
||||||
struct ggml_tensor * Q =
|
struct ggml_tensor * Q =
|
||||||
ggml_permute(ctx0,
|
ggml_permute(ctx0,
|
||||||
ggml_cpy(ctx0,
|
ggml_cpy(ctx0,
|
||||||
@ -1575,14 +1574,14 @@ static bool whisper_encode_internal(
|
|||||||
);
|
);
|
||||||
|
|
||||||
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, ggml_transpose(ctx0, V), KQ_soft_max);
|
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, ggml_transpose(ctx0, V), KQ_soft_max);
|
||||||
#endif
|
#endif
|
||||||
struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
|
struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
|
||||||
|
|
||||||
wstate.use_buf(ctx0, 1);
|
wstate.use_buf(ctx0, 1);
|
||||||
|
|
||||||
cur = ggml_cpy(ctx0,
|
cur = ggml_cpy(ctx0,
|
||||||
KQV_merged,
|
KQV_merged,
|
||||||
ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx));
|
ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx));
|
||||||
}
|
}
|
||||||
|
|
||||||
// projection
|
// projection
|
||||||
@ -1590,14 +1589,14 @@ static bool whisper_encode_internal(
|
|||||||
wstate.use_buf(ctx0, 0);
|
wstate.use_buf(ctx0, 0);
|
||||||
|
|
||||||
cur = ggml_mul_mat(ctx0,
|
cur = ggml_mul_mat(ctx0,
|
||||||
layer.attn_ln_1_w,
|
layer.attn_ln_1_w,
|
||||||
cur);
|
cur);
|
||||||
|
|
||||||
wstate.use_buf(ctx0, 1);
|
wstate.use_buf(ctx0, 1);
|
||||||
|
|
||||||
cur = ggml_add(ctx0,
|
cur = ggml_add(ctx0,
|
||||||
ggml_repeat(ctx0, layer.attn_ln_1_b, cur),
|
ggml_repeat(ctx0, layer.attn_ln_1_b, cur),
|
||||||
cur);
|
cur);
|
||||||
}
|
}
|
||||||
|
|
||||||
wstate.use_buf(ctx0, 2);
|
wstate.use_buf(ctx0, 2);
|
||||||
@ -1619,31 +1618,31 @@ static bool whisper_encode_internal(
|
|||||||
|
|
||||||
// cur = mlp_ln_w*cur + mlp_ln_b
|
// cur = mlp_ln_w*cur + mlp_ln_b
|
||||||
cur = ggml_add(ctx0,
|
cur = ggml_add(ctx0,
|
||||||
ggml_mul(ctx0,
|
ggml_mul(ctx0,
|
||||||
ggml_repeat(ctx0, layer.mlp_ln_w, cur),
|
ggml_repeat(ctx0, layer.mlp_ln_w, cur),
|
||||||
cur),
|
cur),
|
||||||
ggml_repeat(ctx0, layer.mlp_ln_b, cur));
|
ggml_repeat(ctx0, layer.mlp_ln_b, cur));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef WHISPER_USE_FLASH_FF
|
#ifdef WHISPER_USE_FLASH_FF
|
||||||
wstate.use_buf(ctx0, 0);
|
wstate.use_buf(ctx0, 0);
|
||||||
|
|
||||||
cur = ggml_flash_ff(ctx0,
|
cur = ggml_flash_ff(ctx0,
|
||||||
ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wstate.wtype, n_state, n_ctx)),
|
ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wstate.wtype, n_state, n_ctx)),
|
||||||
layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b);
|
layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b);
|
||||||
#else
|
#else
|
||||||
wstate.use_buf(ctx0, 0);
|
wstate.use_buf(ctx0, 0);
|
||||||
|
|
||||||
// fully connected
|
// fully connected
|
||||||
cur = ggml_mul_mat(ctx0,
|
cur = ggml_mul_mat(ctx0,
|
||||||
layer.mlp_0_w,
|
layer.mlp_0_w,
|
||||||
cur);
|
cur);
|
||||||
|
|
||||||
wstate.use_buf(ctx0, 1);
|
wstate.use_buf(ctx0, 1);
|
||||||
|
|
||||||
cur = ggml_add(ctx0,
|
cur = ggml_add(ctx0,
|
||||||
ggml_repeat(ctx0, layer.mlp_0_b, cur),
|
ggml_repeat(ctx0, layer.mlp_0_b, cur),
|
||||||
cur);
|
cur);
|
||||||
|
|
||||||
wstate.use_buf(ctx0, 0);
|
wstate.use_buf(ctx0, 0);
|
||||||
|
|
||||||
@ -1654,15 +1653,15 @@ static bool whisper_encode_internal(
|
|||||||
|
|
||||||
// projection
|
// projection
|
||||||
cur = ggml_mul_mat(ctx0,
|
cur = ggml_mul_mat(ctx0,
|
||||||
layer.mlp_1_w,
|
layer.mlp_1_w,
|
||||||
cur);
|
cur);
|
||||||
|
|
||||||
wstate.use_buf(ctx0, 0);
|
wstate.use_buf(ctx0, 0);
|
||||||
|
|
||||||
cur = ggml_add(ctx0,
|
cur = ggml_add(ctx0,
|
||||||
ggml_repeat(ctx0, layer.mlp_1_b, cur),
|
ggml_repeat(ctx0, layer.mlp_1_b, cur),
|
||||||
cur);
|
cur);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
wstate.use_buf(ctx0, 3);
|
wstate.use_buf(ctx0, 3);
|
||||||
@ -1682,10 +1681,10 @@ static bool whisper_encode_internal(
|
|||||||
|
|
||||||
// cur = ln_f_g*cur + ln_f_b
|
// cur = ln_f_g*cur + ln_f_b
|
||||||
cur = ggml_add(ctx0,
|
cur = ggml_add(ctx0,
|
||||||
ggml_mul(ctx0,
|
ggml_mul(ctx0,
|
||||||
ggml_repeat(ctx0, model.e_ln_w, cur),
|
ggml_repeat(ctx0, model.e_ln_w, cur),
|
||||||
cur),
|
cur),
|
||||||
ggml_repeat(ctx0, model.e_ln_b, cur));
|
ggml_repeat(ctx0, model.e_ln_b, cur));
|
||||||
}
|
}
|
||||||
|
|
||||||
wstate.use_buf(ctx0, -1);
|
wstate.use_buf(ctx0, -1);
|
||||||
|
Loading…
Reference in New Issue
Block a user