From 46d07b9c8506246bea4f2939470acfbbc63ce675 Mon Sep 17 00:00:00 2001 From: midnight Date: Wed, 5 Feb 2025 04:41:10 -0800 Subject: [PATCH] cmake : fix compile assumptions for power9/etc (#2777) * Add small comment re: VSX to readme Co-authored-by: midnight --- README.md | 16 +++++++++++++++- ggml/src/ggml-cpu/CMakeLists.txt | 18 +++++++----------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 14609866..9748969c 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp - Plain C/C++ implementation without dependencies - Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support) - AVX intrinsics support for x86 architectures -- VSX intrinsics support for POWER architectures +- [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics) - Mixed F16 / F32 precision - [Integer quantization support](#quantization) - Zero memory allocations at runtime @@ -139,6 +139,20 @@ make -j large-v3-turbo | medium | 1.5 GiB | ~2.1 GB | | large | 2.9 GiB | ~3.9 GB | +## POWER VSX Intrinsics + +`whisper.cpp` supports POWER architectures and includes code which +significantly speeds operation on Linux running on POWER9/10, making it +capable of faster-than-realtime transcription on underclocked Raptor +Talos II. Ensure you have a BLAS package installed, and replace the +standard cmake setup with: + +```bash +# build with GGML_BLAS defined +cmake -B build -DGGML_BLAS=1 +cmake --build build --config Release +./build/bin/whisper-cli [ .. etc .. ] + ## Quantization `whisper.cpp` supports integer quantization of the Whisper `ggml` models. diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index 6b3641c4..26533e51 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -279,19 +279,15 @@ function(ggml_add_cpu_backend_variant_impl tag_name) endif() elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") message(STATUS "PowerPC detected") - execute_process(COMMAND bash -c "grep POWER10 /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER10_M) - string(FIND "${POWER10_M}" "POWER10" substring_index) - if (NOT DEFINED substring_index OR "${substring_index}" STREQUAL "") - set(substring_index -1) - endif() - - if (${substring_index} GREATER_EQUAL 0) - list(APPEND ARCH_FLAGS -mcpu=power10) + execute_process(COMMAND bash -c "grep POWER /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER_M) + if (${POWER_M} MATCHES "POWER10") + list(APPEND ARCH_FLAGS -mcpu=power10) + elseif (${POWER_M} MATCHES "POWER9") + list(APPEND ARCH_FLAGS -mcpu=power9) elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le") - list(APPEND ARCH_FLAGS -mcpu=powerpc64le) + list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native) else() - list(APPEND ARCH_FLAGS -mcpu=native -mtune=native) - # TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be) + list(APPEND ARCH_FLAGS -mcpu=powerpc64 -mtune=native) endif() elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64") message(STATUS "loongarch64 detected")