mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-22 08:30:07 +00:00
Implement GGML_CPU_ALL_VARIANTS for PowerPC (llama/14286)
* Add PowerPC feature detection and scoring * ggml-cpu: Implement GGML_CPU_ALL_VARIANTS for PowerPC * ggml-cpu: Delay some initializations until function is called When using GGML_BACKEND_DL=ON, these initializations might use instructions that are not supported by the current CPU. --------- Co-authored-by: Diego Devesa <slarengh@gmail.com>
This commit is contained in:
committed by
Georgi Gerganov
parent
33d1f0a3e0
commit
af7168174c
@ -286,6 +286,10 @@ function(ggml_add_cpu_backend_variant tag_name)
|
||||
foreach (feat ${ARGN})
|
||||
set(GGML_INTERNAL_${feat} ON)
|
||||
endforeach()
|
||||
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
||||
foreach (feat ${ARGN})
|
||||
set(GGML_INTERNAL_${feat} ON)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
ggml_add_cpu_backend_variant_impl(${tag_name})
|
||||
@ -337,6 +341,19 @@ if (GGML_CPU_ALL_VARIANTS)
|
||||
else()
|
||||
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
|
||||
endif()
|
||||
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
ggml_add_cpu_backend_variant(power0)
|
||||
ggml_add_cpu_backend_variant(power7_1 POWER7)
|
||||
ggml_add_cpu_backend_variant(power7_2 POWER7 VSX)
|
||||
ggml_add_cpu_backend_variant(power8_1 POWER8)
|
||||
ggml_add_cpu_backend_variant(power8_2 POWER8 VSX)
|
||||
ggml_add_cpu_backend_variant(power9 POWER9 VSX)
|
||||
ggml_add_cpu_backend_variant(power10 POWER10 VSX)
|
||||
ggml_add_cpu_backend_variant(power11 POWER11 VSX)
|
||||
else()
|
||||
message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
|
||||
endif()
|
||||
else()
|
||||
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
|
||||
endif()
|
||||
|
@ -388,6 +388,27 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
||||
else()
|
||||
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
|
||||
endif()
|
||||
elseif(GGML_CPU_ALL_VARIANTS)
|
||||
# Begin with the lowest baseline
|
||||
set(ARCH_DEFINITIONS "")
|
||||
|
||||
# When a feature is selected, bump the MCPU to the first
|
||||
# version that supported it
|
||||
foreach(PVER RANGE 7 11)
|
||||
if(DEFINED GGML_INTERNAL_POWER${PVER})
|
||||
set(POWERPC_MCPU "power${PVER}")
|
||||
list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
|
||||
endif()
|
||||
endforeach()
|
||||
if (GGML_INTERNAL_VSX)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
|
||||
list(APPEND ARCH_FLAGS -mvsx)
|
||||
endif()
|
||||
|
||||
if (DEFINED POWERPC_MCPU)
|
||||
list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
|
||||
endif()
|
||||
ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
|
||||
else()
|
||||
if (GGML_CPU_POWERPC_CPUTYPE)
|
||||
list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
|
||||
|
82
ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp
Normal file
82
ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp
Normal file
@ -0,0 +1,82 @@
|
||||
# include "ggml-backend-impl.h"
|
||||
|
||||
#if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
|
||||
|
||||
#if defined(__linux__)
|
||||
#include <sys/auxv.h>
|
||||
#endif
|
||||
|
||||
#include <string>
|
||||
|
||||
struct powerpc_features {
|
||||
std::string platform = "";
|
||||
int power_version = -1;
|
||||
|
||||
bool has_vsx = false;
|
||||
|
||||
powerpc_features() {
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_PLATFORM);
|
||||
if (auxval) {
|
||||
platform = std::string(reinterpret_cast<const char*>(auxval));
|
||||
// TBD: Do systems exist that return this in uppercase?
|
||||
if (platform.substr(0, 5) == "power") {
|
||||
// Extractt a numeric suffix, if one exists
|
||||
int vpos = -1;
|
||||
for (int i = platform.length() - 1; i >= 0; i--) {
|
||||
if (std::isdigit(platform[i])) {
|
||||
vpos = i;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (vpos > -1) {
|
||||
power_version = std::stoi(platform.substr(vpos));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (power_version >= 9) {
|
||||
has_vsx = true;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static int ggml_backend_cpu_powerpc_score() {
|
||||
int score = 1;
|
||||
powerpc_features pf;
|
||||
|
||||
// Platform scores
|
||||
#if defined(GGML_USE_POWER7)
|
||||
if (pf.power_version < 7) { return 0; }
|
||||
score += 1<<1;
|
||||
#endif
|
||||
#if defined(GGML_USE_POWER8)
|
||||
if (pf.power_version < 8) { return 0; }
|
||||
score += 1<<2;
|
||||
#endif
|
||||
#if defined(GGML_USE_POWER9)
|
||||
if (pf.power_version < 9) { return 0; }
|
||||
score += 1<<3;
|
||||
#endif
|
||||
#if defined(GGML_USE_POWER10)
|
||||
if (pf.power_version < 10) { return 0; }
|
||||
score += 1<<4;
|
||||
#endif
|
||||
#if defined(GGML_USE_POWER11)
|
||||
if (pf.power_version < 11) { return 0; }
|
||||
score += 1<<5;
|
||||
#endif
|
||||
|
||||
// Feature scores
|
||||
#if defined(GGML_USE_VSX)
|
||||
if (!pf.has_vsx) { return 0; }
|
||||
score += 1<<6;
|
||||
#endif
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_powerpc_score)
|
||||
|
||||
#endif // defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
|
@ -1411,44 +1411,45 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
|
||||
}
|
||||
};
|
||||
|
||||
// instance for Q4
|
||||
static const tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
|
||||
static const tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
|
||||
static const tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
|
||||
static const tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
|
||||
|
||||
// instance for IQ4
|
||||
static const tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
|
||||
|
||||
} // namespace ggml::cpu::repack
|
||||
|
||||
static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
|
||||
|
||||
// instance for Q4
|
||||
static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
|
||||
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
|
||||
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
|
||||
static const ggml::cpu::repack::tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
|
||||
|
||||
// instance for IQ4
|
||||
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
|
||||
|
||||
if (cur->type == GGML_TYPE_Q4_0) {
|
||||
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
|
||||
if (cur->ne[1] % 8 == 0) {
|
||||
return &ggml::cpu::repack::q4_0_8x8_q8_0;
|
||||
return &q4_0_8x8_q8_0;
|
||||
}
|
||||
}
|
||||
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
|
||||
if (cur->ne[1] % 4 == 0) {
|
||||
return &ggml::cpu::repack::q4_0_4x8_q8_0;
|
||||
return &q4_0_4x8_q8_0;
|
||||
}
|
||||
}
|
||||
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
|
||||
if (cur->ne[1] % 4 == 0) {
|
||||
return &ggml::cpu::repack::q4_0_4x4_q8_0;
|
||||
return &q4_0_4x4_q8_0;
|
||||
}
|
||||
}
|
||||
} else if (cur->type == GGML_TYPE_Q4_K) {
|
||||
if (ggml_cpu_has_avx2()) {
|
||||
if (cur->ne[1] % 8 == 0) {
|
||||
return &ggml::cpu::repack::q4_K_8x8_q8_K;
|
||||
return &q4_K_8x8_q8_K;
|
||||
}
|
||||
}
|
||||
} else if (cur->type == GGML_TYPE_IQ4_NL) {
|
||||
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
|
||||
if (cur->ne[1] % 4 == 0) {
|
||||
return &ggml::cpu::repack::iq4_nl_4x4_q8_0;
|
||||
return &iq4_nl_4x4_q8_0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user