mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-22 16:38:58 +00:00
Implement GGML_CPU_ALL_VARIANTS for PowerPC (llama/14286)
* Add PowerPC feature detection and scoring * ggml-cpu: Implement GGML_CPU_ALL_VARIANTS for PowerPC * ggml-cpu: Delay some initializations until function is called When using GGML_BACKEND_DL=ON, these initializations might use instructions that are not supported by the current CPU. --------- Co-authored-by: Diego Devesa <slarengh@gmail.com>
This commit is contained in:
committed by
Georgi Gerganov
parent
33d1f0a3e0
commit
af7168174c
@ -286,6 +286,10 @@ function(ggml_add_cpu_backend_variant tag_name)
|
|||||||
foreach (feat ${ARGN})
|
foreach (feat ${ARGN})
|
||||||
set(GGML_INTERNAL_${feat} ON)
|
set(GGML_INTERNAL_${feat} ON)
|
||||||
endforeach()
|
endforeach()
|
||||||
|
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
||||||
|
foreach (feat ${ARGN})
|
||||||
|
set(GGML_INTERNAL_${feat} ON)
|
||||||
|
endforeach()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
ggml_add_cpu_backend_variant_impl(${tag_name})
|
ggml_add_cpu_backend_variant_impl(${tag_name})
|
||||||
@ -337,6 +341,19 @@ if (GGML_CPU_ALL_VARIANTS)
|
|||||||
else()
|
else()
|
||||||
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
|
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
|
||||||
endif()
|
endif()
|
||||||
|
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
||||||
|
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||||
|
ggml_add_cpu_backend_variant(power0)
|
||||||
|
ggml_add_cpu_backend_variant(power7_1 POWER7)
|
||||||
|
ggml_add_cpu_backend_variant(power7_2 POWER7 VSX)
|
||||||
|
ggml_add_cpu_backend_variant(power8_1 POWER8)
|
||||||
|
ggml_add_cpu_backend_variant(power8_2 POWER8 VSX)
|
||||||
|
ggml_add_cpu_backend_variant(power9 POWER9 VSX)
|
||||||
|
ggml_add_cpu_backend_variant(power10 POWER10 VSX)
|
||||||
|
ggml_add_cpu_backend_variant(power11 POWER11 VSX)
|
||||||
|
else()
|
||||||
|
message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
|
||||||
|
endif()
|
||||||
else()
|
else()
|
||||||
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
|
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
|
||||||
endif()
|
endif()
|
||||||
|
@ -388,6 +388,27 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|||||||
else()
|
else()
|
||||||
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
|
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
|
||||||
endif()
|
endif()
|
||||||
|
elseif(GGML_CPU_ALL_VARIANTS)
|
||||||
|
# Begin with the lowest baseline
|
||||||
|
set(ARCH_DEFINITIONS "")
|
||||||
|
|
||||||
|
# When a feature is selected, bump the MCPU to the first
|
||||||
|
# version that supported it
|
||||||
|
foreach(PVER RANGE 7 11)
|
||||||
|
if(DEFINED GGML_INTERNAL_POWER${PVER})
|
||||||
|
set(POWERPC_MCPU "power${PVER}")
|
||||||
|
list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
if (GGML_INTERNAL_VSX)
|
||||||
|
list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
|
||||||
|
list(APPEND ARCH_FLAGS -mvsx)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (DEFINED POWERPC_MCPU)
|
||||||
|
list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
|
||||||
|
endif()
|
||||||
|
ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
|
||||||
else()
|
else()
|
||||||
if (GGML_CPU_POWERPC_CPUTYPE)
|
if (GGML_CPU_POWERPC_CPUTYPE)
|
||||||
list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
|
list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
|
||||||
|
82
ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp
Normal file
82
ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
# include "ggml-backend-impl.h"
|
||||||
|
|
||||||
|
#if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
|
||||||
|
|
||||||
|
#if defined(__linux__)
|
||||||
|
#include <sys/auxv.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
struct powerpc_features {
|
||||||
|
std::string platform = "";
|
||||||
|
int power_version = -1;
|
||||||
|
|
||||||
|
bool has_vsx = false;
|
||||||
|
|
||||||
|
powerpc_features() {
|
||||||
|
#if defined(__linux__)
|
||||||
|
unsigned long auxval = getauxval(AT_PLATFORM);
|
||||||
|
if (auxval) {
|
||||||
|
platform = std::string(reinterpret_cast<const char*>(auxval));
|
||||||
|
// TBD: Do systems exist that return this in uppercase?
|
||||||
|
if (platform.substr(0, 5) == "power") {
|
||||||
|
// Extractt a numeric suffix, if one exists
|
||||||
|
int vpos = -1;
|
||||||
|
for (int i = platform.length() - 1; i >= 0; i--) {
|
||||||
|
if (std::isdigit(platform[i])) {
|
||||||
|
vpos = i;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (vpos > -1) {
|
||||||
|
power_version = std::stoi(platform.substr(vpos));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (power_version >= 9) {
|
||||||
|
has_vsx = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static int ggml_backend_cpu_powerpc_score() {
|
||||||
|
int score = 1;
|
||||||
|
powerpc_features pf;
|
||||||
|
|
||||||
|
// Platform scores
|
||||||
|
#if defined(GGML_USE_POWER7)
|
||||||
|
if (pf.power_version < 7) { return 0; }
|
||||||
|
score += 1<<1;
|
||||||
|
#endif
|
||||||
|
#if defined(GGML_USE_POWER8)
|
||||||
|
if (pf.power_version < 8) { return 0; }
|
||||||
|
score += 1<<2;
|
||||||
|
#endif
|
||||||
|
#if defined(GGML_USE_POWER9)
|
||||||
|
if (pf.power_version < 9) { return 0; }
|
||||||
|
score += 1<<3;
|
||||||
|
#endif
|
||||||
|
#if defined(GGML_USE_POWER10)
|
||||||
|
if (pf.power_version < 10) { return 0; }
|
||||||
|
score += 1<<4;
|
||||||
|
#endif
|
||||||
|
#if defined(GGML_USE_POWER11)
|
||||||
|
if (pf.power_version < 11) { return 0; }
|
||||||
|
score += 1<<5;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Feature scores
|
||||||
|
#if defined(GGML_USE_VSX)
|
||||||
|
if (!pf.has_vsx) { return 0; }
|
||||||
|
score += 1<<6;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_powerpc_score)
|
||||||
|
|
||||||
|
#endif // defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
|
@ -1411,44 +1411,45 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// instance for Q4
|
|
||||||
static const tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
|
|
||||||
static const tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
|
|
||||||
static const tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
|
|
||||||
static const tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
|
|
||||||
|
|
||||||
// instance for IQ4
|
|
||||||
static const tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
|
|
||||||
|
|
||||||
} // namespace ggml::cpu::repack
|
} // namespace ggml::cpu::repack
|
||||||
|
|
||||||
static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
|
static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
|
||||||
|
|
||||||
|
// instance for Q4
|
||||||
|
static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
|
||||||
|
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
|
||||||
|
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
|
||||||
|
static const ggml::cpu::repack::tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
|
||||||
|
|
||||||
|
// instance for IQ4
|
||||||
|
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
|
||||||
|
|
||||||
if (cur->type == GGML_TYPE_Q4_0) {
|
if (cur->type == GGML_TYPE_Q4_0) {
|
||||||
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
|
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
|
||||||
if (cur->ne[1] % 8 == 0) {
|
if (cur->ne[1] % 8 == 0) {
|
||||||
return &ggml::cpu::repack::q4_0_8x8_q8_0;
|
return &q4_0_8x8_q8_0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
|
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
|
||||||
if (cur->ne[1] % 4 == 0) {
|
if (cur->ne[1] % 4 == 0) {
|
||||||
return &ggml::cpu::repack::q4_0_4x8_q8_0;
|
return &q4_0_4x8_q8_0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
|
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
|
||||||
if (cur->ne[1] % 4 == 0) {
|
if (cur->ne[1] % 4 == 0) {
|
||||||
return &ggml::cpu::repack::q4_0_4x4_q8_0;
|
return &q4_0_4x4_q8_0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (cur->type == GGML_TYPE_Q4_K) {
|
} else if (cur->type == GGML_TYPE_Q4_K) {
|
||||||
if (ggml_cpu_has_avx2()) {
|
if (ggml_cpu_has_avx2()) {
|
||||||
if (cur->ne[1] % 8 == 0) {
|
if (cur->ne[1] % 8 == 0) {
|
||||||
return &ggml::cpu::repack::q4_K_8x8_q8_K;
|
return &q4_K_8x8_q8_K;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (cur->type == GGML_TYPE_IQ4_NL) {
|
} else if (cur->type == GGML_TYPE_IQ4_NL) {
|
||||||
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
|
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
|
||||||
if (cur->ne[1] % 4 == 0) {
|
if (cur->ne[1] % 4 == 0) {
|
||||||
return &ggml::cpu::repack::iq4_nl_4x4_q8_0;
|
return &iq4_nl_4x4_q8_0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user