whisper.cpp/ggml/CMakeLists.txt

cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
project("ggml" C CXX)
include(CheckIncludeFileCXX)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()

if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
    set(GGML_STANDALONE ON)

    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

    # configure project version
    # TODO
else()
    set(GGML_STANDALONE OFF)
endif()

if (EMSCRIPTEN)
    set(BUILD_SHARED_LIBS_DEFAULT OFF)

    option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON)
else()
    if (MINGW)
        set(BUILD_SHARED_LIBS_DEFAULT OFF)
    else()
        set(BUILD_SHARED_LIBS_DEFAULT ON)
    endif()
endif()

option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})

#
# option list
#

# TODO: mark all options as advanced when not GGML_STANDALONE

if (APPLE)
    set(GGML_METAL_DEFAULT ON)
    set(GGML_BLAS_DEFAULT ON)
    set(GGML_BLAS_VENDOR_DEFAULT "Apple")
else()
    set(GGML_METAL_DEFAULT OFF)
    set(GGML_BLAS_DEFAULT OFF)
    set(GGML_BLAS_VENDOR_DEFAULT "Generic")
endif()

# general
option(GGML_STATIC "ggml: static link libraries"         OFF)
option(GGML_NATIVE "ggml: enable -march=native flag"     ON)
option(GGML_LTO    "ggml: enable link time optimization" OFF)
option(GGML_CCACHE "ggml: use ccache if available"       ON)

# debug
option(GGML_ALL_WARNINGS           "ggml: enable all compiler warnings"                   ON)
option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
option(GGML_GPROF                  "ggml: enable gprof"                                   OFF)

# build
option(GGML_FATAL_WARNINGS    "ggml: enable -Werror flag"    OFF)

# sanitizers
option(GGML_SANITIZE_THREAD    "ggml: enable thread sanitizer"    OFF)
option(GGML_SANITIZE_ADDRESS   "ggml: enable address sanitizer"   OFF)
option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)

# instruction set specific
if (GGML_NATIVE)
    set(INS_ENB OFF)
else()
    set(INS_ENB ON)
endif()

option(GGML_CPU_HBM     "ggml: use memkind for CPU HBM" OFF)

option(GGML_AVX         "ggml: enable AVX"              ${INS_ENB})
option(GGML_AVX2        "ggml: enable AVX2"             ${INS_ENB})
option(GGML_AVX512      "ggml: enable AVX512"           OFF)
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI"      OFF)
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI"      OFF)
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16"      OFF)
option(GGML_FMA         "ggml: enable FMA"              ${INS_ENB})
if (NOT MSVC)
    option(GGML_F16C    "ggml: enable F16C"             ${INS_ENB}) # in MSVC F16C is implied with AVX2/AVX512
endif()
option(GGML_LASX        "ggml: enable lasx"             ON)
option(GGML_LSX         "ggml: enable lsx"              ON)
option(GGML_SVE         "ggml: enable SVE"              OFF)

if (WIN32)
    set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows Version")
endif()

# ggml core
set(GGML_SCHED_MAX_COPIES  "4" CACHE STRING "ggml: max input copies for pipeline parallelism")

# 3rd party libs / backends
option(GGML_ACCELERATE                      "ggml: enable Accelerate framework"               ON)
option(GGML_BLAS                            "ggml: use BLAS"                                  ${GGML_BLAS_DEFAULT})
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
                                            "ggml: BLAS library vendor")
option(GGML_LLAMAFILE                       "ggml: use ggml SGEMM"                            OFF)

option(GGML_CUDA                            "ggml: use CUDA"                                  OFF)
option(GGML_CUDA_FORCE_DMMV                 "ggml: use dmmv instead of mmvq CUDA kernels"     OFF)
option(GGML_CUDA_FORCE_MMQ                  "ggml: use mmq kernels instead of cuBLAS"         OFF)
option(GGML_CUDA_FORCE_CUBLAS               "ggml: always use cuBLAS instead of mmq kernels"  OFF)
set   (GGML_CUDA_DMMV_X   "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels")
set   (GGML_CUDA_MMV_Y     "1" CACHE STRING "ggml: y block size for mmv CUDA kernels")
option(GGML_CUDA_F16                        "ggml: use 16 bit floats for some calculations"   OFF)
set   (GGML_CUDA_KQUANTS_ITER "2" CACHE STRING
                                            "ggml: iters./thread per block for Q2_K/Q6_K")
set   (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
                                            "ggml: max. batch size for using peer access")
option(GGML_CUDA_NO_PEER_COPY               "ggml: do not use peer to peer copies"            OFF)
option(GGML_CUDA_NO_VMM                     "ggml: do not try to use CUDA VMM"                OFF)
option(GGML_CUDA_FA_ALL_QUANTS              "ggml: compile all quants for FlashAttention"     OFF)
option(GGML_CUDA_USE_GRAPHS                 "ggml: use CUDA graphs (llama.cpp only)"          OFF)

option(GGML_CURL                            "ggml: use libcurl to download model from an URL" OFF)
option(GGML_HIPBLAS                         "ggml: use hipBLAS"                               OFF)
option(GGML_HIP_UMA                         "ggml: use HIP unified memory architecture"       OFF)
option(GGML_VULKAN                          "ggml: use Vulkan"                                OFF)
option(GGML_VULKAN_CHECK_RESULTS            "ggml: run Vulkan op checks"                      OFF)
option(GGML_VULKAN_DEBUG                    "ggml: enable Vulkan debug output"                OFF)
option(GGML_VULKAN_MEMORY_DEBUG             "ggml: enable Vulkan memory debug output"         OFF)
option(GGML_VULKAN_VALIDATE                 "ggml: enable Vulkan validation"                  OFF)
option(GGML_VULKAN_RUN_TESTS                "ggml: run Vulkan tests"                          OFF)
option(GGML_KOMPUTE                         "ggml: use Kompute"                               OFF)
option(GGML_METAL                           "ggml: use Metal"                                 ${GGML_METAL_DEFAULT})
option(GGML_METAL_NDEBUG                    "ggml: disable Metal debugging"                   OFF)
option(GGML_METAL_SHADER_DEBUG              "ggml: compile Metal with -fno-fast-math"         OFF)
option(GGML_METAL_EMBED_LIBRARY             "ggml: embed Metal library"                       ${GGML_METAL})
set   (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING
                                            "ggml: metal minimum macOS version")
set   (GGML_METAL_STD "" CACHE STRING       "ggml: metal standard version (-std flag)")
option(GGML_OPENMP                          "ggml: use OpenMP"                                ON)
option(GGML_RPC                             "ggml: use RPC"                                   OFF)
option(GGML_SYCL                            "ggml: use SYCL"                                  OFF)
option(GGML_SYCL_F16                        "ggml: use 16 bit floats for sycl calculations"   OFF)
set   (GGML_SYCL_TARGET "INTEL" CACHE STRING
                                            "ggml: sycl target device")

# extra artifacts
option(GGML_BUILD_TESTS    "ggml: build tests"    ${GGML_STANDALONE})
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})

#
# dependencies
#

set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)

if (GGML_SYCL)
    set(CMAKE_CXX_STANDARD 17)
else()
    set(CMAKE_CXX_STANDARD 11)
endif()
set(CMAKE_CXX_STANDARD_REQUIRED true)

set(THREADS_PREFER_PTHREAD_FLAG ON)

find_package(Threads REQUIRED)

#
# build the library
#

add_subdirectory(src)

#
# tests and examples
#

if (GGML_BUILD_TESTS)
    enable_testing()
    add_subdirectory(tests)
endif ()

if (GGML_BUILD_EXAMPLES)
    add_subdirectory(examples)
endif ()

#
# install
#

include(GNUInstallDirs)
include(CMakePackageConfigHelpers)

set(GGML_PUBLIC_HEADERS
    include/ggml.h
    include/ggml-alloc.h
    include/ggml-backend.h
    "${GGML_HEADERS_CUDA}"
    "${GGML_HEADERS_METAL}"
    "${GGML_HEADERS_EXTRA}")

set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
#if (GGML_METAL)
#    set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
#endif()
install(TARGETS ggml PUBLIC_HEADER)

if (BUILD_SHARED_LIBS)
    install(TARGETS ggml LIBRARY)
endif()

if (GGML_METAL)
    install(
        FILES src/ggml-metal.metal
        PERMISSIONS
            OWNER_READ
            OWNER_WRITE
            GROUP_READ
            WORLD_READ
        DESTINATION ${CMAKE_INSTALL_BINDIR})

    if (NOT GGML_METAL_EMBED_LIBRARY)
        install(
            FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
            DESTINATION ${CMAKE_INSTALL_BINDIR}
        )
    endif()
endif()

if (GGML_STANDALONE)
    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in
        ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
        @ONLY)

    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
        DESTINATION share/pkgconfig)
endif()
whisper : reorganize source code + improve CMake (#2256) * scripts : update sync [no ci] * files : reorganize [no ci] * sync : llama.cpp * cmake : link math library * cmake : build normal ggml library * files : move headers to include * objc : fix path to ggml-metal.h * ci : fix WHISPER_CUDA -> GGML_CUDA * scripts : sync LICENSE [no ci] 2024-06-26 16:34:09 +00:00			`cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.`
			`project("ggml" C CXX)`
			`include(CheckIncludeFileCXX)`

			`set(CMAKE_EXPORT_COMPILE_COMMANDS ON)`

			`if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)`
			`set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)`
			`set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")`
			`endif()`

			`if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)`
			`set(GGML_STANDALONE ON)`

			`set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)`

			`# configure project version`
			`# TODO`
			`else()`
			`set(GGML_STANDALONE OFF)`
			`endif()`

			`if (EMSCRIPTEN)`
			`set(BUILD_SHARED_LIBS_DEFAULT OFF)`

			`option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON)`
			`else()`
			`if (MINGW)`
			`set(BUILD_SHARED_LIBS_DEFAULT OFF)`
			`else()`
			`set(BUILD_SHARED_LIBS_DEFAULT ON)`
			`endif()`
			`endif()`

			`option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})`

			`#`
			`# option list`
			`#`

			`# TODO: mark all options as advanced when not GGML_STANDALONE`

			`if (APPLE)`
			`set(GGML_METAL_DEFAULT ON)`
			`set(GGML_BLAS_DEFAULT ON)`
			`set(GGML_BLAS_VENDOR_DEFAULT "Apple")`
			`else()`
			`set(GGML_METAL_DEFAULT OFF)`
			`set(GGML_BLAS_DEFAULT OFF)`
			`set(GGML_BLAS_VENDOR_DEFAULT "Generic")`
			`endif()`

			`# general`
			`option(GGML_STATIC "ggml: static link libraries" OFF)`
			`option(GGML_NATIVE "ggml: enable -march=native flag" ON)`
			`option(GGML_LTO "ggml: enable link time optimization" OFF)`
			`option(GGML_CCACHE "ggml: use ccache if available" ON)`

			`# debug`
			`option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)`
			`option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)`
			`option(GGML_GPROF "ggml: enable gprof" OFF)`

			`# build`
			`option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF)`

			`# sanitizers`
			`option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF)`
			`option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF)`
			`option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)`

			`# instruction set specific`
			`if (GGML_NATIVE)`
			`set(INS_ENB OFF)`
			`else()`
			`set(INS_ENB ON)`
			`endif()`

			`option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)`

			`option(GGML_AVX "ggml: enable AVX" ${INS_ENB})`
			`option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})`
			`option(GGML_AVX512 "ggml: enable AVX512" OFF)`
			`option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)`
			`option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)`
			`option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)`
			`option(GGML_FMA "ggml: enable FMA" ${INS_ENB})`
			`if (NOT MSVC)`
			`option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) # in MSVC F16C is implied with AVX2/AVX512`
			`endif()`
			`option(GGML_LASX "ggml: enable lasx" ON)`
			`option(GGML_LSX "ggml: enable lsx" ON)`
			`option(GGML_SVE "ggml: enable SVE" OFF)`

			`if (WIN32)`
			`set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows Version")`
			`endif()`

			`# ggml core`
			`set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism")`

			`# 3rd party libs / backends`
			`option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON)`
			`option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})`
			`set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING`
			`"ggml: BLAS library vendor")`
			`option(GGML_LLAMAFILE "ggml: use ggml SGEMM" OFF)`

			`option(GGML_CUDA "ggml: use CUDA" OFF)`
			`option(GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF)`
			`option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)`
ggml : add GGML_CUDA_USE_GRAPHS option, restore GGML_CUDA_FORCE_CUBLAS (cmake) (llama/8140) 2024-06-26 19:34:14 +00:00			`option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF)`
whisper : reorganize source code + improve CMake (#2256) * scripts : update sync [no ci] * files : reorganize [no ci] * sync : llama.cpp * cmake : link math library * cmake : build normal ggml library * files : move headers to include * objc : fix path to ggml-metal.h * ci : fix WHISPER_CUDA -> GGML_CUDA * scripts : sync LICENSE [no ci] 2024-06-26 16:34:09 +00:00			`set (GGML_CUDA_DMMV_X "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels")`
			`set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels")`
			`option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF)`
			`set (GGML_CUDA_KQUANTS_ITER "2" CACHE STRING`
			`"ggml: iters./thread per block for Q2_K/Q6_K")`
			`set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING`
			`"ggml: max. batch size for using peer access")`
			`option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)`
			`option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)`
			`option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)`
ggml : add GGML_CUDA_USE_GRAPHS option, restore GGML_CUDA_FORCE_CUBLAS (cmake) (llama/8140) 2024-06-26 19:34:14 +00:00			`option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" OFF)`
whisper : reorganize source code + improve CMake (#2256) * scripts : update sync [no ci] * files : reorganize [no ci] * sync : llama.cpp * cmake : link math library * cmake : build normal ggml library * files : move headers to include * objc : fix path to ggml-metal.h * ci : fix WHISPER_CUDA -> GGML_CUDA * scripts : sync LICENSE [no ci] 2024-06-26 16:34:09 +00:00
			`option(GGML_CURL "ggml: use libcurl to download model from an URL" OFF)`
			`option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)`
			`option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)`
			`option(GGML_VULKAN "ggml: use Vulkan" OFF)`
			`option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)`
			`option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)`
			`option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)`
			`option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)`
			`option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)`
			`option(GGML_KOMPUTE "ggml: use Kompute" OFF)`
			`option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})`
			`option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)`
			`option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)`
			`option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL})`
			`set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING`
			`"ggml: metal minimum macOS version")`
			`set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)")`
			`option(GGML_OPENMP "ggml: use OpenMP" ON)`
			`option(GGML_RPC "ggml: use RPC" OFF)`
			`option(GGML_SYCL "ggml: use SYCL" OFF)`
			`option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)`
			`set (GGML_SYCL_TARGET "INTEL" CACHE STRING`
			`"ggml: sycl target device")`

			`# extra artifacts`
			`option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})`
			`option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})`

			`#`
			`# dependencies`
			`#`

			`set(CMAKE_C_STANDARD 11)`
			`set(CMAKE_C_STANDARD_REQUIRED true)`

			`if (GGML_SYCL)`
			`set(CMAKE_CXX_STANDARD 17)`
			`else()`
			`set(CMAKE_CXX_STANDARD 11)`
			`endif()`
			`set(CMAKE_CXX_STANDARD_REQUIRED true)`

			`set(THREADS_PREFER_PTHREAD_FLAG ON)`

			`find_package(Threads REQUIRED)`

			`#`
			`# build the library`
			`#`

			`add_subdirectory(src)`

			`#`
			`# tests and examples`
			`#`

			`if (GGML_BUILD_TESTS)`
			`enable_testing()`
			`add_subdirectory(tests)`
			`endif ()`

			`if (GGML_BUILD_EXAMPLES)`
			`add_subdirectory(examples)`
			`endif ()`

			`#`
			`# install`
			`#`

			`include(GNUInstallDirs)`
			`include(CMakePackageConfigHelpers)`

			`set(GGML_PUBLIC_HEADERS`
			`include/ggml.h`
			`include/ggml-alloc.h`
			`include/ggml-backend.h`
			`"${GGML_HEADERS_CUDA}"`
			`"${GGML_HEADERS_METAL}"`
			`"${GGML_HEADERS_EXTRA}")`

			`set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")`
			`#if (GGML_METAL)`
			`# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")`
			`#endif()`
			`install(TARGETS ggml PUBLIC_HEADER)`

			`if (BUILD_SHARED_LIBS)`
			`install(TARGETS ggml LIBRARY)`
			`endif()`

			`if (GGML_METAL)`
			`install(`
			`FILES src/ggml-metal.metal`
			`PERMISSIONS`
			`OWNER_READ`
			`OWNER_WRITE`
			`GROUP_READ`
			`WORLD_READ`
			`DESTINATION ${CMAKE_INSTALL_BINDIR})`

			`if (NOT GGML_METAL_EMBED_LIBRARY)`
			`install(`
			`FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib`
			`DESTINATION ${CMAKE_INSTALL_BINDIR}`
			`)`
			`endif()`
			`endif()`

			`if (GGML_STANDALONE)`
			`configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in`
			`${CMAKE_CURRENT_BINARY_DIR}/ggml.pc`
			`@ONLY)`

			`install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc`
			`DESTINATION share/pkgconfig)`
			`endif()`