mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-02-22 09:40:52 +00:00
Add an option to build without CUDA VMM (llama/7067)
Add an option to build ggml cuda without CUDA VMM resolves https://github.com/ggerganov/llama.cpp/issues/6889 https://forums.developer.nvidia.com/t/potential-nvshmem-allocated-memory-performance-issue/275416/4
This commit is contained in:
parent
9b84195225
commit
b5521fea19
@ -113,7 +113,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|||||||
for (int id = 0; id < info.device_count; ++id) {
|
for (int id = 0; id < info.device_count; ++id) {
|
||||||
int device_vmm = 0;
|
int device_vmm = 0;
|
||||||
|
|
||||||
#if !defined(GGML_USE_HIPBLAS)
|
#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM)
|
||||||
CUdevice device;
|
CUdevice device;
|
||||||
CU_CHECK(cuDeviceGet(&device, id));
|
CU_CHECK(cuDeviceGet(&device, id));
|
||||||
CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
|
CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
|
||||||
@ -259,7 +259,7 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// pool with virtual memory
|
// pool with virtual memory
|
||||||
#if !defined(GGML_USE_HIPBLAS)
|
#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM)
|
||||||
struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
|
struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
|
||||||
static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
|
static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
|
||||||
|
|
||||||
@ -356,7 +356,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
|
|||||||
#endif // !defined(GGML_USE_HIPBLAS)
|
#endif // !defined(GGML_USE_HIPBLAS)
|
||||||
|
|
||||||
std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
|
std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
|
||||||
#if !defined(GGML_USE_HIPBLAS)
|
#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM)
|
||||||
if (ggml_cuda_info().devices[device].vmm) {
|
if (ggml_cuda_info().devices[device].vmm) {
|
||||||
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
|
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user