mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-02-15 14:52:07 +00:00
CUDA/HIP: add warp_size to cuda_device_info
This commit is contained in:
parent
80fa576254
commit
f41fdad200
@ -520,6 +520,7 @@ struct ggml_cuda_device_info {
|
|||||||
bool vmm; // virtual memory support
|
bool vmm; // virtual memory support
|
||||||
size_t vmm_granularity; // granularity of virtual memory
|
size_t vmm_granularity; // granularity of virtual memory
|
||||||
size_t total_vram;
|
size_t total_vram;
|
||||||
|
int warp_size; // Number of threads in a dispatch
|
||||||
};
|
};
|
||||||
|
|
||||||
cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {};
|
cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {};
|
||||||
|
@ -242,6 +242,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|||||||
|
|
||||||
info.devices[id].nsm = prop.multiProcessorCount;
|
info.devices[id].nsm = prop.multiProcessorCount;
|
||||||
info.devices[id].smpb = prop.sharedMemPerBlock;
|
info.devices[id].smpb = prop.sharedMemPerBlock;
|
||||||
|
info.devices[id].warp_size = prop.warpSize;
|
||||||
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
||||||
info.devices[id].smpbo = prop.sharedMemPerBlock;
|
info.devices[id].smpbo = prop.sharedMemPerBlock;
|
||||||
|
|
||||||
@ -256,8 +257,9 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|||||||
info.devices[id].cc += prop.minor * 0x10;
|
info.devices[id].cc += prop.minor * 0x10;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s\n",
|
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
|
||||||
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff, device_vmm ? "yes" : "no");
|
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
|
||||||
|
device_vmm ? "yes" : "no", prop.warpSize);
|
||||||
#else
|
#else
|
||||||
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
||||||
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user