CUDA/HIP: add warp_size to cuda_device_info

This commit is contained in:
uvos 2025-01-29 17:46:23 +01:00 committed by Georgi Gerganov
parent 80fa576254
commit f41fdad200
2 changed files with 5 additions and 2 deletions

View File

@ -520,6 +520,7 @@ struct ggml_cuda_device_info {
bool vmm; // virtual memory support bool vmm; // virtual memory support
size_t vmm_granularity; // granularity of virtual memory size_t vmm_granularity; // granularity of virtual memory
size_t total_vram; size_t total_vram;
int warp_size; // Number of threads in a dispatch
}; };
cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {}; cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {};

View File

@ -242,6 +242,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
info.devices[id].nsm = prop.multiProcessorCount; info.devices[id].nsm = prop.multiProcessorCount;
info.devices[id].smpb = prop.sharedMemPerBlock; info.devices[id].smpb = prop.sharedMemPerBlock;
info.devices[id].warp_size = prop.warpSize;
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
info.devices[id].smpbo = prop.sharedMemPerBlock; info.devices[id].smpbo = prop.sharedMemPerBlock;
@ -256,8 +257,9 @@ static ggml_cuda_device_info ggml_cuda_init() {
info.devices[id].cc += prop.minor * 0x10; info.devices[id].cc += prop.minor * 0x10;
} }
} }
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s\n", GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff, device_vmm ? "yes" : "no"); id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
device_vmm ? "yes" : "no", prop.warpSize);
#else #else
info.devices[id].smpbo = prop.sharedMemPerBlockOptin; info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
info.devices[id].cc = 100*prop.major + 10*prop.minor; info.devices[id].cc = 100*prop.major + 10*prop.minor;