mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-05-02 08:43:02 +00:00
vulkan: matmul gcn tuning (llama/13016)
* tune matmul for gcn * this one is more power efficient * Update ggml/src/ggml-vulkan/ggml-vulkan.cpp Co-authored-by: 0cc4m <picard12@live.de> * disable this tune for the proprietary driver --------- Co-authored-by: 0cc4m <picard12@live.de>
This commit is contained in:
parent
3d54b68ea7
commit
cf3eb291ab
@ -246,6 +246,7 @@ struct vk_device_struct {
|
|||||||
bool pipeline_robustness;
|
bool pipeline_robustness;
|
||||||
vk::Device device;
|
vk::Device device;
|
||||||
uint32_t vendor_id;
|
uint32_t vendor_id;
|
||||||
|
vk::DriverId driver_id;
|
||||||
vk_device_architecture architecture;
|
vk_device_architecture architecture;
|
||||||
vk_queue compute_queue;
|
vk_queue compute_queue;
|
||||||
vk_queue transfer_queue;
|
vk_queue transfer_queue;
|
||||||
@ -1740,6 +1741,11 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|||||||
m_warptile_mmq_int = { 128, 64, 64, 32, subgroup_size_8, 32, 2, 2, 2, 1, subgroup_size_8 };
|
m_warptile_mmq_int = { 128, 64, 64, 32, subgroup_size_8, 32, 2, 2, 2, 1, subgroup_size_8 };
|
||||||
s_warptile_mmq_int = { subgroup_size_32, 32, 32, 32, 32, 32, 2, 2, 1, 1, subgroup_size_8 };
|
s_warptile_mmq_int = { subgroup_size_32, 32, 32, 32, 32, 32, 2, 2, 1, 1, subgroup_size_8 };
|
||||||
|
|
||||||
|
// chip specific tuning
|
||||||
|
if ((device->architecture == AMD_GCN) && (device->driver_id != vk::DriverId::eAmdProprietary)) {
|
||||||
|
m_warptile_mmq = m_warptile_mmq_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 };
|
||||||
|
}
|
||||||
|
|
||||||
l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 };
|
l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 };
|
||||||
m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 };
|
m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 };
|
||||||
s_mmq_wg_denoms = s_wg_denoms = { 32, 32, 1 };
|
s_mmq_wg_denoms = s_wg_denoms = { 32, 32, 1 };
|
||||||
@ -2658,6 +2664,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||||||
device->physical_device.getProperties2(&props2);
|
device->physical_device.getProperties2(&props2);
|
||||||
device->properties = props2.properties;
|
device->properties = props2.properties;
|
||||||
device->vendor_id = device->properties.vendorID;
|
device->vendor_id = device->properties.vendorID;
|
||||||
|
device->driver_id = driver_props.driverID;
|
||||||
|
|
||||||
const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE");
|
const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE");
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user