diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp index 3b831376..d5412069 100644 --- a/ggml/src/ggml-opencl/ggml-opencl.cpp +++ b/ggml/src/ggml-opencl/ggml-opencl.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #undef MIN #undef MAX @@ -230,13 +231,25 @@ static ggml_cl_compiler_version get_adreno_cl_compiler_version(const char *drive return { type, major, minor, patch }; } +struct ggml_backend_opencl_context; + // backend device context struct ggml_backend_opencl_device_context { cl_platform_id platform; std::string platform_name; - cl_device_id device; - std::string device_name; + cl_device_id device; + std::string device_name; + cl_device_type device_type; + std::string device_version; + + // Initialized by ggml_cl2_init(). + ggml_backend_opencl_context * backend_ctx = nullptr; + + // Initialized by ggml_backend_opencl_device_get_buffer_type() + ggml_backend_buffer_type buffer_type; + + cl_context context = nullptr; }; // backend context @@ -355,15 +368,8 @@ struct ggml_backend_opencl_context { #endif // GGML_OPENCL_USE_ADRENO_KERNELS }; -static ggml_backend_device g_ggml_backend_opencl_device; -static ggml_backend_opencl_device_context g_ggml_ctx_dev_main { - /*.platform =*/ nullptr, - /*.platform_nane =*/ "", - /*.device =*/ nullptr, - /*.device_name =*/ "", -}; - -static int ggml_backend_opencl_n_devices = 0; +// All registered devices with a default device in the front. +static std::vector g_ggml_backend_opencl_devices; // Profiling #ifdef GGML_OPENCL_PROFILING @@ -1118,25 +1124,19 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve GGML_LOG_CONT("\n"); } -static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { - static bool initialized = false; - static ggml_backend_opencl_context *backend_ctx = nullptr; +// XXX static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { +// XXX static bool initialized = false; +// XXX static ggml_backend_opencl_context *backend_ctx = nullptr; - if (initialized) { - return backend_ctx; - } +static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev); - ggml_backend_opencl_device_context *dev_ctx = (ggml_backend_opencl_device_context *)dev->context; - GGML_ASSERT(dev_ctx); - GGML_ASSERT(dev_ctx->platform == nullptr); - GGML_ASSERT(dev_ctx->device == nullptr); - GGML_ASSERT(backend_ctx == nullptr); +namespace /* anonymous */ { +extern struct ggml_backend_device_i ggml_backend_opencl_device_i; +} - initialized = true; - backend_ctx = new ggml_backend_opencl_context(); - backend_ctx->gpu_family = GPU_FAMILY::UNKNOWN; - - cl_int err; +// Look for available and suitable devices. +static std::vector ggml_opencl_probe_devices(ggml_backend_reg * reg) { + std::vector found_devices; #ifdef GGML_OPENCL_PROFILING GGML_LOG_INFO("ggml_opencl: OpenCL profiling enabled\n"); @@ -1169,11 +1169,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { struct cl_device devices[NDEV]; unsigned n_devices = 0; struct cl_device * default_device = NULL; + unsigned default_platform_number = 0; cl_platform_id platform_ids[NPLAT]; if (clGetPlatformIDs(NPLAT, platform_ids, &n_platforms) != CL_SUCCESS) { GGML_LOG_ERROR("ggml_opencl: plaform IDs not available.\n"); - return backend_ctx; + return found_devices; } for (unsigned i = 0; i < n_platforms; i++) { @@ -1208,19 +1209,22 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { } if (default_device == NULL && p->default_device != NULL) { - default_device = p->default_device; + default_device = p->default_device; + default_platform_number = i; } } if (n_devices == 0) { GGML_LOG_ERROR("ggml_opencl: could find any OpenCL devices.\n"); - return backend_ctx; + return found_devices; } - char * user_platform_string = getenv("GGML_OPENCL_PLATFORM"); - char * user_device_string = getenv("GGML_OPENCL_DEVICE"); - int user_platform_number = -1; - int user_device_number = -1; + char * user_platform_string = getenv("GGML_OPENCL_PLATFORM"); + char * user_device_string = getenv("GGML_OPENCL_DEVICE"); + int user_platform_number = -1; + int user_device_number = -1; + cl_device * candidate_devices = nullptr; + unsigned n_candidate_devices = 0; unsigned n; if (user_platform_string != NULL && sscanf(user_platform_string, " %u", &n) == 1 && n < n_platforms) { @@ -1235,12 +1239,11 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { GGML_LOG_ERROR("ggml_opencl: invalid device number %d\n", user_device_number); exit(1); } - default_device = &platform->devices[user_device_number]; + default_device = &platform->devices[user_device_number]; + candidate_devices = platform->devices; + n_candidate_devices = platform->n_devices; } else { - - struct cl_device * selected_devices = devices; - unsigned n_selected_devices = n_devices; - + // Choose a platform by matching a substring. if (user_platform_number == -1 && user_platform_string != NULL && user_platform_string[0] != 0) { for (unsigned i = 0; i < n_platforms; i++) { struct cl_platform * p = &platforms[i]; @@ -1255,20 +1258,20 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { exit(1); } } - if (user_platform_number != -1) { - struct cl_platform * p = &platforms[user_platform_number]; - selected_devices = p->devices; - n_selected_devices = p->n_devices; - default_device = p->default_device; - if (n_selected_devices == 0) { - GGML_LOG_ERROR("ggml_opencl: selected platform '%s' does not have any devices.\n", p->name); - exit(1); - } + + int platform_idx = user_platform_number != -1 ? user_platform_number : default_platform_number; + struct cl_platform * p = &platforms[platform_idx]; + candidate_devices = p->devices; + n_candidate_devices = p->n_devices; + default_device = p->default_device; + if (n_candidate_devices == 0) { + GGML_LOG_ERROR("ggml_opencl: selected platform '%s' does not have any devices.\n", p->name); + exit(1); } if (user_device_number == -1 && user_device_string != NULL && user_device_string[0] != 0) { - for (unsigned i = 0; i < n_selected_devices; i++) { - struct cl_device * d = &selected_devices[i]; + for (unsigned i = 0; i < n_candidate_devices; i++) { + struct cl_device * d = &candidate_devices[i]; if (strstr(d->name, user_device_string) != NULL) { user_device_number = d->number; break; @@ -1280,71 +1283,145 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { } } if (user_device_number != -1) { - selected_devices = &devices[user_device_number]; - n_selected_devices = 1; - default_device = &selected_devices[0]; + candidate_devices = &devices[user_device_number]; + n_candidate_devices = 1; + default_device = &candidate_devices[0]; } - GGML_ASSERT(n_selected_devices > 0); + GGML_ASSERT(n_candidate_devices > 0); if (default_device == NULL) { - default_device = &selected_devices[0]; + default_device = &candidate_devices[0]; } } - GGML_LOG_INFO("ggml_opencl: selecting platform: '%s'\n", default_device->platform->name); - GGML_LOG_INFO("ggml_opencl: selecting device: '%s (%s)'\n", default_device->name, default_device->version); - if (default_device->type != CL_DEVICE_TYPE_GPU) { - GGML_LOG_WARN("ggml_opencl: warning, not a GPU: '%s'.\n", default_device->name); + GGML_ASSERT(n_candidate_devices != 0 && candidate_devices); + + // Put the default device in front. + for (unsigned i = 1; i < n_candidate_devices; i++) { + if (&candidate_devices[i] == default_device) { + std::swap(candidate_devices[0], candidate_devices[i]); + default_device = &candidate_devices[0]; + break; + } } - dev_ctx->platform = default_device->platform->id; - dev_ctx->device = default_device->id; - backend_ctx->device = default_device->id; + GGML_LOG_INFO("ggml_opencl: selected platform: '%s'\n", default_device->platform->name); - if (strstr(default_device->name, "Adreno") || - strstr(default_device->name, "Qualcomm") || - strstr(default_device->version, "Adreno")) { + std::vector device_ids; + for (auto dev = candidate_devices, dev_end = candidate_devices + n_candidate_devices; dev != dev_end; dev++) { + device_ids.push_back(dev->id); + } + + cl_int err; + cl_context shared_context; + cl_context_properties properties[] = { (intptr_t) CL_CONTEXT_PLATFORM, (intptr_t) default_device->platform->id, 0 }; + + CL_CHECK( + (shared_context = clCreateContext(properties, device_ids.size(), device_ids.data(), NULL, NULL, &err), err)); + + for (auto dev = candidate_devices, dev_end = candidate_devices + n_candidate_devices; dev != dev_end; dev++) { + GGML_LOG_INFO("\nggml_opencl: device: '%s (%s)'\n", dev->name, dev->version); + + auto dev_ctx = std::unique_ptr(new ggml_backend_opencl_device_context{ + /*.platform =*/dev->platform->id, + /*.platform_nane =*/dev->platform->name, + /*.device =*/dev->id, + /*.device_name =*/dev->name, + /*.device_type =*/dev->type, + /*.device_version =*/dev->version, + /*.backend_ctx =*/nullptr, + /*.buffer_type =*/{}, + /*.context =*/shared_context, + }); + + found_devices.push_back(ggml_backend_device{ + /* .iface = */ ggml_backend_opencl_device_i, + /* .reg = */ reg, + /* .context = */ dev_ctx.get(), + }); + + if (!ggml_cl2_init(&found_devices.back())) { + found_devices.pop_back(); + GGML_LOG_INFO("ggml_opencl: drop unsupported device.\n"); + continue; + } + + dev_ctx.release(); + } + + if (found_devices.size()) { + auto * dev_ctx = static_cast(found_devices.front().context); + GGML_LOG_INFO("ggml_opencl: default device: '%s (%s)'\n", dev_ctx->device_name.c_str(), + dev_ctx->device_version.c_str()); + + if (dev_ctx->device_type != CL_DEVICE_TYPE_GPU) { + GGML_LOG_WARN("ggml_opencl: warning, the default device is not a GPU: '%s'.\n", + dev_ctx->device_name.c_str()); + } + } + + return found_devices; +} + +// Initialize device if it is supported (returns nullptr if it is not). +static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { + GGML_ASSERT(dev); + GGML_ASSERT(dev->context); + + ggml_backend_opencl_device_context * dev_ctx = (ggml_backend_opencl_device_context *) dev->context; + GGML_ASSERT(dev_ctx->platform); + GGML_ASSERT(dev_ctx->device); + + if (dev_ctx->backend_ctx) { + return dev_ctx->backend_ctx; + } + + auto backend_ctx = std::make_unique(); + backend_ctx->device = dev_ctx->device; + backend_ctx->gpu_family = GPU_FAMILY::UNKNOWN; + + if (strstr(dev_ctx->device_name.c_str(), "Adreno") || + strstr(dev_ctx->device_name.c_str(), "Qualcomm") || + strstr(dev_ctx->device_version.c_str(), "Adreno")) { backend_ctx->gpu_family = GPU_FAMILY::ADRENO; // Usually device version contains the detailed device name - backend_ctx->adreno_gen = get_adreno_gpu_gen(default_device->version); + backend_ctx->adreno_gen = get_adreno_gpu_gen(dev_ctx->device_version.c_str()); if (backend_ctx->adreno_gen == ADRENO_GPU_GEN::ADRENO_UNKNOWN) { - backend_ctx->adreno_gen = get_adreno_gpu_gen(default_device->name); + backend_ctx->adreno_gen = get_adreno_gpu_gen(dev_ctx->device_name.c_str()); } // Use wave size of 64 for all Adreno GPUs. backend_ctx->adreno_wave_size = 64; - } else if (strstr(default_device->name, "Intel")) { + } else if (strstr(dev_ctx->device_name.c_str(), "Intel")) { backend_ctx->gpu_family = GPU_FAMILY::INTEL; } else { - GGML_LOG_ERROR("Unsupported GPU: %s\n", default_device->name); + GGML_LOG_ERROR("Unsupported GPU: %s\n", dev_ctx->device_name.c_str()); backend_ctx->gpu_family = GPU_FAMILY::UNKNOWN; - return backend_ctx; + return nullptr; } #ifdef GGML_OPENCL_USE_ADRENO_KERNELS if (backend_ctx->gpu_family != GPU_FAMILY::ADRENO) { GGML_LOG_ERROR("ggml_opencl: Adreno-specific kernels should not be enabled for non-Adreno GPUs; " "run on an Adreno GPU or recompile with CMake option `-DGGML_OPENCL_USE_ADRENO_KERNELS=OFF`\n"); - return backend_ctx; + return nullptr; } #endif // Populate backend device name - dev_ctx->platform_name = default_device->platform->name; - dev_ctx->device_name = default_device->name; - backend_ctx->device_name = default_device->name; + backend_ctx->device_name = dev_ctx->device_name; // A local ref of cl_device_id for convenience cl_device_id device = backend_ctx->device; - ggml_cl_version platform_version = get_opencl_platform_version(default_device->platform->id); + ggml_cl_version platform_version = get_opencl_platform_version(dev_ctx->platform); // Check device OpenCL version, OpenCL 2.0 or above is required ggml_cl_version opencl_c_version = get_opencl_c_version(platform_version, device); if (opencl_c_version.major < 2) { GGML_LOG_ERROR("ggml_opencl: OpenCL 2.0 or above is required\n"); - return backend_ctx; + return nullptr; } // Check driver version @@ -1375,7 +1452,7 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { // fp16 is required if (!backend_ctx->fp16_support) { GGML_LOG_ERROR("ggml_opencl: device does not support FP16\n"); - return backend_ctx; + return nullptr; } // If OpenCL 3.0 is supported, then check for cl_khr_subgroups, which becomes @@ -1384,7 +1461,7 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { strstr(ext_buffer, "cl_intel_subgroups") == NULL) { GGML_LOG_ERROR("ggml_opencl: device does not support subgroups (cl_khr_subgroups or cl_intel_subgroups) " "(note that subgroups is an optional feature in OpenCL 3.0)\n"); - return backend_ctx; + return nullptr; } cl_uint base_align_in_bits; @@ -1426,14 +1503,10 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { GGML_LOG_INFO("ggml_opencl: using kernels optimized for Adreno (GGML_OPENCL_USE_ADRENO_KERNELS)\n"); #endif // GGML_OPENCL_USE_ADRENO_KERNELS - cl_context_properties properties[] = { - (intptr_t)CL_CONTEXT_PLATFORM, (intptr_t)dev_ctx->platform, 0 - }; - - CL_CHECK((backend_ctx->context = clCreateContext(properties, 1, &device, NULL, NULL, &err), err)); + cl_int err; // A local ref of cl_context for convenience - cl_context context = backend_ctx->context; + cl_context context = backend_ctx->context = dev_ctx->context; //CL_CHECK((queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err), // (err != CL_INVALID_QUEUE_PROPERTIES && err != CL_INVALID_VALUE ? err : @@ -1446,7 +1519,7 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { CL_CHECK((backend_ctx->queue = clCreateCommandQueue(context, device, command_queue_props, &err), err)); // Load kernels - load_cl_kernels(backend_ctx, opencl_c_version); + load_cl_kernels(backend_ctx.get(), opencl_c_version); #ifdef GGML_OPENCL_USE_ADRENO_KERNELS // Allocate intermediate buffers and images @@ -1476,10 +1549,8 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { CL_CHECK((backend_ctx->B_d_max = clCreateBuffer(context, 0, max_B_d_bytes, NULL, &err), err)); #endif // GGML_OPENCL_USE_ADRENO_KERNELS - // For now we support a single devices - ggml_backend_opencl_n_devices = 1; - - return backend_ctx; + dev_ctx->backend_ctx = backend_ctx.release(); + return dev_ctx->backend_ctx; } static void ggml_cl2_free(void) { @@ -1684,10 +1755,46 @@ static void ggml_backend_opencl_synchronize(ggml_backend_t backend) { GGML_UNUSED(backend); } +// Syncronizes the 'backend_ctx's device with others so that commands +// enqueued to it won't start until commands in the other devices have +// completed. +static void sync_with_other_backends(ggml_backend_opencl_context * backend_ctx) { + if (g_ggml_backend_opencl_devices.size() < 2) + return; // No other devices to synchronize with. + + std::vector events; + events.reserve(g_ggml_backend_opencl_devices.size()); + + for (ggml_backend_device & backend_dev : g_ggml_backend_opencl_devices) { + auto * other_backend_ctx = ggml_cl2_init(&backend_dev); + if (backend_ctx != other_backend_ctx) { + cl_event ev; + CL_CHECK(clEnqueueMarkerWithWaitList(other_backend_ctx->queue, 0, nullptr, &ev)); + CL_CHECK(clFlush(other_backend_ctx->queue)); + events.push_back(ev); + } + } + + CL_CHECK(clEnqueueBarrierWithWaitList(backend_ctx->queue, events.size(), events.data(), nullptr)); + for (auto ev : events) { + CL_CHECK(clReleaseEvent(ev)); + } +} + +static void sync_with_other_backends(ggml_backend_t backend) { + auto * backend_ctx = static_cast(backend->context); + sync_with_other_backends(backend_ctx); +} + static ggml_status ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { for (int i = 0; i < cgraph->n_nodes; i++) { ggml_tensor * node = cgraph->nodes[i]; + // NOTE: this may oversynchronize by synchronizing with + // backends/devices which don't compute 'cgraph's + // dependencies. + sync_with_other_backends(backend); + if (node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) { continue; } @@ -2292,8 +2399,8 @@ static void ggml_backend_opencl_buffer_get_tensor(ggml_backend_buffer_t buffer, cl_context context = backend_ctx->context; cl_command_queue queue = backend_ctx->queue; - // Make sure all previously submitted commands are finished. - CL_CHECK(clFinish(queue)); + // Make sure all previously submitted commands in other devices are finished. + sync_with_other_backends(backend_ctx); #ifdef GGML_OPENCL_SOA_Q // In end-to-end runs, get_tensor is usually used to get back the logits, @@ -2397,13 +2504,8 @@ static ggml_backend_buffer_t ggml_backend_opencl_buffer_type_alloc_buffer(ggml_b } static size_t ggml_backend_opencl_buffer_type_get_alignment(ggml_backend_buffer_type_t buffer_type) { - // FIXME: not thread safe, device may not be initialized yet - static cl_uint alignment = -1; - if (alignment == (cl_uint)-1) { - ggml_backend_opencl_context * backend_ctx = ggml_cl2_init(buffer_type->device); - alignment = backend_ctx->alignment; - } - return alignment; + ggml_backend_opencl_context * backend_ctx = ggml_cl2_init(buffer_type->device); + return backend_ctx->alignment; } static size_t ggml_backend_opencl_buffer_type_get_max_size(ggml_backend_buffer_type_t buffer_type) { @@ -2430,16 +2532,6 @@ static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = { /* .is_host = */ NULL, }; -ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type() { - static ggml_backend_buffer_type buffer_type = { - /* .iface = */ ggml_backend_opencl_buffer_type_interface, - /* .device = */ &g_ggml_backend_opencl_device, - /* .context = */ nullptr, - }; - - return &buffer_type; -} - // // backend device // @@ -2497,9 +2589,15 @@ static ggml_backend_t ggml_backend_opencl_device_init(ggml_backend_dev_t dev, co } static ggml_backend_buffer_type_t ggml_backend_opencl_device_get_buffer_type(ggml_backend_dev_t dev) { - return ggml_backend_opencl_buffer_type(); + auto * dev_ctx = static_cast(dev->context); - GGML_UNUSED(dev); + dev_ctx->buffer_type = ggml_backend_buffer_type{ + /* .iface = */ ggml_backend_opencl_buffer_type_interface, + /* .device = */ dev, + /* .context = */ nullptr, + }; + + return &dev_ctx->buffer_type; } static ggml_backend_buffer_t ggml_backend_opencl_device_buffer_from_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) { @@ -2515,12 +2613,21 @@ static bool ggml_backend_opencl_device_supports_op(ggml_backend_dev_t dev, const } static bool ggml_backend_opencl_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { - return buft->iface.get_name == ggml_backend_opencl_buffer_type_get_name; + // Check 'dev' and 'buffer_type' are not objects belonging to this backend. + if (dev->iface.get_name != ggml_backend_opencl_device_get_name || + buft->iface.get_name != ggml_backend_opencl_buffer_type_get_name) { + return false; + } - GGML_UNUSED(dev); + // Check cl_context is the same. clEnqueue* commands may not use + // buffers from another cl_context. + ggml_backend_opencl_context * backend_ctx0 = ggml_cl2_init(dev); + ggml_backend_opencl_context * backend_ctx1 = ggml_cl2_init(buft->device); + return backend_ctx0->context == backend_ctx1->context; } -static struct ggml_backend_device_i ggml_backend_opencl_device_i = { +namespace /* anonymous */ { +struct ggml_backend_device_i ggml_backend_opencl_device_i = { /* .get_name = */ ggml_backend_opencl_device_get_name, /* .get_description = */ ggml_backend_opencl_device_get_description, /* .get_memory = */ ggml_backend_opencl_device_get_memory, @@ -2537,6 +2644,7 @@ static struct ggml_backend_device_i ggml_backend_opencl_device_i = { /* .event_free = */ NULL, /* .event_synchronize = */ NULL, }; +} // Backend registry @@ -2547,15 +2655,15 @@ static const char * ggml_backend_opencl_reg_get_name(ggml_backend_reg_t reg) { } static size_t ggml_backend_opencl_reg_device_count(ggml_backend_reg_t reg) { - return ggml_backend_opencl_n_devices; + return g_ggml_backend_opencl_devices.size(); GGML_UNUSED(reg); } static ggml_backend_dev_t ggml_backend_opencl_reg_device_get(ggml_backend_reg_t reg, size_t index) { - GGML_ASSERT(index == 0); + GGML_ASSERT(index < ggml_backend_opencl_reg_device_count(reg)); - return &g_ggml_backend_opencl_device; + return &g_ggml_backend_opencl_devices[index]; GGML_UNUSED(reg); GGML_UNUSED(index); @@ -2569,27 +2677,23 @@ static struct ggml_backend_reg_i ggml_backend_opencl_reg_i = { }; ggml_backend_reg_t ggml_backend_opencl_reg(void) { - // TODO: make this thread-safe somehow? + static std::mutex mutex; static ggml_backend_reg reg; static bool initialized = false; + std::lock_guard lock(mutex); - if (!initialized) { - reg = ggml_backend_reg { - /* .api_version = */ GGML_BACKEND_API_VERSION, - /* .iface = */ ggml_backend_opencl_reg_i, - /* .context = */ NULL, - }; - - g_ggml_backend_opencl_device = ggml_backend_device { - /* .iface = */ ggml_backend_opencl_device_i, - /* .reg = */ ®, - /* .context = */ &g_ggml_ctx_dev_main, - }; - - ggml_cl2_init(&g_ggml_backend_opencl_device); - - initialized = true; + if (initialized) { + return ® } + initialized = true; + + g_ggml_backend_opencl_devices = ggml_opencl_probe_devices(®); + + reg = ggml_backend_reg{ + /* .api_version = */ GGML_BACKEND_API_VERSION, + /* .iface = */ ggml_backend_opencl_reg_i, + /* .context = */ NULL, + }; return ® }