CANN: Simplify the environment variable setting(#13104)

* Simplify the environment variable setting to specify the memory pool type.

* Adjust the GGML_CANN_ASYNC_MODE setting to accept yes, enable, 1, or on (case-insensitive) as valid options.

* update

* fix CI

* update

* delete whitespace

* fix according to review

* update CANN.md

* update CANN.md
This commit is contained in:
Xinpeng Dou
2025-06-09 19:47:39 +08:00
committed by Georgi Gerganov
parent 4737a8c780
commit 26282282fa
2 changed files with 39 additions and 10 deletions

View File

@ -37,6 +37,7 @@
#include <thread> #include <thread>
#include <unistd.h> #include <unistd.h>
#include <functional> #include <functional>
#include <optional>
#include "../include/ggml-cann.h" #include "../include/ggml-cann.h"
#include "../include/ggml.h" #include "../include/ggml.h"
@ -103,6 +104,9 @@ const ggml_cann_device_info& ggml_cann_info();
void ggml_cann_set_device(int32_t device); void ggml_cann_set_device(int32_t device);
int32_t ggml_cann_get_device(); int32_t ggml_cann_get_device();
std::optional<std::string> get_env(const std::string& name);
bool parse_bool(const std::string& value);
/** /**
* @brief Abstract base class for memory pools used by CANN. * @brief Abstract base class for memory pools used by CANN.
*/ */
@ -354,7 +358,8 @@ struct ggml_backend_cann_context {
: device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) { : device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) {
ggml_cann_set_device(device); ggml_cann_set_device(device);
description = aclrtGetSocName(); description = aclrtGetSocName();
async_mode = (getenv("GGML_CANN_ASYNC_MODE") != nullptr);
bool async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__, GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
device, async_mode ? "ON" : "OFF"); device, async_mode ? "ON" : "OFF");
} }

View File

@ -31,6 +31,8 @@
#include <mutex> #include <mutex>
#include <queue> #include <queue>
#include <chrono> #include <chrono>
#include <unordered_set>
#include <optional>
#include "ggml-impl.h" #include "ggml-impl.h"
#include "ggml-backend-impl.h" #include "ggml-backend-impl.h"
@ -93,6 +95,26 @@ int32_t ggml_cann_get_device() {
return id; return id;
} }
/**
* @brief Get the value of the specified environment variable (name).
* if not empty, return a std::string object
*/
std::optional<std::string> get_env(const std::string& name) {
const char* val = std::getenv(name.c_str());
if (!val) return std::nullopt;
std::string res = std::string(val);
std::transform(res.begin(), res.end(), res.begin(), ::tolower);
return res;
}
/**
* @brief Verify whether the environment variable is a valid value.
*/
bool parse_bool(const std::string& value) {
std::unordered_set<std::string> valid_values = {"on", "1", "yes", "y", "enable", "true"};
return valid_values.find(value) != valid_values.end();
}
/** /**
* @brief Initialize the CANN device information. * @brief Initialize the CANN device information.
* *
@ -214,7 +236,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
* @param device The device ID to associate with this buffer pool. * @param device The device ID to associate with this buffer pool.
*/ */
explicit ggml_cann_pool_buf_prio(int device) : device(device) { explicit ggml_cann_pool_buf_prio(int device) : device(device) {
disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr; disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
} }
/** /**
@ -410,7 +432,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool {
* @param device The device ID to associate with this buffer pool. * @param device The device ID to associate with this buffer pool.
*/ */
explicit ggml_cann_pool_buf(int device) : device(device) { explicit ggml_cann_pool_buf(int device) : device(device) {
disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr; disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
} }
/** /**
@ -731,16 +753,18 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
*/ */
std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device( std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
int device) { int device) {
bool disable_vmm = (getenv("GGML_CANN_DISABLE_VMM_POOL") != nullptr); std::string mem_pool_type = get_env("GGML_CANN_MEM_POOL").value_or("");
if (!disable_vmm && ggml_cann_info().devices[device].vmm) {
GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device); if (mem_pool_type == "prio") {
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
}
bool enable_buf_prio = (getenv("GGML_CANN_ENABLE_BUF_PRIO_POOL") != nullptr);
if (enable_buf_prio) {
GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device); GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device);
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf_prio(device)); return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf_prio(device));
} }
if (ggml_cann_info().devices[device].vmm && mem_pool_type != "leg") {
GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device);
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
}
GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device); GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device);
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf(device)); return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf(device));
} }