CANN: Add x86 build ci (llama/12950)

* CANN: Add x86 build ci

* CANN: fix code format
This commit is contained in:
hipudding 2025-04-15 19:08:55 +08:00 committed by Georgi Gerganov
parent 43e3d25d93
commit 483eecae62

View File

@ -156,195 +156,196 @@ const ggml_cann_device_info& ggml_cann_info() {
* This class manages a pool of CANN buffers for a specific device. * This class manages a pool of CANN buffers for a specific device.
*/ */
struct ggml_cann_pool_buf_prio : public ggml_cann_pool { struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
/** /**
* @brief The maximum reuse margin for a buffer. * @brief The maximum reuse margin for a buffer.
*/ */
static const size_t max_reuse_margin = 1ull << 22; // 4MB static const size_t max_reuse_margin = 1ull << 22; // 4MB
/** /**
* @brief The minimum free margin for a buffer. * @brief The minimum free margin for a buffer.
*/ */
static const size_t min_free_margin = 1ull << 20; // 1MB static const size_t min_free_margin = 1ull << 20; // 1MB
/** /**
* @brief The alignment for buffer allocation. * @brief The alignment for buffer allocation.
*/ */
static const size_t alignment = 128; static const size_t alignment = 128;
/** /**
* @brief The device ID associated with this buffer pool. * @brief The device ID associated with this buffer pool.
*/ */
int device; int device;
/** /**
* @brief Whether to disable clean during buffer allocation. * @brief Whether to disable clean during buffer allocation.
*/ */
bool disable_clean = false; bool disable_clean = false;
/** /**
* @brief Structure representing a CANN buffer. * @brief Structure representing a CANN buffer.
*/ */
struct ggml_cann_buffer { struct ggml_cann_buffer {
void* ptr = nullptr; ///< Pointer to the buffer. void* ptr = nullptr; ///< Pointer to the buffer.
size_t size = 0; ///< Size of the buffer. size_t size = 0; ///< Size of the buffer.
std::chrono::steady_clock::time_point last_used; ///< Last used time. std::chrono::steady_clock::time_point last_used; ///< Last used time.
bool operator>(const ggml_cann_buffer& other) const { bool operator>(const ggml_cann_buffer& other) const {
return size > other.size; return size > other.size;
} }
}; };
/** /**
* @brief Array of CANN buffers in the pool. * @brief Array of CANN buffers in the pool.
*/ */
std::unordered_map<void*, size_t> buffer_pool; std::unordered_map<void*, size_t> buffer_pool;
std::priority_queue<ggml_cann_buffer, std::priority_queue<ggml_cann_buffer,
std::vector<ggml_cann_buffer>, std::vector<ggml_cann_buffer>,
std::greater<>> free_buffers ; std::greater<>> free_buffers ;
/** /**
* @brief Total size of all buffers in the pool. * @brief Total size of all buffers in the pool.
*/ */
size_t pool_size = 0; size_t pool_size = 0;
/** /**
* @brief Constructor to initialize the buffer pool for a specific device. * @brief Constructor to initialize the buffer pool for a specific device.
* *
* @param device The device ID to associate with this buffer pool. * @param device The device ID to associate with this buffer pool.
*/ */
explicit ggml_cann_pool_buf_prio(int device) : device(device) { explicit ggml_cann_pool_buf_prio(int device) : device(device) {
disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr; disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
}
/**
* @brief Destructor to free all buffers in the pool.
*/
~ggml_cann_pool_buf_prio() {
ggml_cann_set_device(device);
for (auto& [b_ptr, b_size] : buffer_pool) {
aclrtFree(b_ptr);
pool_size -= b_size;
}
buffer_pool.clear();
GGML_ASSERT(pool_size == 0);
}
/**
* @brief Allocate a buffer of the given size.
*
* @param size The size of the buffer to allocate.
* @param actual_size A pointer to a variable to receive the actual size of
* the allocated buffer.
* @return A pointer to the allocated buffer.
*/
void* alloc(size_t size, size_t* actual_size) override {
size = GGML_PAD(size, alignment);
if (size == 0) {
size = alignment;
} }
/** void* ptr = nullptr;
* @brief Destructor to free all buffers in the pool. auto now = std::chrono::steady_clock::now();
*/
~ggml_cann_pool_buf_prio() {
ggml_cann_set_device(device);
for (auto& [b_ptr, b_size] : buffer_pool) {
aclrtFree(b_ptr);
pool_size -= b_size;
}
buffer_pool.clear();
GGML_ASSERT(pool_size == 0);
}
/** std::vector<ggml_cann_buffer> free_buffers_rest;
* @brief Allocate a buffer of the given size. free_buffers_rest.reserve(free_buffers.size());
* while (!free_buffers.empty()) {
* @param size The size of the buffer to allocate. auto b = free_buffers.top();
* @param actual_size A pointer to a variable to receive the actual size of free_buffers.pop();
* the allocated buffer.
* @return A pointer to the allocated buffer.
*/
void* alloc(size_t size, size_t* actual_size) override {
size = GGML_PAD(size, alignment);
if (size == 0) {
size = alignment;
}
void* ptr = nullptr; if (b.size >= size) {
auto now = std::chrono::steady_clock::now(); // reuse the buffer if the size is enough
const size_t margin = b.size - size;
std::vector<ggml_cann_buffer> free_buffers_rest; if (margin <= max_reuse_margin) {
free_buffers_rest.reserve(free_buffers.size()); *actual_size = b.size;
while (!free_buffers.empty()) { ptr = b.ptr;
auto b = free_buffers.top(); #ifdef DEBUG_CANN_MALLOC
free_buffers.pop();
if (b.size >= size) {
// reuse the buffer if the size is enough
const size_t margin = b.size - size;
if (margin <= max_reuse_margin) {
*actual_size = b.size;
ptr = b.ptr;
#ifdef DEBUG_CANN_MALLOC
GGML_LOG_INFO(
"cann pool[%d]: reused %p, "
"pool_size = %5u MB, "
"size = %5u MB, "
"margin = %5u MB\n",
device, b.ptr,
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
(uint32_t)(GGML_PAD(size, 1048576) / 1048576),
(uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
#endif
break;
}
}
bool should_clean = !disable_clean &&
b.size > min_free_margin &&
std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
if (should_clean) {
// free the buffer if the size is needed to be freed
ACL_CHECK(aclrtFree(b.ptr));
pool_size -= b.size;
buffer_pool.erase(b.ptr);
#ifdef DEBUG_CANN_MALLOC
GGML_LOG_INFO( GGML_LOG_INFO(
"cann pool[%d]: clean %p, " "cann pool[%d]: reused %p, "
"pool_size = %5u MB, " "pool_size = %5u MB, "
"size = %5u MB\n", "size = %5u MB, "
"margin = %5u MB\n",
device, b.ptr, device, b.ptr,
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576), (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
(uint32_t)(GGML_PAD(b.size, 1048576) / 1048576)); (uint32_t)(GGML_PAD(size, 1048576) / 1048576),
#endif (uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
continue; #endif
break;
} }
free_buffers_rest.push_back(b);
}
for (ggml_cann_buffer &b : free_buffers_rest) {
free_buffers.push(std::move(b));
} }
#ifdef DEBUG_CANN_MALLOC bool should_clean = !disable_clean &&
GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576)); b.size > min_free_margin &&
#endif std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
if (ptr != nullptr) { if (should_clean) {
return ptr; // free the buffer if the size is needed to be freed
ACL_CHECK(aclrtFree(b.ptr));
pool_size -= b.size;
buffer_pool.erase(b.ptr);
#ifdef DEBUG_CANN_MALLOC
GGML_LOG_INFO(
"cann pool[%d]: clean %p, "
"pool_size = %5u MB, "
"size = %5u MB\n",
device, b.ptr,
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
(uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
#endif
continue;
} }
free_buffers_rest.push_back(b);
}
for (ggml_cann_buffer &b : free_buffers_rest) {
free_buffers.push(std::move(b));
}
// allocate a new buffer if no buffer can be reused #ifdef DEBUG_CANN_MALLOC
ggml_cann_set_device(device); GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST)); #endif
*actual_size = size; if (ptr != nullptr) {
pool_size += size;
#ifdef DEBUG_CANN_MALLOC
GGML_LOG_INFO(
"cann pool[%d]: allocate %p, "
"pool_size = %5u MB, "
"size = %5u MB\n",
device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
(uint32_t)(GGML_PAD(size, 1048576) / 1048576));
#endif
buffer_pool.emplace(ptr, size);
return ptr; return ptr;
} }
/** // allocate a new buffer if no buffer can be reused
* @brief Free a buffer and return it to the pool. ggml_cann_set_device(device);
* ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
* @param ptr Pointer to the buffer to free. *actual_size = size;
* @param size Size of the buffer to free. pool_size += size;
*/ #ifdef DEBUG_CANN_MALLOC
void free(void* ptr, size_t size) override { GGML_LOG_INFO(
auto it = buffer_pool.find(ptr); "cann pool[%d]: allocate %p, "
if (it == buffer_pool.end()) { "pool_size = %5u MB, "
GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr); "size = %5u MB\n",
} device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
(uint32_t)(GGML_PAD(size, 1048576) / 1048576));
#endif
buffer_pool.emplace(ptr, size);
return ptr;
}
auto now = std::chrono::steady_clock::now(); /**
free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now}); * @brief Free a buffer and return it to the pool.
#ifdef DEBUG_CANN_MALLOC *
GGML_LOG_INFO( * @param ptr Pointer to the buffer to free.
"cann pool[%d]: return %p, " * @param size Size of the buffer to free.
"pool_size = %5u MB\n", */
device, ptr, void free(void* ptr, size_t size) override {
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576)); GGML_UNUSED(size);
#endif auto it = buffer_pool.find(ptr);
if (it == buffer_pool.end()) {
GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr);
} }
};
auto now = std::chrono::steady_clock::now();
free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now});
#ifdef DEBUG_CANN_MALLOC
GGML_LOG_INFO(
"cann pool[%d]: return %p, "
"pool_size = %5u MB\n",
device, ptr,
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
#endif
}
};
/** /**
* @brief A pool of CANN buffers(segment buffer). * @brief A pool of CANN buffers(segment buffer).
@ -531,6 +532,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool {
* @param size Size of the buffer to free. * @param size Size of the buffer to free.
*/ */
void free(void* ptr, size_t size) override { void free(void* ptr, size_t size) override {
GGML_UNUSED(size);
for (int i = 0; i < MAX_BUFFERS; ++i) { for (int i = 0; i < MAX_BUFFERS; ++i) {
ggml_cann_buffer& b = buffer_pool[i]; ggml_cann_buffer& b = buffer_pool[i];
if (b.ptr != ptr) { if (b.ptr != ptr) {