mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-04-28 23:09:47 +00:00
CANN: Add x86 build ci (llama/12950)
* CANN: Add x86 build ci * CANN: fix code format
This commit is contained in:
parent
43e3d25d93
commit
483eecae62
@ -156,195 +156,196 @@ const ggml_cann_device_info& ggml_cann_info() {
|
|||||||
* This class manages a pool of CANN buffers for a specific device.
|
* This class manages a pool of CANN buffers for a specific device.
|
||||||
*/
|
*/
|
||||||
struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
|
struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
|
||||||
/**
|
/**
|
||||||
* @brief The maximum reuse margin for a buffer.
|
* @brief The maximum reuse margin for a buffer.
|
||||||
*/
|
*/
|
||||||
static const size_t max_reuse_margin = 1ull << 22; // 4MB
|
static const size_t max_reuse_margin = 1ull << 22; // 4MB
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief The minimum free margin for a buffer.
|
* @brief The minimum free margin for a buffer.
|
||||||
*/
|
*/
|
||||||
static const size_t min_free_margin = 1ull << 20; // 1MB
|
static const size_t min_free_margin = 1ull << 20; // 1MB
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief The alignment for buffer allocation.
|
* @brief The alignment for buffer allocation.
|
||||||
*/
|
*/
|
||||||
static const size_t alignment = 128;
|
static const size_t alignment = 128;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief The device ID associated with this buffer pool.
|
* @brief The device ID associated with this buffer pool.
|
||||||
*/
|
*/
|
||||||
int device;
|
int device;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Whether to disable clean during buffer allocation.
|
* @brief Whether to disable clean during buffer allocation.
|
||||||
*/
|
*/
|
||||||
bool disable_clean = false;
|
bool disable_clean = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Structure representing a CANN buffer.
|
* @brief Structure representing a CANN buffer.
|
||||||
*/
|
*/
|
||||||
struct ggml_cann_buffer {
|
struct ggml_cann_buffer {
|
||||||
void* ptr = nullptr; ///< Pointer to the buffer.
|
void* ptr = nullptr; ///< Pointer to the buffer.
|
||||||
size_t size = 0; ///< Size of the buffer.
|
size_t size = 0; ///< Size of the buffer.
|
||||||
std::chrono::steady_clock::time_point last_used; ///< Last used time.
|
std::chrono::steady_clock::time_point last_used; ///< Last used time.
|
||||||
|
|
||||||
bool operator>(const ggml_cann_buffer& other) const {
|
bool operator>(const ggml_cann_buffer& other) const {
|
||||||
return size > other.size;
|
return size > other.size;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Array of CANN buffers in the pool.
|
* @brief Array of CANN buffers in the pool.
|
||||||
*/
|
*/
|
||||||
std::unordered_map<void*, size_t> buffer_pool;
|
std::unordered_map<void*, size_t> buffer_pool;
|
||||||
std::priority_queue<ggml_cann_buffer,
|
std::priority_queue<ggml_cann_buffer,
|
||||||
std::vector<ggml_cann_buffer>,
|
std::vector<ggml_cann_buffer>,
|
||||||
std::greater<>> free_buffers ;
|
std::greater<>> free_buffers ;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Total size of all buffers in the pool.
|
* @brief Total size of all buffers in the pool.
|
||||||
*/
|
*/
|
||||||
size_t pool_size = 0;
|
size_t pool_size = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Constructor to initialize the buffer pool for a specific device.
|
* @brief Constructor to initialize the buffer pool for a specific device.
|
||||||
*
|
*
|
||||||
* @param device The device ID to associate with this buffer pool.
|
* @param device The device ID to associate with this buffer pool.
|
||||||
*/
|
*/
|
||||||
explicit ggml_cann_pool_buf_prio(int device) : device(device) {
|
explicit ggml_cann_pool_buf_prio(int device) : device(device) {
|
||||||
disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
|
disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destructor to free all buffers in the pool.
|
||||||
|
*/
|
||||||
|
~ggml_cann_pool_buf_prio() {
|
||||||
|
ggml_cann_set_device(device);
|
||||||
|
for (auto& [b_ptr, b_size] : buffer_pool) {
|
||||||
|
aclrtFree(b_ptr);
|
||||||
|
pool_size -= b_size;
|
||||||
|
}
|
||||||
|
buffer_pool.clear();
|
||||||
|
GGML_ASSERT(pool_size == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Allocate a buffer of the given size.
|
||||||
|
*
|
||||||
|
* @param size The size of the buffer to allocate.
|
||||||
|
* @param actual_size A pointer to a variable to receive the actual size of
|
||||||
|
* the allocated buffer.
|
||||||
|
* @return A pointer to the allocated buffer.
|
||||||
|
*/
|
||||||
|
void* alloc(size_t size, size_t* actual_size) override {
|
||||||
|
size = GGML_PAD(size, alignment);
|
||||||
|
if (size == 0) {
|
||||||
|
size = alignment;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
void* ptr = nullptr;
|
||||||
* @brief Destructor to free all buffers in the pool.
|
auto now = std::chrono::steady_clock::now();
|
||||||
*/
|
|
||||||
~ggml_cann_pool_buf_prio() {
|
|
||||||
ggml_cann_set_device(device);
|
|
||||||
for (auto& [b_ptr, b_size] : buffer_pool) {
|
|
||||||
aclrtFree(b_ptr);
|
|
||||||
pool_size -= b_size;
|
|
||||||
}
|
|
||||||
buffer_pool.clear();
|
|
||||||
GGML_ASSERT(pool_size == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
std::vector<ggml_cann_buffer> free_buffers_rest;
|
||||||
* @brief Allocate a buffer of the given size.
|
free_buffers_rest.reserve(free_buffers.size());
|
||||||
*
|
while (!free_buffers.empty()) {
|
||||||
* @param size The size of the buffer to allocate.
|
auto b = free_buffers.top();
|
||||||
* @param actual_size A pointer to a variable to receive the actual size of
|
free_buffers.pop();
|
||||||
* the allocated buffer.
|
|
||||||
* @return A pointer to the allocated buffer.
|
|
||||||
*/
|
|
||||||
void* alloc(size_t size, size_t* actual_size) override {
|
|
||||||
size = GGML_PAD(size, alignment);
|
|
||||||
if (size == 0) {
|
|
||||||
size = alignment;
|
|
||||||
}
|
|
||||||
|
|
||||||
void* ptr = nullptr;
|
if (b.size >= size) {
|
||||||
auto now = std::chrono::steady_clock::now();
|
// reuse the buffer if the size is enough
|
||||||
|
const size_t margin = b.size - size;
|
||||||
std::vector<ggml_cann_buffer> free_buffers_rest;
|
if (margin <= max_reuse_margin) {
|
||||||
free_buffers_rest.reserve(free_buffers.size());
|
*actual_size = b.size;
|
||||||
while (!free_buffers.empty()) {
|
ptr = b.ptr;
|
||||||
auto b = free_buffers.top();
|
#ifdef DEBUG_CANN_MALLOC
|
||||||
free_buffers.pop();
|
|
||||||
|
|
||||||
if (b.size >= size) {
|
|
||||||
// reuse the buffer if the size is enough
|
|
||||||
const size_t margin = b.size - size;
|
|
||||||
if (margin <= max_reuse_margin) {
|
|
||||||
*actual_size = b.size;
|
|
||||||
ptr = b.ptr;
|
|
||||||
#ifdef DEBUG_CANN_MALLOC
|
|
||||||
GGML_LOG_INFO(
|
|
||||||
"cann pool[%d]: reused %p, "
|
|
||||||
"pool_size = %5u MB, "
|
|
||||||
"size = %5u MB, "
|
|
||||||
"margin = %5u MB\n",
|
|
||||||
device, b.ptr,
|
|
||||||
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
|
||||||
(uint32_t)(GGML_PAD(size, 1048576) / 1048576),
|
|
||||||
(uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool should_clean = !disable_clean &&
|
|
||||||
b.size > min_free_margin &&
|
|
||||||
std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
|
|
||||||
if (should_clean) {
|
|
||||||
// free the buffer if the size is needed to be freed
|
|
||||||
ACL_CHECK(aclrtFree(b.ptr));
|
|
||||||
pool_size -= b.size;
|
|
||||||
buffer_pool.erase(b.ptr);
|
|
||||||
#ifdef DEBUG_CANN_MALLOC
|
|
||||||
GGML_LOG_INFO(
|
GGML_LOG_INFO(
|
||||||
"cann pool[%d]: clean %p, "
|
"cann pool[%d]: reused %p, "
|
||||||
"pool_size = %5u MB, "
|
"pool_size = %5u MB, "
|
||||||
"size = %5u MB\n",
|
"size = %5u MB, "
|
||||||
|
"margin = %5u MB\n",
|
||||||
device, b.ptr,
|
device, b.ptr,
|
||||||
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
||||||
(uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
|
(uint32_t)(GGML_PAD(size, 1048576) / 1048576),
|
||||||
#endif
|
(uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
|
||||||
continue;
|
#endif
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
free_buffers_rest.push_back(b);
|
|
||||||
}
|
|
||||||
for (ggml_cann_buffer &b : free_buffers_rest) {
|
|
||||||
free_buffers.push(std::move(b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG_CANN_MALLOC
|
bool should_clean = !disable_clean &&
|
||||||
GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
|
b.size > min_free_margin &&
|
||||||
#endif
|
std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
|
||||||
if (ptr != nullptr) {
|
if (should_clean) {
|
||||||
return ptr;
|
// free the buffer if the size is needed to be freed
|
||||||
|
ACL_CHECK(aclrtFree(b.ptr));
|
||||||
|
pool_size -= b.size;
|
||||||
|
buffer_pool.erase(b.ptr);
|
||||||
|
#ifdef DEBUG_CANN_MALLOC
|
||||||
|
GGML_LOG_INFO(
|
||||||
|
"cann pool[%d]: clean %p, "
|
||||||
|
"pool_size = %5u MB, "
|
||||||
|
"size = %5u MB\n",
|
||||||
|
device, b.ptr,
|
||||||
|
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
||||||
|
(uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
|
||||||
|
#endif
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
free_buffers_rest.push_back(b);
|
||||||
|
}
|
||||||
|
for (ggml_cann_buffer &b : free_buffers_rest) {
|
||||||
|
free_buffers.push(std::move(b));
|
||||||
|
}
|
||||||
|
|
||||||
// allocate a new buffer if no buffer can be reused
|
#ifdef DEBUG_CANN_MALLOC
|
||||||
ggml_cann_set_device(device);
|
GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
|
||||||
ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
|
#endif
|
||||||
*actual_size = size;
|
if (ptr != nullptr) {
|
||||||
pool_size += size;
|
|
||||||
#ifdef DEBUG_CANN_MALLOC
|
|
||||||
GGML_LOG_INFO(
|
|
||||||
"cann pool[%d]: allocate %p, "
|
|
||||||
"pool_size = %5u MB, "
|
|
||||||
"size = %5u MB\n",
|
|
||||||
device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
|
||||||
(uint32_t)(GGML_PAD(size, 1048576) / 1048576));
|
|
||||||
#endif
|
|
||||||
buffer_pool.emplace(ptr, size);
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// allocate a new buffer if no buffer can be reused
|
||||||
* @brief Free a buffer and return it to the pool.
|
ggml_cann_set_device(device);
|
||||||
*
|
ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
|
||||||
* @param ptr Pointer to the buffer to free.
|
*actual_size = size;
|
||||||
* @param size Size of the buffer to free.
|
pool_size += size;
|
||||||
*/
|
#ifdef DEBUG_CANN_MALLOC
|
||||||
void free(void* ptr, size_t size) override {
|
GGML_LOG_INFO(
|
||||||
auto it = buffer_pool.find(ptr);
|
"cann pool[%d]: allocate %p, "
|
||||||
if (it == buffer_pool.end()) {
|
"pool_size = %5u MB, "
|
||||||
GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr);
|
"size = %5u MB\n",
|
||||||
}
|
device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
||||||
|
(uint32_t)(GGML_PAD(size, 1048576) / 1048576));
|
||||||
|
#endif
|
||||||
|
buffer_pool.emplace(ptr, size);
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
auto now = std::chrono::steady_clock::now();
|
/**
|
||||||
free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now});
|
* @brief Free a buffer and return it to the pool.
|
||||||
#ifdef DEBUG_CANN_MALLOC
|
*
|
||||||
GGML_LOG_INFO(
|
* @param ptr Pointer to the buffer to free.
|
||||||
"cann pool[%d]: return %p, "
|
* @param size Size of the buffer to free.
|
||||||
"pool_size = %5u MB\n",
|
*/
|
||||||
device, ptr,
|
void free(void* ptr, size_t size) override {
|
||||||
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
|
GGML_UNUSED(size);
|
||||||
#endif
|
auto it = buffer_pool.find(ptr);
|
||||||
|
if (it == buffer_pool.end()) {
|
||||||
|
GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr);
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
auto now = std::chrono::steady_clock::now();
|
||||||
|
free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now});
|
||||||
|
#ifdef DEBUG_CANN_MALLOC
|
||||||
|
GGML_LOG_INFO(
|
||||||
|
"cann pool[%d]: return %p, "
|
||||||
|
"pool_size = %5u MB\n",
|
||||||
|
device, ptr,
|
||||||
|
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief A pool of CANN buffers(segment buffer).
|
* @brief A pool of CANN buffers(segment buffer).
|
||||||
@ -531,6 +532,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool {
|
|||||||
* @param size Size of the buffer to free.
|
* @param size Size of the buffer to free.
|
||||||
*/
|
*/
|
||||||
void free(void* ptr, size_t size) override {
|
void free(void* ptr, size_t size) override {
|
||||||
|
GGML_UNUSED(size);
|
||||||
for (int i = 0; i < MAX_BUFFERS; ++i) {
|
for (int i = 0; i < MAX_BUFFERS; ++i) {
|
||||||
ggml_cann_buffer& b = buffer_pool[i];
|
ggml_cann_buffer& b = buffer_pool[i];
|
||||||
if (b.ptr != ptr) {
|
if (b.ptr != ptr) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user