mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-18 14:58:09 +00:00
@ -14,6 +14,7 @@
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stdexcept>
|
||||
|
||||
#ifdef __has_include
|
||||
#if __has_include(<unistd.h>)
|
||||
@ -74,7 +75,7 @@ struct llama_file {
|
||||
llama_file(const char * fname, const char * mode) {
|
||||
fp = std::fopen(fname, mode);
|
||||
if (fp == NULL) {
|
||||
throw format("failed to open %s: %s", fname, std::strerror(errno));
|
||||
throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
|
||||
}
|
||||
seek(0, SEEK_END);
|
||||
size = tell();
|
||||
@ -100,17 +101,17 @@ struct llama_file {
|
||||
LLAMA_ASSERT(ret == 0); // same
|
||||
}
|
||||
|
||||
void read_raw(void * ptr, size_t size) {
|
||||
if (size == 0) {
|
||||
void read_raw(void * ptr, size_t len) const {
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
errno = 0;
|
||||
std::size_t ret = std::fread(ptr, size, 1, fp);
|
||||
std::size_t ret = std::fread(ptr, len, 1, fp);
|
||||
if (ferror(fp)) {
|
||||
throw format("read error: %s", strerror(errno));
|
||||
throw std::runtime_error(format("read error: %s", strerror(errno)));
|
||||
}
|
||||
if (ret != 1) {
|
||||
throw std::string("unexpectedly reached end of file");
|
||||
throw std::runtime_error(std::string("unexpectedly reached end of file"));
|
||||
}
|
||||
}
|
||||
|
||||
@ -126,14 +127,14 @@ struct llama_file {
|
||||
return std::string(chars.data(), len);
|
||||
}
|
||||
|
||||
void write_raw(const void * ptr, size_t size) {
|
||||
if (size == 0) {
|
||||
void write_raw(const void * ptr, size_t len) const {
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
errno = 0;
|
||||
size_t ret = std::fwrite(ptr, size, 1, fp);
|
||||
size_t ret = std::fwrite(ptr, len, 1, fp);
|
||||
if (ret != 1) {
|
||||
throw format("write error: %s", strerror(errno));
|
||||
throw std::runtime_error(format("write error: %s", strerror(errno)));
|
||||
}
|
||||
}
|
||||
|
||||
@ -171,7 +172,7 @@ struct llama_mmap {
|
||||
#ifdef _POSIX_MAPPED_FILES
|
||||
static constexpr bool SUPPORTED = true;
|
||||
|
||||
llama_mmap(struct llama_file * file, bool prefetch = true) {
|
||||
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) {
|
||||
size = file->size;
|
||||
int fd = fileno(file->fp);
|
||||
int flags = MAP_SHARED;
|
||||
@ -180,12 +181,12 @@ struct llama_mmap {
|
||||
#endif
|
||||
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
|
||||
if (addr == MAP_FAILED) {
|
||||
throw format("mmap failed: %s", strerror(errno));
|
||||
throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
|
||||
}
|
||||
|
||||
if (prefetch) {
|
||||
if (prefetch > 0) {
|
||||
// Advise the kernel to preload the mapped memory
|
||||
if (madvise(addr, file->size, MADV_WILLNEED)) {
|
||||
if (madvise(addr, std::min(file->size, prefetch), MADV_WILLNEED)) {
|
||||
fprintf(stderr, "warning: madvise(.., MADV_WILLNEED) failed: %s\n",
|
||||
strerror(errno));
|
||||
}
|
||||
@ -207,7 +208,7 @@ struct llama_mmap {
|
||||
DWORD error = GetLastError();
|
||||
|
||||
if (hMapping == NULL) {
|
||||
throw format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str());
|
||||
throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
|
||||
}
|
||||
|
||||
addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
|
||||
@ -215,7 +216,7 @@ struct llama_mmap {
|
||||
CloseHandle(hMapping);
|
||||
|
||||
if (addr == NULL) {
|
||||
throw format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str());
|
||||
throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
|
||||
}
|
||||
|
||||
#if _WIN32_WINNT >= _WIN32_WINNT_WIN8
|
||||
@ -243,8 +244,9 @@ struct llama_mmap {
|
||||
#else
|
||||
static constexpr bool SUPPORTED = false;
|
||||
|
||||
llama_mmap(struct llama_file *) {
|
||||
throw std::string("mmap not supported");
|
||||
llama_mmap(struct llama_file *, bool prefetch = true) {
|
||||
(void)prefetch;
|
||||
throw std::runtime_error(std::string("mmap not supported"));
|
||||
}
|
||||
#endif
|
||||
};
|
||||
@ -265,9 +267,9 @@ struct llama_mlock {
|
||||
}
|
||||
}
|
||||
|
||||
void init(void * addr) {
|
||||
LLAMA_ASSERT(this->addr == NULL && this->size == 0);
|
||||
this->addr = addr;
|
||||
void init(void * ptr) {
|
||||
LLAMA_ASSERT(addr == NULL && size == 0);
|
||||
addr = ptr;
|
||||
}
|
||||
|
||||
void grow_to(size_t target_size) {
|
||||
@ -338,14 +340,14 @@ struct llama_mlock {
|
||||
return (size_t) si.dwPageSize;
|
||||
}
|
||||
|
||||
bool raw_lock(void * addr, size_t size) {
|
||||
bool raw_lock(void * ptr, size_t len) {
|
||||
for (int tries = 1; ; tries++) {
|
||||
if (VirtualLock(addr, size)) {
|
||||
if (VirtualLock(ptr, len)) {
|
||||
return true;
|
||||
}
|
||||
if (tries == 2) {
|
||||
fprintf(stderr, "warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n",
|
||||
size, this->size, llama_format_win_err(GetLastError()).c_str());
|
||||
len, size, llama_format_win_err(GetLastError()).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -361,7 +363,7 @@ struct llama_mlock {
|
||||
// is equal to the number of pages in its minimum working set minus
|
||||
// a small overhead."
|
||||
// Hopefully a megabyte is enough overhead:
|
||||
size_t increment = size + 1048576;
|
||||
size_t increment = len + 1048576;
|
||||
// The minimum must be <= the maximum, so we need to increase both:
|
||||
min_ws_size += increment;
|
||||
max_ws_size += increment;
|
||||
@ -373,8 +375,8 @@ struct llama_mlock {
|
||||
}
|
||||
}
|
||||
|
||||
void raw_unlock(void * addr, size_t size) {
|
||||
if (!VirtualUnlock(addr, size)) {
|
||||
void raw_unlock(void * ptr, size_t len) {
|
||||
if (!VirtualUnlock(ptr, len)) {
|
||||
fprintf(stderr, "warning: failed to VirtualUnlock buffer: %s\n",
|
||||
llama_format_win_err(GetLastError()).c_str());
|
||||
}
|
||||
@ -382,11 +384,16 @@ struct llama_mlock {
|
||||
#else
|
||||
static constexpr bool SUPPORTED = false;
|
||||
|
||||
void raw_lock(const void * addr, size_t size) {
|
||||
fprintf(stderr, "warning: mlock not supported on this system\n");
|
||||
size_t lock_granularity() {
|
||||
return (size_t) 65536;
|
||||
}
|
||||
|
||||
void raw_unlock(const void * addr, size_t size) {}
|
||||
bool raw_lock(const void * addr, size_t len) {
|
||||
fprintf(stderr, "warning: mlock not supported on this system\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
void raw_unlock(const void * addr, size_t len) {}
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -395,36 +402,70 @@ struct llama_buffer {
|
||||
uint8_t * addr = NULL;
|
||||
size_t size = 0;
|
||||
|
||||
void resize(size_t size) {
|
||||
llama_buffer() = default;
|
||||
|
||||
void resize(size_t len) {
|
||||
delete[] addr;
|
||||
addr = new uint8_t[size];
|
||||
this->size = size;
|
||||
addr = new uint8_t[len];
|
||||
size = len;
|
||||
}
|
||||
|
||||
~llama_buffer() {
|
||||
delete[] addr;
|
||||
}
|
||||
|
||||
// disable copy and move
|
||||
llama_buffer(const llama_buffer&) = delete;
|
||||
llama_buffer(llama_buffer&&) = delete;
|
||||
llama_buffer& operator=(const llama_buffer&) = delete;
|
||||
llama_buffer& operator=(llama_buffer&&) = delete;
|
||||
};
|
||||
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
#include "ggml-cuda.h"
|
||||
struct llama_ctx_buffer {
|
||||
uint8_t * addr = NULL;
|
||||
bool is_cuda;
|
||||
size_t size = 0;
|
||||
|
||||
llama_ctx_buffer() = default;
|
||||
|
||||
void resize(size_t size) {
|
||||
if (addr) {
|
||||
ggml_cuda_host_free(addr);
|
||||
}
|
||||
free();
|
||||
|
||||
addr = (uint8_t *) ggml_cuda_host_malloc(size);
|
||||
if (addr) {
|
||||
is_cuda = true;
|
||||
}
|
||||
else {
|
||||
// fall back to pageable memory
|
||||
addr = new uint8_t[size];
|
||||
is_cuda = false;
|
||||
}
|
||||
this->size = size;
|
||||
}
|
||||
|
||||
~llama_ctx_buffer() {
|
||||
void free() {
|
||||
if (addr) {
|
||||
ggml_cuda_host_free(addr);
|
||||
if (is_cuda) {
|
||||
ggml_cuda_host_free(addr);
|
||||
}
|
||||
else {
|
||||
delete[] addr;
|
||||
}
|
||||
}
|
||||
addr = NULL;
|
||||
}
|
||||
|
||||
~llama_ctx_buffer() {
|
||||
free();
|
||||
}
|
||||
|
||||
// disable copy and move
|
||||
llama_ctx_buffer(const llama_ctx_buffer&) = delete;
|
||||
llama_ctx_buffer(llama_ctx_buffer&&) = delete;
|
||||
llama_ctx_buffer& operator=(const llama_ctx_buffer&) = delete;
|
||||
llama_ctx_buffer& operator=(llama_ctx_buffer&&) = delete;
|
||||
};
|
||||
#else
|
||||
typedef llama_buffer llama_ctx_buffer;
|
||||
|
Reference in New Issue
Block a user