diff --git a/backends/llama_cpp_official/__init__.py b/backends/llama_cpp_official/__init__.py index 3e868e8a..70e74610 100644 --- a/backends/llama_cpp_official/__init__.py +++ b/backends/llama_cpp_official/__init__.py @@ -38,7 +38,9 @@ class LLAMACPP(GPTBackend): if seed <=0: seed = random.randint(1, 2**31) - self.model = Llama(model_path=f"./models/llama_cpp_official/{self.config['model']}", n_ctx=self.config["ctx_size"], n_gpu_layers=40, seed=seed) + if not "n_gpu_layers" in self.config: + self.config["n_gpu_layers"] = 40 + self.model = Llama(model_path=f"./models/llama_cpp_official/{self.config['model']}", n_ctx=self.config["ctx_size"], n_gpu_layers=self.config["n_gpu_layers"], seed=seed) def tokenize(self, prompt): diff --git a/configs/default.yaml b/configs/default.yaml index eb656a3f..b1db8119 100644 --- a/configs/default.yaml +++ b/configs/default.yaml @@ -1,6 +1,7 @@ version: 4 config: default ctx_size: 2048 +n_gpu_layers: 40 db_path: databases/database.db debug: false n_threads: 8 diff --git a/installations/install_backend_gpu.bat b/installations/install_backend_gpu.bat new file mode 100644 index 00000000..4e3b66f6 --- /dev/null +++ b/installations/install_backend_gpu.bat @@ -0,0 +1,7 @@ +echo this will recompile llapacpp to use your hardware with gpu enabled. +pip uninstall llama-cpp-python -y +rem First we need to purge any old installation +pip cache purge +set CMAKE_ARGS=-DLLAMA_CUBLAS=on +set FORCE_CMAKE=1 +pip install llama-cpp-python --upgrade \ No newline at end of file