diff --git a/backends/llama_cpp_official/__init__.py b/backends/llama_cpp_official/__init__.py
index 3e868e8a..70e74610 100644
--- a/backends/llama_cpp_official/__init__.py
+++ b/backends/llama_cpp_official/__init__.py
@@ -38,7 +38,9 @@ class LLAMACPP(GPTBackend):
         if seed <=0:
             seed = random.randint(1, 2**31)
             
-        self.model = Llama(model_path=f"./models/llama_cpp_official/{self.config['model']}", n_ctx=self.config["ctx_size"], n_gpu_layers=40, seed=seed)
+        if not "n_gpu_layers" in self.config:
+            self.config["n_gpu_layers"] = 40
+        self.model = Llama(model_path=f"./models/llama_cpp_official/{self.config['model']}", n_ctx=self.config["ctx_size"], n_gpu_layers=self.config["n_gpu_layers"], seed=seed)
 
 
     def tokenize(self, prompt):
diff --git a/configs/default.yaml b/configs/default.yaml
index eb656a3f..b1db8119 100644
--- a/configs/default.yaml
+++ b/configs/default.yaml
@@ -1,6 +1,7 @@
 version: 4
 config: default
 ctx_size: 2048
+n_gpu_layers: 40
 db_path: databases/database.db
 debug: false
 n_threads: 8
diff --git a/installations/install_backend_gpu.bat b/installations/install_backend_gpu.bat
new file mode 100644
index 00000000..4e3b66f6
--- /dev/null
+++ b/installations/install_backend_gpu.bat
@@ -0,0 +1,7 @@
+echo this will recompile llapacpp to use your hardware with gpu enabled. 
+pip uninstall llama-cpp-python -y
+rem First we need to purge any old installation
+pip cache purge
+set CMAKE_ARGS=-DLLAMA_CUBLAS=on
+set FORCE_CMAKE=1
+pip install llama-cpp-python --upgrade
\ No newline at end of file