diff --git a/lollms/server/endpoints/lollms_generator.py b/lollms/server/endpoints/lollms_generator.py index af27220..e684ef2 100644 --- a/lollms/server/endpoints/lollms_generator.py +++ b/lollms/server/endpoints/lollms_generator.py @@ -8,7 +8,7 @@ description: """ -from fastapi import APIRouter +from fastapi import APIRouter, Request from lollms.server.elf_server import LOLLMSElfServer from pydantic import BaseModel from starlette.responses import StreamingResponse @@ -16,21 +16,7 @@ from lollms.types import MSG_TYPE from lollms.utilities import detect_antiprompt, remove_text_from_string from ascii_colors import ASCIIColors class GenerateRequest(BaseModel): - """ - Data model for the Generate Request. - - Attributes: - - text: str representing the input text prompt for text generation. - - n_predict: int representing the number of predictions to generate. - - stream: bool indicating whether to stream the generated text or not. - - temperature: float representing the temperature parameter for text generation. - - top_k: int representing the top_k parameter for text generation. - - top_p: float representing the top_p parameter for text generation. - - repeat_penalty: float representing the repeat_penalty parameter for text generation. - - repeat_last_n: int representing the repeat_last_n parameter for text generation. - - seed: int representing the seed for text generation. - - n_threads: int representing the number of threads for text generation. - """ + text: str n_predict: int = 1024 stream: bool = False @@ -92,22 +78,35 @@ def get_generation_status(): # ----------------------------------- Generation ----------------------------------------- @router.post("/generate") -def lollms_generate(request_data: GenerateRequest): +def lollms_generate(request_data: Request): """ Endpoint for generating text from prompts using the lollms fastapi server. Args: - - request_data: GenerateRequest object containing the input text, number of predictions, and stream flag. + Data model for the Generate Request. + Attributes: + - text: str representing the input text prompt for text generation. + - n_predict: int representing the number of predictions to generate. + - stream: bool indicating whether to stream the generated text or not. + - temperature: float representing the temperature parameter for text generation. + - top_k: int representing the top_k parameter for text generation. + - top_p: float representing the top_p parameter for text generation. + - repeat_penalty: float representing the repeat_penalty parameter for text generation. + - repeat_last_n: int representing the repeat_last_n parameter for text generation. + - seed: int representing the seed for text generation. + - n_threads: int representing the number of threads for text generation. + + Returns: - If the elf_server binding is not None: - If stream is True, returns a StreamingResponse of generated text chunks. - If stream is False, returns the generated text as a string. - If the elf_server binding is None, returns None. - """ - text = request_data.text - n_predict = request_data.n_predict - stream = request_data.stream + """ + text = request_data["text"] + n_predict = request_data.get("n_predict", 1024) + stream = request_data.get("stream", False) if elf_server.binding is not None: if stream: diff --git a/lollms/services/ollama/.gitignore b/lollms/services/ollama/.gitignore new file mode 100644 index 0000000..ad347f9 --- /dev/null +++ b/lollms/services/ollama/.gitignore @@ -0,0 +1 @@ +models.txt diff --git a/lollms/services/ollama/lollms_ollama.py b/lollms/services/ollama/lollms_ollama.py index a94dcd5..f9393e0 100644 --- a/lollms/services/ollama/lollms_ollama.py +++ b/lollms/services/ollama/lollms_ollama.py @@ -42,31 +42,21 @@ def verify_ollama(lollms_paths:LollmsPaths): sd_folder = shared_folder / "auto_sd" return sd_folder.exists() + def install_ollama(): if platform.system() == 'Windows': if os.path.exists('C:\\Windows\\System32\\wsl.exe'): - subprocess.run(['wsl', 'bash', str(Path(__file__).parent / 'install.sh')]) + subprocess.run(['wsl', 'bash', '-c', 'cp {} ~'.format(str(Path(__file__).parent / 'install.sh'))]) + subprocess.run(['wsl', 'bash', '-c', 'cp {} ~'.format(str(Path(__file__).parent / 'run_ollama.sh'))]) + subprocess.run(['wsl', 'bash', str(Path.home() / 'install.sh')]) else: subprocess.run(['wsl', '--install', 'Ubuntu']) - subprocess.run(['wsl', 'bash', str(Path(__file__).parent / 'install.sh')]) + subprocess.run(['wsl', 'bash', '-c', 'cp {} ~'.format(str(Path(__file__).parent / 'install.sh'))]) + subprocess.run(['wsl', 'bash', '-c', 'cp {} ~'.format(str(Path(__file__).parent / 'run_ollama.sh'))]) + subprocess.run(['wsl', 'bash', str(Path.home() / 'install.sh')]) else: subprocess.run(['bash', str(Path(__file__).parent / 'install.sh')]) -def get_sd(lollms_paths:LollmsPaths): - root_dir = lollms_paths.personal_path - shared_folder = root_dir/"shared" - sd_folder = shared_folder / "auto_sd" - sd_script_path = sd_folder / "lollms_sd.py" - git_pull(sd_folder) - - if sd_script_path.exists(): - ASCIIColors.success("lollms_sd found.") - ASCIIColors.success("Loading source file...",end="") - # use importlib to load the module from the file path - from lollms.services.sd.lollms_sd import LollmsSD - ASCIIColors.success("ok") - return LollmsSD - class Service: def __init__( self, @@ -74,7 +64,7 @@ class Service: base_url="http://127.0.0.1:11434", wait_max_retries = 5 ): - if base_url=="" or base_url=="http://127.0.0.1:7860": + if base_url=="" or base_url=="http://127.0.0.1:11434": base_url = None # Get the current directory lollms_paths = app.lollms_paths @@ -93,6 +83,14 @@ class Service: if not self.wait_for_service(1,False) and base_url is None: ASCIIColors.info("Loading ollama service") + # run ollama + if platform.system() == 'Windows': + if os.path.exists('C:\\Windows\\System32\\wsl.exe'): + subprocess.run(['wsl', 'bash', str(Path(__file__).parent / 'run_ollama.sh')]) + else: + subprocess.run(['bash', str(Path(__file__).parent / 'install.sh')]) + + # Wait until the service is available at http://127.0.0.1:7860/ self.wait_for_service(max_retries=wait_max_retries) diff --git a/lollms/services/ollama/run_ollama.sh b/lollms/services/ollama/run_ollama.sh index 14b84fb..23a8e32 100644 --- a/lollms/services/ollama/run_ollama.sh +++ b/lollms/services/ollama/run_ollama.sh @@ -1,2 +1,23 @@ -ollama serve& -ollama run mistral \ No newline at end of file +#!/bin/bash + +# Set the OLLAMA_HOST address +OLLAMA_HOST="0.0.0.0:11434" + +# Start the OLLAMA server +ollama serve & + +# Check if models.txt exists +if [ ! -f models.txt ]; then + # Create models.txt and add "mixtral" to it + echo "mixtral" > models.txt +fi + +# Read the models from the file +while IFS= read -r model +do + # Run each model in the background + ollama run "$model" & +done < models.txt + +# Wait for all background processes to finish +wait