upgraded ollama

This commit is contained in:
Saifeddine ALOUI 2024-01-11 00:51:22 +01:00
parent d440b3db8a
commit 21d481e56a
4 changed files with 61 additions and 42 deletions

View File

@ -8,7 +8,7 @@ description:
"""
from fastapi import APIRouter
from fastapi import APIRouter, Request
from lollms.server.elf_server import LOLLMSElfServer
from pydantic import BaseModel
from starlette.responses import StreamingResponse
@ -16,21 +16,7 @@ from lollms.types import MSG_TYPE
from lollms.utilities import detect_antiprompt, remove_text_from_string
from ascii_colors import ASCIIColors
class GenerateRequest(BaseModel):
"""
Data model for the Generate Request.
Attributes:
- text: str representing the input text prompt for text generation.
- n_predict: int representing the number of predictions to generate.
- stream: bool indicating whether to stream the generated text or not.
- temperature: float representing the temperature parameter for text generation.
- top_k: int representing the top_k parameter for text generation.
- top_p: float representing the top_p parameter for text generation.
- repeat_penalty: float representing the repeat_penalty parameter for text generation.
- repeat_last_n: int representing the repeat_last_n parameter for text generation.
- seed: int representing the seed for text generation.
- n_threads: int representing the number of threads for text generation.
"""
text: str
n_predict: int = 1024
stream: bool = False
@ -92,22 +78,35 @@ def get_generation_status():
# ----------------------------------- Generation -----------------------------------------
@router.post("/generate")
def lollms_generate(request_data: GenerateRequest):
def lollms_generate(request_data: Request):
"""
Endpoint for generating text from prompts using the lollms fastapi server.
Args:
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
Data model for the Generate Request.
Attributes:
- text: str representing the input text prompt for text generation.
- n_predict: int representing the number of predictions to generate.
- stream: bool indicating whether to stream the generated text or not.
- temperature: float representing the temperature parameter for text generation.
- top_k: int representing the top_k parameter for text generation.
- top_p: float representing the top_p parameter for text generation.
- repeat_penalty: float representing the repeat_penalty parameter for text generation.
- repeat_last_n: int representing the repeat_last_n parameter for text generation.
- seed: int representing the seed for text generation.
- n_threads: int representing the number of threads for text generation.
Returns:
- If the elf_server binding is not None:
- If stream is True, returns a StreamingResponse of generated text chunks.
- If stream is False, returns the generated text as a string.
- If the elf_server binding is None, returns None.
"""
text = request_data.text
n_predict = request_data.n_predict
stream = request_data.stream
"""
text = request_data["text"]
n_predict = request_data.get("n_predict", 1024)
stream = request_data.get("stream", False)
if elf_server.binding is not None:
if stream:

1
lollms/services/ollama/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
models.txt

View File

@ -42,31 +42,21 @@ def verify_ollama(lollms_paths:LollmsPaths):
sd_folder = shared_folder / "auto_sd"
return sd_folder.exists()
def install_ollama():
if platform.system() == 'Windows':
if os.path.exists('C:\\Windows\\System32\\wsl.exe'):
subprocess.run(['wsl', 'bash', str(Path(__file__).parent / 'install.sh')])
subprocess.run(['wsl', 'bash', '-c', 'cp {} ~'.format(str(Path(__file__).parent / 'install.sh'))])
subprocess.run(['wsl', 'bash', '-c', 'cp {} ~'.format(str(Path(__file__).parent / 'run_ollama.sh'))])
subprocess.run(['wsl', 'bash', str(Path.home() / 'install.sh')])
else:
subprocess.run(['wsl', '--install', 'Ubuntu'])
subprocess.run(['wsl', 'bash', str(Path(__file__).parent / 'install.sh')])
subprocess.run(['wsl', 'bash', '-c', 'cp {} ~'.format(str(Path(__file__).parent / 'install.sh'))])
subprocess.run(['wsl', 'bash', '-c', 'cp {} ~'.format(str(Path(__file__).parent / 'run_ollama.sh'))])
subprocess.run(['wsl', 'bash', str(Path.home() / 'install.sh')])
else:
subprocess.run(['bash', str(Path(__file__).parent / 'install.sh')])
def get_sd(lollms_paths:LollmsPaths):
root_dir = lollms_paths.personal_path
shared_folder = root_dir/"shared"
sd_folder = shared_folder / "auto_sd"
sd_script_path = sd_folder / "lollms_sd.py"
git_pull(sd_folder)
if sd_script_path.exists():
ASCIIColors.success("lollms_sd found.")
ASCIIColors.success("Loading source file...",end="")
# use importlib to load the module from the file path
from lollms.services.sd.lollms_sd import LollmsSD
ASCIIColors.success("ok")
return LollmsSD
class Service:
def __init__(
self,
@ -74,7 +64,7 @@ class Service:
base_url="http://127.0.0.1:11434",
wait_max_retries = 5
):
if base_url=="" or base_url=="http://127.0.0.1:7860":
if base_url=="" or base_url=="http://127.0.0.1:11434":
base_url = None
# Get the current directory
lollms_paths = app.lollms_paths
@ -93,6 +83,14 @@ class Service:
if not self.wait_for_service(1,False) and base_url is None:
ASCIIColors.info("Loading ollama service")
# run ollama
if platform.system() == 'Windows':
if os.path.exists('C:\\Windows\\System32\\wsl.exe'):
subprocess.run(['wsl', 'bash', str(Path(__file__).parent / 'run_ollama.sh')])
else:
subprocess.run(['bash', str(Path(__file__).parent / 'install.sh')])
# Wait until the service is available at http://127.0.0.1:7860/
self.wait_for_service(max_retries=wait_max_retries)

View File

@ -1,2 +1,23 @@
ollama serve&
ollama run mistral
#!/bin/bash
# Set the OLLAMA_HOST address
OLLAMA_HOST="0.0.0.0:11434"
# Start the OLLAMA server
ollama serve &
# Check if models.txt exists
if [ ! -f models.txt ]; then
# Create models.txt and add "mixtral" to it
echo "mixtral" > models.txt
fi
# Read the models from the file
while IFS= read -r model
do
# Run each model in the background
ollama run "$model" &
done < models.txt
# Wait for all background processes to finish
wait