enhanced generation

This commit is contained in:
Saifeddine ALOUI 2024-11-24 18:28:12 +01:00
parent 53a82b996f
commit bb53a72055
7 changed files with 17 additions and 14 deletions

View File

@ -47,7 +47,7 @@ use_continue_message: true
seed: -1
ctx_size: 4084
max_n_predict: 4096
max_n_predict: None
min_n_predict: 1024
temperature: 0.9
top_k: 50

View File

@ -47,7 +47,7 @@ use_continue_message: true
seed: -1
ctx_size: 4084
max_n_predict: 4096
max_n_predict: None
min_n_predict: 1024
temperature: 0.9
top_k: 50

View File

@ -47,7 +47,7 @@ use_continue_message: true
seed: -1
ctx_size: 4084
max_n_predict: 4096
max_n_predict: None
min_n_predict: 1024
temperature: 0.9
top_k: 50

View File

@ -47,7 +47,7 @@ use_continue_message: true
seed: -1
ctx_size: 4084
max_n_predict: 4096
max_n_predict: None
min_n_predict: 1024
temperature: 0.9
top_k: 50

View File

@ -315,7 +315,7 @@ class LollmsApplication(LoLLMsCom):
def _generate_text(self, prompt):
max_tokens = min(self.config.ctx_size - self.model.get_nb_tokens(prompt),self.config.max_n_predict)
max_tokens = min(self.config.ctx_size - self.model.get_nb_tokens(prompt),self.config.max_n_predict if self.config.max_n_predict else self.config.ctx_size- self.model.get_nb_tokens(prompt))
generated_text = self.model.generate(prompt, max_tokens)
return generated_text.strip()

View File

@ -1046,7 +1046,7 @@ Use this structure:
self.print_prompt("gen",prompt)
if max_size is None:
max_size = min(self.config.max_n_predict, self.config.ctx_size-len(self.model.tokenize(prompt)))
max_size = min(self.config.max_n_predict if self.config.max_n_predict else self.config.ctx_size-len(self.model.tokenize(prompt)), self.config.ctx_size-len(self.model.tokenize(prompt)))
self.model.generate_with_images(
prompt,
@ -1071,7 +1071,7 @@ Use this structure:
self.model.generate(
prompt,
max_size if max_size else min(self.config.ctx_size-ntokens,self.config.max_n_predict),
max_size if max_size else min(self.config.ctx_size-ntokens,self.config.max_n_predict if self.config.max_n_predict else self.config.ctx_size-ntokens),
partial(self.process, callback=callback, show_progress=show_progress),
temperature=self.model_temperature if temperature is None else temperature,
top_k=self.model_top_k if top_k is None else top_k,
@ -3575,7 +3575,7 @@ Use this structure:
if self.config.debug:
nb_prompt_tokens = len(self.personality.model.tokenize(prompt))
nb_tokens = min(self.config.ctx_size - nb_prompt_tokens, self.config.max_n_predict)
nb_tokens = min(self.config.ctx_size - nb_prompt_tokens, self.config.max_n_predict if self.config.max_n_predict else self.config.ctx_size-nb_prompt_tokens)
ASCIIColors.info(f"Prompt size : {nb_prompt_tokens}")
ASCIIColors.info(f"Requested generation max size : {nb_tokens}")
@ -4740,7 +4740,7 @@ transition-all duration-300 ease-in-out">
out = self.fast_gen(full_prompt)
nb_tokens = len(self.personality.model.tokenize(out))
if nb_tokens >= self.config.max_n_predict-1:
if nb_tokens >= (self.config.max_n_predict if self.config.max_n_predict else self.config.ctx_size)-1:
out = out+self.fast_gen(full_prompt+out, callback=callback)
if context_details["is_continue"]:
out = context_details["previous_chunk"] + out

View File

@ -85,7 +85,7 @@ class LollmsGenerateRequest(BaseModel):
prompt: str
model_name: Optional[str] = None
personality: Optional[int] = -1
n_predict: Optional[int] = 1024
n_predict: Optional[int] = None
stream: bool = False
temperature: float = 0.1
top_k: Optional[int] = 50
@ -131,7 +131,10 @@ async def lollms_generate(request: LollmsGenerateRequest):
tokens = elf_server.model.tokenize(prompt)
n_tokens = len(tokens)
ASCIIColors.info(f"Prompt input size {n_tokens}")
n_predict = min(min(elf_server.config.ctx_size-n_tokens-1,elf_server.config.max_n_predict), request.n_predict) if request.n_predict>0 else min(elf_server.config.ctx_size-n_tokens-1,elf_server.config.max_n_predict)
if request.n_predict is None:
n_predict = min(elf_server.config.ctx_size-n_tokens-1,elf_server.config.max_n_predict if elf_server.config.max_n_predict else elf_server.config.ctx_size)
else:
n_predict = min(min(elf_server.config.ctx_size-n_tokens-1,elf_server.config.max_n_predict if elf_server.config.max_n_predict else elf_server.config.ctx_size), request.n_predict) if request.n_predict>0 else min(elf_server.config.ctx_size-n_tokens-1,elf_server.config.max_n_predict if elf_server.config.max_n_predict else elf_server.config.ctx_size)
stream = request.stream
if elf_server.binding is not None:
if stream:
@ -491,7 +494,7 @@ async def v1_chat_completions(request: ChatGenerationRequest):
try:
reception_manager=RECEPTION_MANAGER()
messages = request.messages
max_tokens = request.max_tokens if request.max_tokens>0 else elf_server.config.max_n_predict
max_tokens = request.max_tokens if request.max_tokens>0 else elf_server.config.max_n_predict if elf_server.config.max_n_predict else elf_server.config.ctx_size
temperature = request.temperature if elf_server.config.temperature else elf_server.config.temperature
prompt = ""
roles= False
@ -633,7 +636,7 @@ async def ollama_chat_completion(request: ChatGenerationRequest):
try:
reception_manager=RECEPTION_MANAGER()
messages = request.messages
max_tokens = request.max_tokens if request.max_tokens>0 else elf_server.config.max_n_predict
max_tokens = request.max_tokens if request.max_tokens>0 else elf_server.config.max_n_predict if elf_server.config.max_n_predict else elf_server.config.ctx_size
temperature = request.temperature if elf_server.config.temperature else elf_server.config.temperature
prompt = ""
roles= False
@ -986,7 +989,7 @@ async def v1_completion(request: CompletionGenerationRequest):
"""
try:
text = request.prompt
n_predict = request.max_tokens if request.max_tokens>=0 else elf_server.config.max_n_predict
n_predict = request.max_tokens if request.max_tokens>=0 else elf_server.config.max_n_predict if elf_server.config.max_n_predict else elf_server.config.ctx_size
temperature = request.temperature if request.temperature>=0 else elf_server.config.temperature
# top_k = request.top_k if request.top_k>=0 else elf_server.config.top_k
# top_p = request.top_p if request.top_p>=0 else elf_server.config.top_p