mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-18 20:27:58 +00:00
enhanced generation
This commit is contained in:
parent
53a82b996f
commit
bb53a72055
@ -47,7 +47,7 @@ use_continue_message: true
|
||||
|
||||
seed: -1
|
||||
ctx_size: 4084
|
||||
max_n_predict: 4096
|
||||
max_n_predict: None
|
||||
min_n_predict: 1024
|
||||
temperature: 0.9
|
||||
top_k: 50
|
||||
|
@ -47,7 +47,7 @@ use_continue_message: true
|
||||
|
||||
seed: -1
|
||||
ctx_size: 4084
|
||||
max_n_predict: 4096
|
||||
max_n_predict: None
|
||||
min_n_predict: 1024
|
||||
temperature: 0.9
|
||||
top_k: 50
|
||||
|
@ -47,7 +47,7 @@ use_continue_message: true
|
||||
|
||||
seed: -1
|
||||
ctx_size: 4084
|
||||
max_n_predict: 4096
|
||||
max_n_predict: None
|
||||
min_n_predict: 1024
|
||||
temperature: 0.9
|
||||
top_k: 50
|
||||
|
@ -47,7 +47,7 @@ use_continue_message: true
|
||||
|
||||
seed: -1
|
||||
ctx_size: 4084
|
||||
max_n_predict: 4096
|
||||
max_n_predict: None
|
||||
min_n_predict: 1024
|
||||
temperature: 0.9
|
||||
top_k: 50
|
||||
|
@ -315,7 +315,7 @@ class LollmsApplication(LoLLMsCom):
|
||||
|
||||
|
||||
def _generate_text(self, prompt):
|
||||
max_tokens = min(self.config.ctx_size - self.model.get_nb_tokens(prompt),self.config.max_n_predict)
|
||||
max_tokens = min(self.config.ctx_size - self.model.get_nb_tokens(prompt),self.config.max_n_predict if self.config.max_n_predict else self.config.ctx_size- self.model.get_nb_tokens(prompt))
|
||||
generated_text = self.model.generate(prompt, max_tokens)
|
||||
return generated_text.strip()
|
||||
|
||||
|
@ -1046,7 +1046,7 @@ Use this structure:
|
||||
self.print_prompt("gen",prompt)
|
||||
|
||||
if max_size is None:
|
||||
max_size = min(self.config.max_n_predict, self.config.ctx_size-len(self.model.tokenize(prompt)))
|
||||
max_size = min(self.config.max_n_predict if self.config.max_n_predict else self.config.ctx_size-len(self.model.tokenize(prompt)), self.config.ctx_size-len(self.model.tokenize(prompt)))
|
||||
|
||||
self.model.generate_with_images(
|
||||
prompt,
|
||||
@ -1071,7 +1071,7 @@ Use this structure:
|
||||
|
||||
self.model.generate(
|
||||
prompt,
|
||||
max_size if max_size else min(self.config.ctx_size-ntokens,self.config.max_n_predict),
|
||||
max_size if max_size else min(self.config.ctx_size-ntokens,self.config.max_n_predict if self.config.max_n_predict else self.config.ctx_size-ntokens),
|
||||
partial(self.process, callback=callback, show_progress=show_progress),
|
||||
temperature=self.model_temperature if temperature is None else temperature,
|
||||
top_k=self.model_top_k if top_k is None else top_k,
|
||||
@ -3575,7 +3575,7 @@ Use this structure:
|
||||
|
||||
if self.config.debug:
|
||||
nb_prompt_tokens = len(self.personality.model.tokenize(prompt))
|
||||
nb_tokens = min(self.config.ctx_size - nb_prompt_tokens, self.config.max_n_predict)
|
||||
nb_tokens = min(self.config.ctx_size - nb_prompt_tokens, self.config.max_n_predict if self.config.max_n_predict else self.config.ctx_size-nb_prompt_tokens)
|
||||
ASCIIColors.info(f"Prompt size : {nb_prompt_tokens}")
|
||||
ASCIIColors.info(f"Requested generation max size : {nb_tokens}")
|
||||
|
||||
@ -4740,7 +4740,7 @@ transition-all duration-300 ease-in-out">
|
||||
|
||||
out = self.fast_gen(full_prompt)
|
||||
nb_tokens = len(self.personality.model.tokenize(out))
|
||||
if nb_tokens >= self.config.max_n_predict-1:
|
||||
if nb_tokens >= (self.config.max_n_predict if self.config.max_n_predict else self.config.ctx_size)-1:
|
||||
out = out+self.fast_gen(full_prompt+out, callback=callback)
|
||||
if context_details["is_continue"]:
|
||||
out = context_details["previous_chunk"] + out
|
||||
|
@ -85,7 +85,7 @@ class LollmsGenerateRequest(BaseModel):
|
||||
prompt: str
|
||||
model_name: Optional[str] = None
|
||||
personality: Optional[int] = -1
|
||||
n_predict: Optional[int] = 1024
|
||||
n_predict: Optional[int] = None
|
||||
stream: bool = False
|
||||
temperature: float = 0.1
|
||||
top_k: Optional[int] = 50
|
||||
@ -131,7 +131,10 @@ async def lollms_generate(request: LollmsGenerateRequest):
|
||||
tokens = elf_server.model.tokenize(prompt)
|
||||
n_tokens = len(tokens)
|
||||
ASCIIColors.info(f"Prompt input size {n_tokens}")
|
||||
n_predict = min(min(elf_server.config.ctx_size-n_tokens-1,elf_server.config.max_n_predict), request.n_predict) if request.n_predict>0 else min(elf_server.config.ctx_size-n_tokens-1,elf_server.config.max_n_predict)
|
||||
if request.n_predict is None:
|
||||
n_predict = min(elf_server.config.ctx_size-n_tokens-1,elf_server.config.max_n_predict if elf_server.config.max_n_predict else elf_server.config.ctx_size)
|
||||
else:
|
||||
n_predict = min(min(elf_server.config.ctx_size-n_tokens-1,elf_server.config.max_n_predict if elf_server.config.max_n_predict else elf_server.config.ctx_size), request.n_predict) if request.n_predict>0 else min(elf_server.config.ctx_size-n_tokens-1,elf_server.config.max_n_predict if elf_server.config.max_n_predict else elf_server.config.ctx_size)
|
||||
stream = request.stream
|
||||
if elf_server.binding is not None:
|
||||
if stream:
|
||||
@ -491,7 +494,7 @@ async def v1_chat_completions(request: ChatGenerationRequest):
|
||||
try:
|
||||
reception_manager=RECEPTION_MANAGER()
|
||||
messages = request.messages
|
||||
max_tokens = request.max_tokens if request.max_tokens>0 else elf_server.config.max_n_predict
|
||||
max_tokens = request.max_tokens if request.max_tokens>0 else elf_server.config.max_n_predict if elf_server.config.max_n_predict else elf_server.config.ctx_size
|
||||
temperature = request.temperature if elf_server.config.temperature else elf_server.config.temperature
|
||||
prompt = ""
|
||||
roles= False
|
||||
@ -633,7 +636,7 @@ async def ollama_chat_completion(request: ChatGenerationRequest):
|
||||
try:
|
||||
reception_manager=RECEPTION_MANAGER()
|
||||
messages = request.messages
|
||||
max_tokens = request.max_tokens if request.max_tokens>0 else elf_server.config.max_n_predict
|
||||
max_tokens = request.max_tokens if request.max_tokens>0 else elf_server.config.max_n_predict if elf_server.config.max_n_predict else elf_server.config.ctx_size
|
||||
temperature = request.temperature if elf_server.config.temperature else elf_server.config.temperature
|
||||
prompt = ""
|
||||
roles= False
|
||||
@ -986,7 +989,7 @@ async def v1_completion(request: CompletionGenerationRequest):
|
||||
"""
|
||||
try:
|
||||
text = request.prompt
|
||||
n_predict = request.max_tokens if request.max_tokens>=0 else elf_server.config.max_n_predict
|
||||
n_predict = request.max_tokens if request.max_tokens>=0 else elf_server.config.max_n_predict if elf_server.config.max_n_predict else elf_server.config.ctx_size
|
||||
temperature = request.temperature if request.temperature>=0 else elf_server.config.temperature
|
||||
# top_k = request.top_k if request.top_k>=0 else elf_server.config.top_k
|
||||
# top_p = request.top_p if request.top_p>=0 else elf_server.config.top_p
|
||||
|
Loading…
Reference in New Issue
Block a user