From 3003ba80f83145ee3512c507b0c9d9169207bf47 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Thu, 24 Aug 2023 01:34:18 +0200 Subject: [PATCH] added files to all personalities --- api/__init__.py | 17 +++++++++-------- app.py | 4 ++-- docs/youtube/lollms_lawyer | 13 +++++++++++++ 3 files changed, 24 insertions(+), 10 deletions(-) create mode 100644 docs/youtube/lollms_lawyer diff --git a/api/__init__.py b/api/__init__.py index f4318614..894f18a2 100644 --- a/api/__init__.py +++ b/api/__init__.py @@ -507,6 +507,7 @@ class LoLLMsAPPI(LollmsApplication): path:Path = self.lollms_paths.personal_uploads_path / self.personality.personality_folder_name path.mkdir(parents=True, exist_ok=True) file_path = path / data["filename"] + File64BitsManager.b642file(data["fileData"],file_path) if self.personality.processor: self.personality.processor.add_file(file_path, partial(self.process_chunk, client_id=client_id)) else: @@ -972,20 +973,16 @@ class LoLLMsAPPI(LollmsApplication): composed_messages = self.model.detokenize(t[-nb_tk:]) ASCIIColors.warning(f"Cropping discussion to fit context [using {nb_tk} tokens/{self.config.ctx_size}]") discussion_messages = composed_messages - tokens = self.model.tokenize(discussion_messages) - if self.config["debug"]: - ASCIIColors.yellow(discussion_messages) - ASCIIColors.info(f"prompt size:{len(tokens)} tokens") + if len(self.personality.files)>0 and self.personality.vectorizer: - pr = PromptReshaper("!@>Documentation:{{doc}}\n{{conditionning}}{{content}}") + pr = PromptReshaper("!@>Document chunks:\n{{doc}}\n{{conditionning}}\n{{content}}") emb = self.personality.vectorizer.embed_query(message.content) - doc = self.personality.vectorizer.recover_text(emb, top_k=self.config.data_vectorization_nb_chunks) - # TODO, fix + docs, sorted_similarities = self.personality.vectorizer.recover_text(emb, top_k=self.config.data_vectorization_nb_chunks) discussion_messages = pr.build({ - "doc":doc, + "doc":"\n".join(docs), "conditionning":self.personality.personality_conditioning, "content":discussion_messages }, self.model.tokenize, self.model.detokenize, self.config.ctx_size, place_holders_to_sacrifice=["content"]) @@ -996,6 +993,10 @@ class LoLLMsAPPI(LollmsApplication): "content":discussion_messages }, self.model.tokenize, self.model.detokenize, self.config.ctx_size, place_holders_to_sacrifice=["content"]) + if self.config["debug"]: + tokens = self.model.tokenize(discussion_messages) + ASCIIColors.yellow(discussion_messages) + ASCIIColors.info(f"prompt size:{len(tokens)} tokens") return discussion_messages, message.content, tokens diff --git a/app.py b/app.py index 3a277795..9b75b389 100644 --- a/app.py +++ b/app.py @@ -1510,7 +1510,7 @@ class LoLLMsWebUI(LoLLMsAPPI): data = request.get_json() id = data['id'] print(f"- Selecting active personality {id} ...",end="") - if id= {len(self.config['personalities'])}") + ASCIIColors.error(f"nok : personality id out of bounds @ {id} >= {len(self.mounted_personalities)}") return jsonify({"status": False, "error":"Invalid ID"}) diff --git a/docs/youtube/lollms_lawyer b/docs/youtube/lollms_lawyer new file mode 100644 index 00000000..facf441c --- /dev/null +++ b/docs/youtube/lollms_lawyer @@ -0,0 +1,13 @@ +Disclaimer: +As any AI model, the models used on lollms are sometimes prone to errors +and bias. This is not a replacement for lawyers but just a tool that may help +you if you didn't understand some terms in a document or if you have +a big document and need to look for a specific part of it. + +The quality of the answers will depend on the model you are using. +larger models tend to be better, but there is no garantee that the answer is correct. + + +Make sure you contact a legal professional for serious matters. +Thanks for watching +See ya \ No newline at end of file