mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-18 20:27:58 +00:00
Upgraded media
This commit is contained in:
parent
2815296592
commit
62a8af9c3b
@ -1,267 +0,0 @@
|
|||||||
"""Wrapper around llama.cpp."""
|
|
||||||
import logging
|
|
||||||
from typing import Any, Dict, Generator, List, Optional
|
|
||||||
|
|
||||||
from pydantic import Field, root_validator
|
|
||||||
|
|
||||||
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
|
||||||
from langchain.llms.base import LLM
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class LLMModel(LLM):
|
|
||||||
"""Wrapper around the llama.cpp model.
|
|
||||||
|
|
||||||
To use, you should have the llama-cpp-python library installed, and provide the
|
|
||||||
path to the Llama model as a named parameter to the constructor.
|
|
||||||
Check out: https://github.com/abetlen/llama-cpp-python
|
|
||||||
|
|
||||||
Example:
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain.llms import LlamaCppEmbeddings
|
|
||||||
llm = LlamaCppEmbeddings(model_path="/path/to/llama/model")
|
|
||||||
"""
|
|
||||||
|
|
||||||
client: Any #: :meta private:
|
|
||||||
model_path: str
|
|
||||||
"""The path to the Llama model file."""
|
|
||||||
|
|
||||||
lora_base: Optional[str] = None
|
|
||||||
"""The path to the Llama LoRA base model."""
|
|
||||||
|
|
||||||
lora_path: Optional[str] = None
|
|
||||||
"""The path to the Llama LoRA. If None, no LoRa is loaded."""
|
|
||||||
|
|
||||||
n_ctx: int = Field(512, alias="n_ctx")
|
|
||||||
"""Token context window."""
|
|
||||||
|
|
||||||
n_parts: int = Field(-1, alias="n_parts")
|
|
||||||
"""Number of parts to split the model into.
|
|
||||||
If -1, the number of parts is automatically determined."""
|
|
||||||
|
|
||||||
seed: int = Field(-1, alias="seed")
|
|
||||||
"""Seed. If -1, a random seed is used."""
|
|
||||||
|
|
||||||
f16_kv: bool = Field(True, alias="f16_kv")
|
|
||||||
"""Use half-precision for key/value cache."""
|
|
||||||
|
|
||||||
logits_all: bool = Field(False, alias="logits_all")
|
|
||||||
"""Return logits for all tokens, not just the last token."""
|
|
||||||
|
|
||||||
vocab_only: bool = Field(False, alias="vocab_only")
|
|
||||||
"""Only load the vocabulary, no weights."""
|
|
||||||
|
|
||||||
use_mlock: bool = Field(False, alias="use_mlock")
|
|
||||||
"""Force system to keep model in RAM."""
|
|
||||||
|
|
||||||
n_threads: Optional[int] = Field(None, alias="n_threads")
|
|
||||||
"""Number of threads to use.
|
|
||||||
If None, the number of threads is automatically determined."""
|
|
||||||
|
|
||||||
n_batch: Optional[int] = Field(8, alias="n_batch")
|
|
||||||
"""Number of tokens to process in parallel.
|
|
||||||
Should be a number between 1 and n_ctx."""
|
|
||||||
|
|
||||||
n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers")
|
|
||||||
"""Number of layers to be loaded into gpu memory. Default None."""
|
|
||||||
|
|
||||||
suffix: Optional[str] = Field(None)
|
|
||||||
"""A suffix to append to the generated text. If None, no suffix is appended."""
|
|
||||||
|
|
||||||
max_tokens: Optional[int] = 256
|
|
||||||
"""The maximum number of tokens to generate."""
|
|
||||||
|
|
||||||
temperature: Optional[float] = 0.8
|
|
||||||
"""The temperature to use for sampling."""
|
|
||||||
|
|
||||||
top_p: Optional[float] = 0.95
|
|
||||||
"""The top-p value to use for sampling."""
|
|
||||||
|
|
||||||
logprobs: Optional[int] = Field(None)
|
|
||||||
"""The number of logprobs to return. If None, no logprobs are returned."""
|
|
||||||
|
|
||||||
echo: Optional[bool] = False
|
|
||||||
"""Whether to echo the prompt."""
|
|
||||||
|
|
||||||
stop: Optional[List[str]] = []
|
|
||||||
"""A list of strings to stop generation when encountered."""
|
|
||||||
|
|
||||||
repeat_penalty: Optional[float] = 1.1
|
|
||||||
"""The penalty to apply to repeated tokens."""
|
|
||||||
|
|
||||||
top_k: Optional[int] = 40
|
|
||||||
"""The top-k value to use for sampling."""
|
|
||||||
|
|
||||||
last_n_tokens_size: Optional[int] = 64
|
|
||||||
"""The number of tokens to look back when applying the repeat_penalty."""
|
|
||||||
|
|
||||||
use_mmap: Optional[bool] = True
|
|
||||||
"""Whether to keep the model loaded in RAM"""
|
|
||||||
|
|
||||||
streaming: bool = True
|
|
||||||
"""Whether to stream the results, token by token."""
|
|
||||||
|
|
||||||
@root_validator()
|
|
||||||
def validate_environment(cls, values: Dict) -> Dict:
|
|
||||||
"""Validate that llama-cpp-python library is installed."""
|
|
||||||
model = values["model"]
|
|
||||||
model_param_names = [
|
|
||||||
"lora_path",
|
|
||||||
"lora_base",
|
|
||||||
"n_ctx",
|
|
||||||
"n_parts",
|
|
||||||
"seed",
|
|
||||||
"f16_kv",
|
|
||||||
"logits_all",
|
|
||||||
"vocab_only",
|
|
||||||
"use_mlock",
|
|
||||||
"n_threads",
|
|
||||||
"n_batch",
|
|
||||||
"use_mmap",
|
|
||||||
"last_n_tokens_size",
|
|
||||||
]
|
|
||||||
model_params = {k: values[k] for k in model_param_names}
|
|
||||||
# For backwards compatibility, only include if non-null.
|
|
||||||
if values["n_gpu_layers"] is not None:
|
|
||||||
model_params["n_gpu_layers"] = values["n_gpu_layers"]
|
|
||||||
|
|
||||||
values["client"] = model
|
|
||||||
|
|
||||||
return values
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _default_params(self) -> Dict[str, Any]:
|
|
||||||
"""Get the default parameters for calling llama_cpp."""
|
|
||||||
return {
|
|
||||||
"suffix": self.suffix,
|
|
||||||
"max_tokens": self.max_tokens,
|
|
||||||
"temperature": self.temperature,
|
|
||||||
"top_p": self.top_p,
|
|
||||||
"logprobs": self.logprobs,
|
|
||||||
"echo": self.echo,
|
|
||||||
"stop_sequences": self.stop, # key here is convention among LLM classes
|
|
||||||
"repeat_penalty": self.repeat_penalty,
|
|
||||||
"top_k": self.top_k,
|
|
||||||
}
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _identifying_params(self) -> Dict[str, Any]:
|
|
||||||
"""Get the identifying parameters."""
|
|
||||||
return {**{"model_path": self.model_path}, **self._default_params}
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _llm_type(self) -> str:
|
|
||||||
"""Return type of llm."""
|
|
||||||
return "lollms_generic_llm"
|
|
||||||
|
|
||||||
def _get_parameters(self, stop: Optional[List[str]] = None) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Performs sanity check, preparing paramaters in format needed by llama_cpp.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
stop (Optional[List[str]]): List of stop sequences for llama_cpp.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing the combined parameters.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Raise error if stop sequences are in both input and default params
|
|
||||||
if self.stop and stop is not None:
|
|
||||||
raise ValueError("`stop` found in both the input and default params.")
|
|
||||||
|
|
||||||
params = self._default_params
|
|
||||||
|
|
||||||
# llama_cpp expects the "stop" key not this, so we remove it:
|
|
||||||
params.pop("stop_sequences")
|
|
||||||
|
|
||||||
# then sets it as configured, or default to an empty list:
|
|
||||||
params["stop"] = self.stop or stop or []
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
def _call(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
stop: Optional[List[str]] = None,
|
|
||||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
||||||
) -> str:
|
|
||||||
"""Call the model and return the output.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The prompt to use for generation.
|
|
||||||
stop: A list of strings to stop generation when encountered.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The generated text.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain.llms import LlamaCpp
|
|
||||||
llm = LlamaCpp(model_path="/path/to/local/llama/model.bin")
|
|
||||||
llm("This is a prompt.")
|
|
||||||
"""
|
|
||||||
if self.streaming:
|
|
||||||
# If streaming is enabled, we use the stream
|
|
||||||
# method that yields as they are generated
|
|
||||||
# and return the combined strings from the first choices's text:
|
|
||||||
combined_text_output = ""
|
|
||||||
for token in self.stream(prompt=prompt, stop=stop, run_manager=run_manager):
|
|
||||||
combined_text_output += token["choices"][0]["text"]
|
|
||||||
return combined_text_output
|
|
||||||
else:
|
|
||||||
params = self._get_parameters(stop)
|
|
||||||
result = self.client(prompt=prompt, **params)
|
|
||||||
return result["choices"][0]["text"]
|
|
||||||
|
|
||||||
def stream(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
stop: Optional[List[str]] = None,
|
|
||||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
||||||
) -> Generator[Dict, None, None]:
|
|
||||||
"""Yields results objects as they are generated in real time.
|
|
||||||
|
|
||||||
BETA: this is a beta feature while we figure out the right abstraction:
|
|
||||||
Once that happens, this interface could change.
|
|
||||||
|
|
||||||
It also calls the callback manager's on_llm_new_token event with
|
|
||||||
similar parameters to the OpenAI LLM class method of the same name.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The prompts to pass into the model.
|
|
||||||
stop: Optional list of stop words to use when generating.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A generator representing the stream of tokens being generated.
|
|
||||||
|
|
||||||
Yields:
|
|
||||||
A dictionary like objects containing a string token and metadata.
|
|
||||||
See llama-cpp-python docs and below for more.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain.llms import LlamaCpp
|
|
||||||
llm = LlamaCpp(
|
|
||||||
model_path="/path/to/local/model.bin",
|
|
||||||
temperature = 0.5
|
|
||||||
)
|
|
||||||
for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
|
|
||||||
stop=["'","\n"]):
|
|
||||||
result = chunk["choices"][0]
|
|
||||||
print(result["text"], end='', flush=True)
|
|
||||||
|
|
||||||
"""
|
|
||||||
params = self._get_parameters(stop)
|
|
||||||
result = self.client(prompt=prompt, stream=True, **params)
|
|
||||||
for chunk in result:
|
|
||||||
token = chunk["choices"][0]["text"]
|
|
||||||
log_probs = chunk["choices"][0].get("logprobs", None)
|
|
||||||
if run_manager:
|
|
||||||
run_manager.on_llm_new_token(
|
|
||||||
token=token, verbose=self.verbose, log_probs=log_probs
|
|
||||||
)
|
|
||||||
yield chunk
|
|
@ -130,6 +130,13 @@ class AudioRecorder:
|
|||||||
# Convert to float
|
# Convert to float
|
||||||
|
|
||||||
audio_data = self.audio_frames.astype(np.float32)
|
audio_data = self.audio_frames.astype(np.float32)
|
||||||
|
audio = wave.open(str(self.filename), 'wb')
|
||||||
|
audio.setnchannels(self.channels)
|
||||||
|
audio.setsampwidth(pyaudio.PyAudio().get_sample_size(self.audio_format))
|
||||||
|
audio.setframerate(self.sample_rate)
|
||||||
|
audio.writeframes(b''.join(self.audio_frames[non_silent_start:non_silent_end]))
|
||||||
|
audio.close()
|
||||||
|
|
||||||
|
|
||||||
# Transcribe the audio using the whisper model
|
# Transcribe the audio using the whisper model
|
||||||
text = self.whisper_model.transcribe(audio_data[non_silent_start:non_silent_end])
|
text = self.whisper_model.transcribe(audio_data[non_silent_start:non_silent_end])
|
||||||
@ -183,18 +190,20 @@ class AudioRecorder:
|
|||||||
|
|
||||||
def stop_recording(self):
|
def stop_recording(self):
|
||||||
self.is_recording = False
|
self.is_recording = False
|
||||||
self.audio_stream.stop_stream()
|
if self.audio_stream:
|
||||||
self.audio_stream.close()
|
self.audio_stream.stop_stream()
|
||||||
|
self.audio_stream.close()
|
||||||
|
|
||||||
audio = wave.open(str(self.filename), 'wb')
|
audio = wave.open(str(self.filename), 'wb')
|
||||||
audio.setnchannels(self.channels)
|
audio.setnchannels(self.channels)
|
||||||
audio.setsampwidth(pyaudio.PyAudio().get_sample_size(self.audio_format))
|
audio.setsampwidth(pyaudio.PyAudio().get_sample_size(self.audio_format))
|
||||||
audio.setframerate(self.sample_rate)
|
audio.setframerate(self.sample_rate)
|
||||||
audio.writeframes(b''.join(self.audio_frames))
|
audio.writeframes(b''.join(self.audio_frames))
|
||||||
audio.close()
|
audio.close()
|
||||||
|
|
||||||
self.lollmsCom.info(f"Recording saved to {self.filename}")
|
|
||||||
|
|
||||||
|
self.lollmsCom.info(f"Recording saved to {self.filename}")
|
||||||
|
else:
|
||||||
|
self.warning("No recording available")
|
||||||
|
|
||||||
class WebcamImageSender:
|
class WebcamImageSender:
|
||||||
"""
|
"""
|
||||||
|
Loading…
Reference in New Issue
Block a user