mirror of
https://github.com/ianarawjo/ChainForge.git
synced 2025-03-14 16:26:45 +00:00
* Add OpenAI function calls to settings screen for Azure models, and backend support in the Azure call for functions and function_calls
82 lines
3.9 KiB
Python
82 lines
3.9 KiB
Python
"""
|
|
A list of all model APIs natively supported by ChainForge.
|
|
"""
|
|
from enum import Enum
|
|
|
|
class LLM(str, Enum):
|
|
""" OpenAI Chat """
|
|
OpenAI_ChatGPT = "gpt-3.5-turbo"
|
|
OpenAI_ChatGPT_16k = "gpt-3.5-turbo-16k"
|
|
OpenAI_ChatGPT_16k_0613 = "gpt-3.5-turbo-16k-0613"
|
|
OpenAI_ChatGPT_0301 = "gpt-3.5-turbo-0301"
|
|
OpenAI_ChatGPT_0613 = "gpt-3.5-turbo-0613"
|
|
OpenAI_GPT4 = "gpt-4"
|
|
OpenAI_GPT4_0314 = "gpt-4-0314"
|
|
OpenAI_GPT4_0613 = "gpt-4-0613"
|
|
OpenAI_GPT4_32k = "gpt-4-32k"
|
|
OpenAI_GPT4_32k_0314 = "gpt-4-32k-0314"
|
|
OpenAI_GPT4_32k_0613 = "gpt-4-32k-0613"
|
|
|
|
""" OpenAI Text Completions """
|
|
OpenAI_Davinci003 = "text-davinci-003"
|
|
OpenAI_Davinci002 = "text-davinci-002"
|
|
|
|
""" Azure OpenAI Endpoints """
|
|
Azure_OpenAI = "azure-openai"
|
|
|
|
""" Dalai-served models (Alpaca and Llama) """
|
|
Dalai_Alpaca_7B = "alpaca.7B"
|
|
Dalai_Alpaca_13B = "alpaca.13B"
|
|
Dalai_Llama_7B = "llama.7B"
|
|
Dalai_Llama_13B = "llama.13B"
|
|
Dalai_Llama_30B = "llama.30B"
|
|
Dalai_Llama_65B = "llama.65B"
|
|
|
|
""" Anthropic """
|
|
# Our largest model, ideal for a wide range of more complex tasks. Using this model name
|
|
# will automatically switch you to newer versions of claude-v1 as they are released.
|
|
Claude_v1 = "claude-v1"
|
|
|
|
# An earlier version of claude-v1
|
|
Claude_v1_0 = "claude-v1.0"
|
|
|
|
# An improved version of claude-v1. It is slightly improved at general helpfulness,
|
|
# instruction following, coding, and other tasks. It is also considerably better with
|
|
# non-English languages. This model also has the ability to role play (in harmless ways)
|
|
# more consistently, and it defaults to writing somewhat longer and more thorough responses.
|
|
Claude_v1_2 = "claude-v1.2"
|
|
|
|
# A significantly improved version of claude-v1. Compared to claude-v1.2, it's more robust
|
|
# against red-team inputs, better at precise instruction-following, better at code, and better
|
|
# and non-English dialogue and writing.
|
|
Claude_v1_3 = "claude-v1.3"
|
|
|
|
# A smaller model with far lower latency, sampling at roughly 40 words/sec! Its output quality
|
|
# is somewhat lower than claude-v1 models, particularly for complex tasks. However, it is much
|
|
# less expensive and blazing fast. We believe that this model provides more than adequate performance
|
|
# on a range of tasks including text classification, summarization, and lightweight chat applications,
|
|
# as well as search result summarization. Using this model name will automatically switch you to newer
|
|
# versions of claude-instant-v1 as they are released.
|
|
Claude_v1_instant = "claude-instant-v1"
|
|
|
|
""" Google models """
|
|
PaLM2_Text_Bison = "text-bison-001" # it's really models/text-bison-001, but that's confusing
|
|
PaLM2_Chat_Bison = "chat-bison-001"
|
|
|
|
|
|
# LLM APIs often have rate limits, which control number of requests. E.g., OpenAI: https://platform.openai.com/account/rate-limits
|
|
# For a basic organization in OpenAI, GPT3.5 is currently 3500 and GPT4 is 200 RPM (requests per minute).
|
|
# For Anthropic evaluaton preview of Claude, can only send 1 request at a time (synchronously).
|
|
# This 'cheap' version of controlling for rate limits is to wait a few seconds between batches of requests being sent off.
|
|
# If a model is missing from below, it means we must send and receive only 1 request at a time (synchronous).
|
|
# The following is only a guideline, and a bit on the conservative side.
|
|
RATE_LIMITS = {
|
|
LLM.OpenAI_ChatGPT: (30, 10), # max 30 requests a batch; wait 10 seconds between
|
|
LLM.OpenAI_ChatGPT_0301: (30, 10),
|
|
LLM.OpenAI_GPT4: (4, 15), # max 4 requests a batch; wait 15 seconds between
|
|
LLM.OpenAI_GPT4_0314: (4, 15),
|
|
LLM.OpenAI_GPT4_32k: (4, 15),
|
|
LLM.OpenAI_GPT4_32k_0314: (4, 15),
|
|
LLM.PaLM2_Text_Bison: (4, 10), # max 30 requests per minute; so do 4 per batch, 10 seconds between (conservative)
|
|
LLM.PaLM2_Chat_Bison: (4, 10),
|
|
} |