ChainForge/chainforge/promptengine/models.py

"""
    A list of all model APIs natively supported by ChainForge.
"""
from enum import Enum

class LLM(str, Enum):
    """ OpenAI Chat """
    OpenAI_ChatGPT = "gpt-3.5-turbo"
    OpenAI_ChatGPT_16k = "gpt-3.5-turbo-16k"
    OpenAI_ChatGPT_16k_0613 = "gpt-3.5-turbo-16k-0613"
    OpenAI_ChatGPT_0301 = "gpt-3.5-turbo-0301"
    OpenAI_ChatGPT_0613 = "gpt-3.5-turbo-0613"
    OpenAI_GPT4 = "gpt-4"
    OpenAI_GPT4_0314 = "gpt-4-0314"
    OpenAI_GPT4_0613 = "gpt-4-0613"
    OpenAI_GPT4_32k = "gpt-4-32k"
    OpenAI_GPT4_32k_0314 = "gpt-4-32k-0314"
    OpenAI_GPT4_32k_0613 = "gpt-4-32k-0613"

    """ OpenAI Text Completions """
    OpenAI_Davinci003 = "text-davinci-003"
    OpenAI_Davinci002 = "text-davinci-002"

    """ Azure OpenAI Endpoints """
    Azure_OpenAI = "azure-openai"

    """ Dalai-served models (Alpaca and Llama) """
    Dalai_Alpaca_7B = "alpaca.7B"
    Dalai_Alpaca_13B = "alpaca.13B"
    Dalai_Llama_7B = "llama.7B"
    Dalai_Llama_13B = "llama.13B"
    Dalai_Llama_30B = "llama.30B"
    Dalai_Llama_65B = "llama.65B"

    """ Anthropic """
    # Our largest model, ideal for a wide range of more complex tasks. Using this model name
    # will automatically switch you to newer versions of claude-v1 as they are released.
    Claude_v1 = "claude-v1"

    # An earlier version of claude-v1
    Claude_v1_0 = "claude-v1.0"

    # An improved version of claude-v1. It is slightly improved at general helpfulness,
    # instruction following, coding, and other tasks. It is also considerably better with
    # non-English languages. This model also has the ability to role play (in harmless ways)
    # more consistently, and it defaults to writing somewhat longer and more thorough responses.
    Claude_v1_2 = "claude-v1.2"

    # A significantly improved version of claude-v1. Compared to claude-v1.2, it's more robust
    # against red-team inputs, better at precise instruction-following, better at code, and better
    # and non-English dialogue and writing.
    Claude_v1_3 = "claude-v1.3"

    # A smaller model with far lower latency, sampling at roughly 40 words/sec! Its output quality
    # is somewhat lower than claude-v1 models, particularly for complex tasks. However, it is much
    # less expensive and blazing fast. We believe that this model provides more than adequate performance
    # on a range of tasks including text classification, summarization, and lightweight chat applications,
    # as well as search result summarization. Using this model name will automatically switch you to newer
    # versions of claude-instant-v1 as they are released.
    Claude_v1_instant = "claude-instant-v1"

    """ Google models """
    PaLM2_Text_Bison = "text-bison-001"  # it's really models/text-bison-001, but that's confusing
    PaLM2_Chat_Bison = "chat-bison-001"


# LLM APIs often have rate limits, which control number of requests. E.g., OpenAI: https://platform.openai.com/account/rate-limits
#   For a basic organization in OpenAI, GPT3.5 is currently 3500 and GPT4 is 200 RPM (requests per minute).
#   For Anthropic evaluaton preview of Claude, can only send 1 request at a time (synchronously).
# This 'cheap' version of controlling for rate limits is to wait a few seconds between batches of requests being sent off.
# If a model is missing from below, it means we must send and receive only 1 request at a time (synchronous).
# The following is only a guideline, and a bit on the conservative side.
RATE_LIMITS = {
    LLM.OpenAI_ChatGPT: (30, 10),  # max 30 requests a batch; wait 10 seconds between
    LLM.OpenAI_ChatGPT_0301: (30, 10),
    LLM.OpenAI_GPT4: (4, 15),  # max 4 requests a batch; wait 15 seconds between
    LLM.OpenAI_GPT4_0314: (4, 15),
    LLM.OpenAI_GPT4_32k: (4, 15),
    LLM.OpenAI_GPT4_32k_0314: (4, 15),
    LLM.PaLM2_Text_Bison: (4, 10),  # max 30 requests per minute; so do 4 per batch, 10 seconds between (conservative)
    LLM.PaLM2_Chat_Bison: (4, 10),
}