Add Deepseek API (#322)

* Add DeepSeek Model API and settings * Update Together model list
2025-03-14 00:06:33 +00:00 · 2025-01-27 22:48:37 -05:00 · 2025-01-27 22:48:37 -05:00 · 1206d62b7b
commit 1206d62b7b
parent 0174b53aff
7 changed files with 239 additions and 5 deletions
--- a/chainforge/flask_app.py
+++ b/chainforge/flask_app.py
@ -472,6 +472,7 @@ def fetchEnvironAPIKeys():
        'AWS_REGION': 'AWS_Region', 
        'AWS_SESSION_TOKEN': 'AWS_Session_Token',
        'TOGETHER_API_KEY': 'Together',
+        'DEEPSEEK_API_KEY': 'DeepSeek',
    }
    d = { alias: os.environ.get(key) for key, alias in keymap.items() }
    ret = jsonify(d)
--- a/chainforge/react-server/src/GlobalSettingsModal.tsx
+++ b/chainforge/react-server/src/GlobalSettingsModal.tsx
@ -400,6 +400,12 @@ const GlobalSettingsModal = forwardRef<GlobalSettingsModalRef, object>(
                  {...form.getInputProps("Google")}
                />
                <br />
+                <TextInput
+                  label="DeepSeek API Key"
+                  placeholder="Paste your DeepSeek API key here"
+                  {...form.getInputProps("DeepSeek")}
+                />
+                <br />
                <TextInput
                  label="Aleph Alpha API Key"
                  placeholder="Paste your Aleph Alpha API key here"
--- a/chainforge/react-server/src/ModelSettingSchemas.tsx
+++ b/chainforge/react-server/src/ModelSettingSchemas.tsx
@ -326,6 +326,120 @@ const GPT4Settings: ModelSettingsDict = {
  postprocessors: ChatGPTSettings.postprocessors,
 };

+const DeepSeekSettings: ModelSettingsDict = {
+  fullName: "DeepSeek",
+  schema: {
+    type: "object",
+    required: ["shortname"],
+    properties: {
+      shortname: {
+        type: "string",
+        title: "Nickname",
+        description:
+          "Unique identifier to appear in ChainForge. Keep it short.",
+        default: "Deep Seek",
+      },
+      model: {
+        type: "string",
+        title: "Model Version",
+        description:
+          "Select a DeepSeek model to query. For more details on the differences, see the DeepSeek API documentation.",
+        enum: ["deepseek-chat", "deepseek-reasoner"],
+        default: "deepseek-chat",
+      },
+      system_msg: {
+        type: "string",
+        title: "system_msg",
+        description:
+          "Many conversations begin with a system message to gently instruct the assistant. By default, ChainForge includes the suggested 'You are a helpful assistant.'",
+        default: "You are a helpful assistant.",
+        allow_empty_str: true,
+      },
+      temperature: {
+        type: "number",
+        title: "temperature",
+        description:
+          "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.",
+        default: 1,
+        minimum: 0,
+        maximum: 2,
+        multipleOf: 0.01,
+      },
+      response_format: {
+        type: "string",
+        title: "response_format",
+        description:
+          "An object specifying the format that the model must output. Can be 'text' or 'json_object' or (late 2024) can be a JSON schema specifying structured outputs. In ChainForge, you should only specify text, json_object, or the verbatim JSON schema---do not add a JSON object with a 'type' parameter surrounding these values. JSON modes only works with newest GPT models. IMPORTANT: when using JSON mode, you must also instruct the model to produce JSON yourself via a system or user message.",
+        default: "text",
+      },
+      tools: {
+        type: "string",
+        title: "tools",
+        description:
+          "A list of JSON schema objects, each with 'name', 'description', and 'parameters' keys, which describe functions the model may generate JSON inputs for. For more info, see https://github.com/openai/openai-cookbook/blob/main/examples/How_to_call_functions_with_chat_models.ipynb",
+        default: "",
+      },
+      tool_choice: {
+        type: "string",
+        title: "tool_choice",
+        description:
+          "Controls how the model responds to function calls. 'none' means the model does not call a function, and responds to the end-user. 'auto' means the model can pick between an end-user or calling a function. 'required' means the model must call one or more tools. Specifying a particular function name forces the model to call only that function. Leave blank for default behavior.",
+        default: "",
+      },
+      top_p: {
+        type: "number",
+        title: "top_p",
+        description:
+          "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.",
+        default: 1,
+        minimum: 0,
+        maximum: 1,
+        multipleOf: 0.005,
+      },
+      stop: {
+        type: "string",
+        title: "stop sequences",
+        description:
+          'Up to 4 sequences where the API will stop generating further tokens. Enclose stop sequences in double-quotes "" and use whitespace to separate them.',
+        default: "",
+      },
+      max_tokens: {
+        type: "integer",
+        title: "max_tokens",
+        description:
+          "The maximum number of tokens to generate in the chat completion. (The total length of input tokens and generated tokens is limited by the model's context length.)",
+      },
+      presence_penalty: {
+        type: "number",
+        title: "presence_penalty",
+        description:
+          "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
+        default: 0,
+        minimum: -2,
+        maximum: 2,
+        multipleOf: 0.005,
+      },
+      frequency_penalty: {
+        type: "number",
+        title: "frequency_penalty",
+        description:
+          "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
+        default: 0,
+        minimum: -2,
+        maximum: 2,
+        multipleOf: 0.005,
+      },
+    },
+  },
+  uiSchema: {
+    ...ChatGPTSettings.uiSchema,
+    model: {
+      "ui:help": "Defaults to deepseek-chat.",
+    },
+  },
+  postprocessors: ChatGPTSettings.postprocessors,
+};
+
 const DalleSettings: ModelSettingsDict = {
  fullName: "Dall-E Image Models (OpenAI)",
  schema: {
@ -2190,9 +2304,13 @@ export const TogetherChatSettings: ModelSettingsDict = {
          "Austism/chronos-hermes-13b",
          "cognitivecomputations/dolphin-2.5-mixtral-8x7b",
          "databricks/dbrx-instruct",
+          "deepseek-ai/DeepSeek-V3",
+          "deepseek-ai/DeepSeek-R1",
          "deepseek-ai/deepseek-coder-33b-instruct",
          "deepseek-ai/deepseek-llm-67b-chat",
          "garage-bAInd/Platypus2-70B-instruct",
+          "google/gemma-2-27b-it",
+          "google/gemma-2-9b-it",
          "google/gemma-2b-it",
          "google/gemma-7b-it",
          "Gryphe/MythoMax-L2-13b",
@ -2202,11 +2320,22 @@ export const TogetherChatSettings: ModelSettingsDict = {
          "codellama/CodeLlama-34b-Instruct-hf",
          "codellama/CodeLlama-70b-Instruct-hf",
          "codellama/CodeLlama-7b-Instruct-hf",
+          "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+          "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+          "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+          "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
+          "meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
+          "meta-llama/Meta-Llama-3-70B-Instruct-Turbo",
+          "meta-llama/Llama-3.2-3B-Instruct-Turbo",
+          "meta-llama/Meta-Llama-3-8B-Instruct-Lite",
+          "meta-llama/Meta-Llama-3-70B-Instruct-Lite",
          "meta-llama/Llama-2-70b-chat-hf",
          "meta-llama/Llama-2-13b-chat-hf",
          "meta-llama/Llama-2-7b-chat-hf",
          "meta-llama/Llama-3-8b-chat-hf",
          "meta-llama/Llama-3-70b-chat-hf",
+          "microsoft/WizardLM-2-8x22B",
+          "mistralai/Mistral-7B-Instruct-v0.3",
          "mistralai/Mistral-7B-Instruct-v0.1",
          "mistralai/Mistral-7B-Instruct-v0.2",
          "mistralai/Mixtral-8x7B-Instruct-v0.1",
@ -2218,8 +2347,16 @@ export const TogetherChatSettings: ModelSettingsDict = {
          "NousResearch/Nous-Hermes-llama-2-7b",
          "NousResearch/Nous-Hermes-Llama2-13b",
          "NousResearch/Nous-Hermes-2-Yi-34B",
+          "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
          "openchat/openchat-3.5-1210",
          "Open-Orca/Mistral-7B-OpenOrca",
+          "Qwen/Qwen2.5-7B-Instruct-Turbo",
+          "Qwen/Qwen2.5-72B-Instruct-Turbo",
+          "Qwen/Qwen2-72B-Instruct",
+          "Qwen/Qwen2-VL-72B-Instruct",
+          "Qwen/Qwen2.5-Coder-32B-Instruct",
+          "Qwen/Qwen2.5-Coder-32B-Instruct",
+          "Qwen/QwQ-32B-Preview",
          "Qwen/Qwen1.5-0.5B-Chat",
          "Qwen/Qwen1.5-1.8B-Chat",
          "Qwen/Qwen1.5-4B-Chat",
@ -2242,7 +2379,7 @@ export const TogetherChatSettings: ModelSettingsDict = {
          "WizardLM/WizardLM-13B-V1.2",
          "upstage/SOLAR-10.7B-Instruct-v1.0",
        ],
-        default: "meta-llama/Llama-2-7b-chat-hf",
+        default: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
      },
      temperature: {
        type: "number",
@ -2364,6 +2501,7 @@ export const ModelSettings: Dict<ModelSettingsDict> = {
  "br.meta.llama2": BedrockLlama2ChatSettings,
  "br.meta.llama3": BedrockLlama3Settings,
  together: TogetherChatSettings,
+  deepseek: DeepSeekSettings,
 };

 export function getSettingsSchemaForLLM(
@ -2383,6 +2521,7 @@ export function getSettingsSchemaForLLM(
    [LLMProvider.Aleph_Alpha]: AlephAlphaLuminousSettings,
    [LLMProvider.Ollama]: OllamaSettings,
    [LLMProvider.Together]: TogetherChatSettings,
+    [LLMProvider.DeepSeek]: DeepSeekSettings,
  };

  if (llm_provider === LLMProvider.Custom) return ModelSettings[llm_name];
--- a/chainforge/react-server/src/backend/models.ts
+++ b/chainforge/react-server/src/backend/models.ts
@ -80,6 +80,10 @@ export enum NativeLLM {
  GEMINI_v1_5_pro = "gemini-1.5-pro",
  GEMINI_v1_pro = "gemini-1.0-pro",

+  // DeepSeek
+  DeepSeek_Chat = "deepseek-chat",
+  DeepSeek_Reasoner = "deepseek-reasoner",
+
  // Aleph Alpha
  Aleph_Alpha_Luminous_Extended = "luminous-extended",
  Aleph_Alpha_Luminous_ExtendedControl = "luminous-extended-control",
@ -151,6 +155,29 @@ export enum NativeLLM {
  Together_Meta_LLaMA2_Chat_7B = "together/meta-llama/Llama-2-7b-chat-hf",
  Together_Meta_LLaMA3_Chat_8B = "together/meta-llama/Llama-3-8b-chat-hf",
  Together_Meta_LLaMA3_Chat_70B = "together/meta-llama/Llama-3-70b-chat-hf",
+  Together_Meta_LLaMA3_3_70B = "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+  Together_Meta_LLaMA3_1_8B = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+  Together_Meta_LLaMA3_1_70B = "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+  Together_Meta_LLaMA3_1_405B = "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
+  Together_Meta_LLaMA3_8B = "meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
+  Together_Meta_LLaMA3_70B = "meta-llama/Meta-Llama-3-70B-Instruct-Turbo",
+  Together_Meta_LLaMA3_2_3B = "meta-llama/Llama-3.2-3B-Instruct-Turbo",
+  Together_Meta_LLaMA3_8B_Lite = "meta-llama/Meta-Llama-3-8B-Instruct-Lite",
+  Together_Meta_LLaMA3_70B_Lite = "meta-llama/Meta-Llama-3-70B-Instruct-Lite",
+  Together_Nvidia_LLaMA3_1_Nemotron_70B = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
+  Together_Qwen_Qwen2_5_Coder_32B = "Qwen/Qwen2.5-Coder-32B-Instruct",
+  Together_Qwen_QwQ_32B_Preview = "Qwen/QwQ-32B-Preview",
+  Together_Microsoft_WizardLM_2_8x22B = "microsoft/WizardLM-2-8x22B",
+  Together_Google_Gemma2_27B = "google/gemma-2-27b-it",
+  Together_Google_Gemma2_9B = "google/gemma-2-9b-it",
+  Together_DeepSeek_3 = "deepseek-ai/DeepSeek-V3",
+  Together_DeepSeek_R1 = "deepseek-ai/DeepSeek-R1",
+  Together_mistralai_Mistral_7B_Instruct_v0_3 = "mistralai/Mistral-7B-Instruct-v0.3",
+  Together_Qwen_Qwen2_5_7B_Turbo = "Qwen/Qwen2.5-7B-Instruct-Turbo",
+  Together_Qwen_Qwen2_5_72B_Turbo = "Qwen/Qwen2.5-72B-Instruct-Turbo",
+  Together_Qwen_Qwen2_5_72B = "Qwen/Qwen2-72B-Instruct",
+  Together_Qwen_Qwen2_VL_72B = "Qwen/Qwen2-VL-72B-Instruct",
+  Together_Qwen_Qwen2_5_32B_Coder = "Qwen/Qwen2.5-Coder-32B-Instruct",
  Together_mistralai_Mistral_7B_Instruct = "together/mistralai/Mistral-7B-Instruct-v0.1",
  Together_mistralai_Mistral_7B_Instruct_v0_2 = "together/mistralai/Mistral-7B-Instruct-v0.2",
  Together_mistralai_Mixtral8x7B_Instruct_46_7B = "together/mistralai/Mixtral-8x7B-Instruct-v0.1",
@ -212,6 +239,7 @@ export enum LLMProvider {
  Ollama = "ollama",
  Bedrock = "bedrock",
  Together = "together",
+  DeepSeek = "deepseek",
  Custom = "__custom",
 }

@ -233,6 +261,7 @@ export function getProvider(llm: LLM): LLMProvider | undefined {
  else if (llm_name?.startsWith("Ollama")) return LLMProvider.Ollama;
  else if (llm_name?.startsWith("Bedrock")) return LLMProvider.Bedrock;
  else if (llm_name?.startsWith("Together")) return LLMProvider.Together;
+  else if (llm_name?.startsWith("DeepSeek")) return LLMProvider.DeepSeek;
  else if (llm.toString().startsWith("__custom/")) return LLMProvider.Custom;

  return undefined;
@ -289,6 +318,7 @@ export const RATE_LIMIT_BY_PROVIDER: { [key in LLMProvider]?: number } = {
  [LLMProvider.Anthropic]: 25, // Tier 1 pricing limit is 50 per minute, across all models; we halve this, to be safe.
  [LLMProvider.Together]: 30, // Paid tier limit is 60 per minute, across all models; we halve this, to be safe.
  [LLMProvider.Google]: 1000, // RPM for Google Gemini models 1.5 is quite generous; at base it is 1000 RPM. If you are using the free version it's 15 RPM, but we can expect most CF users to be using paid (and anyway you can just re-run prompt node until satisfied).
+  [LLMProvider.DeepSeek]: 1000, // DeepSeek does not constrain users atm but they might in the future. To be safe we are limiting it to 1000 queries per minute.
 };

 // Max concurrent requests. Add to this to further constrain the rate limiter.
--- a/chainforge/react-server/src/backend/utils.ts
+++ b/chainforge/react-server/src/backend/utils.ts
@ -156,6 +156,7 @@ let AWS_SECRET_ACCESS_KEY = get_environ("AWS_SECRET_ACCESS_KEY");
 let AWS_SESSION_TOKEN = get_environ("AWS_SESSION_TOKEN");
 let AWS_REGION = get_environ("AWS_REGION");
 let TOGETHER_API_KEY = get_environ("TOGETHER_API_KEY");
+let DEEPSEEK_API_KEY = get_environ("DEEPSEEK_API_KEY");

 /**
 * Sets the local API keys for the revelant LLM API(s).
@ -188,6 +189,7 @@ export function set_api_keys(api_keys: Dict<string>): void {
    AWS_SESSION_TOKEN = api_keys.AWS_Session_Token;
  if (key_is_present("AWS_Region")) AWS_REGION = api_keys.AWS_Region;
  if (key_is_present("Together")) TOGETHER_API_KEY = api_keys.Together;
+  if (key_is_present("DeepSeek")) DEEPSEEK_API_KEY = api_keys.DeepSeek;
 }

 export function get_azure_openai_api_keys(): [
@ -234,6 +236,8 @@ export async function call_chatgpt(
  temperature = 1.0,
  params?: Dict,
  should_cancel?: () => boolean,
+  BASE_URL?: string,
+  API_KEY?: string,
 ): Promise<[Dict, Dict]> {
  if (!OPENAI_API_KEY)
    throw new Error(
@ -241,8 +245,8 @@ export async function call_chatgpt(
    );

  const configuration = new OpenAIConfig({
-    apiKey: OPENAI_API_KEY,
-    basePath: OPENAI_BASE_URL ?? undefined,
+    apiKey: API_KEY ?? OPENAI_API_KEY,
+    basePath: BASE_URL ?? OPENAI_BASE_URL ?? undefined,
  });

  // Since we are running client-side, we need to remove the user-agent header:
@ -284,7 +288,8 @@ export async function call_chatgpt(
  if (params?.tools === undefined && params?.parallel_tool_calls !== undefined)
    delete params?.parallel_tool_calls;

-  console.log(`Querying OpenAI model '${model}' with prompt '${prompt}'...`);
+  if (!BASE_URL)
+    console.log(`Querying OpenAI model '${model}' with prompt '${prompt}'...`);

  // Determine the system message and whether there's chat history to continue:
  const chat_history: ChatHistory | undefined = params?.chat_history;
@ -339,6 +344,36 @@ export async function call_chatgpt(
  return [query, response];
 }

+/**
+ * Calls DeepSeek models via DeepSeek's API.
+ */
+export async function call_deepseek(
+  prompt: string,
+  model: LLM,
+  n = 1,
+  temperature = 1.0,
+  params?: Dict,
+  should_cancel?: () => boolean,
+): Promise<[Dict, Dict]> {
+  if (!DEEPSEEK_API_KEY)
+    throw new Error(
+      "Could not find a DeepSeek API key. Double-check that your API key is set in Settings or in your local environment.",
+    );
+
+  console.log(`Querying DeepSeek model '${model}' with prompt '${prompt}'...`);
+
+  return await call_chatgpt(
+    prompt,
+    model,
+    n,
+    temperature,
+    params,
+    should_cancel,
+    "https://api.deepseek.com",
+    DEEPSEEK_API_KEY,
+  );
+}
+
 /**
 * Calls OpenAI Image models via OpenAI's API.
   @returns raw query and response JSON dicts.
@ -1604,6 +1639,7 @@ export async function call_llm(
  else if (llm_provider === LLMProvider.Custom) call_api = call_custom_provider;
  else if (llm_provider === LLMProvider.Bedrock) call_api = call_bedrock;
  else if (llm_provider === LLMProvider.Together) call_api = call_together;
+  else if (llm_provider === LLMProvider.DeepSeek) call_api = call_deepseek;
  if (call_api === undefined)
    throw new Error(
      `Adapter for Language model ${llm} and ${llm_provider} not found`,
@ -1823,6 +1859,8 @@ export function extract_responses(
      return response as Array<string>;
    case LLMProvider.Together:
      return _extract_openai_responses(response as Dict[]);
+    case LLMProvider.DeepSeek:
+      return _extract_openai_responses(response as Dict[]);
    default:
      if (
        Array.isArray(response) &&
--- a/chainforge/react-server/src/store.tsx
+++ b/chainforge/react-server/src/store.tsx
@ -193,6 +193,26 @@ export const initLLMProviderMenu: (LLMSpec | LLMGroup)[] = [
      },
    ],
  },
+  {
+    group: "DeepSeek",
+    emoji: "🐋",
+    items: [
+      {
+        name: "DeepSeek Chat",
+        emoji: "🐋",
+        model: "deepseek-chat",
+        base_model: "deepseek",
+        temp: 1.0,
+      }, // The base_model designates what settings form will be used, and must be unique.
+      {
+        name: "DeepSeek Reasoner",
+        emoji: "🐳",
+        model: "deepseek-reasoner",
+        base_model: "deepseek",
+        temp: 1.0,
+      },
+    ],
+  },
  {
    group: "HuggingFace",
    emoji: "🤗",
--- a/setup.py
+++ b/setup.py
@ -6,7 +6,7 @@ def readme():

 setup(
    name="chainforge",
-    version="0.3.2.8",
+    version="0.3.2.9",
    packages=find_packages(),
    author="Ian Arawjo",
    description="A Visual Programming Environment for Prompt Engineering",