Merge branch 'main' into pv

2025-03-14 16:26:45 +00:00 · 2023-05-02 18:43:13 -04:00 · 2023-05-02 18:43:13 -04:00 · 4c59c8f257
commit 4c59c8f257
parent c75724dcb3 9e44fd549e
8 changed files with 93 additions and 50 deletions
--- a/README.md
+++ b/README.md
@ -51,11 +51,12 @@ We provide ongoing releases of this tool in the hopes that others find it useful

 ## Future Planned Features

- **Dark mode**: A dark mode theme
+- **Collapse nodes**: Nodes should be collapseable, to save screen space.
 - **LMQL node**: Support for prompt pipelines that involve LMQL code, esp. inspecting masked response variables. 
 - **AI assistance for prompt engineering**: Spur creative ideas and quickly iterate on variations of prompts through interaction with GPT4.
- **Compare fine-tuned to base models**: Beyond comparing between different models like Alpaca and ChatGPT, we want to support comparison between versions of the same model (e.g., a base model and a fine-tuned one). Did your fine-tuning result in any 'breaking changes' elsewhere? We are building infrastructure to help you detect where.  
+- **Compare fine-tuned to base models**: Beyond comparing between different models like Alpaca and ChatGPT, support comparison between versions of the same model (e.g., a base model and a fine-tuned one). Helper users detect where fine-tuning resulted in any 'breaking changes' elsewhere. 
 - **Export prompt chains to well-known APIs**: In the future, export a chain (in part) to a programming API like LangChain.
+- **Dark mode**: A dark mode theme
 - **Compare across chains**: If a system prompt, or another ‘shared prompt’, is used *across* chains C1 C2 etc, how does changing it affect all downstream events?

 ## Inspiration and Links
--- a/chain-forge/src/InspectorNode.js
+++ b/chain-forge/src/InspectorNode.js
@ -45,11 +45,15 @@ const InspectorNode = ({ data, id }) => {
            // Bucket responses by LLM:
            const responses_by_llm = bucketResponsesByLLM(json.responses);
            
-            // Get the var names across responses 
-            // NOTE: This assumes only a single prompt node output as input 
-            //       (all response vars have the exact same keys).
-            let tempvars = {};
-            Object.keys(json.responses[0].vars).forEach(v => {tempvars[v] = new Set();});
+            // // Get the var names across all responses, as a set
+            // let tempvarnames = new Set();
+            // json.responses.forEach(r => {
+            //     if (!r.vars) return;
+            //     Object.keys(r.vars).forEach(tempvarnames.add);
+            // });
+
+            // // Create a dict version
+            // let tempvars = {};

            const vars_to_str = (vars) => {
                const pairs = Object.keys(vars).map(varname => {
@ -67,7 +71,7 @@ const InspectorNode = ({ data, id }) => {
                    const ps = res_obj.responses.map((r, idx) => 
                        (<pre className="small-response" key={idx}>{r}</pre>)
                    );
-                    Object.keys(res_obj.vars).forEach(v => {tempvars[v].add(res_obj.vars[v])});
+                    // Object.keys(res_obj.vars).forEach(v => {tempvars[v].add(res_obj.vars[v])});
                    const vars = vars_to_str(res_obj.vars);
                    return (
                        <div key={"r"+res_idx} className="response-box" style={{ backgroundColor: colors[llm_idx % colors.length] }}>
@ -84,19 +88,19 @@ const InspectorNode = ({ data, id }) => {
                );
            }));

-            setVarSelects(Object.keys(tempvars).map(v => {
-                const options = Array.from(tempvars[v]).map((val, idx) => (
-                    <option value={val} key={idx}>{val}</option>
-                ));
-                return (
-                    <div key={v}>
-                        <label htmlFor={v}>{v}: </label>
-                        <select name={v} id={v} onChange={handleVarValueSelect}>
-                            {options}
-                        </select>
-                    </div>
-                );
-            }));
+            // setVarSelects(Object.keys(tempvars).map(v => {
+            //     const options = Array.from(tempvars[v]).map((val, idx) => (
+            //         <option value={val} key={idx}>{val}</option>
+            //     ));
+            //     return (
+            //         <div key={v}>
+            //             <label htmlFor={v}>{v}: </label>
+            //             <select name={v} id={v} onChange={handleVarValueSelect}>
+            //                 {options}
+            //             </select>
+            //         </div>
+            //     );
+            // }));
        }
    });
  }
--- a/chain-forge/src/PromptNode.js
+++ b/chain-forge/src/PromptNode.js
@ -36,6 +36,7 @@ const PromptNode = ({ data, id }) => {
  const edges = useStore((state) => state.edges);
  const output = useStore((state) => state.output);
  const setDataPropsForNode = useStore((state) => state.setDataPropsForNode);
+  const getNode = useStore((state) => state.getNode);

  const [hovered, setHovered] = useState(false);
  const [templateVars, setTemplateVars] = useState(data.vars || []);
@ -105,24 +106,39 @@ const PromptNode = ({ data, id }) => {

        // Pull data from each source:
        const pulled_data = {};
-        templateVars.forEach(varname => {
-            // Find the relevant edge (breaking once we've found it):
-            for (let i = 0; i < edges.length; i++) {
-                const e = edges[i];
-                if (e.target == id && e.targetHandle == varname) {
-                    // Get the data output for that handle on the source node:
-                    let out = output(e.source, e.sourceHandle);
-                    if (!Array.isArray(out)) out = [out];
-                    if (varname in pulled_data)
-                        pulled_data[varname] = pulled_data[varname].concat(out);
-                    else
-                        pulled_data[varname] = out;
-                }
-            }
-        });
+        const get_outputs = (varnames, nodeId) => {
+            console.log(varnames);
+            varnames.forEach(varname => {
+                // Find the relevant edge(s):
+                edges.forEach(e => {
+                    if (e.target == nodeId && e.targetHandle == varname) {
+                        // Get the immediate output:
+                        let out = output(e.source, e.sourceHandle);
+
+                        // Save the var data from the pulled output
+                        if (varname in pulled_data)
+                            pulled_data[varname] = pulled_data[varname].concat(out);
+                        else
+                            pulled_data[varname] = out;
+
+                        // Get any vars that the output depends on, and recursively collect those outputs as well:
+                        const n_vars = getNode(e.source).data.vars;
+                        if (n_vars && Array.isArray(n_vars) && n_vars.length > 0)
+                            get_outputs(n_vars, e.source);
+                    }
+                });
+            });
+        };
+        get_outputs(templateVars, id);

        // Get Pythonic version of the prompt, by adding a $ before any template variables in braces:
-        const py_prompt_template = promptText.replace(/(?<!\\){(.*?)(?<!\\)}/g, "${$1}")
+        const to_py_template_format = (str) => str.replace(/(?<!\\){(.*?)(?<!\\)}/g, "${$1}")
+        const py_prompt_template = to_py_template_format(promptText);
+
+        // Do the same for the vars, since vars can themselves be prompt templates:
+        Object.keys(pulled_data).forEach(varname => {
+            pulled_data[varname] = pulled_data[varname].map(val => to_py_template_format(val));
+        });

        // Run all prompt permutations through the LLM to generate + cache responses:
        fetch('http://localhost:8000/queryllm', {
@ -276,7 +292,9 @@ const PromptNode = ({ data, id }) => {
        <div className="nodrag">
            <input type="checkbox" id="gpt3.5" name="gpt3.5" value="gpt3.5" defaultChecked={true} onChange={handleLLMChecked} />
            <label htmlFor="gpt3.5">GPT3.5  </label>
-            <input type="checkbox" id="alpaca.7B" name="alpaca.7B" value="alpaca.7B" onChange={handleLLMChecked} />
+            <input type="checkbox" id="gpt4" name="gpt4" value="gpt4" defaultChecked={false} onChange={handleLLMChecked} />
+            <label htmlFor="gpt4">GPT4  </label>
+            <input type="checkbox" id="alpaca.7B" name="alpaca.7B" value="alpaca.7B" defaultChecked={false} onChange={handleLLMChecked} />
            <label htmlFor="alpaca.7B">Alpaca 7B</label>
        </div>
        <hr />
--- a/chain-forge/src/TextFieldsNode.js
+++ b/chain-forge/src/TextFieldsNode.js
@ -11,6 +11,17 @@ const union = (setA, setB) => {
  }
  return _union;
 }
+const setsAreEqual = (setA, setB) => {
+  if (setA.size !== setB.size) return false;
+  let equal = true;
+  for (const item of setA) {
+    if (!setB.has(item)) {
+      equal = false;
+      break;
+    }
+  }
+  return equal;
+}

 const TextFieldsNode = ({ data, id }) => {

@ -22,7 +33,6 @@ const TextFieldsNode = ({ data, id }) => {
    // Update the data for this text fields' id.
    let new_data = { 'fields': {...data.fields} };
    new_data.fields[event.target.id] = event.target.value;
-    setDataPropsForNode(id, new_data);

    // TODO: Optimize this check.
    let all_found_vars = new Set();
@ -37,9 +47,14 @@ const TextFieldsNode = ({ data, id }) => {

    // Update template var fields + handles, if there's a change in sets
    const past_vars = new Set(templateVars);
-    if (all_found_vars !== past_vars) {
-      setTemplateVars(Array.from(all_found_vars));
+    if (!setsAreEqual(all_found_vars, past_vars)) {
+      console.log('set vars');
+      const new_vars_arr = Array.from(all_found_vars);
+      new_data.vars = new_vars_arr;
+      setTemplateVars(new_vars_arr);
    }
+
+    setDataPropsForNode(id, new_data);
  }, [data, id, setDataPropsForNode, templateVars]);

  // Initialize fields (run once at init)
@ -56,7 +71,7 @@ const TextFieldsNode = ({ data, id }) => {
      const val = data.fields ? data.fields[i] : '';
      return (
        <div className="input-field" key={i}>
-          <textarea id={i} name={i} className="text-field-fixed nodrag" rows="3" cols="40" defaultValue={val} onChange={handleInputChange} />
+          <textarea id={i} name={i} className="text-field-fixed nodrag" rows="2" cols="40" defaultValue={val} onChange={handleInputChange} />
        </div>
    )}));
  }, [data.fields, handleInputChange]);
--- a/python-backend/app.py
+++ b/python-backend/app.py
@ -14,6 +14,7 @@ CORS(app)
 LLM_NAME_MAP = {
    'gpt3.5': LLM.ChatGPT,
    'alpaca.7B': LLM.Alpaca7B,
+    'gpt4': LLM.GPT4,
 }
 LLM_NAME_MAP_INVERSE = {val.name: key for key, val in LLM_NAME_MAP.items()}

--- a/python-backend/promptengine/query.py
+++ b/python-backend/promptengine/query.py
@ -106,8 +106,8 @@ class PromptPipeline:
        self._cache_responses({})

    def _prompt_llm(self, llm: LLM, prompt: str, n: int = 1, temperature: float = 1.0) -> Tuple[Dict, Dict]:
-        if llm is LLM.ChatGPT:
-            return call_chatgpt(prompt, n=n, temperature=temperature)
+        if llm is LLM.ChatGPT or llm is LLM.GPT4:
+            return call_chatgpt(prompt, model=llm, n=n, temperature=temperature)
        elif llm is LLM.Alpaca7B:
            return call_dalai(llm_name='alpaca.7B', port=4000, prompt=prompt, n=n, temperature=temperature)
        else:
--- a/python-backend/promptengine/template.py
+++ b/python-backend/promptengine/template.py
@ -109,8 +109,7 @@ class PromptPermutationGenerator:
                break
        
        if param is None:
-            print("Did not find any more params left to fill in current template. Returning empty list...")
-            return []
+            return [template]

        # Generate new prompts by filling in its value(s) into the PromptTemplate
        val = paramDict[param]
@ -136,9 +135,9 @@ class PromptPermutationGenerator:
            return

        for p in self._gen_perm(self.template, list(paramDict.keys()), paramDict):
+            print(p)
            yield p

-
 # Test cases
 if __name__ == '__main__':
    # Single template
--- a/python-backend/promptengine/utils.py
+++ b/python-backend/promptengine/utils.py
@ -14,15 +14,20 @@ openai.api_key = os.environ.get("OPENAI_API_KEY")
 class LLM(Enum):
    ChatGPT = 0
    Alpaca7B = 1
+    GPT4 = 2

-def call_chatgpt(prompt: str, n: int = 1, temperature: float = 1.0, system_msg: Union[str, None]=None) -> Tuple[Dict, Dict]:
+def call_chatgpt(prompt: str, model: LLM, n: int = 1, temperature: float = 1.0, system_msg: Union[str, None]=None) -> Tuple[Dict, Dict]:
    """
        Calls GPT3.5 via OpenAI's API. 
        Returns raw query and response JSON dicts. 
    """
+    model_map = { LLM.ChatGPT: 'gpt-3.5-turbo', LLM.GPT4: 'gpt-4' }
+    if model not in model_map:
+        raise Exception(f"Could not find OpenAI chat model {model}")
+    model = model_map[model]
    system_msg = "You are a helpful assistant." if system_msg is None else system_msg
    query = {
-        "model": "gpt-3.5-turbo",
+        "model": model,
        "messages": [
            {"role": "system", "content": system_msg},
            {"role": "user", "content": prompt},
@ -133,7 +138,7 @@ def extract_responses(response: Union[list, dict], llm: LLM) -> List[dict]:
        Given a LLM and a response object from its API, extract the
        text response(s) part of the response object.
    """
-    if llm is LLM.ChatGPT or llm == LLM.ChatGPT.name:
+    if llm is LLM.ChatGPT or llm == LLM.ChatGPT.name or llm is LLM.GPT4 or llm == LLM.GPT4.name:
        return _extract_chatgpt_responses(response)
    elif llm is LLM.Alpaca7B or llm == LLM.Alpaca7B.name:
        return response["response"]