Add Code Processor nodes (#180)

* Add Code Processor nodes. * Renamed EvaluatorNode to CodeEvaluatorNode. * Changed the way evaluator nodes pull responses (from grabResponses to pullInputData). * Fix SimpleEvalNode to be consistent with CodeEvaluatorNode * Fix Vis Node where no eval resps are connected, but resps are connected. * Rebuilt react and update package version
2025-03-14 08:16:37 +00:00 · 2023-11-27 13:57:27 -05:00 · 2023-11-27 13:57:27 -05:00 · 7223735b7f
commit 7223735b7f
parent a861695c87
17 changed files with 431 additions and 283 deletions
--- a/chainforge/flask_app.py
+++ b/chainforge/flask_app.py
@ -158,12 +158,12 @@ def check_typeof_vals(arr: list) -> MetricType:
    else:
        return val_type

-def run_over_responses(eval_func, responses: list, scope: str) -> list:
+def run_over_responses(process_func, responses: list, scope: str, process_type: str) -> list:
    for resp_obj in responses:
        res = resp_obj['responses']
        if scope == 'response':
-            # Run evaluator func over every individual response text
-            evals = [eval_func(
+            # Run process func over every individual response text
+            proc = [process_func(
                        ResponseInfo(
                            text=r,
                            prompt=resp_obj['prompt'],
@ -172,51 +172,62 @@ def run_over_responses(eval_func, responses: list, scope: str) -> list:
                            llm=resp_obj['llm'])
                    ) for r in res]

-            # Check the type of evaluation results
-            # NOTE: We assume this is consistent across all evaluations, but it may not be.
-            eval_res_type = check_typeof_vals(evals)
+            if process_type == 'processor':
+                # Response text was just transformed, not evaluated
+                resp_obj['responses'] = proc
+            else: 
+                # Responses were evaluated/scored
+                # Check the type of evaluation results
+                # NOTE: We assume this is consistent across all evaluations, but it may not be.
+                eval_res_type = check_typeof_vals(proc)

-            if eval_res_type == MetricType.Numeric:
-                # Store items with summary of mean, median, etc
-                resp_obj['eval_res'] = {
-                    'mean': mean(evals),
-                    'median': median(evals),
-                    'stdev': stdev(evals) if len(evals) > 1 else 0,
-                    'range': (min(evals), max(evals)),
-                    'items': evals,
-                    'dtype': eval_res_type.name,
-                }
-            elif eval_res_type in (MetricType.Unknown, MetricType.Empty):
-                raise Exception('Unsupported types found in evaluation results. Only supported types for metrics are: int, float, bool, str.')
-            else:
-                # Categorical, KeyValue, etc, we just store the items:
-                resp_obj['eval_res'] = { 
-                    'items': evals,
-                    'dtype': eval_res_type.name,
-                }
+                if eval_res_type == MetricType.Numeric:
+                    # Store items with summary of mean, median, etc
+                    resp_obj['eval_res'] = {
+                        'mean': mean(proc),
+                        'median': median(proc),
+                        'stdev': stdev(proc) if len(proc) > 1 else 0,
+                        'range': (min(proc), max(proc)),
+                        'items': proc,
+                        'dtype': eval_res_type.name,
+                    }
+                elif eval_res_type in (MetricType.Unknown, MetricType.Empty):
+                    raise Exception('Unsupported types found in evaluation results. Only supported types for metrics are: int, float, bool, str.')
+                else:
+                    # Categorical, KeyValue, etc, we just store the items:
+                    resp_obj['eval_res'] = { 
+                        'items': proc,
+                        'dtype': eval_res_type.name,
+                    }
        else:  
-            # Run evaluator func over the entire response batch
-            ev = eval_func([
+            # Run process func over the entire response batch
+            proc = process_func([
                    ResponseInfo(text=r,
                                 prompt=resp_obj['prompt'],
                                 var=resp_obj['vars'],
                                 llm=resp_obj['llm'])
-                for r in res])
-            ev_type = check_typeof_vals([ev])
-            if ev_type == MetricType.Numeric:
-                resp_obj['eval_res'] = {
-                    'mean': ev,
-                    'median': ev,
-                    'stdev': 0,
-                    'range': (ev, ev),
-                    'items': [ev],
-                    'type': ev_type.name,
-                }
-            else:
-                resp_obj['eval_res'] = { 
-                    'items': [ev],
-                    'type': ev_type.name,
-                }
+                   for r in res])
+            
+            if process_type == 'processor':
+                # Response text was just transformed, not evaluated
+                resp_obj['responses'] = proc
+            else: 
+                # Responses were evaluated/scored
+                ev_type = check_typeof_vals([proc])
+                if ev_type == MetricType.Numeric:
+                    resp_obj['eval_res'] = {
+                        'mean': proc,
+                        'median': proc,
+                        'stdev': 0,
+                        'range': (proc, proc),
+                        'items': [proc],
+                        'type': ev_type.name,
+                    }
+                else:
+                    resp_obj['eval_res'] = { 
+                        'items': [proc],
+                        'type': ev_type.name,
+                    }
    return responses

 async def make_sync_call_async(sync_method, *args, **params):
@ -266,6 +277,7 @@ def executepy():
            'responses': List[StandardizedLLMResponse]  # the responses to run on.
            'scope': 'response' | 'batch'  # the scope of responses to run on --a single response, or all across each batch. 
                                           # If batch, evaluator has access to 'responses'. Only matters if n > 1 for each prompt.
+            'process_type': 'evaluator' | 'processor'  # the type of processing to perform. Evaluators only 'score'/annotate responses. Processors change responses (e.g. text).
            'script_paths': unspecified | List[str]  # the paths to scripts to be added to the path before the lambda function is evaluated
        }

@ -304,6 +316,9 @@ def executepy():
    except Exception as e:
        return jsonify({'error': f'Could not add script path to sys.path. Error message:\n{str(e)}'})

+    # Get processor type, if any
+    process_type = data['process_type'] if 'process_type' in data else 'evaluator'
+
    # Create the evaluator function
    # DANGER DANGER! 
    try:
@ -311,7 +326,10 @@ def executepy():

        # Double-check that there is an 'evaluate' method in our namespace. 
        # This will throw a NameError if not: 
-        evaluate  # noqa
+        if process_type == 'evaluator':
+            evaluate  # noqa
+        else:
+            process  # noqa
    except Exception as e:
        return jsonify({'error': f'Could not compile evaluator code. Error message:\n{str(e)}'})
    
@ -319,7 +337,7 @@ def executepy():
    logs = []
    try:
        HIJACK_PYTHON_PRINT()
-        evald_responses = run_over_responses(evaluate, responses, scope=data['scope'])  # noqa
+        evald_responses = run_over_responses(evaluate if process_type == 'evaluator' else process, responses, scope=data['scope'], process_type=process_type)  # noqa
        logs = REVERT_PYTHON_PRINT()
    except Exception as e:
        logs = REVERT_PYTHON_PRINT()
--- a/chainforge/react-server/build/asset-manifest.json
+++ b/chainforge/react-server/build/asset-manifest.json
@ -1,15 +1,15 @@
 {
  "files": {
    "main.css": "/static/css/main.8665fcca.css",
-    "main.js": "/static/js/main.72fd301c.js",
+    "main.js": "/static/js/main.77a6d92d.js",
    "static/js/787.4c72bb55.chunk.js": "/static/js/787.4c72bb55.chunk.js",
    "index.html": "/index.html",
    "main.8665fcca.css.map": "/static/css/main.8665fcca.css.map",
-    "main.72fd301c.js.map": "/static/js/main.72fd301c.js.map",
+    "main.77a6d92d.js.map": "/static/js/main.77a6d92d.js.map",
    "787.4c72bb55.chunk.js.map": "/static/js/787.4c72bb55.chunk.js.map"
  },
  "entrypoints": [
    "static/css/main.8665fcca.css",
-    "static/js/main.72fd301c.js"
+    "static/js/main.77a6d92d.js"
  ]
 }
--- a/chainforge/react-server/build/index.html
+++ b/chainforge/react-server/build/index.html
@ -1 +1 @@
-<!doctype html><html lang="en"><head><meta charset="utf-8"/><script async src="https://www.googletagmanager.com/gtag/js?id=G-RN3FDBLMCR"></script><script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-RN3FDBLMCR")</script><link rel="icon" href="/favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="A visual programming environment for prompt engineering"/><link rel="apple-touch-icon" href="/logo192.png"/><link rel="manifest" href="/manifest.json"/><title>ChainForge</title><script defer="defer" src="/static/js/main.72fd301c.js"></script><link href="/static/css/main.8665fcca.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
+<!doctype html><html lang="en"><head><meta charset="utf-8"/><script async src="https://www.googletagmanager.com/gtag/js?id=G-RN3FDBLMCR"></script><script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-RN3FDBLMCR")</script><link rel="icon" href="/favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="A visual programming environment for prompt engineering"/><link rel="apple-touch-icon" href="/logo192.png"/><link rel="manifest" href="/manifest.json"/><title>ChainForge</title><script defer="defer" src="/static/js/main.77a6d92d.js"></script><link href="/static/css/main.8665fcca.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
--- a/chainforge/react-server/build/static/js/main.77a6d92d.js
+++ b/chainforge/react-server/build/static/js/main.77a6d92d.js
--- a/chainforge/react-server/build/static/js/main.77a6d92d.js.LICENSE.txt
+++ b/chainforge/react-server/build/static/js/main.77a6d92d.js.LICENSE.txt
--- a/chainforge/react-server/build/static/js/main.77a6d92d.js.map
+++ b/chainforge/react-server/build/static/js/main.77a6d92d.js.map
--- a/chainforge/react-server/src/App.js
+++ b/chainforge/react-server/src/App.js
@ -12,7 +12,7 @@ import { IconSettings, IconTextPlus, IconTerminal, IconCsv, IconSettingsAutomati
 import RemoveEdge from './RemoveEdge';
 import TextFieldsNode from './TextFieldsNode'; // Import a custom node
 import PromptNode from './PromptNode';
-import EvaluatorNode from './EvaluatorNode';
+import CodeEvaluatorNode from './CodeEvaluatorNode';
 import VisNode from './VisNode';
 import InspectNode from './InspectorNode';
 import ScriptNode from './ScriptNode';
@ -81,7 +81,7 @@ const nodeTypes = {
  prompt: PromptNode,
  chat: PromptNode,
  simpleval: SimpleEvalNode,
-  evaluator: EvaluatorNode,
+  evaluator: CodeEvaluatorNode,
  llmeval: LLMEvaluatorNode,
  vis: VisNode,
  inspect: InspectNode,
@ -91,6 +91,7 @@ const nodeTypes = {
  comment: CommentNode,
  join: JoinNode,
  split: SplitNode,
+  processor: CodeEvaluatorNode,
 };

 const edgeTypes = {
@ -238,6 +239,15 @@ const App = () => {
    const { x, y } = getViewportCenter();
    addNode({ id: 'split-'+Date.now(), type: 'split', data: {}, position: {x: x-200, y:y-100} });
  };
+  const addProcessorNode = (progLang) => {
+    const { x, y } = getViewportCenter();
+    let code = "";
+    if (progLang === 'python') 
+      code = "def process(response):\n  return response.text;";
+    else if (progLang === 'javascript')
+      code = "function process(response) {\n  return response.text;\n}";
+    addNode({ id: 'process-'+Date.now(), type: 'processor', data: { language: progLang, code: code }, position: {x: x-200, y:y-100} });
+  };

  const onClickExamples = () => {
    if (examplesModal && examplesModal.current)
@ -762,16 +772,16 @@ const App = () => {
            <Menu.Divider />
            <Menu.Label>Evaluators</Menu.Label>
            <MenuTooltip label="Evaluate responses with a simple check (no coding required).">
-              <Menu.Item onClick={addSimpleEvalNode} icon={<IconRuler2 size="16px" />}> Simple Evaluator Node </Menu.Item>
+              <Menu.Item onClick={addSimpleEvalNode} icon={<IconRuler2 size="16px" />}> Simple Evaluator </Menu.Item>
            </MenuTooltip>
            <MenuTooltip label="Evaluate responses by writing JavaScript code.">
-              <Menu.Item onClick={() => addEvalNode('javascript')} icon={<IconTerminal size="16px" />}> JavaScript Evaluator Node </Menu.Item>
+              <Menu.Item onClick={() => addEvalNode('javascript')} icon={<IconTerminal size="16px" />}> JavaScript Evaluator </Menu.Item>
            </MenuTooltip>
            {IS_RUNNING_LOCALLY ? (<MenuTooltip label="Evaluate responses by writing Python code.">
-                  <Menu.Item onClick={() => addEvalNode('python')} icon={<IconTerminal size="16px" />}> Python Evaluator Node </Menu.Item>
+                  <Menu.Item onClick={() => addEvalNode('python')} icon={<IconTerminal size="16px" />}> Python Evaluator </Menu.Item>
            </MenuTooltip>): <></>}
            <MenuTooltip label="Evaluate responses with an LLM like GPT-4.">
-              <Menu.Item onClick={addLLMEvalNode} icon={<IconRobot size="16px" />}> LLM Scorer Node</Menu.Item>
+              <Menu.Item onClick={addLLMEvalNode} icon={<IconRobot size="16px" />}> LLM Scorer </Menu.Item>
            </MenuTooltip>
            <Menu.Divider />
            <Menu.Label>Visualizers</Menu.Label>
@ -783,6 +793,12 @@ const App = () => {
            </MenuTooltip>
            <Menu.Divider />
            <Menu.Label>Processors</Menu.Label>
+            <MenuTooltip label="Transform responses by mapping a JavaScript function over them.">
+              <Menu.Item onClick={() => addProcessorNode('javascript')} icon={<IconTerminal size='14pt' />}> JavaScript Processor </Menu.Item>
+            </MenuTooltip>
+            {IS_RUNNING_LOCALLY ? <MenuTooltip label="Transform responses by mapping a Python function over them.">
+              <Menu.Item onClick={() => addProcessorNode('python')} icon={<IconTerminal size='14pt' />}> Python Processor </Menu.Item>
+            </MenuTooltip>: <></>}
            <MenuTooltip label="Concatenate responses or input data together before passing into later nodes, within or across variables and LLMs.">
              <Menu.Item onClick={addJoinNode} icon={<IconArrowMerge size='14pt' />}> Join Node </Menu.Item>
            </MenuTooltip>
--- a/chainforge/react-server/src/CodeEvaluatorNode.js
+++ b/chainforge/react-server/src/CodeEvaluatorNode.js
@ -1,4 +1,4 @@
-import React, { useState, useRef, useCallback, useEffect } from 'react';
+import React, { useState, useRef, useCallback, useEffect, useMemo } from 'react';
 import { Handle } from 'reactflow';
 import { Button, Code, Modal, Tooltip, Box, Text } from '@mantine/core';
 import { Prism } from '@mantine/prism';
@ -16,8 +16,9 @@ import "ace-builds/src-noconflict/mode-javascript";
 import "ace-builds/src-noconflict/theme-xcode";
 import "ace-builds/src-noconflict/ext-language_tools";
 import fetch_from_backend from './fetch_from_backend';
-import { APP_IS_RUNNING_LOCALLY } from './backend/utils';
+import { APP_IS_RUNNING_LOCALLY, stripLLMDetailsFromResponses, toStandardResponseFormat } from './backend/utils';
 import InspectFooter from './InspectFooter';
+import { escapeBraces } from './backend/template';

 // Whether we are running on localhost or not, and hence whether
 // we have access to the Flask backend for, e.g., Python code evaluation.
@ -53,6 +54,7 @@ class ResponseInfo:
    ...
 `;

+// Code evaluator examples for info modal
 const _info_example_py = `
 def evaluate(response):
  # Return the length of the response (num of characters)
@ -78,9 +80,48 @@ function evaluate(response) {
  return ... // for instance, true or false
 }`;

-const EvaluatorNode = ({ data, id }) => {
+// Code processor examples for info modal
+const _info_proc_example_py = `
+def process(response):
+  # Return the first 12 characters
+  return response.text[:12]
+`;
+const _info_proc_example_js = `
+function process(response) {
+  // Return the first 12 characters
+  return response.text.slice(0, 12);
+}`;
+const _info_proc_example_var_py = `
+def process(response):
+  # Find the index of the substring "ANSWER:"
+  answer_index = response.text.find("ANSWER:")
+
+  # If "ANSWER:" is in the text, return everything after it
+  if answer_index != -1:
+    return response.text[answer_index + len("ANSWER:"):]
+  else: # return error message
+    return "NOT FOUND"
+`;
+const _info_proc_example_var_js = `
+function process(response) {
+  // Find the index of the substring "ANSWER:"
+  const answerIndex = response.text.indexOf("ANSWER:");
+
+  // If "ANSWER:" is in the text, return everything after it
+  if (answerIndex !== -1)
+    return response.text.substring(answerIndex + "ANSWER:".length);
+  else  // return error message
+    return "NOT FOUND";
+}`;
+
+/**
+ *  The Code Evaluator class supports users in writing JavaScript and Python functions that map across LLM responses.
+ *  It has two modes: evaluator and processor mode. Evaluators annotate responses with scores; processors transform response objects themselves. 
+ */
+const CodeEvaluatorNode = ({ data, id, type: node_type }) => {

  const inputEdgesForNode = useStore((state) => state.inputEdgesForNode);
+  const pullInputData = useStore((state) => state.pullInputData);
  const pingOutputNodes = useStore((state) => state.pingOutputNodes);
  const setDataPropsForNode = useStore((state) => state.setDataPropsForNode);
  const [status, setStatus] = useState('none');
@ -129,7 +170,7 @@ const EvaluatorNode = ({ data, id }) => {
    }).then(function(json) {
      if (json.responses && json.responses.length > 0) {
        // Store responses and set status to green checkmark
-        setLastResponses(json.responses);
+        setLastResponses(stripLLMDetailsFromResponses(json.responses));
        setStatus('ready');
      }
    });
@ -164,18 +205,22 @@ const EvaluatorNode = ({ data, id }) => {
        instead. If you'd like to run the Python evaluator, consider installing ChainForge locally.");
      return;
    }
-    
-    // Get the ids from the connected input nodes:
-    const input_node_ids = inputEdgesForNode(id).map(e => e.source);
-    if (input_node_ids.length === 0) {
-        console.warn("No inputs for evaluator node.");
-        return;
+
+    // Pull input data
+    let pulled_inputs = pullInputData(["responseBatch"], id);
+    if (!pulled_inputs || !pulled_inputs["responseBatch"]) {
+      console.warn(`No inputs for code ${node_type} node.`);
+      return;
    }
+    // Convert to standard response format (StandardLLMResponseFormat)
+    pulled_inputs = pulled_inputs["responseBatch"].map(toStandardResponseFormat);

    // Double-check that the code includes an 'evaluate' function:
-    const find_evalfunc_regex = progLang === 'python' ? /def\s+evaluate\s*(.*):/ : /function\s+evaluate\s*(.*)/;
-    if (codeText.search(find_evalfunc_regex) === -1) {
-      const err_msg = `Could not find required function 'evaluate'. Make sure you have defined an 'evaluate' function.`;
+    const find_func_regex = node_type === 'evaluator' ? (progLang === 'python' ? /def\s+evaluate\s*(.*):/ : /function\s+evaluate\s*(.*)/)
+                                                      : (progLang === 'python' ? /def\s+process\s*(.*):/ : /function\s+process\s*(.*)/);
+    if (codeText.search(find_func_regex) === -1) {
+      const req_func_name = node_type === 'evaluator' ? 'evaluate' : 'process';
+      const err_msg = `Could not find required function '${req_func_name}'. Make sure you have defined an '${req_func_name}' function.`;
      setStatus('error');
      alertModal.current.trigger(err_msg);
      return;
@ -190,6 +235,8 @@ const EvaluatorNode = ({ data, id }) => {
      alertModal.current.trigger(err_msg);
    };

+    // const _llmItemsCurrState = getLLMsInPulledInputData(pulled_data);
+
    // Get all the Python script nodes, and get all the folder paths
    // NOTE: Python only!
    let script_paths = [];
@ -204,8 +251,9 @@ const EvaluatorNode = ({ data, id }) => {
    fetch_from_backend(execute_route, {
      id: id,
      code: codeTextOnRun,
-      responses: input_node_ids,
+      responses: pulled_inputs,
      scope: 'response',
+      process_type: node_type,
      script_paths: script_paths,
    }).then(function(json) {
        // Store any Python print output
@ -226,12 +274,27 @@ const EvaluatorNode = ({ data, id }) => {
        
        // Ping any vis + inspect nodes attached to this node to refresh their contents:
        pingOutputNodes(id);
-
-        console.log(json.responses);
-        setLastResponses(json.responses);
+        setLastResponses(stripLLMDetailsFromResponses(json.responses));
        setCodeTextOnLastRun(codeTextOnRun);
        setLastRunSuccess(true);

+        setDataPropsForNode(id, {fields: json.responses.map(
+          resp_obj => resp_obj['responses'].map(r => {
+            // Carry over the response text, prompt, prompt fill history (vars), and llm data
+            let o = { text: escapeBraces(r), 
+                      prompt: resp_obj['prompt'],
+                      fill_history: resp_obj['vars'],
+                      metavars: resp_obj['metavars'] || {},
+                      llm: resp_obj['llm'] };
+
+            // Carry over any chat history
+            if (resp_obj['chat_history']) 
+              o.chat_history = resp_obj['chat_history'];
+
+            return o;
+          })).flat()
+        });
+
        if (status !== 'ready')
          setUninspectedResponses(true);
        
@ -250,10 +313,67 @@ const EvaluatorNode = ({ data, id }) => {
    }
  }, [inspectModal, lastResponses]);

-  const default_header = (progLang === 'python') ? 
-                          'Python Evaluator Node'
-                          : 'JavaScript Evaluator Node';
+  /* Memoized variables for displaying the UI that depend on the node type (evaluator or processor) and the programming language. */
+  const default_header = useMemo(() => {
+    const capitalized_type = node_type.charAt(0).toUpperCase() + node_type.slice(1);
+    if (progLang === 'python')
+      return `Python ${capitalized_type} Node`;
+    else
+      return `JavaScript ${capitalized_type} Node`;
+  }, [progLang, node_type]); 
  const node_header = data.title || default_header;
+  const run_tooltip = useMemo(() => `Run ${node_type} over inputs`, [node_type]);
+  const code_instruct_header = useMemo(() => {
+    if (node_type === 'evaluator')
+      return <div className="code-mirror-field-header">Define an <Code>evaluate</Code> func to map over each response:</div>;
+    else 
+      return <div className="code-mirror-field-header">Define a <Code>process</Code> func to map over each response:</div>;
+  }, [node_type]);
+  const code_info_modal = useMemo(() => {
+    if (node_type === 'evaluator')
+      return <Box m='lg' mt='xl'>
+        <Text mb='sm'>To use a {default_header}, write a function <Code>evaluate</Code> that takes a single argument of class <Code>ResponseInfo</Code>.
+        The function should return a \'score\' for that response, which usually is a number or a boolean value (strings as categoricals are supported, but experimental).</Text>
+        <Text mt='sm' mb='sm'>
+        For instance, here is an evaluator that returns the length of a response:</Text>
+        <Prism language={progLang === 'python' ? 'py' : 'ts'}>
+          {progLang === 'python' ? _info_example_py : _info_example_js}
+        </Prism>
+        <Text mt='md' mb='sm'>This function gets the text of the response via <Code>response.text</Code>, then calculates its length in characters. The full <Code>ResponseInfo</Code> class has the following properties and methods:</Text>
+        <Prism language={progLang === 'python' ? 'py' : 'ts'}>
+          {progLang === 'python' ? _info_codeblock_py : _info_codeblock_js}
+        </Prism>
+        <Text mt='md' mb='sm'>For instance, say you have a prompt template <Code>What is the capital of &#123;country&#125;?</Code> on a Prompt Node. 
+          You want to get the input variable 'country', which filled the prompt that led to the current response. You can use<Code>response.var</Code>:</Text>
+        <Prism language={progLang === 'python' ? 'py' : 'ts'}>
+          {progLang === 'python' ? _info_example_var_py : _info_example_var_js}
+        </Prism>
+        <Text mt='md'>Note that you are allowed to define variables outside of the function, or define more functions, as long as a function called <Code>evaluate</Code> is defined. 
+        For more information on what's possible, see the <a href="https://chainforge.ai/docs/" target='_blank'>documentation</a> or load some Example Flows.</Text>
+      </Box>;
+    else 
+      return <Box m='lg' mt='xl'>
+        <Text mb='sm'>To use a {default_header}, write a function <Code>process</Code> that takes a single argument of class <Code>ResponseInfo</Code>.
+        The function should returned the <strong>transformed response text</strong>, as a string or number.</Text>
+        <Text mt='sm' mb='sm'>
+        For instance, here is a processor that simply returns the first 12 characters of the response:</Text>
+        <Prism language={progLang === 'python' ? 'py' : 'ts'}>
+          {progLang === 'python' ? _info_proc_example_py : _info_proc_example_js}
+        </Prism>
+        <Text mt='md' mb='sm'>This function gets the text of the response via <Code>response.text</Code>, then slices it until the 12th-indexed character. The full <Code>ResponseInfo</Code> class has the following properties and methods:</Text>
+        <Prism language={progLang === 'python' ? 'py' : 'ts'}>
+          {progLang === 'python' ? _info_codeblock_py : _info_codeblock_js}
+        </Prism>
+        <Text mt='md' mb='sm'>For another example, say you have a prompt that requests the LLM output in a consistent format, with "ANSWER:" at the end like Chain-of-Thought. 
+          You want to get just the part after 'ANSWER:' Here's how you can do this:</Text>
+        <Prism language={progLang === 'python' ? 'py' : 'ts'}>
+          {progLang === 'python' ? _info_proc_example_var_py : _info_proc_example_var_js}
+        </Prism>
+        <Text mt='md'>Note that you are allowed to define variables outside of the function, or define more functions, as long as a function called <Code>process</Code> is defined. 
+        For more information on what's possible, see the <a href="https://chainforge.ai/docs/" target='_blank'>documentation</a>. Finally, note that currently 
+        you cannot change the response metadata itself (i.e., var, meta dictionaries); if you have a use case for that feature, raise an Issue on our GitHub.</Text>
+      </Box>;
+  }, [progLang, node_type])

  return (
    <BaseNode classNames="evaluator-node" nodeId={id}>
@ -264,7 +384,7 @@ const EvaluatorNode = ({ data, id }) => {
                  status={status}
                  alertModal={alertModal}
                  handleRunClick={handleRunClick}
-                  runButtonTooltip="Run evaluator over inputs"
+                  runButtonTooltip={run_tooltip}
                  customButtons={[
                    <Tooltip label='Info' key="eval-info">
                      <button onClick={openInfoModal} className='custom-button' style={{border:'none'}}>
@ -274,26 +394,7 @@ const EvaluatorNode = ({ data, id }) => {
                  />
      <LLMResponseInspectorModal ref={inspectModal} jsonResponses={lastResponses} />
      <Modal title={default_header} size='60%' opened={infoModalOpened} onClose={closeInfoModal} styles={{header: {backgroundColor: '#FFD700'}, root: {position: 'relative', left: '-5%'}}}>
-        <Box m='lg' mt='xl'>
-          <Text mb='sm'>To use a {default_header}, write a function <Code>evaluate</Code> that takes a single argument of class <Code>ResponseInfo</Code>.
-          The function should return a 'score' for that response, which usually is a number or a boolean value (strings as categoricals are supported, but experimental).</Text>
-          <Text mt='sm' mb='sm'>
-          For instance, here is an evaluator that returns the length of a response:</Text>
-          <Prism language={progLang === 'python' ? 'py' : 'ts'}>
-            {progLang === 'python' ? _info_example_py : _info_example_js}
-          </Prism>
-          <Text mt='md' mb='sm'>This function gets the text of the response via <Code>response.text</Code>, then calculates its length in characters. The full <Code>ResponseInfo</Code> class has the following properties and methods:</Text>
-          <Prism language={progLang === 'python' ? 'py' : 'ts'}>
-            {progLang === 'python' ? _info_codeblock_py : _info_codeblock_js}
-          </Prism>
-          <Text mt='md' mb='sm'>For instance, say you have a prompt template <Code>What is the capital of &#123;country&#125;?</Code> on a Prompt Node. 
-            You want to get the input variable 'country', which filled the prompt that led to the current response. You can use<Code>response.var</Code>:</Text>
-          <Prism language={progLang === 'python' ? 'py' : 'ts'}>
-            {progLang === 'python' ? _info_example_var_py : _info_example_var_js}
-          </Prism>
-          <Text mt='md'>Note that you are allowed to define variables outside of the function, or define more functions, as long as a function called <Code>evaluate</Code> is defined. 
-          For more information on what's possible, see the <a href="https://github.com/ianarawjo/ChainForge/blob/main/GUIDE.md#python-evaluator-node" target='_blank'>documentation</a> or load some Example Flows.</Text>
-        </Box>
+        {code_info_modal}
      </Modal>
      <iframe style={{display: 'none'}} id={`${id}-iframe`}></iframe>
      <Handle
@ -311,14 +412,7 @@ const EvaluatorNode = ({ data, id }) => {
          style={{ top: '50%' }}
        />
      <div className="core-mirror-field">
-        <div className="code-mirror-field-header">Define an <Code>evaluate</Code> func to map over each response:
-        {/* &nbsp;<select name="mapscope" id="mapscope" onChange={handleOnMapScopeSelect}>
-            <option value="response">response</option>
-            <option value="batch">batch of responses</option>
-        </select> */}
-        </div>
-        
-        {/* <span className="code-style">response</span>: */}
+        {code_instruct_header}
        <div className="ace-editor-container nodrag">
          <AceEditor
            mode={progLang}
@ -358,4 +452,4 @@ const EvaluatorNode = ({ data, id }) => {
  );
 };

-export default EvaluatorNode;
+export default CodeEvaluatorNode;
--- a/chainforge/react-server/src/PromptNode.js
+++ b/chainforge/react-server/src/PromptNode.js
@ -13,7 +13,7 @@ import fetch_from_backend from './fetch_from_backend';
 import { escapeBraces } from './backend/template';
 import ChatHistoryView from './ChatHistoryView';
 import InspectFooter from './InspectFooter';
-import { countNumLLMs, setsAreEqual } from './backend/utils';
+import { countNumLLMs, setsAreEqual, getLLMsInPulledInputData } from './backend/utils';

 const getUniqueLLMMetavarKey = (responses) => {
    const metakeys = new Set(responses.map(resp_obj => Object.keys(resp_obj.metavars)).flat());
@ -32,17 +32,6 @@ const bucketChatHistoryInfosByLLM = (chat_hist_infos) => {
    });
    return chats_by_llm;
 }
-const getLLMsInPulledInputData = (pulled_data) => {
-    let found_llms = {};
-    Object.values(pulled_data).filter(_vs => {
-        let vs = Array.isArray(_vs) ? _vs : [_vs];
-        vs.forEach(v => {
-            if (v?.llm !== undefined && !(v.llm.key in found_llms))
-                found_llms[v.llm.key] = v.llm;
-        });
-    });
-    return Object.values(found_llms);
-};

 class PromptInfo {
    prompt; // string
--- a/chainforge/react-server/src/SimpleEvalNode.js
+++ b/chainforge/react-server/src/SimpleEvalNode.js
@ -8,6 +8,7 @@ import InspectFooter from "./InspectFooter";
 import LLMResponseInspectorModal from "./LLMResponseInspectorModal";
 import useStore from "./store";
 import fetch_from_backend from "./fetch_from_backend";
+import { stripLLMDetailsFromResponses, toStandardResponseFormat } from "./backend/utils";

 const createJSEvalCodeFor = (responseFormat, operation, value, valueType) => {
  let responseObj = 'r.text'
@ -52,7 +53,7 @@ const createJSEvalCodeFor = (responseFormat, operation, value, valueType) => {
 const SimpleEvalNode = ({data, id}) => {
  
  const setDataPropsForNode = useStore((state) => state.setDataPropsForNode);
-  const inputEdgesForNode = useStore((state) => state.inputEdgesForNode);
+  const pullInputData = useStore((state) => state.pullInputData);
  const pingOutputNodes = useStore((state) => state.pingOutputNodes);
  const [pastInputs, setPastInputs] = useState([]);

@ -97,17 +98,25 @@ const SimpleEvalNode = ({data, id}) => {
    dirtyStatus();
  }, [lastTextValue, dirtyStatus]);

-  const handleRunClick = useCallback(() => {
-    // Get the ids from the connected input nodes:
-    const input_node_ids = inputEdgesForNode(id).map(e => e.source);
-    if (input_node_ids.length === 0) {
-        console.warn("No inputs for simple evaluator node.");
-        return;
+  const handlePullInputs = useCallback(() => {
+    // Pull input data
+    let pulled_inputs = pullInputData(["responseBatch"], id);
+    if (!pulled_inputs || !pulled_inputs["responseBatch"]) {
+      console.warn(`No inputs to the Simple Evaluator node.`);
+      return [];
    }
+    // Convert to standard response format (StandardLLMResponseFormat)
+    return pulled_inputs["responseBatch"].map(toStandardResponseFormat);
+  }, [pullInputData, id, toStandardResponseFormat]); 
+
+  const handleRunClick = useCallback(() => {
+    // Pull inputs to the node
+    let pulled_inputs = handlePullInputs();

    // Set status and created rejection callback
    setStatus('loading');
    setLastResponses([]);
+
    const rejected = (err_msg) => {
      setStatus('error');
      alertModal.current.trigger(err_msg);
@ -120,31 +129,32 @@ const SimpleEvalNode = ({data, id}) => {

    // Run evaluator in backend
    fetch_from_backend('executejs', {
-      id: id,
-      code: code,
-      responses: input_node_ids,
-      scope: 'response',
+        id: id,
+        code: code,
+        responses: pulled_inputs,
+        scope: 'response',
+        process_type: 'evaluator'
    }).then(function(json) {    
-        // Check if there's an error; if so, bubble it up to user and exit:
-        if (!json || json.error) {
-          setLastRunSuccess(false);
-          rejected(json ? json.error : 'Unknown error encountered when requesting evaluations: empty response returned.');
-          return;
-        }
-        
-        // Ping any vis + inspect nodes attached to this node to refresh their contents:
-        pingOutputNodes(id);
+      // Check if there's an error; if so, bubble it up to user and exit:
+      if (!json || json.error) {
+        setLastRunSuccess(false);
+        rejected(json ? json.error : 'Unknown error encountered when requesting evaluations: empty response returned.');
+        return;
+      }
+      
+      // Ping any vis + inspect nodes attached to this node to refresh their contents:
+      pingOutputNodes(id);

-        console.log(json.responses);
-        setLastResponses(json.responses);
-        setLastRunSuccess(true);
+      console.log(json.responses);
+      setLastResponses(stripLLMDetailsFromResponses(json.responses));
+      setLastRunSuccess(true);

-        if (status !== 'ready')
-          setUninspectedResponses(true);
-        
-        setStatus('ready');
+      if (status !== 'ready')
+        setUninspectedResponses(true);
+      
+      setStatus('ready');
    }).catch((err) => rejected(err.message));
-  }, [inputEdgesForNode, pingOutputNodes, setStatus, alertModal, status, varValue, varValueType, responseFormat, textValue, valueFieldDisabled]);
+  }, [handlePullInputs, pingOutputNodes, setStatus, alertModal, status, varValue, varValueType, responseFormat, textValue, valueFieldDisabled]);

  const showResponseInspector = useCallback(() => {
    if (inspectModal && inspectModal.current && lastResponses) {
@ -154,31 +164,24 @@ const SimpleEvalNode = ({data, id}) => {
  }, [inspectModal, lastResponses]);

  const handleOnConnect = useCallback(() => {
-    // Get the ids from the connected input nodes:
-    const input_node_ids = inputEdgesForNode(id).map(e => e.source);
-
-    // Fetch all input responses
-    fetch_from_backend(
-      'grabResponses',
-      {responses: input_node_ids}
-    ).then(function(json) {
-      if (json.responses && json.responses.length > 0) {
-        // Find all vars and metavars in responses
-        let varnames = new Set();
-        let metavars = new Set();
-        json.responses.forEach(resp_obj => {
-            Object.keys(resp_obj.vars).forEach(v => varnames.add(v));
-            if (resp_obj.metavars)
-                Object.keys(resp_obj.metavars).forEach(v => metavars.add(v));
-        });
-        const avs = Array.from(varnames);
-        const amvs = Array.from(metavars).filter(v => !(v.startsWith('LLM_')));
-        setAvailableVars(avs);
-        setAvailableMetavars(amvs);
-        setDataPropsForNode(id, { availableVars: avs, availableMetavars: amvs });
-      }
-    });
-  }, [data, id, inputEdgesForNode, setDataPropsForNode]);
+    // Pull inputs to the node
+    let pulled_inputs = handlePullInputs();
+    if (pulled_inputs && pulled_inputs.length > 0) {
+      // Find all vars and metavars in responses
+      let varnames = new Set();
+      let metavars = new Set();
+      pulled_inputs.forEach(resp_obj => {
+          Object.keys(resp_obj.vars).forEach(v => varnames.add(v));
+          if (resp_obj.metavars)
+              Object.keys(resp_obj.metavars).forEach(v => metavars.add(v));
+      });
+      const avs = Array.from(varnames);
+      const amvs = Array.from(metavars).filter(v => !(v.startsWith('LLM_')));
+      setAvailableVars(avs);
+      setAvailableMetavars(amvs);
+      setDataPropsForNode(id, { availableVars: avs, availableMetavars: amvs });
+    }
+  }, [data, id, handlePullInputs, setDataPropsForNode]);

  if (data.input) {
    // If there's a change in inputs...
--- a/chainforge/react-server/src/VisNode.js
+++ b/chainforge/react-server/src/VisNode.js
@ -150,11 +150,19 @@ const VisNode = ({ data, id }) => {
    useEffect(() => {
        if (!responses || responses.length === 0 || !multiSelectValue) return;

+        // Check if there are evaluation results 
+        if (responses.every(r => r?.eval_res === undefined)) {
+            setPlaceholderText(<p style={{maxWidth: '220px', backgroundColor: '#f0f0aa', padding: '10px', fontSize: '10pt'}}>
+                To plot evaluation results, you need to run LLM responses through an Evaluator Node or LLM Scorer Node first.
+            </p>);
+            return;
+        }
+
        setStatus('none');

        const get_llm = (resp_obj) => {
            if (selectedLLMGroup === 'LLM')
-                return resp_obj.llm;
+                return typeof resp_obj.llm === "string" ? resp_obj.llm : resp_obj.llm?.name;
            else
                return resp_obj.metavars[selectedLLMGroup];
        };
@ -204,7 +212,7 @@ const VisNode = ({ data, id }) => {

        // Get the type of evaluation results, if present
        // (This is assumed to be consistent across response batches)
-        let typeof_eval_res = 'dtype' in responses[0].eval_res ? responses[0].eval_res['dtype'] : 'Numeric';
+        let typeof_eval_res = (responses[0].eval_res && 'dtype' in responses[0].eval_res) ? responses[0].eval_res['dtype'] : 'Numeric';

        // If categorical type, check if all binary:
        if (typeof_eval_res === 'Categorical') {
@ -244,6 +252,7 @@ const VisNode = ({ data, id }) => {
        };

        const get_items = (eval_res_obj) => {
+            if (eval_res_obj === undefined) return [];
            if (typeof_eval_res.includes('KeyValue'))
                return eval_res_obj.items.map(item => item[metric_axes_labels[0]]);
            return eval_res_obj.items;
--- a/chainforge/react-server/src/backend/test/backend.test.ts
+++ b/chainforge/react-server/src/backend/test/backend.test.ts
@ -3,7 +3,7 @@
 */
 import { NativeLLM } from '../models';
 import { expect, test } from '@jest/globals';
-import { queryLLM, executejs, countQueries, ResponseInfo } from '../backend';
+import { queryLLM, executejs, countQueries, ResponseInfo, grabResponses } from '../backend';
 import { StandardizedLLMResponse, Dict } from '../typing';
 import StorageCache from '../cache';

@ -69,6 +69,8 @@ test('run evaluate func over responses', async () => {
    return response.text.length;
  };

+  const input_resps = await grabResponses(['dummy_response_id']) as StandardizedLLMResponse[];
+
 //   const code = `
 // function evaluate(response) {
 //   console.log('hello there!');
@ -77,7 +79,7 @@ test('run evaluate func over responses', async () => {
 // `;

  // Execute the code, and map the evaluate function over all responses
-  const {responses, logs, error} = await executejs('evalid', code, ['dummy_response_id'], 'response');
+  const {responses, logs, error} = await executejs('evalid', code, input_resps, 'response', 'evaluator');

  // There should be no errors
  if (error)
--- a/chainforge/react-server/src/backend/backend.ts
+++ b/chainforge/react-server/src/backend/backend.ts
@ -330,38 +330,47 @@ function check_typeof_vals(arr: Array<any>): MetricType {
    return val_type;
 }

-function run_over_responses(eval_func: (resp: ResponseInfo) => any, responses: Array<StandardizedLLMResponse>): Array<StandardizedLLMResponse> {
+function run_over_responses(process_func: (resp: ResponseInfo) => any, 
+                            responses: StandardizedLLMResponse[],
+                            process_type: 'evaluator' | 'processor'): StandardizedLLMResponse[] {
  return responses.map((_resp_obj: StandardizedLLMResponse) => {
    // Deep clone the response object
-    const resp_obj = JSON.parse(JSON.stringify(_resp_obj));
+    let resp_obj = JSON.parse(JSON.stringify(_resp_obj));

-    // Map the evaluator func over every individual response text in each response object
+    // Map the processor func over every individual response text in each response object
    const res = resp_obj.responses;
-    const evals = res.map(
-      (r: string) => eval_func(new ResponseInfo(r, 
-                                                resp_obj.prompt, 
-                                                resp_obj.vars, 
-                                                resp_obj.metavars || {}, 
-                                                extract_llm_nickname(resp_obj.llm))
-    ));
+    const processed = res.map((r: string) => 
+      process_func(new ResponseInfo(r, 
+                                    resp_obj.prompt, 
+                                    resp_obj.vars, 
+                                    resp_obj.metavars || {}, 
+                                    extract_llm_nickname(resp_obj.llm)))
+    );

-    // Check the type of evaluation results
-    // NOTE: We assume this is consistent across all evaluations, but it may not be.
-    const eval_res_type = check_typeof_vals(evals);
+    // If type is just a processor
+    if (process_type === 'processor') {
+      // Replace response texts in resp_obj with the transformed ones:
+      resp_obj.responses = processed;

-    if (eval_res_type === MetricType.Numeric) {
-        // Store items with summary of mean, median, etc
-        resp_obj.eval_res = {
-          items: evals,
+    } else { // If type is an evaluator
+      // Check the type of evaluation results
+      // NOTE: We assume this is consistent across all evaluations, but it may not be.
+      const eval_res_type = check_typeof_vals(processed);
+
+      if (eval_res_type === MetricType.Numeric) {
+          // Store items with summary of mean, median, etc
+          resp_obj.eval_res = {
+            items: processed,
+            dtype: getEnumName(MetricType, eval_res_type),
+          };
+      } else if ([MetricType.Unknown, MetricType.Empty].includes(eval_res_type)) {
+        throw new Error('Unsupported types found in evaluation results. Only supported types for metrics are: int, float, bool, str.');
+      } else {
+        // Categorical, KeyValue, etc, we just store the items:
+        resp_obj.eval_res = { 
+          items: processed,
          dtype: getEnumName(MetricType, eval_res_type),
-        };
-    } else if ([MetricType.Unknown, MetricType.Empty].includes(eval_res_type)) {
-      throw new Error('Unsupported types found in evaluation results. Only supported types for metrics are: int, float, bool, str.');
-    } else {
-      // Categorical, KeyValue, etc, we just store the items:
-      resp_obj.eval_res = { 
-        items: evals,
-        dtype: getEnumName(MetricType, eval_res_type),
+        }
      }
    }

@ -773,93 +782,79 @@ export async function queryLLM(id: string,
 * 
 * @param id a unique ID to refer to this information. Used when cache'ing evaluation results. 
 * @param code the code to evaluate. Must include an 'evaluate()' function that takes a 'response' of type ResponseInfo. Alternatively, can be the evaluate function itself.
- * @param response_ids the cache'd response to run on, which must be a unique ID or list of unique IDs of cache'd data
- * @param scope the scope of responses to run on --a single response, or all across each batch. (If batch, evaluate() func has access to 'responses'.)
+ * @param responses the cache'd response to run on, which must be a unique ID or list of unique IDs of cache'd data
+ * @param scope the scope of responses to run on --a single response, or all across each batch. (If batch, evaluate() func has access to 'responses'.) NOTE: Currently this feature is disabled.
+ * @param process_type the type of processing to perform. Evaluators only 'score'/annotate responses with an 'eval_res' key. Processors change responses (e.g. text).
 */
 export async function executejs(id: string, 
                                code: string | ((rinfo: ResponseInfo) => any), 
-                                response_ids: string | string[], 
-                                scope: 'response' | 'batch'): Promise<Dict> {
-  // Check format of response_ids
-  if (!Array.isArray(response_ids))
-    response_ids = [ response_ids ];
-  response_ids = response_ids as Array<string>;
+                                responses: StandardizedLLMResponse[], 
+                                scope: 'response' | 'batch',
+                                process_type: 'evaluator' | 'processor'): Promise<Dict> {
+  const req_func_name = (!process_type || process_type === 'evaluator') ? 'evaluate' : 'process';

  // Instantiate the evaluator function by eval'ing the passed code
  // DANGER DANGER!!
  let iframe: HTMLElement | undefined;
+  let process_func: any;
  if (typeof code === 'string') {
    try {
-        /*
-          To run Javascript code in a psuedo-'sandbox' environment, we
-          can use an iframe and run eval() inside the iframe, instead of the current environment.
-          This is slightly safer than using eval() directly, doesn't clog our namespace, and keeps
-          multiple Evaluate node execution environments separate. 
-          
-          The Evaluate node in the front-end has a hidden iframe with the following id. 
-          We need to get this iframe element. 
-        */
-        iframe = document.getElementById(`${id}-iframe`);
-        if (!iframe)
-          throw new Error("Could not find iframe sandbox for evaluator node.");
+      /*
+        To run Javascript code in a psuedo-'sandbox' environment, we
+        can use an iframe and run eval() inside the iframe, instead of the current environment.
+        This is slightly safer than using eval() directly, doesn't clog our namespace, and keeps
+        multiple Evaluate node execution environments separate. 
+        
+        The Evaluate node in the front-end has a hidden iframe with the following id. 
+        We need to get this iframe element. 
+      */
+      iframe = document.getElementById(`${id}-iframe`);
+      if (!iframe)
+        throw new Error("Could not find iframe sandbox for evaluator node.");

-        // Now run eval() on the 'window' of the iframe:
-        // @ts-ignore
-        iframe.contentWindow.eval(code);
+      // Now run eval() on the 'window' of the iframe:
+      // @ts-ignore
+      iframe.contentWindow.eval(code);
+
+      // Now check that there is an 'evaluate' method in the iframe's scope.
+      // NOTE: We need to tell Typescript to ignore this, since it's a dynamic type check.
+      // @ts-ignore
+      process_func = (!process_type || process_type === 'evaluator') ? iframe.contentWindow.evaluate : iframe.contentWindow.process;
+      if (process_func === undefined)
+        throw new Error(`${req_func_name}() function is undefined.`);

-        // Now check that there is an 'evaluate' method in the iframe's scope.
-        // NOTE: We need to tell Typescript to ignore this, since it's a dynamic type check.
-        // @ts-ignore
-        if (iframe.contentWindow.evaluate === undefined) {
-          throw new Error('evaluate() function is undefined.');
-        }
    } catch (err) {
-      return {'error': `Could not compile evaluator code. Error message:\n${err.message}`};
+      return {'error': `Could not compile code. Error message:\n${err.message}`};
    }
  }

  // Load all responses with the given ID:
-  let all_evald_responses: StandardizedLLMResponse[] = [];
  let all_logs: string[] = [];
-  for (let i = 0; i < response_ids.length; i++) {
-    const cache_id = response_ids[i];
-    const fname = `${cache_id}.json`;
-    if (!StorageCache.has(fname))
-      return {error: `Did not find cache file for id ${cache_id}`, logs: all_logs};
+  let processed_resps: StandardizedLLMResponse[];
+  try {
+    // Intercept any calls to console.log, .warn, or .error, so we can store the calls
+    // and print them in the 'output' footer of the Evaluator Node: 
+    // @ts-ignore
+    HIJACK_CONSOLE_LOGGING(id, iframe.contentWindow);

-    // Load the raw responses from the cache
-    const responses = load_cache_responses(fname);
-    if (responses.length === 0)
-      continue;
+    // Run the user-defined 'evaluate' function over the responses: 
+    // NOTE: 'evaluate' here was defined dynamically from 'eval' above. We've already checked that it exists. 
+    // @ts-ignore
+    processed_resps = run_over_responses((iframe ? process_func : code), responses, process_type);

-    let evald_responses: StandardizedLLMResponse[];
-    try {
-      // Intercept any calls to console.log, .warn, or .error, so we can store the calls
-      // and print them in the 'output' footer of the Evaluator Node: 
-      // @ts-ignore
-      HIJACK_CONSOLE_LOGGING(id, iframe.contentWindow);
-
-      // Run the user-defined 'evaluate' function over the responses: 
-      // NOTE: 'evaluate' here was defined dynamically from 'eval' above. We've already checked that it exists. 
-      // @ts-ignore
-      evald_responses = run_over_responses((iframe ? iframe.contentWindow.evaluate : code), responses, scope);
-
-      // Revert the console.log, .warn, .error back to browser default:
-      // @ts-ignore
-      all_logs = all_logs.concat(REVERT_CONSOLE_LOGGING(id, iframe.contentWindow));
-    } catch (err) {
-      // @ts-ignore
-      all_logs = all_logs.concat(REVERT_CONSOLE_LOGGING(id, iframe.contentWindow));
-      return { error: `Error encountered while trying to run "evaluate" method:\n${err.message}`, logs: all_logs };
-    }
-
-    all_evald_responses = all_evald_responses.concat(evald_responses);
+    // Revert the console.log, .warn, .error back to browser default:
+    // @ts-ignore
+    all_logs = all_logs.concat(REVERT_CONSOLE_LOGGING(id, iframe.contentWindow));
+  } catch (err) {
+    // @ts-ignore
+    all_logs = all_logs.concat(REVERT_CONSOLE_LOGGING(id, iframe.contentWindow));
+    return { error: `Error encountered while trying to run "evaluate" method:\n${err.message}`, logs: all_logs };
  }

  // Store the evaluated responses in a new cache json:
-  StorageCache.store(`${id}.json`, all_evald_responses);
+  StorageCache.store(`${id}.json`, processed_resps);

-  return {responses: all_evald_responses, logs: all_logs};
+  return {responses: processed_resps, logs: all_logs};
 }

 /**
@ -872,28 +867,19 @@ export async function executejs(id: string,
 * @param id a unique ID to refer to this information. Used when cache'ing evaluation results. 
 * @param code the code to evaluate. Must include an 'evaluate()' function that takes a 'response' of type ResponseInfo. Alternatively, can be the evaluate function itself.
 * @param response_ids the cache'd response to run on, which must be a unique ID or list of unique IDs of cache'd data
- * @param scope the scope of responses to run on --a single response, or all across each batch. (If batch, evaluate() func has access to 'responses'.)
+ * @param scope the scope of responses to run on --a single response, or all across each batch. (If batch, evaluate() func has access to 'responses'.) NOTE: Currently disabled.
+ * @param process_type the type of processing to perform. Evaluators only 'score'/annotate responses with an 'eval_res' key. Processors change responses (e.g. text).
 */
 export async function executepy(id: string, 
                                code: string | ((rinfo: ResponseInfo) => any), 
-                                response_ids: string | string[], 
+                                responses: StandardizedLLMResponse[], 
                                scope: 'response' | 'batch',
+                                process_type: 'evaluator' | 'processor',
                                script_paths?: string[]): Promise<Dict> {
  if (!APP_IS_RUNNING_LOCALLY()) {
    // We can't execute Python if we're not running the local Flask server. Error out:
    throw new Error("Cannot evaluate Python code: ChainForge does not appear to be running on localhost.")
  }
-
-  // Check format of response_ids
-  if (!Array.isArray(response_ids))
-    response_ids = [ response_ids ];
-  response_ids = response_ids as Array<string>;
-
-  // Load cache'd responses for all response_ids:
-  const {responses, error} = await grabResponses(response_ids);
-
-  if (error !== undefined)
-    throw new Error(error);
  
  // All responses loaded; call our Python server to execute the evaluation code across all responses:
  const flask_response = await call_flask_backend('executepy', {
@ -901,6 +887,7 @@ export async function executepy(id: string,
    code: code,
    responses: responses,
    scope: scope,
+    process_type: process_type,
    script_paths: script_paths,
  }).catch(err => {
    throw new Error(err.message);
--- a/chainforge/react-server/src/backend/utils.ts
+++ b/chainforge/react-server/src/backend/utils.ts
@ -934,4 +934,34 @@ export const dict_excluding_key = (d, key) => {
  const copy_d = {...d};
  delete copy_d[key];
  return copy_d;
+};
+
+export const getLLMsInPulledInputData = (pulled_data: Dict) => {
+  let found_llms = {};
+  Object.values(pulled_data).filter(_vs => {
+    let vs = Array.isArray(_vs) ? _vs : [_vs];
+    vs.forEach(v => {
+      if (v?.llm !== undefined && !(v.llm.key in found_llms))
+        found_llms[v.llm.key] = v.llm;
+    });
+  });
+  return Object.values(found_llms);
+};
+
+export const stripLLMDetailsFromResponses = (resps) => resps.map(r => ({...r, llm: (typeof r?.llm === 'string' ? r?.llm : (r?.llm?.name ?? 'undefined'))}));
+
+export const toStandardResponseFormat = (r) => {
+  let resp_obj: Dict = {
+    vars: r?.fill_history ?? {},
+    metavars: r?.metavars ?? {},
+    llm: r?.llm ?? undefined,
+    prompt: r?.prompt ?? "",
+    responses: [typeof r === 'string' ? r : r?.text],
+    tokens: r?.raw_response?.usage ?? {},
+  };
+  if ('eval_res' in r)
+    resp_obj.eval_res = r.eval_res;
+  if ('chat_history' in r)
+    resp_obj.chat_history = r.chat_history;
+  return resp_obj;
 };
--- a/chainforge/react-server/src/fetch_from_backend.js
+++ b/chainforge/react-server/src/fetch_from_backend.js
@ -17,9 +17,9 @@ async function _route_to_js_backend(route, params) {
    case 'queryllm':
      return queryLLM(params.id, clone(params.llm), params.n, params.prompt, clone(params.vars), params.chat_histories, params.api_keys, params.no_cache, params.progress_listener, params.cont_only_w_prior_llms);
    case 'executejs':
-      return executejs(params.id, params.code, params.responses, params.scope);
+      return executejs(params.id, params.code, params.responses, params.scope, params.process_type);
    case 'executepy':
-      return executepy(params.id, params.code, params.responses, params.scope, params.script_paths);
+      return executepy(params.id, params.code, params.responses, params.scope, params.process_type, params.script_paths);
    case 'evalWithLLM':
      return evalWithLLM(params.id, params.llm, params.root_prompt, params.responses, params.api_keys, params.progress_listener);
    case 'importCache':
--- a/chainforge/react-server/src/store.js
+++ b/chainforge/react-server/src/store.js
@ -26,7 +26,7 @@ export const colorPalettes = {
  var: varColorPalette,
 }

-const refreshableOutputNodeTypes = new Set(['evaluator', 'prompt', 'inspect', 'vis', 'llmeval', 'textfields', 'chat', 'simpleval', 'join', 'split']);
+const refreshableOutputNodeTypes = new Set(['evaluator', 'processor', 'prompt', 'inspect', 'vis', 'llmeval', 'textfields', 'chat', 'simpleval', 'join', 'split']);

 export let initLLMProviders = [
  { name: "GPT3.5", emoji: "🤖", model: "gpt-3.5-turbo", base_model: "gpt-3.5-turbo", temp: 1.0 },  // The base_model designates what settings form will be used, and must be unique.
--- a/setup.py
+++ b/setup.py
@ -6,7 +6,7 @@ def readme():

 setup(
    name='chainforge',
-    version='0.2.7.4',
+    version='0.2.7.5',
    packages=find_packages(),
    author="Ian Arawjo",
    description="A Visual Programming Environment for Prompt Engineering",