Add Simple Eval Node. Other minor UI improvements. (#121)

* Add red dot in Inspect Responses footer to indicate something changed

* Abstract out inspect footer button to component

* Add tooltips to AddNode menu items.

* Simple eval wip

* Add menu sections to Add Node. Minor tweaks to simple eval.

* Save state of simple eval when editing fields

* Add 'only show scores' toggle to response inspector

* Change 2 example flows to use simple evals. Fix bg of toolbar buttons.

* Update version and rebuild react
This commit is contained in:
ianarawjo 2023-08-19 16:50:11 -04:00 committed by GitHub
parent 07fb40d1e1
commit f43861f075
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 7309 additions and 6039 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,15 +1,15 @@
{
"files": {
"main.css": "/static/css/main.a4e8271c.css",
"main.js": "/static/js/main.a3a00eec.js",
"main.css": "/static/css/main.37690c8a.css",
"main.js": "/static/js/main.c2ab48ed.js",
"static/js/787.4c72bb55.chunk.js": "/static/js/787.4c72bb55.chunk.js",
"index.html": "/index.html",
"main.a4e8271c.css.map": "/static/css/main.a4e8271c.css.map",
"main.a3a00eec.js.map": "/static/js/main.a3a00eec.js.map",
"main.37690c8a.css.map": "/static/css/main.37690c8a.css.map",
"main.c2ab48ed.js.map": "/static/js/main.c2ab48ed.js.map",
"787.4c72bb55.chunk.js.map": "/static/js/787.4c72bb55.chunk.js.map"
},
"entrypoints": [
"static/css/main.a4e8271c.css",
"static/js/main.a3a00eec.js"
"static/css/main.37690c8a.css",
"static/js/main.c2ab48ed.js"
]
}

View File

@ -1 +1 @@
<!doctype html><html lang="en"><head><meta charset="utf-8"/><script async src="https://www.googletagmanager.com/gtag/js?id=G-RN3FDBLMCR"></script><script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-RN3FDBLMCR")</script><link rel="icon" href="/favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="A visual programming environment for prompt engineering"/><link rel="apple-touch-icon" href="/logo192.png"/><link rel="manifest" href="/manifest.json"/><title>ChainForge</title><script defer="defer" src="/static/js/main.a3a00eec.js"></script><link href="/static/css/main.a4e8271c.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
<!doctype html><html lang="en"><head><meta charset="utf-8"/><script async src="https://www.googletagmanager.com/gtag/js?id=G-RN3FDBLMCR"></script><script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-RN3FDBLMCR")</script><link rel="icon" href="/favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="A visual programming environment for prompt engineering"/><link rel="apple-touch-icon" href="/logo192.png"/><link rel="manifest" href="/manifest.json"/><title>ChainForge</title><script defer="defer" src="/static/js/main.c2ab48ed.js"></script><link href="/static/css/main.37690c8a.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -8,7 +8,7 @@ import ReactFlow, {
} from 'react-flow-renderer';
import { Button, Menu, LoadingOverlay, Text, Box, List, Loader, Header, Chip, Badge, Card, Accordion, Tooltip } from '@mantine/core';
import { useClipboard } from '@mantine/hooks';
import { IconSettings, IconTextPlus, IconTerminal, IconCsv, IconSettingsAutomation, IconFileSymlink, IconRobot } from '@tabler/icons-react';
import { IconSettings, IconTextPlus, IconTerminal, IconCsv, IconSettingsAutomation, IconFileSymlink, IconRobot, IconRuler2 } from '@tabler/icons-react';
import TextFieldsNode from './TextFieldsNode'; // Import a custom node
import PromptNode from './PromptNode';
import EvaluatorNode from './EvaluatorNode';
@ -38,6 +38,7 @@ import { APP_IS_RUNNING_LOCALLY } from './backend/utils';
// Device / Browser detection
import { isMobile, isChrome, isFirefox, isEdgeChromium, isChromium } from 'react-device-detect';
import SimpleEvalNode from './SimpleEvalNode';
const IS_ACCEPTED_BROWSER = (isChrome || isChromium || isEdgeChromium || isFirefox || navigator?.brave !== undefined) && !isMobile;
const selector = (state) => ({
@ -75,6 +76,7 @@ const nodeTypes = {
textfields: TextFieldsNode, // Register the custom node
prompt: PromptNode,
chat: PromptNode,
simpleval: SimpleEvalNode,
evaluator: EvaluatorNode,
llmeval: LLMEvaluatorNode,
vis: VisNode,
@ -110,9 +112,12 @@ const getSharedFlowURLParam = () => {
return undefined;
};
const MenuTooltip = ({ label, children }) => {
return (<Tooltip label={label} position="right" width={200} multiline withArrow arrowSize={10}>{children}</Tooltip>);
};
// const connectionLineStyle = { stroke: '#ddd' };
const snapGrid = [16, 16];
let saveIntervalInitialized = false;
const App = () => {
@ -172,6 +177,10 @@ const App = () => {
const { x, y } = getViewportCenter();
addNode({ id: 'chatTurn-'+Date.now(), type: 'chat', data: { prompt: '' }, position: {x: x-200, y:y-100} });
};
const addSimpleEvalNode = () => {
const { x, y } = getViewportCenter();
addNode({ id: 'simpleEval-'+Date.now(), type: 'simpleval', data: {}, position: {x: x-200, y:y-100} });
};
const addEvalNode = (progLang) => {
const { x, y } = getViewportCenter();
let code = "";
@ -704,31 +713,64 @@ const App = () => {
position="top-start"
width={220}
closeOnClickOutside={true}
closeOnEscape
>
<Menu.Target>
<Button size="sm" variant="gradient" compact mr='sm'>Add Node +</Button>
<Button size="sm" variant="gradient" compact mr='sm'>Add Node +</Button>
</Menu.Target>
<Menu.Dropdown>
<Menu.Item onClick={addTextFieldsNode} icon={<IconTextPlus size="16px" />}> TextFields </Menu.Item>
<Menu.Item onClick={addTabularDataNode} icon={'🗂️'}> Tabular Data Node </Menu.Item>
<Menu.Item onClick={addPromptNode} icon={'💬'}> Prompt Node </Menu.Item>
<Menu.Item onClick={addChatTurnNode} icon={'🗣'}> Chat Turn Node </Menu.Item>
<Menu.Item onClick={() => addEvalNode('javascript')} icon={<IconTerminal size="16px" />}> JavaScript Evaluator Node </Menu.Item>
{IS_RUNNING_LOCALLY ? (
<Menu.Item onClick={() => addEvalNode('python')} icon={<IconTerminal size="16px" />}> Python Evaluator Node </Menu.Item>
): <></>}
<Menu.Item onClick={addLLMEvalNode} icon={<IconRobot size="16px" />}> LLM Scorer Node</Menu.Item>
<Menu.Item onClick={addVisNode} icon={'📊'}> Vis Node </Menu.Item>
<Menu.Item onClick={addInspectNode} icon={'🔍'}> Inspect Node </Menu.Item>
<Menu.Label>Input Data</Menu.Label>
<MenuTooltip label="Specify input text to prompt or chat nodes. You can also declare variables in brackets {} to chain TextFields together." >
<Menu.Item onClick={addTextFieldsNode} icon={<IconTextPlus size="16px" />}> TextFields Node </Menu.Item>
</MenuTooltip>
<MenuTooltip label="Specify input text as comma-separated values. Good for specifying lots of short text values. An alternative to TextFields node.">
<Menu.Item onClick={addCsvNode} icon={<IconCsv size="16px" />}> CSV Node </Menu.Item>
</MenuTooltip>
<MenuTooltip label="Import or create a spreadhseet of data to use as input to prompt or chat nodes.">
<Menu.Item onClick={addTabularDataNode} icon={'🗂️'}> Tabular Data Node </Menu.Item>
</MenuTooltip>
<Menu.Divider />
<Menu.Label>Prompters</Menu.Label>
<MenuTooltip label="Prompt one or multiple LLMs. Specify prompt variables in brackets {}.">
<Menu.Item onClick={addPromptNode} icon={'💬'}> Prompt Node </Menu.Item>
</MenuTooltip>
<MenuTooltip label="Start or continue a conversation with chat models. Attach Prompt Node output as past context to continue chatting past the first turn.">
<Menu.Item onClick={addChatTurnNode} icon={'🗣'}> Chat Turn Node </Menu.Item>
</MenuTooltip>
<Menu.Divider />
<Menu.Label>Evaluators</Menu.Label>
<MenuTooltip label="Evaluate responses with a simple check (no coding required).">
<Menu.Item onClick={addSimpleEvalNode} icon={<IconRuler2 size="16px" />}> Simple Evaluator Node </Menu.Item>
</MenuTooltip>
<MenuTooltip label="Evaluate responses by writing JavaScript code.">
<Menu.Item onClick={() => addEvalNode('javascript')} icon={<IconTerminal size="16px" />}> JavaScript Evaluator Node </Menu.Item>
</MenuTooltip>
{IS_RUNNING_LOCALLY ? (<MenuTooltip label="Evaluate responses by writing Python code.">
<Menu.Item onClick={() => addEvalNode('python')} icon={<IconTerminal size="16px" />}> Python Evaluator Node </Menu.Item>
</MenuTooltip>): <></>}
<MenuTooltip label="Evaluate responses with an LLM like GPT-4.">
<Menu.Item onClick={addLLMEvalNode} icon={<IconRobot size="16px" />}> LLM Scorer Node</Menu.Item>
</MenuTooltip>
<Menu.Divider />
<Menu.Label>Visualizers</Menu.Label>
<MenuTooltip label="Plot evaluation results. (Attach an evaluator or scorer node as input.)">
<Menu.Item onClick={addVisNode} icon={'📊'}> Vis Node </Menu.Item>
</MenuTooltip>
<MenuTooltip label="Used to inspect responses from prompter or evaluation nodes, without opening up the pop-up view.">
<Menu.Item onClick={addInspectNode} icon={'🔍'}> Inspect Node </Menu.Item>
</MenuTooltip>
<Menu.Divider />
<Menu.Label>Misc</Menu.Label>
<MenuTooltip label="Make a comment about your flow.">
<Menu.Item onClick={addCommentNode} icon={'✏️'}> Comment Node </Menu.Item>
{IS_RUNNING_LOCALLY ? (
<Menu.Item onClick={addScriptNode} icon={<IconSettingsAutomation size="16px" />}> Global Python Scripts </Menu.Item>
): <></>}
</MenuTooltip>
{IS_RUNNING_LOCALLY ? (<MenuTooltip label="Specify directories to load as local packages, so they can be imported in your Python evaluator nodes (add to sys path).">
<Menu.Item onClick={addScriptNode} icon={<IconSettingsAutomation size="16px" />}> Global Python Scripts </Menu.Item>
</MenuTooltip>): <></>}
</Menu.Dropdown>
</Menu>
<Button onClick={exportFlow} size="sm" variant="outline" compact mr='xs'>Export</Button>
<Button onClick={importFlowFromFile} size="sm" variant="outline" compact>Import</Button>
<Button onClick={exportFlow} size="sm" variant="outline" bg="#eee" compact mr='xs'>Export</Button>
<Button onClick={importFlowFromFile} size="sm" variant="outline" bg="#eee" compact>Import</Button>
</div>
<div style={{position: 'fixed', right: '10px', top: '10px', zIndex: 8}}>
{IS_RUNNING_LOCALLY ? (<></>) : (
@ -740,7 +782,7 @@ const App = () => {
{clipboard.copied ? 'Link copied!' : (waitingForShare ? 'Sharing...' : 'Share')}
</Button>
)}
<Button onClick={onClickNewFlow} size="sm" variant="outline" compact mr='xs' style={{float: 'left'}}> New Flow </Button>
<Button onClick={onClickNewFlow} size="sm" variant="outline" bg="#eee" compact mr='xs' style={{float: 'left'}}> New Flow </Button>
<Button onClick={onClickExamples} size="sm" variant="filled" compact mr='xs' style={{float: 'left'}}> Example Flows </Button>
<Button onClick={onClickSettings} size="sm" variant="gradient" compact><IconSettings size={"90%"} /></Button>
</div>

View File

@ -21,7 +21,7 @@ const AreYouSureModal = forwardRef(({title, message, onConfirm}, ref) => {
}
return (
<Modal opened={opened} onClose={close} title={title} styles={{header: {backgroundColor: 'red', color: 'white'}, root: {position: 'relative', left: '-5%'}}}>
<Modal opened={opened} onClose={close} title={title} styles={{header: {backgroundColor: 'orange', color: 'white'}, root: {position: 'relative', left: '-5%'}}}>
<Box maw={400} mx="auto" mt='md' mb='md'>
<Text>{description}</Text>
</Box>
@ -32,7 +32,7 @@ const AreYouSureModal = forwardRef(({title, message, onConfirm}, ref) => {
direction="row"
wrap="wrap"
>
<Button variant='light' color='red' type="submit" w='40%' onClick={close}>Cancel</Button>
<Button variant='light' color='orange' type="submit" w='40%' onClick={close}>Cancel</Button>
<Button variant='filled' color='blue' type="submit" w='40%' onClick={confirmAndClose}>Confirm</Button>
</Flex>
</Modal>

View File

@ -16,6 +16,7 @@ import "ace-builds/src-noconflict/theme-xcode";
import "ace-builds/src-noconflict/ext-language_tools";
import fetch_from_backend from './fetch_from_backend';
import { APP_IS_RUNNING_LOCALLY } from './backend/utils';
import InspectFooter from './InspectFooter';
// Whether we are running on localhost or not, and hence whether
// we have access to the Flask backend for, e.g., Python code evaluation.
@ -92,6 +93,7 @@ const EvaluatorNode = ({ data, id }) => {
// For a way to inspect responses without having to attach a dedicated node
const inspectModal = useRef(null);
const [uninspectedResponses, setUninspectedResponses] = useState(false);
// The programming language for the editor. Also determines what 'execute'
// function will ultimately be called.
@ -104,7 +106,6 @@ const EvaluatorNode = ({ data, id }) => {
const [lastRunLogs, setLastRunLogs] = useState("");
const [lastResponses, setLastResponses] = useState([]);
const [lastRunSuccess, setLastRunSuccess] = useState(true);
const [mapScope, setMapScope] = useState('response');
// On initialization
useEffect(() => {
@ -153,7 +154,7 @@ const EvaluatorNode = ({ data, id }) => {
setDataPropsForNode(id, {code: code});
};
const handleRunClick = (event) => {
const handleRunClick = () => {
// Disallow running a Python evaluator node when not on localhost:
if (!IS_RUNNING_LOCALLY && progLang === 'python') {
alertModal.current.trigger(
@ -203,7 +204,7 @@ const EvaluatorNode = ({ data, id }) => {
id: id,
code: codeTextOnRun,
responses: input_node_ids,
scope: mapScope,
scope: 'response',
script_paths: script_paths,
}).then(function(json) {
// Store any Python print output
@ -229,6 +230,10 @@ const EvaluatorNode = ({ data, id }) => {
setLastResponses(json.responses);
setCodeTextOnLastRun(codeTextOnRun);
setLastRunSuccess(true);
if (status !== 'ready')
setUninspectedResponses(true);
setStatus('ready');
}).catch((err) => rejected(err.message));
};
@ -238,8 +243,10 @@ const EvaluatorNode = ({ data, id }) => {
};
const showResponseInspector = useCallback(() => {
if (inspectModal && inspectModal.current && lastResponses)
if (inspectModal && inspectModal.current && lastResponses) {
setUninspectedResponses(false);
inspectModal.current.trigger();
}
}, [inspectModal, lastResponses]);
const default_header = (progLang === 'python') ?
@ -342,10 +349,10 @@ const EvaluatorNode = ({ data, id }) => {
}
{ lastRunSuccess && lastResponses && lastResponses.length > 0 ?
(<div className="eval-inspect-response-footer nodrag" onClick={showResponseInspector} style={{display: 'flex', justifyContent:'center'}}>
<Button color='blue' variant='subtle' w='100%' >Inspect results&nbsp;<IconSearch size='12pt'/></Button>
</div>) : <></>}
(<InspectFooter label={<>Inspect results&nbsp;<IconSearch size='12pt'/></>}
onClick={showResponseInspector}
showNotificationDot={uninspectedResponses} />
) : <></>}
</div>
);
};

View File

@ -0,0 +1,22 @@
import { useState } from "react";
import { Button } from "@mantine/core";
import { IconSearch } from "@tabler/icons-react";
/**
* The footer at the bottom of a node, allowing a user to click it
* to inspect responses.
*/
const InspectFooter = ({ label, onClick, showNotificationDot }) => {
const [text, setText] = useState(label || (<>Inspect responses&nbsp;<IconSearch size='12pt' /></>));
return (
<div className="eval-inspect-response-footer nodrag" onClick={onClick} style={{display: 'flex', justifyContent:'center'}}>
<Button color='blue' variant='subtle' w='100%' >
{text}
{ showNotificationDot ? <div className="something-changed-circle"></div> : <></>}
</Button>
</div>
);
};
export default InspectFooter;

View File

@ -9,6 +9,7 @@ import fetch_from_backend from './fetch_from_backend';
import { AvailableLLMs, getDefaultModelSettings } from './ModelSettingSchemas';
import { LLMListContainer } from './LLMListComponent';
import LLMResponseInspectorModal from './LLMResponseInspectorModal';
import InspectFooter from './InspectFooter';
// The default prompt shown in gray highlights to give people a good example of an evaluation prompt.
const PLACEHOLDER_PROMPT = "Respond with 'true' if the text below has a positive sentiment, and 'false' if not. Do not reply with anything else.";
@ -26,7 +27,9 @@ const LLMEvaluatorNode = ({ data, id }) => {
const [promptText, setPromptText] = useState(data.prompt || "");
const [status, setStatus] = useState('none');
const alertModal = useRef(null);
const inspectModal = useRef(null);
const [uninspectedResponses, setUninspectedResponses] = useState(false);
const setDataPropsForNode = useStore((state) => state.setDataPropsForNode);
const inputEdgesForNode = useStore((state) => state.inputEdgesForNode);
@ -99,6 +102,7 @@ const LLMEvaluatorNode = ({ data, id }) => {
console.log(json.responses);
setLastResponses(json.responses);
setUninspectedResponses(true);
setStatus('ready');
setProgress(undefined);
}).catch(handleError);
@ -120,8 +124,10 @@ const LLMEvaluatorNode = ({ data, id }) => {
}, []);
const showResponseInspector = useCallback(() => {
if (inspectModal && inspectModal.current && lastResponses)
if (inspectModal && inspectModal.current && lastResponses) {
setUninspectedResponses(false);
inspectModal.current.trigger();
}
}, [inspectModal, lastResponses]);
useEffect(() => {
@ -189,9 +195,10 @@ const LLMEvaluatorNode = ({ data, id }) => {
/>
{ lastResponses && lastResponses.length > 0 ?
(<div className="eval-inspect-response-footer nodrag" onClick={showResponseInspector} style={{display: 'flex', justifyContent:'center'}}>
<Button color='blue' variant='subtle' w='100%' >Inspect scores&nbsp;<IconSearch size='12pt'/></Button>
</div>) : <></>}
(<InspectFooter label={<>Inspect scores&nbsp;<IconSearch size='12pt'/></>}
onClick={showResponseInspector}
showNotificationDot={uninspectedResponses}
/>) : <></>}
</div>
);
};

View File

@ -4,6 +4,7 @@ import styled from "styled-components";
import LLMItemButtonGroup from "./LLMItemButtonGroup"
import { IconTemperature } from '@tabler/icons-react';
import { getTemperatureSpecForModel } from "./ModelSettingSchemas";
import { Tooltip } from "@mantine/core";
// == The below function perc2color modified from: ==
// License: MIT - https://opensource.org/licenses/MIT
@ -87,8 +88,10 @@ const LLMListItem = ({ item, provided, snapshot, removeCallback, onClickSettings
<div>
<CardHeader>{item.emoji}&nbsp;{item.name}{
temperature !== undefined ?
(<TemperatureStatus style={{color: tempColor}}>&nbsp;<IconTemperature size={14} stroke={2} style={{position: 'relative', top: '2px', marginRight: '-3px'}} />:{temperature !== undefined ? temperature : ""}</TemperatureStatus>)
: (<></>)}
(<Tooltip label={"temp = " + (temperature || "0")} withArrow>
<TemperatureStatus style={{color: tempColor}}>&nbsp;<IconTemperature size={14} stroke={2} style={{position: 'relative', top: '2px', marginRight: '-3px'}} />:{temperature !== undefined ? temperature : ""}</TemperatureStatus>
</Tooltip>
): (<></>)}
</CardHeader>
<LLMItemButtonGroup onClickTrash={() => removeCallback(item.key)} ringProgress={progress} onClickSettings={onClickSettings} />
</div>

View File

@ -5,7 +5,7 @@
* be deployed in multiple locations.
*/
import React, { useState, useEffect, useRef } from 'react';
import { Collapse, Radio, MultiSelect, Group, Table, NativeSelect } from '@mantine/core';
import { Collapse, Radio, MultiSelect, Group, Table, NativeSelect, Checkbox, Flex } from '@mantine/core';
import { useDisclosure } from '@mantine/hooks';
import { IconTable, IconLayoutList } from '@tabler/icons-react';
import * as XLSX from 'xlsx';
@ -147,6 +147,10 @@ const LLMResponseInspector = ({ jsonResponses, wideFormat }) => {
const [tableColVar, setTableColVar] = useState("LLM");
const [userSelectedTableCol, setUserSelectedTableCol] = useState(false);
// State of the 'only show scores' toggle when eval results are present
const [showEvalScoreOptions, setShowEvalScoreOptions] = useState(false);
const [onlyShowScores, setOnlyShowScores] = useState(false);
// Global lookup for what color to use per LLM
const getColorForLLMAndSetIfNotFound = useStore((state) => state.getColorForLLMAndSetIfNotFound);
@ -167,6 +171,10 @@ const LLMResponseInspector = ({ jsonResponses, wideFormat }) => {
found_vars = Array.from(found_vars);
found_llms = Array.from(found_llms);
// Whether there's some evaluation scores in the responses
const contains_eval_res = jsonResponses.some(res_obj => res_obj.eval_res !== undefined);
setShowEvalScoreOptions(contains_eval_res);
// Set the variables accessible in the MultiSelect for 'group by'
let msvars = found_vars.map(name => (
// We add a $ prefix to mark this as a prompt parameter, and so
@ -234,7 +242,8 @@ const LLMResponseInspector = ({ jsonResponses, wideFormat }) => {
{eval_res_items ? (
<p className="small-response-metrics">{getEvalResultStr(resp_str_to_eval_res[r])}</p>
) : <></>}
<pre className="small-response">{r}</pre>
{(contains_eval_res && onlyShowScores) ? <pre>{}</pre> :
<pre className="small-response">{r}</pre>}
</div>
));
@ -406,7 +415,7 @@ const LLMResponseInspector = ({ jsonResponses, wideFormat }) => {
setResponses(divs);
}
}, [multiSelectValue, jsonResponses, wideFormat, viewFormat, tableColVar]);
}, [multiSelectValue, jsonResponses, wideFormat, viewFormat, tableColVar, onlyShowScores]);
// When the user clicks an item in the drop-down,
// we want to autoclose the multiselect drop-down:
@ -434,32 +443,45 @@ const LLMResponseInspector = ({ jsonResponses, wideFormat }) => {
: <></>}
{viewFormat === "table" ?
<NativeSelect
value={tableColVar}
onChange={(event) => {
setTableColVar(event.currentTarget.value);
setUserSelectedTableCol(true);
}}
data={multiSelectVars}
label="Select the main variable to use for columns:"
mb="sm"
/>
<Flex gap='xl' align='end'>
<NativeSelect
value={tableColVar}
onChange={(event) => {
setTableColVar(event.currentTarget.value);
setUserSelectedTableCol(true);
}}
data={multiSelectVars}
label="Select the main variable to use for columns:"
mb="sm"
w="80%"
/>
<Checkbox checked={onlyShowScores}
label="Only show scores"
onChange={(e) => setOnlyShowScores(e.currentTarget.checked)}
mb='md'
display={showEvalScoreOptions ? 'inherit' : 'none'} />
</Flex>
: <></>}
{wideFormat === false || viewFormat === "hierarchy" ?
<div>
<Flex gap='xl' align='end'>
<MultiSelect ref={multiSelectRef}
onChange={handleMultiSelectValueChange}
className='nodrag nowheel inspect-multiselect'
label={<span style={{marginTop: '0px'}}>Group responses by (order matters):</span>}
label="Group responses by (order matters):"
data={multiSelectVars}
placeholder="Pick vars to group responses, in order of importance"
size={wideFormat ? 'sm' : 'xs'}
value={multiSelectValue}
clearSearchOnChange={true}
clearSearchOnBlur={true}
w='100%' />
</div>
w='80%' />
<Checkbox checked={onlyShowScores}
label="Only show scores"
onChange={(e) => setOnlyShowScores(e.currentTarget.checked)}
mb='xs'
display={showEvalScoreOptions ? 'inherit' : 'none'} />
</Flex>
: <></>}
<div className="nowheel nodrag">

View File

@ -1,16 +1,17 @@
import React, { useEffect, useState, useRef, useCallback, useMemo } from 'react';
import { Handle } from 'react-flow-renderer';
import { Menu, Switch, Button, Progress, Textarea, Text, Popover, Center, Modal, Box, Tooltip } from '@mantine/core';
import { Switch, Progress, Textarea, Text, Popover, Center, Modal, Box, Tooltip } from '@mantine/core';
import { useDisclosure } from '@mantine/hooks';
import { IconSearch, IconList } from '@tabler/icons-react';
import { IconList } from '@tabler/icons-react';
import useStore from './store';
import NodeLabel from './NodeLabelComponent'
import TemplateHooks, { extractBracketedSubstrings } from './TemplateHooksComponent'
import { LLMListContainer } from './LLMListComponent'
import LLMResponseInspectorModal from './LLMResponseInspectorModal';
import fetch_from_backend from './fetch_from_backend';
import { PromptTemplate, escapeBraces } from './backend/template';
import { escapeBraces } from './backend/template';
import ChatHistoryView from './ChatHistoryView';
import InspectFooter from './InspectFooter';
const getUniqueLLMMetavarKey = (responses) => {
const metakeys = new Set(responses.map(resp_obj => Object.keys(resp_obj.metavars)).flat());
@ -102,6 +103,8 @@ const PromptNode = ({ data, id, type: node_type }) => {
// For a way to inspect responses without having to attach a dedicated node
const inspectModal = useRef(null);
const [uninspectedResponses, setUninspectedResponses] = useState(false);
const [responsesWillChange, setResponsesWillChange] = useState(false);
// Chat node specific
const [contChatWithPriorLLMs, setContChatWithPriorLLMs] = useState(data.contChat !== undefined ? data.contChat : true);
@ -123,8 +126,10 @@ const PromptNode = ({ data, id, type: node_type }) => {
}, [llmListContainer, alertModal]);
const showResponseInspector = useCallback(() => {
if (inspectModal && inspectModal.current && jsonResponses)
if (inspectModal && inspectModal.current && jsonResponses) {
inspectModal.current.trigger();
setUninspectedResponses(false);
}
}, [inspectModal, jsonResponses]);
// Signal that prompt node state is dirty; user should re-run:
@ -357,9 +362,12 @@ const PromptNode = ({ data, id, type: node_type }) => {
const num_llms_missing = Object.keys(counts).length;
if (num_llms_missing === 0) {
setRunTooltip('Will load responses from cache');
setResponsesWillChange(false);
return;
}
setResponsesWillChange(true);
// Tally how many queries per LLM:
let queries_per_llm = {};
Object.keys(counts).forEach(llm_key => {
@ -471,6 +479,8 @@ const PromptNode = ({ data, id, type: node_type }) => {
// Create a callback to listen for progress
let onProgressChange = () => {};
const open_progress_listener = ([response_counts, total_num_responses]) => {
setResponsesWillChange(!response_counts || Object.keys(response_counts).length === 0);
const max_responses = Object.keys(total_num_responses).reduce((acc, llm) => acc + total_num_responses[llm], 0);
onProgressChange = (progress_by_llm_key) => {
@ -556,6 +566,10 @@ const PromptNode = ({ data, id, type: node_type }) => {
return;
}
if (responsesWillChange)
setUninspectedResponses(true);
setResponsesWillChange(false);
// All responses collected! Change status to 'ready':
setStatus('ready');
setContChatToggleDisabled(false);
@ -749,9 +763,8 @@ const PromptNode = ({ data, id, type: node_type }) => {
: <></>}
{ jsonResponses && jsonResponses.length > 0 && status !== 'loading' ?
(<div className="eval-inspect-response-footer nodrag" onClick={showResponseInspector} style={{display: 'flex', justifyContent:'center'}}>
<Button color='blue' variant='subtle' w='100%' >Inspect responses&nbsp;<IconSearch size='12pt'/></Button>
</div>) : <></>
(<InspectFooter onClick={showResponseInspector} showNotificationDot={uninspectedResponses} />
) : <></>
}
</div>
</div>

View File

@ -0,0 +1,302 @@
import { useState, useCallback, useEffect, useRef } from "react";
import { Handle } from "react-flow-renderer";
import { NativeSelect, TextInput, Flex, Text, Box, Select, ActionIcon, Menu, Tooltip } from "@mantine/core";
import { IconCaretDown, IconHash, IconRuler2, IconSearch, IconX } from "@tabler/icons-react";
import NodeLabel from "./NodeLabelComponent";
import InspectFooter from "./InspectFooter";
import LLMResponseInspectorModal from "./LLMResponseInspectorModal";
import useStore from "./store";
import fetch_from_backend from "./fetch_from_backend";
const createJSEvalCodeFor = (responseFormat, operation, value, valueType) => {
let responseObj = 'r.text'
if (responseFormat === 'response in lowercase')
responseObj = 'r.text.toLowerCase()';
else if (responseFormat === 'length of response')
responseObj = 'r.text.length';
let valueObj = `${JSON.stringify(value)}`;
if (valueType === 'var')
valueObj = `r.var['${value}']`;
else if (valueType === 'meta')
valueObj = `r.meta['${value}']`;
let returnBody;
switch (operation) { // 'contains', 'starts with', 'ends with', 'equals', 'appears in'
case 'contains':
returnBody = `${responseObj}.includes(${valueObj})`;
break;
case 'starts with':
returnBody = `${responseObj}.trim().startsWith(${valueObj})`;
break;
case 'ends with':
returnBody = `${responseObj}.trim().endsWith(${valueObj})`;
break;
case 'equals':
returnBody = `${responseObj} === ${valueObj}`;
break;
case 'appears in':
returnBody = `${valueObj}.includes(${responseObj})`;
break;
default:
console.error(`Could not create JS code for simple evaluator: Operation type '${operation}' does not exist.`)
break;
}
return `function evaluate(r) {\n return ${returnBody};\n}`;
};
/**
* A no-code evaluator node with a very basic options for scoring responses.
*/
const SimpleEvalNode = ({data, id}) => {
const setDataPropsForNode = useStore((state) => state.setDataPropsForNode);
const inputEdgesForNode = useStore((state) => state.inputEdgesForNode);
const pingOutputNodes = useStore((state) => state.pingOutputNodes);
const [pastInputs, setPastInputs] = useState([]);
const [status, setStatus] = useState('none');
const alertModal = useRef(null);
const inspectModal = useRef(null);
const [uninspectedResponses, setUninspectedResponses] = useState(false);
const [lastResponses, setLastResponses] = useState([]);
const [lastRunSuccess, setLastRunSuccess] = useState(true);
const [responseFormat, setResponseFormat] = useState(data.responseFormat || "response");
const [operation, setOperation] = useState(data.operation || "contains");
const [textValue, setTextValue] = useState(data.textValue || "");
const [varValue, setVarValue] = useState(data.varValue || "");
const [varValueType, setVarValueType] = useState(data.varValueType || "var");
const [valueFieldDisabled, setValueFieldDisabled] = useState(data.varSelected || false);
const [lastTextValue, setLastTextValue] = useState("");
const [availableVars, setAvailableVars] = useState(data.availableVars || []);
const [availableMetavars, setAvailableMetavars] = useState(data.availableMetavars || []);
const dirtyStatus = useCallback(() => {
if (status === 'ready')
setStatus('warning');
}, [status]);
const handleSetVarAsValue = useCallback((e, valueType) => {
const txt = `of ${e.target.innerText} (${valueType})`;
setLastTextValue(textValue);
setTextValue(txt);
setVarValue(e.target.innerText);
setVarValueType(valueType);
setValueFieldDisabled(true);
setDataPropsForNode(id, { varValue: e.target.innerText, varValueType: valueType, varSelected: true, textValue: txt });
dirtyStatus();
}, [textValue, dirtyStatus]);
const handleClearValueField = useCallback(() => {
setTextValue(lastTextValue);
setValueFieldDisabled(false);
setDataPropsForNode(id, { varSelected: false, textValue: lastTextValue });
dirtyStatus();
}, [lastTextValue, dirtyStatus]);
const handleRunClick = useCallback(() => {
// Get the ids from the connected input nodes:
const input_node_ids = inputEdgesForNode(id).map(e => e.source);
if (input_node_ids.length === 0) {
console.warn("No inputs for simple evaluator node.");
return;
}
// Set status and created rejection callback
setStatus('loading');
setLastResponses([]);
const rejected = (err_msg) => {
setStatus('error');
alertModal.current.trigger(err_msg);
};
// Generate JS code for the user's spec
const code = valueFieldDisabled
? createJSEvalCodeFor(responseFormat, operation, varValue, varValueType)
: createJSEvalCodeFor(responseFormat, operation, textValue, 'string');
// Run evaluator in backend
fetch_from_backend('executejs', {
id: id,
code: code,
responses: input_node_ids,
scope: 'response',
}).then(function(json) {
// Check if there's an error; if so, bubble it up to user and exit:
if (!json || json.error) {
setLastRunSuccess(false);
rejected(json ? json.error : 'Unknown error encountered when requesting evaluations: empty response returned.');
return;
}
// Ping any vis + inspect nodes attached to this node to refresh their contents:
pingOutputNodes(id);
console.log(json.responses);
setLastResponses(json.responses);
setLastRunSuccess(true);
if (status !== 'ready')
setUninspectedResponses(true);
setStatus('ready');
}).catch((err) => rejected(err.message));
}, [inputEdgesForNode, pingOutputNodes, setStatus, alertModal, status, varValue, varValueType, responseFormat, textValue, valueFieldDisabled]);
const showResponseInspector = useCallback(() => {
if (inspectModal && inspectModal.current && lastResponses) {
setUninspectedResponses(false);
inspectModal.current.trigger();
}
}, [inspectModal, lastResponses]);
const handleOnConnect = useCallback(() => {
// Get the ids from the connected input nodes:
const input_node_ids = inputEdgesForNode(id).map(e => e.source);
// Fetch all input responses
fetch_from_backend(
'grabResponses',
{responses: input_node_ids}
).then(function(json) {
if (json.responses && json.responses.length > 0) {
// Find all vars and metavars in responses
let varnames = new Set();
let metavars = new Set();
json.responses.forEach(resp_obj => {
Object.keys(resp_obj.vars).forEach(v => varnames.add(v));
if (resp_obj.metavars)
Object.keys(resp_obj.metavars).forEach(v => metavars.add(v));
});
const avs = Array.from(varnames);
const amvs = Array.from(metavars).filter(v => !(v.startsWith('LLM_')));
setAvailableVars(avs);
setAvailableMetavars(amvs);
setDataPropsForNode(id, { availableVars: avs, availableMetavars: amvs });
}
});
}, [data, id, inputEdgesForNode, setDataPropsForNode]);
if (data.input) {
// If there's a change in inputs...
if (data.input != pastInputs) {
setPastInputs(data.input);
handleOnConnect();
}
}
useEffect(() => {
if (data.refresh && data.refresh === true) {
setDataPropsForNode(id, { refresh: false });
setStatus('warning');
handleOnConnect();
}
}, [data]);
return (
<div className="evaluator-node cfnode">
<NodeLabel title={data.title || 'Simple Evaluator'}
nodeId={id}
icon={<IconRuler2 size="16px" />}
status={status}
alertModal={alertModal}
handleRunClick={handleRunClick}
runButtonTooltip="Run evaluator over inputs" />
<LLMResponseInspectorModal ref={inspectModal} jsonResponses={lastResponses} />
<iframe style={{display: 'none'}} id={`${id}-iframe`}></iframe>
<Flex gap='xs'>
<Text mt='6px' fz='sm'>Return true if</Text>
<NativeSelect data={['response', 'response in lowercase']}
defaultValue={responseFormat}
onChange={(e) => {
setResponseFormat(e.target.value);
setDataPropsForNode(id, { responseFormat: e.target.value });
dirtyStatus();
}} />
</Flex>
<Flex gap='xs'>
<Box w='85px' />
<NativeSelect mt='sm'
data={['contains', 'starts with', 'ends with', 'equals', 'appears in']}
defaultValue={operation}
onChange={(e) => {
setOperation(e.target.value);
setDataPropsForNode(id, { operation: e.target.value });
dirtyStatus();
}} />
</Flex>
<Flex gap='xs' mt='sm'>
<Text mt='6px' fz='sm'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;the value</Text>
<TextInput value={textValue}
onChange={(e) => setTextValue(e.target.value)}
onBlur={(e) => setDataPropsForNode(id, {textValue: e.target.value})}
onKeyDown={dirtyStatus}
disabled={valueFieldDisabled}
className="nodrag" />
{ valueFieldDisabled ? (
<Tooltip label='Clear variable' withArrow position="right" withinPortal>
<ActionIcon variant="light" size='lg' onClick={handleClearValueField}>
<IconX size='20px' />
</ActionIcon>
</Tooltip>
): (
(availableVars.length > 0 || availableMetavars.length > 0) ?
<Menu shadow="md" width={200} withinPortal>
<Menu.Target>
<Tooltip label='Use a variable' withArrow position="right" withinPortal>
<ActionIcon variant="light" size='lg'>
<IconCaretDown size='20px' />
</ActionIcon>
</Tooltip>
</Menu.Target>
<Menu.Dropdown>
{ availableVars.length > 0 ? <>
<Menu.Label>Variables</Menu.Label>
{availableVars.map(v =>
<Menu.Item key={v} icon={<IconHash size={14} />} onClick={(e) => handleSetVarAsValue(e, 'var')}>{v}</Menu.Item>
)}
<Menu.Divider />
</> : <></>}
{ availableMetavars.length > 0 ? <>
<Menu.Label>Metavariables</Menu.Label>
{availableMetavars.map(v =>
<Menu.Item key={v} icon={<IconHash size={14} />} onClick={(e) => handleSetVarAsValue(e, 'meta')}>{v}</Menu.Item>
)}
</> : <></>}
</Menu.Dropdown>
</Menu>
: <></>)}
</Flex>
<Handle
type="target"
position="left"
id="responseBatch"
className="grouped-handle"
style={{ top: '50%' }}
onConnect={handleOnConnect} />
<Handle
type="source"
position="right"
id="output"
className="grouped-handle"
style={{ top: '50%' }} />
{ lastRunSuccess && lastResponses && lastResponses.length > 0 ?
(<InspectFooter label={<>Inspect scores&nbsp;<IconSearch size='12pt'/></>}
onClick={showResponseInspector}
showNotificationDot={uninspectedResponses}
/>) : <></>}
</div>
);
};
export default SimpleEvalNode;

View File

@ -68,7 +68,7 @@ const genUniqueShortnames = (names, max_chars_per_line=32) => {
// Generate unique 'shortnames' to refer to each name:
let past_shortnames_counts = {};
let shortnames = {};
const max_lines = 2;
const max_lines = 8;
for (const name of names) {
// Truncate string up to maximum num of chars
let sn = truncStr(name, max_chars_per_line * max_lines - 3);
@ -656,7 +656,7 @@ const VisNode = ({ data, id }) => {
if (json.responses && json.responses.length > 0) {
// Store responses and extract + store vars
setResponses(json.responses);
setResponses(json.responses.toReversed());
// Find all vars in responses
let varnames = new Set();

View File

@ -157,11 +157,6 @@ async function setAPIKeys(api_keys: StringDict): Promise<void> {
set_api_keys(api_keys);
}
// def remove_cached_responses(cache_id: str):
// cache_files = get_cache_keys_related_to_id(cache_id)
// for filename in cache_files:
// os.remove(os.path.join(CACHE_DIR, filename))
/**
* Loads the cache JSON file at filepath.
* 'Soft fails' if the file does not exist (returns empty object).
@ -270,6 +265,10 @@ function areSetsEqual(xs: Set<any>, ys: Set<any>): boolean {
return xs.size === ys.size && [...xs].every((x) => ys.has(x));
}
function allStringsAreNumeric(strs: Array<string>) {
return strs.every(s => !isNaN(parseFloat(s)));
}
function check_typeof_vals(arr: Array<any>): MetricType {
if (arr.length === 0) return MetricType.Empty;
@ -948,30 +947,35 @@ export async function evalWithLLM(id: string,
all_evald_responses = all_evald_responses.concat(resp_objs);
}
// Do additional processing to check if all evaluations are boolean-ish (e.g., 'true' and 'false')
let all_eval_res = new Set();
// Do additional processing to check if all evaluations are
// boolean-ish (e.g., 'true' and 'false') or all numeric-ish (parseable as numbers)
let all_eval_res: Set<string> = new Set();
for (const resp_obj of all_evald_responses) {
if (!resp_obj.eval_res) continue;
for (const score of resp_obj.eval_res.items) {
if (score !== undefined)
all_eval_res.add(score.trim().toLowerCase());
}
if (all_eval_res.size > 2)
break; // it's categorical if size is over 2
}
if (all_eval_res.size === 2) {
// Check if the results are boolean-ish:
if ((all_eval_res.has('true') && all_eval_res.has('false')) ||
(all_eval_res.has('yes') && all_eval_res.has('no'))) {
// Convert all eval results to boolean datatypes:
all_evald_responses.forEach(resp_obj => {
resp_obj.eval_res.items = resp_obj.eval_res.items.map((i: string) => {
const li = i.toLowerCase();
return li === 'true' || li === 'yes';
});
resp_obj.eval_res.dtype = 'Categorical';
// Check if the results are boolean-ish:
if (all_eval_res.size === 2 && (all_eval_res.has('true') || all_eval_res.has('false') ||
all_eval_res.has('yes') || all_eval_res.has('no'))) {
// Convert all eval results to boolean datatypes:
all_evald_responses.forEach(resp_obj => {
resp_obj.eval_res.items = resp_obj.eval_res.items.map((i: string) => {
const li = i.toLowerCase();
return li === 'true' || li === 'yes';
});
}
resp_obj.eval_res.dtype = 'Categorical';
});
// Check if the results are all numeric-ish:
} else if (allStringsAreNumeric(Array.from(all_eval_res))) {
// Convert all eval results to numeric datatypes:
all_evald_responses.forEach(resp_obj => {
resp_obj.eval_res.items = resp_obj.eval_res.items.map((i: string) => parseFloat(i));
resp_obj.eval_res.dtype = 'Numeric';
});
}
// Store the evaluated responses in a new cache json:

View File

@ -483,9 +483,9 @@ export async function call_google_palm(prompt: string, model: LLM, n: number = 1
// the current chat completions API provides users no control over the blocking.
// We need to detect this and fill the response with the safety reasoning:
if (completion.filters && completion.filters.length > 0) {
// Request was blocked. Output why in the response text, repairing the candidate dict to mock up 'n' responses
const block_error_msg = `[[BLOCKED_REQUEST]] Request was blocked because it triggered safety filters: ${JSON.stringify(completion.filters)}`
completion.candidates = new Array(n).fill({'author': '1', 'content':block_error_msg});
// Request was blocked. Output why in the response text, repairing the candidate dict to mock up 'n' responses
const block_error_msg = `[[BLOCKED_REQUEST]] Request was blocked because it triggered safety filters: ${JSON.stringify(completion.filters)}`
completion.candidates = new Array(n).fill({'author': '1', 'content':block_error_msg});
}
// Weirdly, google ignores candidate_count if temperature is 0.

View File

@ -25,7 +25,7 @@ export const colorPalettes = {
var: varColorPalette,
}
const refreshableOutputNodeTypes = new Set(['evaluator', 'prompt', 'inspect', 'vis', 'llmeval', 'textfields', 'chat']);
const refreshableOutputNodeTypes = new Set(['evaluator', 'prompt', 'inspect', 'vis', 'llmeval', 'textfields', 'chat', 'simpleval']);
// A global store of variables, used for maintaining state
// across ChainForge and ReactFlow components.
@ -234,7 +234,7 @@ const useStore = create((set, get) => ({
// Get the target node information
const target = get().getNode(connection.target);
if (target.type === 'vis' || target.type === 'inspect') {
if (target.type === 'vis' || target.type === 'inspect' || target.type === 'simpleval') {
get().setDataPropsForNode(target.id, { input: connection.source });
}

View File

@ -210,6 +210,14 @@
.eval-inspect-response-footer button {
cursor: zoom-in;
}
/* .blink-inspect-response-footer {
animation: blink-footer 0.7s ease-in 1;
}
@keyframes blink-footer {
50% {
background-color: #eff6fbb1;
}
} */
.ace-editor-container {
resize:vertical;
@ -915,6 +923,17 @@
font-size: 14px;
}
.something-changed-circle {
width: 6px;
height: 6px;
border-radius: 50%;
border: 0px solid;
background-color: #ff0000;
opacity: 0.7;
margin-bottom: 10px;
margin-left: 2px;
}
/* Model settings form styling */
.rjsf fieldset {
border-style: none;

View File

@ -6,7 +6,7 @@ def readme():
setup(
name='chainforge',
version='0.2.5.3',
version='0.2.5.4',
packages=find_packages(),
author="Ian Arawjo",
description="A Visual Programming Environment for Prompt Engineering",