This commit is contained in:
Ian Arawjo 2024-03-24 00:58:19 -04:00
parent a77a67b22a
commit 589fb5342f
7 changed files with 65 additions and 90 deletions

View File

@ -23,12 +23,7 @@ import InspectFooter from "./InspectFooter";
import LLMResponseInspectorDrawer from "./LLMResponseInspectorDrawer";
import { stripLLMDetailsFromResponses } from "./backend/utils";
import { AlertModalContext } from "./AlertModal";
import {
Dict,
LLMResponse,
LLMSpec,
QueryProgress,
} from "./backend/typing";
import { Dict, LLMResponse, LLMSpec, QueryProgress } from "./backend/typing";
import { Status } from "./StatusIndicatorComponent";
import { evalWithLLM, grabResponses } from "./backend/backend";

View File

@ -9,7 +9,7 @@ import { exportToExcel } from "./LLMResponseInspector";
import { LLMResponse } from "./backend/typing";
// Lazy load the inspector view
const LLMResponseInspector = lazy(() => import("./LLMResponseInspector.js"));
const LLMResponseInspector = lazy(() => import("./LLMResponseInspector"));
export interface LLMResponseInspectorModalRef {
trigger: () => void;

View File

@ -5,7 +5,7 @@ import { truncStr } from "./backend/utils";
import { Dict, EvaluationScore, LLMResponse } from "./backend/typing";
// Lazy load the response toolbars
const ResponseRatingToolbar = lazy(() => import("./ResponseRatingToolbar.js"));
const ResponseRatingToolbar = lazy(() => import("./ResponseRatingToolbar"));
/* HELPER FUNCTIONS */
const SUCCESS_EVAL_SCORES = new Set(["true", "yes"]);
@ -183,7 +183,8 @@ export const genResponseTextsDisplay = (
const same_resp_text_counts = countResponsesBy(responses, (r) => r);
const same_resp_keys = Object.keys(same_resp_text_counts).sort(
(key1, key2) => same_resp_text_counts[key2].length - same_resp_text_counts[key1].length,
(key1, key2) =>
same_resp_text_counts[key2].length - same_resp_text_counts[key1].length,
);
return same_resp_keys.map((r, idx) => {

View File

@ -45,7 +45,7 @@ type ResponseFormat = "response" | "response in lowercase";
const RESPONSE_FORMATS: ResponseFormat[] = [
"response",
"response in lowercase",
] as const;
];
type Operator =
| "contains"
@ -59,7 +59,7 @@ const OPERATORS: Operator[] = [
"ends with",
"equals",
"appears in",
] as const;
];
const createJSEvalCodeFor = (
responseFormat: ResponseFormat,

View File

@ -861,68 +861,70 @@ const VisNode: React.FC<VisNodeProps> = ({ data, id }) => {
// Grab the input node ids
const input_node_ids = [data.input];
grabResponses(input_node_ids).then(function (resps) {
if (resps && resps.length > 0) {
// Store responses and extract + store vars
setResponses(resps.toReversed());
grabResponses(input_node_ids)
.then(function (resps) {
if (resps && resps.length > 0) {
// Store responses and extract + store vars
setResponses(resps.toReversed());
// Find all vars in responses
let varnames: string[] | Set<string> = new Set<string>();
let metavars: string[] | Set<string> = new Set<string>();
resps.forEach((resp_obj) => {
Object.keys(resp_obj.vars).forEach((v) =>
(varnames as Set<string>).add(v),
);
if (resp_obj.metavars)
Object.keys(resp_obj.metavars).forEach((v) =>
(metavars as Set<string>).add(v),
// Find all vars in responses
let varnames: string[] | Set<string> = new Set<string>();
let metavars: string[] | Set<string> = new Set<string>();
resps.forEach((resp_obj) => {
Object.keys(resp_obj.vars).forEach((v) =>
(varnames as Set<string>).add(v),
);
});
varnames = Array.from(varnames);
metavars = Array.from(metavars);
if (resp_obj.metavars)
Object.keys(resp_obj.metavars).forEach((v) =>
(metavars as Set<string>).add(v),
);
});
varnames = Array.from(varnames);
metavars = Array.from(metavars);
// Get all vars for the y-axis dropdown, merging metavars and vars into one list,
// and excluding any special 'LLM group' metavars:
const msvars = [{ value: "LLM (default)", label: "LLM (default)" }]
.concat(varnames.map((name) => ({ value: name, label: name })))
.concat(
// Get all vars for the y-axis dropdown, merging metavars and vars into one list,
// and excluding any special 'LLM group' metavars:
const msvars = [{ value: "LLM (default)", label: "LLM (default)" }]
.concat(varnames.map((name) => ({ value: name, label: name })))
.concat(
metavars.filter(cleanMetavarsFilterFunc).map((name) => ({
value: `__meta_${name}`,
label: `${name} (meta)`,
})),
);
// Find all the special 'LLM group' metavars and put them in the 'group by' dropdown:
const available_llm_groups = [{ value: "LLM", label: "LLM" }].concat(
metavars.filter(cleanMetavarsFilterFunc).map((name) => ({
value: `__meta_${name}`,
label: `${name} (meta)`,
value: name,
label: `LLMs #${parseInt(name.slice(4)) + 1}`,
})),
);
if (available_llm_groups.length > 1)
available_llm_groups[0] = { value: "LLM", label: "LLMs (last)" };
setAvailableLLMGroups(available_llm_groups);
// Find all the special 'LLM group' metavars and put them in the 'group by' dropdown:
const available_llm_groups = [{ value: "LLM", label: "LLM" }].concat(
metavars.filter(cleanMetavarsFilterFunc).map((name) => ({
value: name,
label: `LLMs #${parseInt(name.slice(4)) + 1}`,
})),
);
if (available_llm_groups.length > 1)
available_llm_groups[0] = { value: "LLM", label: "LLMs (last)" };
setAvailableLLMGroups(available_llm_groups);
// Check for a change in available parameters
if (
!multiSelectVars ||
!multiSelectValue ||
!areSetsEqual(
new Set(msvars.map((o) => o.value)),
new Set(multiSelectVars.map((o) => o.value)),
)
) {
setMultiSelectValue("LLM (default)");
setMultiSelectVars(msvars);
setDataPropsForNode(id, {
vars: msvars,
selected_vars: [],
llm_groups: available_llm_groups,
});
// Check for a change in available parameters
if (
!multiSelectVars ||
!multiSelectValue ||
!areSetsEqual(
new Set(msvars.map((o) => o.value)),
new Set(multiSelectVars.map((o) => o.value)),
)
) {
setMultiSelectValue("LLM (default)");
setMultiSelectVars(msvars);
setDataPropsForNode(id, {
vars: msvars,
selected_vars: [],
llm_groups: available_llm_groups,
});
}
// From here a React effect will detect the changes to these values and display a new plot
}
// From here a React effect will detect the changes to these values and display a new plot
}
}).catch(console.error);
})
.catch(console.error);
}, [data]);
if (data.input) {

View File

@ -23,13 +23,12 @@ test("count queries required", async () => {
// Double-check the queries required (not loading from cache)
const test_count_queries = async (llms: Array<string | Dict>, n: number) => {
const { counts, total_num_responses, error } = await countQueries(
const { counts, total_num_responses } = await countQueries(
prompt,
vars,
llms,
n,
);
expect(error).toBeUndefined();
Object.values(total_num_responses).forEach((v) => {
expect(v).toBe(n * 3 * 3);
@ -135,7 +134,7 @@ test("run evaluate func over responses", async () => {
expect(responses).toHaveLength(DUMMY_RESPONSE_CACHE.length);
// Expect all scores (evaluation results) to be present
responses.forEach((r) => {
responses?.forEach((r) => {
expect(r.eval_res?.items?.length).toBe(1);
});

View File

@ -1,22 +0,0 @@
/* eslint dot-notation: "off" */
import DefaultDict from "../defaultdict";
describe("DefaultDict", () => {
it("should return the default value when a key is not found", () => {
const dict = new DefaultDict(() => 0);
expect(dict["a"]).toBe(0);
expect(dict["b"]).toBe(0);
});
it("should return the value set for a key", () => {
const dict = new DefaultDict(() => 0);
dict["a"] = 1;
expect(dict["a"]).toBe(1);
});
it("should return the default value for a key set to null", () => {
const dict = new DefaultDict(() => 0);
dict["a"] = null;
expect(dict["a"]).toBe(0);
});
});