mirror of
https://github.com/ianarawjo/ChainForge.git
synced 2025-03-14 16:26:45 +00:00
7248 lines
245 KiB
Plaintext
7248 lines
245 KiB
Plaintext
{
|
|
"flow": {
|
|
"nodes": [
|
|
{
|
|
"width": 312,
|
|
"height": 464,
|
|
"data": {
|
|
"fields": [
|
|
{
|
|
"text": "4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"fill_history": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "568b5331-f9e5-462f-8314-a0d48bdbbe34",
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
}
|
|
},
|
|
{
|
|
"text": "4.",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"fill_history": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "568b5331-f9e5-462f-8314-a0d48bdbbe34",
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
}
|
|
},
|
|
{
|
|
"text": "4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"fill_history": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "3e4d4071-7768-4ef1-aba4-04f97da8e3c8",
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
}
|
|
},
|
|
{
|
|
"text": "4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"fill_history": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "3e4d4071-7768-4ef1-aba4-04f97da8e3c8",
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
}
|
|
},
|
|
{
|
|
"text": "4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"fill_history": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "14721e37-f7d7-4c42-b350-cca63fa887a6",
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
},
|
|
{
|
|
"text": "4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"fill_history": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "14721e37-f7d7-4c42-b350-cca63fa887a6",
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
},
|
|
{
|
|
"text": "2 + 2 = 4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"fill_history": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "ea431926-edea-4006-83fb-67dae766ba0e",
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
},
|
|
{
|
|
"text": "2 + 2 = 4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"fill_history": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "ea431926-edea-4006-83fb-67dae766ba0e",
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
},
|
|
{
|
|
"text": "4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"fill_history": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "653a2dbd-6d2c-4322-aa93-0dc04210f244",
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
},
|
|
{
|
|
"text": "4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"fill_history": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "653a2dbd-6d2c-4322-aa93-0dc04210f244",
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
},
|
|
{
|
|
"text": "3",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"fill_history": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "49a4841f-98c5-46aa-92a7-416cfe8f041b",
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
}
|
|
},
|
|
{
|
|
"text": "3",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"fill_history": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "49a4841f-98c5-46aa-92a7-416cfe8f041b",
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
}
|
|
},
|
|
{
|
|
"text": "3",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"fill_history": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "8aa01880-0342-4e2a-bed5-c0e893b68f3f",
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
}
|
|
},
|
|
{
|
|
"text": "3",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"fill_history": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "8aa01880-0342-4e2a-bed5-c0e893b68f3f",
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
}
|
|
},
|
|
{
|
|
"text": "3",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"fill_history": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "d73153fb-5451-41e7-80ad-558a1b591c97",
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
},
|
|
{
|
|
"text": "3",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"fill_history": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "d73153fb-5451-41e7-80ad-558a1b591c97",
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
},
|
|
{
|
|
"text": "The square root of 9 is 3.",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"fill_history": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "fc0cae18-4b1f-4c97-813c-c04bcb0cc7e7",
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
},
|
|
{
|
|
"text": "The square root of 9 is 3.",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"fill_history": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "fc0cae18-4b1f-4c97-813c-c04bcb0cc7e7",
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
},
|
|
{
|
|
"text": "3",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"fill_history": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "409dbe3e-cc82-4037-a4f4-040aed417de6",
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
},
|
|
{
|
|
"text": "3",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"fill_history": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "409dbe3e-cc82-4037-a4f4-040aed417de6",
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
},
|
|
{
|
|
"text": "1997",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"fill_history": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "e11f0ed2-41ed-4d53-ad8a-510c7836fadc",
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
}
|
|
},
|
|
{
|
|
"text": "1997.",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"fill_history": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "e11f0ed2-41ed-4d53-ad8a-510c7836fadc",
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
}
|
|
},
|
|
{
|
|
"text": "1997",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"fill_history": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "55136cbc-c5f1-4c5c-876e-d76a3efaff0f",
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
}
|
|
},
|
|
{
|
|
"text": "1997",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"fill_history": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "55136cbc-c5f1-4c5c-876e-d76a3efaff0f",
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
}
|
|
},
|
|
{
|
|
"text": "1997",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"fill_history": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "b988dd5c-8cc1-4395-9d03-d549fee7b627",
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
},
|
|
{
|
|
"text": "2001",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"fill_history": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "b988dd5c-8cc1-4395-9d03-d549fee7b627",
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
},
|
|
{
|
|
"text": "The 50th anniversary of the invention of the transistor in 1962.",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"fill_history": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "61974590-9a02-43b0-9adb-3669c4b2809b",
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
},
|
|
{
|
|
"text": "The 50th anniversary of the invention of the transistor in 1962.",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"fill_history": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "61974590-9a02-43b0-9adb-3669c4b2809b",
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
},
|
|
{
|
|
"text": "2008",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"fill_history": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "1e38094d-c615-4b1d-a2d7-87efec494a2f",
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
},
|
|
{
|
|
"text": "2047",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"fill_history": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "1e38094d-c615-4b1d-a2d7-87efec494a2f",
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
},
|
|
{
|
|
"text": "125",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"fill_history": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "9905585e-ac2c-4e70-b307-dcce305bc9ae",
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
}
|
|
},
|
|
{
|
|
"text": "125",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"fill_history": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "9905585e-ac2c-4e70-b307-dcce305bc9ae",
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
}
|
|
},
|
|
{
|
|
"text": "125",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"fill_history": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "7b46d7b2-3e20-4cfa-9e83-20bf184f05fa",
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
}
|
|
},
|
|
{
|
|
"text": "125",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"fill_history": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "7b46d7b2-3e20-4cfa-9e83-20bf184f05fa",
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
}
|
|
},
|
|
{
|
|
"text": "125",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"fill_history": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "994b02fe-50cc-448c-937e-afa05e5c9130",
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
},
|
|
{
|
|
"text": "125",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"fill_history": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "994b02fe-50cc-448c-937e-afa05e5c9130",
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
},
|
|
{
|
|
"text": "5 to the power of 3 is 625.",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"fill_history": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "aaf7358a-1604-4a36-a33b-9872fe74549d",
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
},
|
|
{
|
|
"text": "5 to the power of 3 is 625.",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"fill_history": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "aaf7358a-1604-4a36-a33b-9872fe74549d",
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
},
|
|
{
|
|
"text": "125",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"fill_history": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "3e14fc18-ace2-47c6-bb6f-6ba1b3dacf79",
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
},
|
|
{
|
|
"text": "125",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"fill_history": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "3e14fc18-ace2-47c6-bb6f-6ba1b3dacf79",
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
},
|
|
{
|
|
"text": "18",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"fill_history": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "33b1f438-e9d7-4627-be1f-2c6677412f58",
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
}
|
|
},
|
|
{
|
|
"text": "54",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"fill_history": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "33b1f438-e9d7-4627-be1f-2c6677412f58",
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
}
|
|
},
|
|
{
|
|
"text": "36",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"fill_history": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "4e23bba3-879b-4c9d-aa7f-25a5e9202a4d",
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
}
|
|
},
|
|
{
|
|
"text": "12",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"fill_history": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "4e23bba3-879b-4c9d-aa7f-25a5e9202a4d",
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
}
|
|
},
|
|
{
|
|
"text": "18",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"fill_history": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "7f746549-4e3f-4058-8498-80724188d53d",
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
},
|
|
{
|
|
"text": "36",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"fill_history": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "7f746549-4e3f-4058-8498-80724188d53d",
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
},
|
|
{
|
|
"text": "The answer is 2 x (√729) = 2 x 27 = 810.",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"fill_history": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "2de3a046-04f0-4316-aaad-d89ec08a34fe",
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
},
|
|
{
|
|
"text": "The answer is 2 x (√729) = 2 x 27 = 810.",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"fill_history": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "2de3a046-04f0-4316-aaad-d89ec08a34fe",
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
},
|
|
{
|
|
"text": "18",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"fill_history": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "e0240f4c-ab55-4026-a38a-3da7def0dbb7",
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
},
|
|
{
|
|
"text": "18",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"fill_history": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"uid": "e0240f4c-ab55-4026-a38a-3da7def0dbb7",
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
}
|
|
],
|
|
"llms": [
|
|
{
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1
|
|
},
|
|
{
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1
|
|
},
|
|
{
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
}
|
|
},
|
|
{
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
}
|
|
},
|
|
{
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
}
|
|
}
|
|
],
|
|
"n": 2,
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}"
|
|
},
|
|
"dragging": false,
|
|
"id": "prompt-1686496332028",
|
|
"position": {
|
|
"x": 450,
|
|
"y": 200
|
|
},
|
|
"positionAbsolute": {
|
|
"x": 450,
|
|
"y": 200
|
|
},
|
|
"selected": false,
|
|
"type": "prompt"
|
|
},
|
|
{
|
|
"width": 722,
|
|
"height": 557,
|
|
"data": {
|
|
"input": "simpleEval-1692475827469",
|
|
"llm_groups": [
|
|
{
|
|
"value": "LLM",
|
|
"label": "LLMs (last)"
|
|
},
|
|
{
|
|
"value": "Expected",
|
|
"label": "LLMs #NaN"
|
|
}
|
|
],
|
|
"refresh": false,
|
|
"selected_vars": [],
|
|
"title": "How correct each LLM was on the math questions",
|
|
"vars": [
|
|
{
|
|
"value": "LLM (default)",
|
|
"label": "LLM (default)"
|
|
},
|
|
{
|
|
"value": "question",
|
|
"label": "question"
|
|
},
|
|
{
|
|
"value": "__meta_Expected",
|
|
"label": "Expected (meta)"
|
|
}
|
|
]
|
|
},
|
|
"dragging": false,
|
|
"id": "vis-1686496332028",
|
|
"position": {
|
|
"x": 1200,
|
|
"y": 176
|
|
},
|
|
"positionAbsolute": {
|
|
"x": 1200,
|
|
"y": 176
|
|
},
|
|
"selected": false,
|
|
"type": "vis"
|
|
},
|
|
{
|
|
"width": 302,
|
|
"height": 319,
|
|
"data": {
|
|
"input": "prompt-1686496332028",
|
|
"refresh": false
|
|
},
|
|
"dragging": false,
|
|
"id": "inspect-1686496332028",
|
|
"position": {
|
|
"x": 816,
|
|
"y": 416
|
|
},
|
|
"positionAbsolute": {
|
|
"x": 816,
|
|
"y": 416
|
|
},
|
|
"selected": false,
|
|
"type": "inspect"
|
|
},
|
|
{
|
|
"width": 423,
|
|
"height": 420,
|
|
"data": {
|
|
"columns": [
|
|
{
|
|
"header": "Question",
|
|
"key": "question"
|
|
},
|
|
{
|
|
"header": "Expected",
|
|
"key": "answer"
|
|
}
|
|
],
|
|
"rows": [
|
|
{
|
|
"__uid": "15b96ab6-ac8f-4c66-8bb0-92384e55fb68",
|
|
"answer": "4",
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
{
|
|
"__uid": "fc207d76-1f61-47b2-b3fd-c059be2f69ff",
|
|
"answer": "3",
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
{
|
|
"__uid": "e3963edf-a0c4-406c-bcc3-f9cc092f8bca",
|
|
"answer": "1997",
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
{
|
|
"__uid": "fb9e2d1c-3023-451a-9570-964579c4390b",
|
|
"answer": "125",
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
{
|
|
"__uid": "8d6dbad2-d877-4f87-ba1a-5f0b815cc277",
|
|
"answer": "18",
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
}
|
|
],
|
|
"sel_rows": null
|
|
},
|
|
"dragging": false,
|
|
"id": "table-1686496353296",
|
|
"position": {
|
|
"x": -48,
|
|
"y": 176
|
|
},
|
|
"positionAbsolute": {
|
|
"x": -48,
|
|
"y": 176
|
|
},
|
|
"selected": false,
|
|
"type": "table"
|
|
},
|
|
{
|
|
"width": 343,
|
|
"height": 230,
|
|
"data": {
|
|
"availableMetavars": [
|
|
"Expected"
|
|
],
|
|
"availableVars": [
|
|
"question"
|
|
],
|
|
"input": "prompt-1686496332028",
|
|
"operation": "contains",
|
|
"refresh": false,
|
|
"textValue": "of Expected (meta)",
|
|
"varSelected": true,
|
|
"varValue": "Expected",
|
|
"varValueType": "meta"
|
|
},
|
|
"dragging": false,
|
|
"id": "simpleEval-1692475827469",
|
|
"position": {
|
|
"x": 816,
|
|
"y": 144
|
|
},
|
|
"positionAbsolute": {
|
|
"x": 816,
|
|
"y": 144
|
|
},
|
|
"selected": true,
|
|
"type": "simpleval"
|
|
}
|
|
],
|
|
"edges": [
|
|
{
|
|
"id": "reactflow__edge-table-1686496353296Question-prompt-1686496332028question",
|
|
"interactionWidth": 100,
|
|
"markerEnd": {
|
|
"height": "22px",
|
|
"type": "arrow",
|
|
"width": "22px"
|
|
},
|
|
"source": "table-1686496353296",
|
|
"sourceHandle": "Question",
|
|
"target": "prompt-1686496332028",
|
|
"targetHandle": "question"
|
|
},
|
|
{
|
|
"id": "reactflow__edge-prompt-1686496332028prompt-inspect-1686496332028input",
|
|
"interactionWidth": 100,
|
|
"markerEnd": {
|
|
"height": "22px",
|
|
"type": "arrow",
|
|
"width": "22px"
|
|
},
|
|
"source": "prompt-1686496332028",
|
|
"sourceHandle": "prompt",
|
|
"target": "inspect-1686496332028",
|
|
"targetHandle": "input"
|
|
},
|
|
{
|
|
"id": "reactflow__edge-prompt-1686496332028prompt-eval-1686496332028responseBatch",
|
|
"interactionWidth": 100,
|
|
"markerEnd": {
|
|
"height": "22px",
|
|
"type": "arrow",
|
|
"width": "22px"
|
|
},
|
|
"source": "prompt-1686496332028",
|
|
"sourceHandle": "prompt",
|
|
"target": "eval-1686496332028",
|
|
"targetHandle": "responseBatch"
|
|
},
|
|
{
|
|
"id": "reactflow__edge-eval-1686496332028output-inspectNode-1686496774695input",
|
|
"interactionWidth": 100,
|
|
"markerEnd": {
|
|
"height": "22px",
|
|
"type": "arrow",
|
|
"width": "22px"
|
|
},
|
|
"source": "eval-1686496332028",
|
|
"sourceHandle": "output",
|
|
"target": "inspectNode-1686496774695",
|
|
"targetHandle": "input"
|
|
},
|
|
{
|
|
"id": "reactflow__edge-eval-1686496332028output-inspectNode-1686496840189input",
|
|
"interactionWidth": 100,
|
|
"markerEnd": {
|
|
"height": "22px",
|
|
"type": "arrow",
|
|
"width": "22px"
|
|
},
|
|
"source": "eval-1686496332028",
|
|
"sourceHandle": "output",
|
|
"target": "inspectNode-1686496840189",
|
|
"targetHandle": "input"
|
|
},
|
|
{
|
|
"id": "reactflow__edge-eval-1686496332028output-vis-1686496332028input",
|
|
"interactionWidth": 100,
|
|
"markerEnd": {
|
|
"height": "22px",
|
|
"type": "arrow",
|
|
"width": "22px"
|
|
},
|
|
"source": "eval-1686496332028",
|
|
"sourceHandle": "output",
|
|
"target": "vis-1686496332028",
|
|
"targetHandle": "input"
|
|
},
|
|
{
|
|
"id": "reactflow__edge-prompt-1686496332028prompt-evalNode-1688056068815responseBatch",
|
|
"interactionWidth": 100,
|
|
"markerEnd": {
|
|
"height": "22px",
|
|
"type": "arrow",
|
|
"width": "22px"
|
|
},
|
|
"source": "prompt-1686496332028",
|
|
"sourceHandle": "prompt",
|
|
"target": "evalNode-1688056068815",
|
|
"targetHandle": "responseBatch"
|
|
},
|
|
{
|
|
"id": "reactflow__edge-evalNode-1688056068815output-vis-1686496332028input",
|
|
"interactionWidth": 100,
|
|
"markerEnd": {
|
|
"height": "22px",
|
|
"type": "arrow",
|
|
"width": "22px"
|
|
},
|
|
"source": "evalNode-1688056068815",
|
|
"sourceHandle": "output",
|
|
"target": "vis-1686496332028",
|
|
"targetHandle": "input"
|
|
},
|
|
{
|
|
"id": "reactflow__edge-simpleEval-1692475827469output-vis-1686496332028input",
|
|
"interactionWidth": 100,
|
|
"markerEnd": {
|
|
"height": "22px",
|
|
"type": "arrow",
|
|
"width": "22px"
|
|
},
|
|
"source": "simpleEval-1692475827469",
|
|
"sourceHandle": "output",
|
|
"target": "vis-1686496332028",
|
|
"targetHandle": "input"
|
|
},
|
|
{
|
|
"id": "reactflow__edge-prompt-1686496332028prompt-simpleEval-1692475827469responseBatch",
|
|
"interactionWidth": 100,
|
|
"markerEnd": {
|
|
"height": "22px",
|
|
"type": "arrow",
|
|
"width": "22px"
|
|
},
|
|
"source": "prompt-1686496332028",
|
|
"sourceHandle": "prompt",
|
|
"target": "simpleEval-1692475827469",
|
|
"targetHandle": "responseBatch"
|
|
}
|
|
],
|
|
"viewport": {
|
|
"x": 101,
|
|
"y": 56.30621910095215,
|
|
"zoom": 1
|
|
}
|
|
},
|
|
"cache": {
|
|
"prompt-1686496332028_0.json": {
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?": [
|
|
{
|
|
"info": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"llm": "GPT3.5",
|
|
"metavars": {
|
|
"Expected Answer": "4",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"query": {
|
|
"frequency_penalty": 0,
|
|
"messages": [
|
|
{
|
|
"content": "You are a helpful assistant.",
|
|
"role": "system"
|
|
},
|
|
{
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"role": "user"
|
|
}
|
|
],
|
|
"model": "gpt-3.5-turbo",
|
|
"n": 1,
|
|
"presence_penalty": 0,
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"raw_response": [
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "4",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1692476414,
|
|
"id": "chatcmpl-7pMic64icrpOahwiGjfwLI44XCGJM",
|
|
"model": "gpt-3.5-turbo-0613",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 44,
|
|
"total_tokens": 45
|
|
}
|
|
},
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "4.",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1686496680,
|
|
"id": "chatcmpl-7QH7IhEZXfMOo6nrnF69msD7XGUFd",
|
|
"model": "gpt-3.5-turbo-0301",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 2,
|
|
"prompt_tokens": 46,
|
|
"total_tokens": 48
|
|
}
|
|
}
|
|
],
|
|
"responses": [
|
|
"4",
|
|
"4."
|
|
],
|
|
"uid": "568b5331-f9e5-462f-8314-a0d48bdbbe34"
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?": [
|
|
{
|
|
"info": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"llm": "GPT3.5",
|
|
"metavars": {
|
|
"Expected Answer": "18",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"query": {
|
|
"frequency_penalty": 0,
|
|
"messages": [
|
|
{
|
|
"content": "You are a helpful assistant.",
|
|
"role": "system"
|
|
},
|
|
{
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"role": "user"
|
|
}
|
|
],
|
|
"model": "gpt-3.5-turbo",
|
|
"n": 1,
|
|
"presence_penalty": 0,
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"raw_response": [
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "18",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1692476414,
|
|
"id": "chatcmpl-7pMicugQ1OKrWL05KI0wEGsXV0HdL",
|
|
"model": "gpt-3.5-turbo-0613",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 49,
|
|
"total_tokens": 50
|
|
}
|
|
},
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "54",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1686496679,
|
|
"id": "chatcmpl-7QH7H4evsq3RIsvVkzMDZyFSTcRiR",
|
|
"model": "gpt-3.5-turbo-0301",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 51,
|
|
"total_tokens": 52
|
|
}
|
|
}
|
|
],
|
|
"responses": [
|
|
"18",
|
|
"54"
|
|
],
|
|
"uid": "33b1f438-e9d7-4627-be1f-2c6677412f58"
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?": [
|
|
{
|
|
"info": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"llm": "GPT3.5",
|
|
"metavars": {
|
|
"Expected Answer": "125",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"query": {
|
|
"frequency_penalty": 0,
|
|
"messages": [
|
|
{
|
|
"content": "You are a helpful assistant.",
|
|
"role": "system"
|
|
},
|
|
{
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"role": "user"
|
|
}
|
|
],
|
|
"model": "gpt-3.5-turbo",
|
|
"n": 1,
|
|
"presence_penalty": 0,
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"raw_response": [
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "125",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1692476414,
|
|
"id": "chatcmpl-7pMicDPnCusCEH8YoNgpFupv9V8Ig",
|
|
"model": "gpt-3.5-turbo-0613",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 47,
|
|
"total_tokens": 48
|
|
}
|
|
},
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "125",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1686496680,
|
|
"id": "chatcmpl-7QH7IL3zyNy1qRJIZ7pHRT2itdFxU",
|
|
"model": "gpt-3.5-turbo-0301",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 49,
|
|
"total_tokens": 50
|
|
}
|
|
}
|
|
],
|
|
"responses": [
|
|
"125",
|
|
"125"
|
|
],
|
|
"uid": "9905585e-ac2c-4e70-b307-dcce305bc9ae"
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?": [
|
|
{
|
|
"info": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"llm": "GPT3.5",
|
|
"metavars": {
|
|
"Expected Answer": "3",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"query": {
|
|
"frequency_penalty": 0,
|
|
"messages": [
|
|
{
|
|
"content": "You are a helpful assistant.",
|
|
"role": "system"
|
|
},
|
|
{
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"role": "user"
|
|
}
|
|
],
|
|
"model": "gpt-3.5-turbo",
|
|
"n": 1,
|
|
"presence_penalty": 0,
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"raw_response": [
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "3",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1692476414,
|
|
"id": "chatcmpl-7pMic79SF6FjzKSq30zc8sYEKshGc",
|
|
"model": "gpt-3.5-turbo-0613",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 45,
|
|
"total_tokens": 46
|
|
}
|
|
},
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "3",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1686496680,
|
|
"id": "chatcmpl-7QH7IQiQOuw3rgk8bCjadBFJmoHPI",
|
|
"model": "gpt-3.5-turbo-0301",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 47,
|
|
"total_tokens": 48
|
|
}
|
|
}
|
|
],
|
|
"responses": [
|
|
"3",
|
|
"3"
|
|
],
|
|
"uid": "49a4841f-98c5-46aa-92a7-416cfe8f041b"
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?": [
|
|
{
|
|
"info": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"llm": "GPT3.5",
|
|
"metavars": {
|
|
"Expected Answer": "1997",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"query": {
|
|
"frequency_penalty": 0,
|
|
"messages": [
|
|
{
|
|
"content": "You are a helpful assistant.",
|
|
"role": "system"
|
|
},
|
|
{
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"role": "user"
|
|
}
|
|
],
|
|
"model": "gpt-3.5-turbo",
|
|
"n": 1,
|
|
"presence_penalty": 0,
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"raw_response": [
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "1997",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1692476414,
|
|
"id": "chatcmpl-7pMicFvjhspBERpYtTZZEawO5wPHz",
|
|
"model": "gpt-3.5-turbo-0613",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 2,
|
|
"prompt_tokens": 51,
|
|
"total_tokens": 53
|
|
}
|
|
},
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "1997.",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1686496679,
|
|
"id": "chatcmpl-7QH7HarcW5kMzCeR20W8almasmPT7",
|
|
"model": "gpt-3.5-turbo-0301",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 3,
|
|
"prompt_tokens": 53,
|
|
"total_tokens": 56
|
|
}
|
|
}
|
|
],
|
|
"responses": [
|
|
"1997",
|
|
"1997."
|
|
],
|
|
"uid": "e11f0ed2-41ed-4d53-ad8a-510c7836fadc"
|
|
}
|
|
]
|
|
},
|
|
"prompt-1686496332028_2.json": {
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?": [
|
|
{
|
|
"info": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"llm": "GPT4",
|
|
"metavars": {
|
|
"Expected Answer": "4",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"query": {
|
|
"frequency_penalty": 0,
|
|
"messages": [
|
|
{
|
|
"content": "You are a helpful assistant.",
|
|
"role": "system"
|
|
},
|
|
{
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"role": "user"
|
|
}
|
|
],
|
|
"model": "gpt-4",
|
|
"n": 1,
|
|
"presence_penalty": 0,
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"raw_response": [
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "4",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1692476414,
|
|
"id": "chatcmpl-7pMicarZ9BCnqTXdBbngd4PXmVxpI",
|
|
"model": "gpt-4-0613",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 44,
|
|
"total_tokens": 45
|
|
}
|
|
},
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "4",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1686496680,
|
|
"id": "chatcmpl-7QH7IfQMLwGU9ChcDnASzQEL7XfLb",
|
|
"model": "gpt-4-0314",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 44,
|
|
"total_tokens": 45
|
|
}
|
|
}
|
|
],
|
|
"responses": [
|
|
"4",
|
|
"4"
|
|
],
|
|
"uid": "3e4d4071-7768-4ef1-aba4-04f97da8e3c8"
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?": [
|
|
{
|
|
"info": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"llm": "GPT4",
|
|
"metavars": {
|
|
"Expected Answer": "18",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"query": {
|
|
"frequency_penalty": 0,
|
|
"messages": [
|
|
{
|
|
"content": "You are a helpful assistant.",
|
|
"role": "system"
|
|
},
|
|
{
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"role": "user"
|
|
}
|
|
],
|
|
"model": "gpt-4",
|
|
"n": 1,
|
|
"presence_penalty": 0,
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"raw_response": [
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "36",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1692476414,
|
|
"id": "chatcmpl-7pMicODTp5ZLDynDOV2PIEyFoHs7V",
|
|
"model": "gpt-4-0613",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 49,
|
|
"total_tokens": 50
|
|
}
|
|
},
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "12",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1686496694,
|
|
"id": "chatcmpl-7QH7Wa9V9PveSzmDXwYz2PL0BjFF4",
|
|
"model": "gpt-4-0314",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 49,
|
|
"total_tokens": 50
|
|
}
|
|
}
|
|
],
|
|
"responses": [
|
|
"36",
|
|
"12"
|
|
],
|
|
"uid": "4e23bba3-879b-4c9d-aa7f-25a5e9202a4d"
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?": [
|
|
{
|
|
"info": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"llm": "GPT4",
|
|
"metavars": {
|
|
"Expected Answer": "125",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"query": {
|
|
"frequency_penalty": 0,
|
|
"messages": [
|
|
{
|
|
"content": "You are a helpful assistant.",
|
|
"role": "system"
|
|
},
|
|
{
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"role": "user"
|
|
}
|
|
],
|
|
"model": "gpt-4",
|
|
"n": 1,
|
|
"presence_penalty": 0,
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"raw_response": [
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "125",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1692476414,
|
|
"id": "chatcmpl-7pMiciwv6tJaUd2N7V32yY8LalsWe",
|
|
"model": "gpt-4-0613",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 47,
|
|
"total_tokens": 48
|
|
}
|
|
},
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "125",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1686496679,
|
|
"id": "chatcmpl-7QH7HYxQLOayvVKFZwmJAMm6vv0EM",
|
|
"model": "gpt-4-0314",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 47,
|
|
"total_tokens": 48
|
|
}
|
|
}
|
|
],
|
|
"responses": [
|
|
"125",
|
|
"125"
|
|
],
|
|
"uid": "7b46d7b2-3e20-4cfa-9e83-20bf184f05fa"
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?": [
|
|
{
|
|
"info": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"llm": "GPT4",
|
|
"metavars": {
|
|
"Expected Answer": "3",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"query": {
|
|
"frequency_penalty": 0,
|
|
"messages": [
|
|
{
|
|
"content": "You are a helpful assistant.",
|
|
"role": "system"
|
|
},
|
|
{
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"role": "user"
|
|
}
|
|
],
|
|
"model": "gpt-4",
|
|
"n": 1,
|
|
"presence_penalty": 0,
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"raw_response": [
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "3",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1692476414,
|
|
"id": "chatcmpl-7pMicCUStjUpd5ylAxDW11i96MsZL",
|
|
"model": "gpt-4-0613",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 45,
|
|
"total_tokens": 46
|
|
}
|
|
},
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "3",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1686496680,
|
|
"id": "chatcmpl-7QH7IswMdgGBlgESAGyiih4KxT533",
|
|
"model": "gpt-4-0314",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 1,
|
|
"prompt_tokens": 45,
|
|
"total_tokens": 46
|
|
}
|
|
}
|
|
],
|
|
"responses": [
|
|
"3",
|
|
"3"
|
|
],
|
|
"uid": "8aa01880-0342-4e2a-bed5-c0e893b68f3f"
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?": [
|
|
{
|
|
"info": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"llm": "GPT4",
|
|
"metavars": {
|
|
"Expected Answer": "1997",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"query": {
|
|
"frequency_penalty": 0,
|
|
"messages": [
|
|
{
|
|
"content": "You are a helpful assistant.",
|
|
"role": "system"
|
|
},
|
|
{
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"role": "user"
|
|
}
|
|
],
|
|
"model": "gpt-4",
|
|
"n": 1,
|
|
"presence_penalty": 0,
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"raw_response": [
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "1997",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1692476414,
|
|
"id": "chatcmpl-7pMic7SUi1qqV8yfHnMXDqOwQ0rRi",
|
|
"model": "gpt-4-0613",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 2,
|
|
"prompt_tokens": 51,
|
|
"total_tokens": 53
|
|
}
|
|
},
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "1997",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1686496679,
|
|
"id": "chatcmpl-7QH7H1NNyFZM88oRILZZzvcCcsB5a",
|
|
"model": "gpt-4-0314",
|
|
"object": "chat.completion",
|
|
"usage": {
|
|
"completion_tokens": 2,
|
|
"prompt_tokens": 51,
|
|
"total_tokens": 53
|
|
}
|
|
}
|
|
],
|
|
"responses": [
|
|
"1997",
|
|
"1997"
|
|
],
|
|
"uid": "55136cbc-c5f1-4c5c-876e-d76a3efaff0f"
|
|
}
|
|
]
|
|
},
|
|
"prompt-1686496332028_3.json": {
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"query": {
|
|
"model": "models/gemini-pro",
|
|
"temperature": 0.5,
|
|
"stopSequences": [],
|
|
"candidateCount": 2,
|
|
"maxOutputTokens": 512,
|
|
"topP": -1,
|
|
"topK": -1
|
|
},
|
|
"uid": "d73153fb-5451-41e7-80ad-558a1b591c97",
|
|
"responses": [
|
|
"3",
|
|
"3"
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"text": "3",
|
|
"candidates": [
|
|
{
|
|
"content": {
|
|
"parts": [
|
|
{
|
|
"text": "3"
|
|
}
|
|
],
|
|
"role": "model"
|
|
},
|
|
"finishReason": "STOP",
|
|
"index": 0,
|
|
"safetyRatings": [
|
|
{
|
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HARASSMENT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
"probability": "NEGLIGIBLE"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"text": "3",
|
|
"candidates": [
|
|
{
|
|
"content": {
|
|
"parts": [
|
|
{
|
|
"text": "3"
|
|
}
|
|
],
|
|
"role": "model"
|
|
},
|
|
"finishReason": "STOP",
|
|
"index": 0,
|
|
"safetyRatings": [
|
|
{
|
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HARASSMENT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
"probability": "NEGLIGIBLE"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"llm": "Gemini",
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"query": {
|
|
"model": "models/gemini-pro",
|
|
"temperature": 0.5,
|
|
"stopSequences": [],
|
|
"candidateCount": 2,
|
|
"maxOutputTokens": 512,
|
|
"topP": -1,
|
|
"topK": -1
|
|
},
|
|
"uid": "14721e37-f7d7-4c42-b350-cca63fa887a6",
|
|
"responses": [
|
|
"4",
|
|
"4"
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"text": "4",
|
|
"candidates": [
|
|
{
|
|
"content": {
|
|
"parts": [
|
|
{
|
|
"text": "4"
|
|
}
|
|
],
|
|
"role": "model"
|
|
},
|
|
"finishReason": "STOP",
|
|
"index": 0,
|
|
"safetyRatings": [
|
|
{
|
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HARASSMENT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
"probability": "NEGLIGIBLE"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"text": "4",
|
|
"candidates": [
|
|
{
|
|
"content": {
|
|
"parts": [
|
|
{
|
|
"text": "4"
|
|
}
|
|
],
|
|
"role": "model"
|
|
},
|
|
"finishReason": "STOP",
|
|
"index": 0,
|
|
"safetyRatings": [
|
|
{
|
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HARASSMENT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
"probability": "NEGLIGIBLE"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"llm": "Gemini",
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"query": {
|
|
"model": "models/gemini-pro",
|
|
"temperature": 0.5,
|
|
"stopSequences": [],
|
|
"candidateCount": 2,
|
|
"maxOutputTokens": 512,
|
|
"topP": -1,
|
|
"topK": -1
|
|
},
|
|
"uid": "7f746549-4e3f-4058-8498-80724188d53d",
|
|
"responses": [
|
|
"18",
|
|
"36"
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"text": "18",
|
|
"candidates": [
|
|
{
|
|
"content": {
|
|
"parts": [
|
|
{
|
|
"text": "18"
|
|
}
|
|
],
|
|
"role": "model"
|
|
},
|
|
"finishReason": "STOP",
|
|
"index": 0,
|
|
"safetyRatings": [
|
|
{
|
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HARASSMENT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
"probability": "NEGLIGIBLE"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"text": "36",
|
|
"candidates": [
|
|
{
|
|
"content": {
|
|
"parts": [
|
|
{
|
|
"text": "36"
|
|
}
|
|
],
|
|
"role": "model"
|
|
},
|
|
"finishReason": "STOP",
|
|
"index": 0,
|
|
"safetyRatings": [
|
|
{
|
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HARASSMENT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
"probability": "NEGLIGIBLE"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"llm": "Gemini",
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"query": {
|
|
"model": "models/gemini-pro",
|
|
"temperature": 0.5,
|
|
"stopSequences": [],
|
|
"candidateCount": 2,
|
|
"maxOutputTokens": 512,
|
|
"topP": -1,
|
|
"topK": -1
|
|
},
|
|
"uid": "994b02fe-50cc-448c-937e-afa05e5c9130",
|
|
"responses": [
|
|
"125",
|
|
"125"
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"text": "125",
|
|
"candidates": [
|
|
{
|
|
"content": {
|
|
"parts": [
|
|
{
|
|
"text": "125"
|
|
}
|
|
],
|
|
"role": "model"
|
|
},
|
|
"finishReason": "STOP",
|
|
"index": 0,
|
|
"safetyRatings": [
|
|
{
|
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HARASSMENT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
"probability": "NEGLIGIBLE"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"text": "125",
|
|
"candidates": [
|
|
{
|
|
"content": {
|
|
"parts": [
|
|
{
|
|
"text": "125"
|
|
}
|
|
],
|
|
"role": "model"
|
|
},
|
|
"finishReason": "STOP",
|
|
"index": 0,
|
|
"safetyRatings": [
|
|
{
|
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HARASSMENT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
"probability": "NEGLIGIBLE"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"llm": "Gemini",
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"query": {
|
|
"model": "models/gemini-pro",
|
|
"temperature": 0.5,
|
|
"stopSequences": [],
|
|
"candidateCount": 2,
|
|
"maxOutputTokens": 512,
|
|
"topP": -1,
|
|
"topK": -1
|
|
},
|
|
"uid": "b988dd5c-8cc1-4395-9d03-d549fee7b627",
|
|
"responses": [
|
|
"1997",
|
|
"2001"
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"text": "1997",
|
|
"candidates": [
|
|
{
|
|
"content": {
|
|
"parts": [
|
|
{
|
|
"text": "1997"
|
|
}
|
|
],
|
|
"role": "model"
|
|
},
|
|
"finishReason": "STOP",
|
|
"index": 0,
|
|
"safetyRatings": [
|
|
{
|
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HARASSMENT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
"probability": "NEGLIGIBLE"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"text": "2001",
|
|
"candidates": [
|
|
{
|
|
"content": {
|
|
"parts": [
|
|
{
|
|
"text": "2001"
|
|
}
|
|
],
|
|
"role": "model"
|
|
},
|
|
"finishReason": "STOP",
|
|
"index": 0,
|
|
"safetyRatings": [
|
|
{
|
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HARASSMENT",
|
|
"probability": "NEGLIGIBLE"
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
"probability": "NEGLIGIBLE"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"llm": "Gemini",
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
"prompt-1686496332028_4.json": {
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"query": {
|
|
"temperature": 1,
|
|
"max_new_tokens": 250,
|
|
"return_full_text": false
|
|
},
|
|
"uid": "ea431926-edea-4006-83fb-67dae766ba0e",
|
|
"responses": [
|
|
"2 + 2 = 4",
|
|
"2 + 2 = 4"
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"generated_text": "\n2 + 2 = 4"
|
|
},
|
|
{
|
|
"generated_text": "\n2 + 2 = 4"
|
|
}
|
|
],
|
|
"llm": "Falcon.7B",
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"query": {
|
|
"temperature": 1,
|
|
"max_new_tokens": 250,
|
|
"return_full_text": false
|
|
},
|
|
"uid": "fc0cae18-4b1f-4c97-813c-c04bcb0cc7e7",
|
|
"responses": [
|
|
"The square root of 9 is 3.",
|
|
"The square root of 9 is 3."
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"generated_text": "\nThe square root of 9 is 3."
|
|
},
|
|
{
|
|
"generated_text": "\nThe square root of 9 is 3."
|
|
}
|
|
],
|
|
"llm": "Falcon.7B",
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"query": {
|
|
"temperature": 1,
|
|
"max_new_tokens": 250,
|
|
"return_full_text": false
|
|
},
|
|
"uid": "aaf7358a-1604-4a36-a33b-9872fe74549d",
|
|
"responses": [
|
|
"5 to the power of 3 is 625.",
|
|
"5 to the power of 3 is 625."
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"generated_text": "\n5 to the power of 3 is 625."
|
|
},
|
|
{
|
|
"generated_text": "\n5 to the power of 3 is 625."
|
|
}
|
|
],
|
|
"llm": "Falcon.7B",
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"query": {
|
|
"temperature": 1,
|
|
"max_new_tokens": 250,
|
|
"return_full_text": false
|
|
},
|
|
"uid": "61974590-9a02-43b0-9adb-3669c4b2809b",
|
|
"responses": [
|
|
"The 50th anniversary of the invention of the transistor in 1962.",
|
|
"The 50th anniversary of the invention of the transistor in 1962."
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"generated_text": "\nThe 50th anniversary of the invention of the transistor in 1962."
|
|
},
|
|
{
|
|
"generated_text": "\nThe 50th anniversary of the invention of the transistor in 1962."
|
|
}
|
|
],
|
|
"llm": "Falcon.7B",
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"query": {
|
|
"temperature": 1,
|
|
"max_new_tokens": 250,
|
|
"return_full_text": false
|
|
},
|
|
"uid": "2de3a046-04f0-4316-aaad-d89ec08a34fe",
|
|
"responses": [
|
|
"The answer is 2 x (√729) = 2 x 27 = 810.",
|
|
"The answer is 2 x (√729) = 2 x 27 = 810."
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"generated_text": "\nThe answer is 2 x (√729) = 2 x 27 = 810."
|
|
},
|
|
{
|
|
"generated_text": "\nThe answer is 2 x (√729) = 2 x 27 = 810."
|
|
}
|
|
],
|
|
"llm": "Falcon.7B",
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
"prompt-1686496332028_5.json": {
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"query": {
|
|
"model": "claude-3-5-haiku-latest",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"temperature": 1,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_tokens": 1024,
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?"
|
|
}
|
|
]
|
|
},
|
|
"uid": "3e14fc18-ace2-47c6-bb6f-6ba1b3dacf79",
|
|
"responses": [
|
|
"125",
|
|
"125"
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"content": [
|
|
{
|
|
"text": "125",
|
|
"type": "text"
|
|
}
|
|
],
|
|
"id": "msg_01PQUzDw5Hk5oE1726K9Ngqw",
|
|
"model": "claude-3-5-haiku-20241022",
|
|
"role": "assistant",
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": null,
|
|
"type": "message",
|
|
"usage": {
|
|
"cache_creation_input_tokens": 0,
|
|
"cache_read_input_tokens": 0,
|
|
"input_tokens": 38,
|
|
"output_tokens": 5
|
|
}
|
|
},
|
|
{
|
|
"content": [
|
|
{
|
|
"text": "125",
|
|
"type": "text"
|
|
}
|
|
],
|
|
"id": "msg_01WMRKEh13H5wy6cu4p76iMj",
|
|
"model": "claude-3-5-haiku-20241022",
|
|
"role": "assistant",
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": null,
|
|
"type": "message",
|
|
"usage": {
|
|
"cache_creation_input_tokens": 0,
|
|
"cache_read_input_tokens": 0,
|
|
"input_tokens": 38,
|
|
"output_tokens": 5
|
|
}
|
|
}
|
|
],
|
|
"llm": "claude-3.5-haiku",
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"query": {
|
|
"model": "claude-3-5-haiku-latest",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"temperature": 1,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_tokens": 1024,
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?"
|
|
}
|
|
]
|
|
},
|
|
"uid": "e0240f4c-ab55-4026-a38a-3da7def0dbb7",
|
|
"responses": [
|
|
"18",
|
|
"18"
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"content": [
|
|
{
|
|
"text": "18",
|
|
"type": "text"
|
|
}
|
|
],
|
|
"id": "msg_01PbkiARdZztUpFHPszheVjV",
|
|
"model": "claude-3-5-haiku-20241022",
|
|
"role": "assistant",
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": null,
|
|
"type": "message",
|
|
"usage": {
|
|
"cache_creation_input_tokens": 0,
|
|
"cache_read_input_tokens": 0,
|
|
"input_tokens": 41,
|
|
"output_tokens": 5
|
|
}
|
|
},
|
|
{
|
|
"content": [
|
|
{
|
|
"text": "18",
|
|
"type": "text"
|
|
}
|
|
],
|
|
"id": "msg_01CFt4sS9CZ7QDnhVZjYW2cD",
|
|
"model": "claude-3-5-haiku-20241022",
|
|
"role": "assistant",
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": null,
|
|
"type": "message",
|
|
"usage": {
|
|
"cache_creation_input_tokens": 0,
|
|
"cache_read_input_tokens": 0,
|
|
"input_tokens": 41,
|
|
"output_tokens": 5
|
|
}
|
|
}
|
|
],
|
|
"llm": "claude-3.5-haiku",
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"query": {
|
|
"model": "claude-3-5-haiku-latest",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"temperature": 1,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_tokens": 1024,
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?"
|
|
}
|
|
]
|
|
},
|
|
"uid": "1e38094d-c615-4b1d-a2d7-87efec494a2f",
|
|
"responses": [
|
|
"2008",
|
|
"2047"
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"content": [
|
|
{
|
|
"text": "2008",
|
|
"type": "text"
|
|
}
|
|
],
|
|
"id": "msg_01LYeoPwNVTRoqARnkmyT1Xf",
|
|
"model": "claude-3-5-haiku-20241022",
|
|
"role": "assistant",
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": null,
|
|
"type": "message",
|
|
"usage": {
|
|
"cache_creation_input_tokens": 0,
|
|
"cache_read_input_tokens": 0,
|
|
"input_tokens": 42,
|
|
"output_tokens": 6
|
|
}
|
|
},
|
|
{
|
|
"content": [
|
|
{
|
|
"text": "2047",
|
|
"type": "text"
|
|
}
|
|
],
|
|
"id": "msg_01R3dEKSVJa14RtwokEiHmwm",
|
|
"model": "claude-3-5-haiku-20241022",
|
|
"role": "assistant",
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": null,
|
|
"type": "message",
|
|
"usage": {
|
|
"cache_creation_input_tokens": 0,
|
|
"cache_read_input_tokens": 0,
|
|
"input_tokens": 42,
|
|
"output_tokens": 6
|
|
}
|
|
}
|
|
],
|
|
"llm": "claude-3.5-haiku",
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"query": {
|
|
"model": "claude-3-5-haiku-latest",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"temperature": 1,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_tokens": 1024,
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?"
|
|
}
|
|
]
|
|
},
|
|
"uid": "409dbe3e-cc82-4037-a4f4-040aed417de6",
|
|
"responses": [
|
|
"3",
|
|
"3"
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"content": [
|
|
{
|
|
"text": "3",
|
|
"type": "text"
|
|
}
|
|
],
|
|
"id": "msg_01CqTybAnpN8rxHMbeTd4uLR",
|
|
"model": "claude-3-5-haiku-20241022",
|
|
"role": "assistant",
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": null,
|
|
"type": "message",
|
|
"usage": {
|
|
"cache_creation_input_tokens": 0,
|
|
"cache_read_input_tokens": 0,
|
|
"input_tokens": 35,
|
|
"output_tokens": 5
|
|
}
|
|
},
|
|
{
|
|
"content": [
|
|
{
|
|
"text": "3",
|
|
"type": "text"
|
|
}
|
|
],
|
|
"id": "msg_01GaQvUTx9jZhc84maMNRSv5",
|
|
"model": "claude-3-5-haiku-20241022",
|
|
"role": "assistant",
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": null,
|
|
"type": "message",
|
|
"usage": {
|
|
"cache_creation_input_tokens": 0,
|
|
"cache_read_input_tokens": 0,
|
|
"input_tokens": 35,
|
|
"output_tokens": 5
|
|
}
|
|
}
|
|
],
|
|
"llm": "claude-3.5-haiku",
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
}
|
|
],
|
|
"Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?": [
|
|
{
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"query": {
|
|
"model": "claude-3-5-haiku-latest",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"temperature": 1,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_tokens": 1024,
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?"
|
|
}
|
|
]
|
|
},
|
|
"uid": "653a2dbd-6d2c-4322-aa93-0dc04210f244",
|
|
"responses": [
|
|
"4",
|
|
"4"
|
|
],
|
|
"raw_response": [
|
|
{
|
|
"content": [
|
|
{
|
|
"text": "4",
|
|
"type": "text"
|
|
}
|
|
],
|
|
"id": "msg_01VhYD5rT6o1T7MonYpHkcY8",
|
|
"model": "claude-3-5-haiku-20241022",
|
|
"role": "assistant",
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": null,
|
|
"type": "message",
|
|
"usage": {
|
|
"cache_creation_input_tokens": 0,
|
|
"cache_read_input_tokens": 0,
|
|
"input_tokens": 35,
|
|
"output_tokens": 5
|
|
}
|
|
},
|
|
{
|
|
"content": [
|
|
{
|
|
"text": "4",
|
|
"type": "text"
|
|
}
|
|
],
|
|
"id": "msg_01WQfQzAPHAwcK8dyNoCrmUZ",
|
|
"model": "claude-3-5-haiku-20241022",
|
|
"role": "assistant",
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": null,
|
|
"type": "message",
|
|
"usage": {
|
|
"cache_creation_input_tokens": 0,
|
|
"cache_read_input_tokens": 0,
|
|
"input_tokens": 35,
|
|
"output_tokens": 5
|
|
}
|
|
}
|
|
],
|
|
"llm": "claude-3.5-haiku",
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
"prompt-1686496332028.json": {
|
|
"cache_files": {
|
|
"prompt-1686496332028_0.json": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 0,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt-1686496332028_2.json": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 0,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt-1686496332028_3.json": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 0,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt-1686496332028_4.json": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 0,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt-1686496332028_5.json": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 0,
|
|
"error": 0
|
|
}
|
|
}
|
|
},
|
|
"responses_last_run": [
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"llm": "GPT3.5",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"4",
|
|
"4."
|
|
],
|
|
"uid": "568b5331-f9e5-462f-8314-a0d48bdbbe34"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"llm": "GPT4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"4",
|
|
"4"
|
|
],
|
|
"uid": "3e4d4071-7768-4ef1-aba4-04f97da8e3c8"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"llm": "Gemini",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"4",
|
|
"4"
|
|
],
|
|
"uid": "14721e37-f7d7-4c42-b350-cca63fa887a6"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"llm": "Falcon.7B",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"2 + 2 = 4",
|
|
"2 + 2 = 4"
|
|
],
|
|
"uid": "ea431926-edea-4006-83fb-67dae766ba0e"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"llm": "claude-3.5-haiku",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"4",
|
|
"4"
|
|
],
|
|
"uid": "653a2dbd-6d2c-4322-aa93-0dc04210f244"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"llm": "GPT3.5",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"3",
|
|
"3"
|
|
],
|
|
"uid": "49a4841f-98c5-46aa-92a7-416cfe8f041b"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"llm": "GPT4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"3",
|
|
"3"
|
|
],
|
|
"uid": "8aa01880-0342-4e2a-bed5-c0e893b68f3f"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"llm": "Gemini",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"3",
|
|
"3"
|
|
],
|
|
"uid": "d73153fb-5451-41e7-80ad-558a1b591c97"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"llm": "Falcon.7B",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"The square root of 9 is 3.",
|
|
"The square root of 9 is 3."
|
|
],
|
|
"uid": "fc0cae18-4b1f-4c97-813c-c04bcb0cc7e7"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"llm": "claude-3.5-haiku",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"3",
|
|
"3"
|
|
],
|
|
"uid": "409dbe3e-cc82-4037-a4f4-040aed417de6"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"llm": "GPT3.5",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"1997",
|
|
"1997."
|
|
],
|
|
"uid": "e11f0ed2-41ed-4d53-ad8a-510c7836fadc"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"llm": "GPT4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"1997",
|
|
"1997"
|
|
],
|
|
"uid": "55136cbc-c5f1-4c5c-876e-d76a3efaff0f"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"llm": "Gemini",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"1997",
|
|
"2001"
|
|
],
|
|
"uid": "b988dd5c-8cc1-4395-9d03-d549fee7b627"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"llm": "Falcon.7B",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"The 50th anniversary of the invention of the transistor in 1962.",
|
|
"The 50th anniversary of the invention of the transistor in 1962."
|
|
],
|
|
"uid": "61974590-9a02-43b0-9adb-3669c4b2809b"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"llm": "claude-3.5-haiku",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"2008",
|
|
"2047"
|
|
],
|
|
"uid": "1e38094d-c615-4b1d-a2d7-87efec494a2f"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"llm": "GPT3.5",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"125",
|
|
"125"
|
|
],
|
|
"uid": "9905585e-ac2c-4e70-b307-dcce305bc9ae"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"llm": "GPT4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"125",
|
|
"125"
|
|
],
|
|
"uid": "7b46d7b2-3e20-4cfa-9e83-20bf184f05fa"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"llm": "Gemini",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"125",
|
|
"125"
|
|
],
|
|
"uid": "994b02fe-50cc-448c-937e-afa05e5c9130"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"llm": "Falcon.7B",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"5 to the power of 3 is 625.",
|
|
"5 to the power of 3 is 625."
|
|
],
|
|
"uid": "aaf7358a-1604-4a36-a33b-9872fe74549d"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"llm": "claude-3.5-haiku",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"125",
|
|
"125"
|
|
],
|
|
"uid": "3e14fc18-ace2-47c6-bb6f-6ba1b3dacf79"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"llm": "GPT3.5",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"18",
|
|
"54"
|
|
],
|
|
"uid": "33b1f438-e9d7-4627-be1f-2c6677412f58"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"llm": "GPT4",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"36",
|
|
"12"
|
|
],
|
|
"uid": "4e23bba3-879b-4c9d-aa7f-25a5e9202a4d"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"llm": "Gemini",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"18",
|
|
"36"
|
|
],
|
|
"uid": "7f746549-4e3f-4058-8498-80724188d53d"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"llm": "Falcon.7B",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"The answer is 2 x (√729) = 2 x 27 = 810.",
|
|
"The answer is 2 x (√729) = 2 x 27 = 810."
|
|
],
|
|
"uid": "2de3a046-04f0-4316-aaad-d89ec08a34fe"
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"llm": "claude-3.5-haiku",
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"18",
|
|
"18"
|
|
],
|
|
"uid": "e0240f4c-ab55-4026-a38a-3da7def0dbb7"
|
|
}
|
|
]
|
|
},
|
|
"vis-1686496332028.json": {},
|
|
"inspect-1686496332028.json": {},
|
|
"table-1686496353296.json": {},
|
|
"simpleEval-1692475827469.json": [
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"uid": "568b5331-f9e5-462f-8314-a0d48bdbbe34",
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"4"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"uid": "568b5331-f9e5-462f-8314-a0d48bdbbe34",
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"4."
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"uid": "3e4d4071-7768-4ef1-aba4-04f97da8e3c8",
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"4"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"uid": "3e4d4071-7768-4ef1-aba4-04f97da8e3c8",
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"4"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"uid": "14721e37-f7d7-4c42-b350-cca63fa887a6",
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"4"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"uid": "14721e37-f7d7-4c42-b350-cca63fa887a6",
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"4"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"uid": "ea431926-edea-4006-83fb-67dae766ba0e",
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"2 + 2 = 4"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"uid": "ea431926-edea-4006-83fb-67dae766ba0e",
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"2 + 2 = 4"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"uid": "653a2dbd-6d2c-4322-aa93-0dc04210f244",
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"4"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 + 2?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "4",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"uid": "653a2dbd-6d2c-4322-aa93-0dc04210f244",
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 + 2?",
|
|
"responses": [
|
|
"4"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"uid": "49a4841f-98c5-46aa-92a7-416cfe8f041b",
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"3"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"uid": "49a4841f-98c5-46aa-92a7-416cfe8f041b",
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"3"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"uid": "8aa01880-0342-4e2a-bed5-c0e893b68f3f",
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"3"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"uid": "8aa01880-0342-4e2a-bed5-c0e893b68f3f",
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"3"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"uid": "d73153fb-5451-41e7-80ad-558a1b591c97",
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"3"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"uid": "d73153fb-5451-41e7-80ad-558a1b591c97",
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"3"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"uid": "fc0cae18-4b1f-4c97-813c-c04bcb0cc7e7",
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"The square root of 9 is 3."
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"uid": "fc0cae18-4b1f-4c97-813c-c04bcb0cc7e7",
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"The square root of 9 is 3."
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"uid": "409dbe3e-cc82-4037-a4f4-040aed417de6",
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"3"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is the square root of 9?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "3",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"uid": "409dbe3e-cc82-4037-a4f4-040aed417de6",
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is the square root of 9?",
|
|
"responses": [
|
|
"3"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"uid": "e11f0ed2-41ed-4d53-ad8a-510c7836fadc",
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"1997"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"uid": "e11f0ed2-41ed-4d53-ad8a-510c7836fadc",
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"1997."
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"uid": "55136cbc-c5f1-4c5c-876e-d76a3efaff0f",
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"1997"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"uid": "55136cbc-c5f1-4c5c-876e-d76a3efaff0f",
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"1997"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"uid": "b988dd5c-8cc1-4395-9d03-d549fee7b627",
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"1997"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"uid": "b988dd5c-8cc1-4395-9d03-d549fee7b627",
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"2001"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
false
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"uid": "61974590-9a02-43b0-9adb-3669c4b2809b",
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"The 50th anniversary of the invention of the transistor in 1962."
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
false
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"uid": "61974590-9a02-43b0-9adb-3669c4b2809b",
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"The 50th anniversary of the invention of the transistor in 1962."
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
false
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"uid": "1e38094d-c615-4b1d-a2d7-87efec494a2f",
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"2008"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
false
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What year was the 50th anniversary of the invention of the transistor?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "1997",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"uid": "1e38094d-c615-4b1d-a2d7-87efec494a2f",
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What year was the 50th anniversary of the invention of the transistor?",
|
|
"responses": [
|
|
"2047"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
false
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"uid": "9905585e-ac2c-4e70-b307-dcce305bc9ae",
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"125"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"uid": "9905585e-ac2c-4e70-b307-dcce305bc9ae",
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"125"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"uid": "7b46d7b2-3e20-4cfa-9e83-20bf184f05fa",
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"125"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"uid": "7b46d7b2-3e20-4cfa-9e83-20bf184f05fa",
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"125"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"uid": "994b02fe-50cc-448c-937e-afa05e5c9130",
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"125"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"uid": "994b02fe-50cc-448c-937e-afa05e5c9130",
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"125"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"uid": "aaf7358a-1604-4a36-a33b-9872fe74549d",
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"5 to the power of 3 is 625."
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
false
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"uid": "aaf7358a-1604-4a36-a33b-9872fe74549d",
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"5 to the power of 3 is 625."
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
false
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"uid": "3e14fc18-ace2-47c6-bb6f-6ba1b3dacf79",
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"125"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 5 to the power of 3?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "125",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"uid": "3e14fc18-ace2-47c6-bb6f-6ba1b3dacf79",
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 5 to the power of 3?",
|
|
"responses": [
|
|
"125"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"uid": "33b1f438-e9d7-4627-be1f-2c6677412f58",
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"18"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT3.5"
|
|
},
|
|
"uid": "33b1f438-e9d7-4627-be1f-2c6677412f58",
|
|
"llm": {
|
|
"base_model": "gpt-3.5-turbo",
|
|
"emoji": "🙂",
|
|
"key": "9fbdcc67-7c5f-4cd5-a2e4-6f2d4a190cd0",
|
|
"model": "gpt-3.5-turbo",
|
|
"name": "GPT3.5",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"54"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
false
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"uid": "4e23bba3-879b-4c9d-aa7f-25a5e9202a4d",
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"36"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
false
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "GPT4"
|
|
},
|
|
"uid": "4e23bba3-879b-4c9d-aa7f-25a5e9202a4d",
|
|
"llm": {
|
|
"base_model": "gpt-4",
|
|
"emoji": "🥵",
|
|
"formData": {
|
|
"shortname": "GPT4"
|
|
},
|
|
"key": "667e0d14-527b-4fe7-afdc-219504729e6d",
|
|
"model": "gpt-4",
|
|
"name": "GPT4",
|
|
"settings": {
|
|
"frequency_penalty": 0,
|
|
"presence_penalty": 0,
|
|
"stop": [],
|
|
"system_msg": "You are a helpful assistant.",
|
|
"temperature": 1,
|
|
"top_p": 1
|
|
},
|
|
"temp": 1,
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"12"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
false
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"uid": "7f746549-4e3f-4058-8498-80724188d53d",
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"18"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Gemini"
|
|
},
|
|
"uid": "7f746549-4e3f-4058-8498-80724188d53d",
|
|
"llm": {
|
|
"key": "0b3606a1-109d-448d-aa72-98f612750092",
|
|
"name": "Gemini",
|
|
"emoji": "♊",
|
|
"model": "gemini-pro",
|
|
"base_model": "palm2-bison",
|
|
"temp": 0.7,
|
|
"settings": {
|
|
"temperature": 0.5,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"max_output_tokens": 512,
|
|
"stop_sequences": []
|
|
},
|
|
"formData": {
|
|
"shortname": "Gemini"
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"36"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
false
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"uid": "2de3a046-04f0-4316-aaad-d89ec08a34fe",
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"The answer is 2 x (√729) = 2 x 27 = 810."
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
false
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "Falcon.7B"
|
|
},
|
|
"uid": "2de3a046-04f0-4316-aaad-d89ec08a34fe",
|
|
"llm": {
|
|
"key": "cb5ba0a7-2833-4924-9d43-d86fe19a3175",
|
|
"name": "Falcon.7B",
|
|
"emoji": "🤗",
|
|
"model": "tiiuae/falcon-7b-instruct",
|
|
"base_model": "hf",
|
|
"temp": 1,
|
|
"settings": {
|
|
"custom_model": "",
|
|
"model_type": "text",
|
|
"temperature": 1,
|
|
"num_continuations": 0,
|
|
"top_k": -1,
|
|
"top_p": -1,
|
|
"repetition_penalty": -1,
|
|
"max_new_tokens": 250,
|
|
"do_sample": true,
|
|
"use_cache": false
|
|
},
|
|
"formData": {
|
|
"shortname": "Falcon.7B"
|
|
},
|
|
"progress": {
|
|
"success": 80,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"The answer is 2 x (√729) = 2 x 27 = 810."
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
false
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"uid": "e0240f4c-ab55-4026-a38a-3da7def0dbb7",
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"18"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
},
|
|
{
|
|
"vars": {
|
|
"question": "What is 2 multiplied by the cubic root of 729?"
|
|
},
|
|
"metavars": {
|
|
"Expected": "18",
|
|
"__pt": "Answer the following math question as brief as possible, and only include the answer in your response: {question}",
|
|
"LLM_0": "claude-3.5-haiku"
|
|
},
|
|
"uid": "e0240f4c-ab55-4026-a38a-3da7def0dbb7",
|
|
"llm": {
|
|
"key": "9d809e37-0f03-4217-9e78-86c5284f7407",
|
|
"name": "claude-3.5-haiku",
|
|
"emoji": "📚",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"base_model": "claude-v1",
|
|
"temp": 0.5,
|
|
"settings": {
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": [],
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": [
|
|
"\n\nHuman:"
|
|
],
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"formData": {
|
|
"shortname": "claude-3.5-haiku",
|
|
"model": "claude-3-5-haiku-latest",
|
|
"system_msg": "",
|
|
"temperature": 1,
|
|
"tools": "",
|
|
"tool_choice": "",
|
|
"parallel_tool_calls": true,
|
|
"max_tokens_to_sample": 1024,
|
|
"custom_prompt_wrapper": "",
|
|
"stop_sequences": "\"\n\nHuman:\"",
|
|
"top_k": -1,
|
|
"top_p": -1
|
|
},
|
|
"progress": {
|
|
"success": 100,
|
|
"error": 0
|
|
}
|
|
},
|
|
"prompt": "Answer the following math question as brief as possible, and only include the answer in your response: What is 2 multiplied by the cubic root of 729?",
|
|
"responses": [
|
|
"18"
|
|
],
|
|
"tokens": {},
|
|
"eval_res": {
|
|
"items": [
|
|
true
|
|
],
|
|
"dtype": "Categorical"
|
|
}
|
|
}
|
|
],
|
|
"r.568b5331-f9e5-462f-8314-a0d48bdbbe34.grade": {}
|
|
}
|
|
} |