lollms-webui/extensions/chatgpt2lollms/chatgpt2lollms.py

85 lines
2.6 KiB
Python
Raw Permalink Normal View History

2023-07-06 09:36:57 +00:00
import argparse
2024-12-19 12:48:57 +00:00
import json
2023-07-06 09:36:57 +00:00
from datetime import datetime
2024-12-19 12:48:57 +00:00
2023-07-06 09:36:57 +00:00
def convert_discussions(input_data, flatten=False):
discussions = []
for discussion in input_data:
converted_discussion = {
2024-12-19 12:48:57 +00:00
"id": discussion["id"],
2023-07-06 09:36:57 +00:00
"messages": [],
2024-12-19 12:48:57 +00:00
"title": discussion["title"],
2023-07-06 09:36:57 +00:00
}
2024-12-19 12:48:57 +00:00
mapping = discussion["mapping"]
2023-07-06 09:36:57 +00:00
message_ids = list(mapping.keys())
2024-12-19 12:48:57 +00:00
messages = [
mapping[message_id]["message"]
for message_id in message_ids
if mapping[message_id]["message"]
]
2023-07-06 09:36:57 +00:00
for i, message in enumerate(messages):
2024-12-19 12:48:57 +00:00
created_at = ""
create_time = message.get("create_time")
2023-07-06 09:36:57 +00:00
if create_time is not None:
2024-12-19 12:48:57 +00:00
created_at = datetime.fromtimestamp(create_time).strftime(
"%Y-%m-%d %H:%M:%S"
)
2023-07-06 09:36:57 +00:00
2024-12-19 12:48:57 +00:00
content = message["content"].get("parts", [""])[0]
2023-07-06 09:36:57 +00:00
if content:
2024-12-19 12:48:57 +00:00
parent = (
i - 1
if flatten and i > 0
else mapping[message_ids[i]]["parent"] or -1
)
2023-07-06 09:36:57 +00:00
converted_message = {
2024-12-19 12:48:57 +00:00
"binding": message["content"].get("binding", ""),
2023-07-06 09:36:57 +00:00
"content": content,
"created_at": created_at,
2024-12-19 12:48:57 +00:00
"finished_generating_at": "",
"model": "",
2023-07-06 09:36:57 +00:00
"parent": parent,
2024-12-19 12:48:57 +00:00
"personality": "",
2023-07-06 09:36:57 +00:00
"rank": 0,
2024-12-19 12:48:57 +00:00
"sender": message["author"]["role"],
"type": 0,
2023-07-06 09:36:57 +00:00
}
2024-12-19 12:48:57 +00:00
converted_discussion["messages"].append(converted_message)
2023-07-06 09:36:57 +00:00
discussions.append(converted_discussion)
return discussions
2024-12-19 12:48:57 +00:00
2023-07-06 09:36:57 +00:00
def convert_json(input_file, output_file, flatten=False):
2024-12-19 12:48:57 +00:00
with open(input_file, "r") as file:
2023-07-06 09:36:57 +00:00
input_json = file.read()
input_data = json.loads(input_json)
converted_data = convert_discussions(input_data, flatten=flatten)
2024-12-19 12:48:57 +00:00
with open(output_file, "w") as file:
2023-07-06 09:36:57 +00:00
json.dump(converted_data, file, indent=4)
2024-12-19 12:48:57 +00:00
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Convert JSON files from the first format to the second format."
)
parser.add_argument("input_file", help="Input JSON file path")
parser.add_argument("output_file", help="Output JSON file path")
parser.add_argument(
"--flatten", action="store_true", help="Flatten the discussion hierarchy"
)
2023-07-06 09:36:57 +00:00
args = parser.parse_args()
convert_json(args.input_file, args.output_file, flatten=args.flatten)