2023-07-06 09:36:57 +00:00
|
|
|
import argparse
|
2024-12-19 12:48:57 +00:00
|
|
|
import json
|
2023-07-06 09:36:57 +00:00
|
|
|
from datetime import datetime
|
|
|
|
|
2024-12-19 12:48:57 +00:00
|
|
|
|
2023-07-06 09:36:57 +00:00
|
|
|
def convert_discussions(input_data, flatten=False):
|
|
|
|
discussions = []
|
|
|
|
|
|
|
|
for discussion in input_data:
|
|
|
|
converted_discussion = {
|
2024-12-19 12:48:57 +00:00
|
|
|
"id": discussion["id"],
|
2023-07-06 09:36:57 +00:00
|
|
|
"messages": [],
|
2024-12-19 12:48:57 +00:00
|
|
|
"title": discussion["title"],
|
2023-07-06 09:36:57 +00:00
|
|
|
}
|
|
|
|
|
2024-12-19 12:48:57 +00:00
|
|
|
mapping = discussion["mapping"]
|
2023-07-06 09:36:57 +00:00
|
|
|
message_ids = list(mapping.keys())
|
|
|
|
|
2024-12-19 12:48:57 +00:00
|
|
|
messages = [
|
|
|
|
mapping[message_id]["message"]
|
|
|
|
for message_id in message_ids
|
|
|
|
if mapping[message_id]["message"]
|
|
|
|
]
|
2023-07-06 09:36:57 +00:00
|
|
|
|
|
|
|
for i, message in enumerate(messages):
|
2024-12-19 12:48:57 +00:00
|
|
|
created_at = ""
|
|
|
|
create_time = message.get("create_time")
|
2023-07-06 09:36:57 +00:00
|
|
|
|
|
|
|
if create_time is not None:
|
2024-12-19 12:48:57 +00:00
|
|
|
created_at = datetime.fromtimestamp(create_time).strftime(
|
|
|
|
"%Y-%m-%d %H:%M:%S"
|
|
|
|
)
|
2023-07-06 09:36:57 +00:00
|
|
|
|
2024-12-19 12:48:57 +00:00
|
|
|
content = message["content"].get("parts", [""])[0]
|
2023-07-06 09:36:57 +00:00
|
|
|
if content:
|
2024-12-19 12:48:57 +00:00
|
|
|
parent = (
|
|
|
|
i - 1
|
|
|
|
if flatten and i > 0
|
|
|
|
else mapping[message_ids[i]]["parent"] or -1
|
|
|
|
)
|
2023-07-06 09:36:57 +00:00
|
|
|
|
|
|
|
converted_message = {
|
2024-12-19 12:48:57 +00:00
|
|
|
"binding": message["content"].get("binding", ""),
|
2023-07-06 09:36:57 +00:00
|
|
|
"content": content,
|
|
|
|
"created_at": created_at,
|
2024-12-19 12:48:57 +00:00
|
|
|
"finished_generating_at": "",
|
|
|
|
"model": "",
|
2023-07-06 09:36:57 +00:00
|
|
|
"parent": parent,
|
2024-12-19 12:48:57 +00:00
|
|
|
"personality": "",
|
2023-07-06 09:36:57 +00:00
|
|
|
"rank": 0,
|
2024-12-19 12:48:57 +00:00
|
|
|
"sender": message["author"]["role"],
|
|
|
|
"type": 0,
|
2023-07-06 09:36:57 +00:00
|
|
|
}
|
|
|
|
|
2024-12-19 12:48:57 +00:00
|
|
|
converted_discussion["messages"].append(converted_message)
|
2023-07-06 09:36:57 +00:00
|
|
|
|
|
|
|
discussions.append(converted_discussion)
|
|
|
|
|
|
|
|
return discussions
|
|
|
|
|
2024-12-19 12:48:57 +00:00
|
|
|
|
2023-07-06 09:36:57 +00:00
|
|
|
def convert_json(input_file, output_file, flatten=False):
|
2024-12-19 12:48:57 +00:00
|
|
|
with open(input_file, "r") as file:
|
2023-07-06 09:36:57 +00:00
|
|
|
input_json = file.read()
|
|
|
|
|
|
|
|
input_data = json.loads(input_json)
|
|
|
|
converted_data = convert_discussions(input_data, flatten=flatten)
|
|
|
|
|
2024-12-19 12:48:57 +00:00
|
|
|
with open(output_file, "w") as file:
|
2023-07-06 09:36:57 +00:00
|
|
|
json.dump(converted_data, file, indent=4)
|
|
|
|
|
2024-12-19 12:48:57 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
description="Convert JSON files from the first format to the second format."
|
|
|
|
)
|
|
|
|
parser.add_argument("input_file", help="Input JSON file path")
|
|
|
|
parser.add_argument("output_file", help="Output JSON file path")
|
|
|
|
parser.add_argument(
|
|
|
|
"--flatten", action="store_true", help="Flatten the discussion hierarchy"
|
|
|
|
)
|
2023-07-06 09:36:57 +00:00
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
convert_json(args.input_file, args.output_file, flatten=args.flatten)
|