重构模型训练

2025-03-03 13:21:55 +08:00 · 2025-03-03 13:21:55 +08:00 · aabc01d209
parent d26764f138
commit aabc01d209
3 changed files with 235156 additions and 330933 deletions
--- a/generated_data/按比例分配uie数据.py
+++ b/generated_data/按比例分配uie数据.py
@ -0,0 +1,48 @@
+import json
+import os
+import random
+
+# 目录路径
+directory = "output/uie"
+
+# 确保目录存在
+if not os.path.exists(directory):
+    os.makedirs(directory)
+
+# 读取 JSON 文件
+def load_json(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        return json.load(f)
+
+# 按7:3比例随机拆分 JSON 文件
+def split_json_random(input_file, output_file1, output_file2):
+    # 读取数据
+    data = load_json(input_file)
+
+    # 随机打乱数据
+    random.shuffle(data)
+
+    # 计算数据的分割点
+    split_point = int(len(data) * 0.7)
+
+    # 按比例分割数据
+    data_part1 = data[:split_point]  # 70% 训练数据
+    data_part2 = data[split_point:]  # 30% 验证数据
+
+    # 保存数据到两个文件
+    with open(output_file1, 'w', encoding='utf-8') as f1:
+        json.dump(data_part1, f1, ensure_ascii=False, indent=4)
+
+    with open(output_file2, 'w', encoding='utf-8') as f2:
+        json.dump(data_part2, f2, ensure_ascii=False, indent=4)
+
+    print(f"数据已随机按 7:3 比例分割，并保存到 {output_file1} 和 {output_file2}")
+
+# 输入的 JSON 文件路径
+input_file = 'output/merged_data.json'
+# 输出的两个文件路径
+output_file1 = 'output/uie/train.json'
+output_file2 = 'output/uie/val.json'
+
+# 按 7:3 随机拆分并保存
+split_json_random(input_file, output_file1, output_file2)
--- a/uie/data/data_part1.json
+++ b/uie/data/data_part1.json
--- a/uie/merged_data.json
+++ b/uie/merged_data.json