重构模型训练
This commit is contained in:
parent
d26764f138
commit
aabc01d209
|
|
@ -0,0 +1,48 @@
|
|||
import json
|
||||
import os
|
||||
import random
|
||||
|
||||
# 目录路径
|
||||
directory = "output/uie"
|
||||
|
||||
# 确保目录存在
|
||||
if not os.path.exists(directory):
|
||||
os.makedirs(directory)
|
||||
|
||||
# 读取 JSON 文件
|
||||
def load_json(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
|
||||
# 按7:3比例随机拆分 JSON 文件
|
||||
def split_json_random(input_file, output_file1, output_file2):
|
||||
# 读取数据
|
||||
data = load_json(input_file)
|
||||
|
||||
# 随机打乱数据
|
||||
random.shuffle(data)
|
||||
|
||||
# 计算数据的分割点
|
||||
split_point = int(len(data) * 0.7)
|
||||
|
||||
# 按比例分割数据
|
||||
data_part1 = data[:split_point] # 70% 训练数据
|
||||
data_part2 = data[split_point:] # 30% 验证数据
|
||||
|
||||
# 保存数据到两个文件
|
||||
with open(output_file1, 'w', encoding='utf-8') as f1:
|
||||
json.dump(data_part1, f1, ensure_ascii=False, indent=4)
|
||||
|
||||
with open(output_file2, 'w', encoding='utf-8') as f2:
|
||||
json.dump(data_part2, f2, ensure_ascii=False, indent=4)
|
||||
|
||||
print(f"数据已随机按 7:3 比例分割,并保存到 {output_file1} 和 {output_file2}")
|
||||
|
||||
# 输入的 JSON 文件路径
|
||||
input_file = 'output/merged_data.json'
|
||||
# 输出的两个文件路径
|
||||
output_file1 = 'output/uie/train.json'
|
||||
output_file2 = 'output/uie/val.json'
|
||||
|
||||
# 按 7:3 随机拆分并保存
|
||||
split_json_random(input_file, output_file1, output_file2)
|
||||
135975
uie/data/data_part1.json
135975
uie/data/data_part1.json
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue