25 lines
698 B
Python
25 lines
698 B
Python
import json
|
|
|
|
# 读取 text 文件
|
|
with open("data/train.txt", "r", encoding="utf-8") as f:
|
|
data = f.readlines() # 按行读取
|
|
|
|
# 解析数据
|
|
json_list = []
|
|
for line in data:
|
|
parts = line.strip().split("\t") # 按 Tab 拆分
|
|
if len(parts) == 2: # 确保数据格式正确
|
|
json_list.append({"text": parts[0], "label": parts[1]})
|
|
else:
|
|
print(f"跳过格式错误的行: {line.strip()}") # 打印错误数据,方便排查
|
|
|
|
# 转换为 JSON 格式
|
|
json_output = json.dumps(json_list, ensure_ascii=False, indent=4)
|
|
|
|
# 保存到 JSON 文件
|
|
with open("data/train.json", "w", encoding="utf-8") as f:
|
|
f.write(json_output)
|
|
|
|
# 打印 JSON 结果
|
|
print(json_output)
|