259 lines
19 KiB
Python
259 lines
19 KiB
Python
import json
|
||
from itertools import product
|
||
|
||
# Define the base data
|
||
implementation_organizations = ["送电一分公司", "送电二分公司", "变电分公司", "建筑分公司", "消防分公司",
|
||
"检修试验分公司", "安徽宏源电力建设有限公司", "安徽顺安电网建设有限公司"]
|
||
project_types = ["基建", "技改大修", "用户工程", "小型基建"]
|
||
project_names = [
|
||
"国网北京检修公司2024年±500kV延庆换流站直流主设备年度检修维护",
|
||
"合肥二电厂-彭郢π入长临河变电站220kV线路工程",
|
||
"杨柳四铺π入况楼变110kV电缆线路工程",
|
||
"安徽蚌埠濠州220kV变电站220千伏大唐凤阳红心镇光伏间隔扩建工程(电气安装)",
|
||
"合肥轨道7号线10kV杆线迁改工程",
|
||
"金上-湖北线路工程(川12标)",
|
||
"六安汤池 110kV 变电站新建工程",
|
||
"双港-独秀π入和平变电站220kV线路工程",
|
||
"茗南-熙湖T接城南变电站110kV架空线路工程",
|
||
"南屏-蓬莱路π入派河变电站110kV线路工程",
|
||
"藕池-漆园π入杨柳变电站220kV线路工程",
|
||
"芜湖站1000千伏1号主变A相局放配合项目",
|
||
"埇桥-灵泗500kV线路工程",
|
||
"月桥-火龙岗π入高村变电站220kV线路工程"
|
||
]
|
||
construction_units = ["国网安徽省电力有限公司建设分公司", "国网安徽省电力有限公司马鞍山供电公司",
|
||
"国网安徽省电力有限公司合肥供电公司", "国网安徽省电力有限公司阜阳供电公司",
|
||
"国网安徽省电力有限公司滁州供电公司", "国网安徽省电力有限公司安庆供电公司",
|
||
"国网安徽省电力有限公司黄山供电公司", "国网安徽省电力有限公司蚌埠供电公司",
|
||
"国网安徽省电力有限公司池州供电公司", "国网安徽省电力有限公司六安供电公司",
|
||
"国家电有限公司特高压建设分公司", "国网安徽省电力有限公司淮南供电公司",
|
||
"国网安徽省电力有限公司宣城供电公司", "国网北京市电力公司", "国网安徽省电力有限公司宿州供电公司",
|
||
"国网安徽省电力有限公司营销服务中心", "中国葛洲坝集团电力有限责任公司",
|
||
"银联黄山园区开发有限公司", "淮南交通控股(集团)有限公司", "国网安徽省电力有限公司舒城县供电公司",
|
||
"国网安徽省电力有限公司颍上县供电公司", "中铁二局集团电务工程有限公司",
|
||
"国网四川省电力公司建设分公司"]
|
||
project_departments = ["第九项目管理部(马鞍山)", "第十一项目管理部(马鞍山)", "第八项目管理部(芜湖)",
|
||
"第五项目管理部(阜阳)", "第六项目管理部(滁州)", "第十二项目管理部(陕皖)",
|
||
"第十三项目管理部(黄山)", "第四项目管理部(安庆)"]
|
||
project_managers = ["陈少平项目经理", "范文立项目经理", "何东洋项目经理", "胡彬项目经理", "黄东林项目经理", "姜松竺项目经理", "刘闩项目经理", "柳杰项目经理"]
|
||
subcontractors = ["安徽远宏电力工程有限公司", "安徽京硚建设有限公司", "武汉久林电力建设有限公司",
|
||
"安徽省鸿钢建设发展有限公司", "安徽星联建筑安装有限公司", "福建文港建设工程有限公司",
|
||
"芜湖冉电电力安装工程有限责任公司", "合肥市胜峰建筑安装有限公司", "安徽劦力建筑装饰有限责任公司",
|
||
"安徽苏亚建设集团有限公司"]
|
||
team_leaders = ["李元帅班组长", "刘雨豪班组长", "马新欣班组长", "任家泉班组长", "王海峰班组长", "王书民班组长"]
|
||
risk_levels = ["1级", "2级", "3级", "4级", "5级"]
|
||
labels = ["天气查询", "通用对话", "页面切换", "日计划数量查询", "周计划数量查询", "日计划作业内容", "周计划作业内容",
|
||
"施工人数", "作业考勤人数", "知识问答"]
|
||
|
||
import json
|
||
from itertools import product
|
||
|
||
|
||
def generate_data(template_variables, variable_values, filename,label):
|
||
samples = []
|
||
|
||
for template, variables in template_variables.items():
|
||
for values in product(*[variable_values[var] for var in variables]):
|
||
text = template.format(**dict(zip(variables, values)))
|
||
|
||
# 生成 annotations 数据
|
||
annotations = []
|
||
for var, val in zip(variables, values):
|
||
start = text.find(val)
|
||
if start != -1:
|
||
entity = {"text": val, "start": start, "end": start + len(val), "label": var}
|
||
annotations.append(entity)
|
||
|
||
samples.append({
|
||
"text": text,
|
||
"annotations": annotations # 这里改成 annotations 数组
|
||
})
|
||
|
||
# 保存到 JSON 文件
|
||
with open(filename, "w", encoding="utf-8") as f:
|
||
json.dump(samples, f, ensure_ascii=False, indent=2)
|
||
|
||
print(f"共生成 {len(samples)} 条数据,并已保存为 {filename}")
|
||
|
||
|
||
for label in labels:
|
||
if label in ["日计划作业内容", "周计划作业内容"]:
|
||
if label == "日计划作业内容":
|
||
dates = ["今天", "昨天", "2024年5月24日", "5月24日", "5月24日", "24日"]
|
||
else:
|
||
dates = ["本周", "上一周"]
|
||
template_variables = {
|
||
# Define templates and corresponding variables
|
||
"{date}{project_name}作业内容是什么?": ["date", "project_name"],
|
||
"{project_name}{date}作业内容是什么?": ["project_name", "date"],
|
||
"{date}工程性质为{project_type}的工程作业内容是什么?": ["date", "project_type"],
|
||
"工程性质为{project_type}的工程{date}作业内容是什么?": ["project_type", "date"],
|
||
"{date}{construction_unit}工程作业内容是什么?": ["date", "construction_unit"],
|
||
"{construction_unit}{date}工程作业内容是什么?": ["construction_unit", "date"],
|
||
"{date}{implementation_organization}作业内容是什么?": ["date", "implementation_organization"],
|
||
"{implementation_organization}{date}作业内容是什么?": ["implementation_organization", "date"],
|
||
"{date}{project_department}作业内容是什么?": ["date", "project_department"],
|
||
"{project_department}{date}作业内容是什么?": ["project_department", "date"],
|
||
"{date}{project_manager}作业内容是什么?": ["date", "project_manager"],
|
||
"{project_manager}{date}作业内容是什么?": ["project_manager", "date"],
|
||
"{date}{subcontractor}作业内容是什么?": ["date", "subcontractor"],
|
||
"{subcontractor}{date}作业内容是什么?": ["subcontractor", "date"],
|
||
"{date}{team_leader}作业内容是什么?": ["date", "team_leader"],
|
||
"{team_leader}{date}作业内容是什么?": ["team_leader", "date"],
|
||
"{date}风险等级为{risk_level}的工程作业内容是什么?": ["date", "risk_level"],
|
||
"风险等级为{risk_level}的工程{date}作业内容是什么?": ["risk_level", "date"],
|
||
"{date}{project_name}风险等级为{risk_level}的工程作业内容是什么?": ["date", "project_name", "risk_level"],
|
||
"{project_name}风险等级为{risk_level}的工程{date}作业内容是什么?": ["project_name", "risk_level", "date"],
|
||
"{date}工程性质为{project_type}风险等级为{risk_level}的工程作业内容是什么?": ["date", "project_type",
|
||
"risk_level"],
|
||
"{project_type}工程风险等级为{risk_level}的工程{date}作业内容是什么?": ["project_type", "risk_level",
|
||
"date"],
|
||
}
|
||
variable_values = {
|
||
"date": dates,
|
||
"project_name": project_names,
|
||
"project_type": project_types,
|
||
"construction_unit": construction_units,
|
||
"implementation_organization": implementation_organizations,
|
||
"project_department": project_departments,
|
||
"project_manager": project_managers,
|
||
"subcontractor": subcontractors,
|
||
"team_leader": team_leaders,
|
||
"risk_level": risk_levels
|
||
}
|
||
generate_data(template_variables, variable_values, f"{label}.json",label)
|
||
|
||
if label in ["日计划数量查询", "周计划数量查询"]:
|
||
if label == "日计划数量查询":
|
||
dates = ["今天", "昨天", "2024年5月24日", "5月24日", "5月24日", "24日"]
|
||
else:
|
||
dates = ["本周", "上一周"]
|
||
template_variables = {
|
||
# Define templates and corresponding variables
|
||
"{date}{project_name}有多少作业计划?": ["date", "project_name"],
|
||
"{project_name}{date}有多少作业计划?": ["project_name", "date"],
|
||
"{date}工程性质为{project_type}的工程有多少作业计划?": ["date", "project_type"],
|
||
"工程性质为{project_type}的工程{date}有多少作业计划?": ["project_type", "date"],
|
||
"{date}{construction_unit}有多少作业计划?": ["date", "construction_unit"],
|
||
"{construction_unit}{date}有多少作业计划?": ["construction_unit", "date"],
|
||
"{date}{implementation_organization}有多少作业计划?": ["date", "implementation_organization"],
|
||
"{implementation_organization}{date}有多少作业计划?": ["implementation_organization", "date"],
|
||
"{date}{project_department}有多少作业计划?": ["date", "project_department"],
|
||
"{project_department}{date}有多少作业计划?": ["project_department", "date"],
|
||
"{date}{project_manager}有多少作业计划?": ["date", "project_manager"],
|
||
"{project_manager}{date}有多少作业计划?": ["project_manager", "date"],
|
||
"{date}{subcontractor}有多少作业计划?": ["date", "subcontractor"],
|
||
"{subcontractor}{date}有多少作业计划?": ["subcontractor", "date"],
|
||
"{date}{team_leader}有多少作业计划?": ["date", "team_leader"],
|
||
"{team_leader}{date}有多少作业计划?": ["team_leader", "date"],
|
||
"{date}风险等级为{risk_level}的工程有多少作业计划?": ["date", "risk_level"],
|
||
"风险等级为{risk_level}的工程{date}有多少作业计划?": ["risk_level", "date"],
|
||
"{date}{project_name}风险等级为{risk_level}的工程有多少作业计划?": ["date", "project_name", "risk_level"],
|
||
"{project_name}风险等级为{risk_level}的工程{date}有多少作业计划?": ["project_name", "risk_level", "date"],
|
||
"{date}工程性质为{project_type}风险等级为{risk_level}的工程有多少作业计划?": ["date", "project_type",
|
||
"risk_level"],
|
||
"{project_type}工程风险等级为{risk_level}的工程{date}有多少作业计划?": ["project_type", "risk_level",
|
||
"date"],
|
||
}
|
||
variable_values = {
|
||
"date": dates,
|
||
"project_name": project_names,
|
||
"project_type": project_types,
|
||
"construction_unit": construction_units,
|
||
"implementation_organization": implementation_organizations,
|
||
"project_department": project_departments,
|
||
"project_manager": project_managers,
|
||
"subcontractor": subcontractors,
|
||
"team_leader": team_leaders,
|
||
"risk_level": risk_levels
|
||
}
|
||
generate_data(template_variables, variable_values, f"{label}.json",label)
|
||
|
||
if label == "施工人数":
|
||
dates = ["今天", "昨天", "2024年5月24日", "5月24日", "5月24日", "24日"];
|
||
template_variables = {
|
||
# Define templates and corresponding variables
|
||
"{date}{project_name}有多少施工人数?": ["date", "project_name"],
|
||
"{project_name}{date}有多少施工人数?": ["project_name", "date"],
|
||
"{date}工程性质为{project_type}的工程有多少施工人数?": ["date", "project_type"],
|
||
"工程性质为{project_type}的工程{date}有多少施工人数?": ["project_type", "date"],
|
||
"{date}{construction_unit}有多少施工人数?": ["date", "construction_unit"],
|
||
"{construction_unit}{date}有多少施工人数?": ["construction_unit", "date"],
|
||
"{date}{implementation_organization}有多少施工人数?": ["date", "implementation_organization"],
|
||
"{implementation_organization}{date}有多少施工人数?": ["implementation_organization", "date"],
|
||
"{date}{project_department}有多少施工人数?": ["date", "project_department"],
|
||
"{project_department}{date}有多少施工人数?": ["project_department", "date"],
|
||
"{date}{project_manager}有多少施工人数?": ["date", "project_manager"],
|
||
"{project_manager}{date}有多少施工人数?": ["project_manager", "date"],
|
||
"{date}{subcontractor}有多少施工人数?": ["date", "subcontractor"],
|
||
"{subcontractor}{date}有多少施工人数?": ["subcontractor", "date"],
|
||
"{date}{team_leader}有多少施工人数?": ["date", "team_leader"],
|
||
"{team_leader}{date}有多少施工人数?": ["team_leader", "date"],
|
||
"{date}风险等级为{risk_level}的工程有多少施工人数?": ["date", "risk_level"],
|
||
"风险等级为{risk_level}的工程{date}有多少施工人数?": ["risk_level", "date"],
|
||
"{date}{project_name}风险等级为{risk_level}的工程有多少施工人数?": ["date", "project_name", "risk_level"],
|
||
"{project_name}风险等级为{risk_level}的工程{date}有多少施工人数?": ["project_name", "risk_level", "date"],
|
||
"{date}工程性质为{project_type}风险等级为{risk_level}的工程有多少施工人数?": ["date", "project_type",
|
||
"risk_level"],
|
||
"{project_type}工程风险等级为{risk_level}的工程{date}有多少施工人数?": ["project_type", "risk_level",
|
||
"date"],
|
||
}
|
||
variable_values = {
|
||
"date": dates,
|
||
"project_name": project_names,
|
||
"project_type": project_types,
|
||
"construction_unit": construction_units,
|
||
"implementation_organization": implementation_organizations,
|
||
"project_department": project_departments,
|
||
"project_manager": project_managers,
|
||
"subcontractor": subcontractors,
|
||
"team_leader": team_leaders,
|
||
"risk_level": risk_levels
|
||
}
|
||
generate_data(template_variables, variable_values, f"{label}.json",label)
|
||
|
||
if label == "作业考勤人数":
|
||
dates = ["今天", "昨天", "2024年5月24日", "5月24日", "5月24日", "24日"];
|
||
template_variables = {
|
||
# Define templates and corresponding variables
|
||
"{date}{project_name}有多少作业考勤人数?": ["date", "project_name"],
|
||
"{project_name}{date}有多少作业考勤人数?": ["project_name", "date"],
|
||
"{date}工程性质为{project_type}的工程有多少作业考勤人数?": ["date", "project_type"],
|
||
"工程性质为{project_type}的工程{date}有多少作业考勤人数?": ["project_type", "date"],
|
||
"{date}{construction_unit}有多少作业考勤人数?": ["date", "construction_unit"],
|
||
"{construction_unit}{date}有多少作业考勤人数?": ["construction_unit", "date"],
|
||
"{date}{implementation_organization}有多少作业考勤人数?": ["date", "implementation_organization"],
|
||
"{implementation_organization}{date}有多少作业考勤人数?": ["implementation_organization", "date"],
|
||
"{date}{project_department}有多少作业考勤人数?": ["date", "project_department"],
|
||
"{project_department}{date}有多少作业考勤人数?": ["project_department", "date"],
|
||
"{date}{project_manager}有多少作业考勤人数?": ["date", "project_manager"],
|
||
"{project_manager}{date}有多少作业考勤人数?": ["project_manager", "date"],
|
||
"{date}{subcontractor}有多少作业考勤人数?": ["date", "subcontractor"],
|
||
"{subcontractor}{date}有多少作业考勤人数?": ["subcontractor", "date"],
|
||
"{date}{team_leader}班组长有多少作业考勤人数?": ["date", "team_leader"],
|
||
"{team_leader}班组长{date}有多少作业考勤人数?": ["team_leader", "date"],
|
||
"{date}风险等级为{risk_level}的工程有多少作业考勤人数?": ["date", "risk_level"],
|
||
"风险等级为{risk_level}的工程{date}有多少作业考勤人数?": ["risk_level", "date"],
|
||
"{date}{project_name}风险等级为{risk_level}的工程有多少作业考勤人数?": ["date", "project_name",
|
||
"risk_level"],
|
||
"{project_name}风险等级为{risk_level}的工程{date}有多少作业考勤人数?": ["project_name", "risk_level",
|
||
"date"],
|
||
"{date}工程性质为{project_type}风险等级为{risk_level}的工程作业考勤人数?": ["date", "project_type",
|
||
"risk_level"],
|
||
"{project_type}工程风险等级为{risk_level}的工程{date}有多少作业考勤人数?": ["project_type", "risk_level",
|
||
"date"],
|
||
}
|
||
variable_values = {
|
||
"date": dates,
|
||
"project_name": project_names,
|
||
"project_type": project_types,
|
||
"construction_unit": construction_units,
|
||
"implementation_organization": implementation_organizations,
|
||
"project_department": project_departments,
|
||
"project_manager": project_managers,
|
||
"subcontractor": subcontractors,
|
||
"team_leader": team_leaders,
|
||
"risk_level": risk_levels
|
||
}
|
||
generate_data(template_variables, variable_values, f"{label}.json", label)
|