Intention/generated_data/generated.py

832 lines
55 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import os
from itertools import product
# 目录路径
directory = "data"
# 确保目录存在
if not os.path.exists(directory):
os.makedirs(directory)
# 基础数据定义
BASE_DATA = {
# 实施组织
"implementation_organizations": ["宏源电力建设公司(变电)", "送一分公司", "变电分公司", "消防分公司", "安徽宏源电力建设有限公司(线路)", "检修试验分公司"],
# 工程性质
"project_types": ["基建", "技改大修", "用户工程", "小型基建"],
# 工程名
"project_names": [
"中心变",
"1号工程",
"国网安徽马鞍山供电公司220kV恒兴变电站220kV配电装置改造(PROJ-2024-0271)",
"国网供电公司220kV恒兴变电站220kV配电装置改造调试部分",
"安徽蚌埠濠州220kV变电站220千伏大唐凤阳红心镇光伏间隔扩建工程(电气安装)"
"滁州堰陈110千伏变电站新建工程",
"安徽蚌埠濠州220kV变电站220千伏大唐凤阳红心镇光伏间隔扩建工程(电气安装)(PROJ-2024-0794)",
"金牛500kV变电站新建工程建筑(PROJ-2023-0506)",
"谷岭220kV变电站220kV蕲城电厂、埇南间隔扩建工程(PROJ-2023-0466)",
"渝北±800千伏换流站工程电气安装A包(PROJ-2024-0057)",
"检修公司调相机一二次设备检修维护和改造服务框架-2025年南苑调相机检修一期(PROJ-2023-0179)"
"明生科创基地项目",
"安徽明生有限公司科创基地项目",
"无人机智能巡检技术实验室项目",
"九号线路项目",
"埇南间隔更换项目",
"国网滁州供电公司电流互感器更换项目",
"谷岭220kV变电站220kV蕲城电厂-埇南间隔扩建项目(PROJ-2023-0466)",
"安徽明生电力投资集团有限公司科创基地项目(一期)(PROJ-2024-1035)"
"九号工程",
"合州变电站",
"合州换流站"
],
# 建管单位
"construction_units": ["国网安徽省电力有限公司建设分公司", "国网安徽省电力有限公司马鞍山供电公司",
"中铁二局集团电务工程有限公司"],
# 项目部名称
"project_departments": ["调试一队", "第9项目管理部", "金上第十一项目部", "第八项目管理部(合肥)", "肥东9号项目部",
"金上第一项目部管理部(池州黄山)", "第一项目部管理部(肥东)", "调试四队","第一项目部"],
# 项目经理
"project_managers": ["陈少平项目经理", "范文立项目经理", "何东洋项目经理"],
# 分包单位
"subcontractors": ["安徽劦力建筑装饰有限责任公司", "安徽苏亚建设集团有限公司"],
# 班组名称
"team_names": ["张朵班组", "刘梁玉班组", "魏玉龙班组"],
# 班组长
"team_leaders": ["李元帅班组长", "刘雨豪班组长"],
# 风险等级
"risk_levels": ["1级", "一级", "二级", "5级", "四级"],
# 8+2工况
"operatings": ["8+2工况", "8加2工况"],
# 页面切换
"pages": ["风险管控", "日计划", "周风险", "日计划统计报表", "日计划推送", "生产管控中心", "考勤统计详情",
"今日作业计划", "周风险统计报表", "周风险推送"]
}
# 自然语言模板配置
TEMPLATE_CONFIG = {
"日计划数量查询": {
"date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天","2025-04-09"],
"templates": [
("{date}{project_name}有多少作业计划?", ["date", "project_name"]),
("{project_name}{date}有多少项作业计划?", ["project_name", "date"]),
("{date}公司{project_name}有多少作业计划?", ["date", "project_name"]),
("工程性质是{project_type}{date}有多少作业计划?", ["project_type", "date"]),
("工程性质是{project_type}{date}有多少项作业计划?", ["project_type", "date"]),
("工程性质是{project_type}{date}有多少条作业计划?", ["project_type", "date"]),
("{date}风险等级为{risk_level}的作业计划有多少?", ["date", "risk_level"]),
("公司{date}工程性质为{project_type}的有多少项作业计划?", ["date", "project_type"]),
("{date}工程性质为{project_type}的有多少条作业计划?", ["date", "project_type"]),
("公司工程性质为{project_type}{date}有多少作业计划?", ["project_type", "date"]),
("工程性质为{project_type}{date}有多少项作业计划?", ["project_type", "date"]),
("查询{project_name}{date}的作业计划数量", ["project_name", "date"]),
("{date}{project_type}类作业计划有多少?", ["date", "project_type"]),
("{project_type}{date}作业计划有多少?", ["project_type", "date"]),
("{construction_unit}{date}有多少作业计划?", ["construction_unit", "date"]),
("{construction_unit}{date}有多少项作业计划?", ["construction_unit", "date"]),
("{date}{construction_unit}有多少作业计划?", ["date", "construction_unit"]),
("{date}公司有多少项作业计划?", ["date"]),
("{date}有多少条作业计划?", ["date"]),
("公司{date}有多少作业计划?", ["date"]),
("{date}{operating}有多少项作业计划?", ["date", "operating"]),
("{date}公司{operating}有多少项作业计划?", ["date", "operating"]),
("{date}{implementation_organization}{project_department}有多少项作业计划?",
["date", "implementation_organization", "project_department"]),
("{date}{project_department}{implementation_organization}有多少项作业计划?",
["date", "project_department", "implementation_organization"]),
("{implementation_organization}{project_department}{date}有多少条作业计划?",
["implementation_organization", "project_department", "date"]),
("{date}{implementation_organization}有多少项作业计划?", ["date", "implementation_organization"]),
("{date}公司{project_department}有多少作业计划?", ["date", "project_department"]),
("{date}{project_department}{implementation_organization}有多少作业计划?",
["date", "project_department", "implementation_organization"]),
("{date}{project_department}有多少项作业计划?", ["date", "project_department"]),
("公司{project_department}{date}有多少项{risk_level}风险作业计划?",
["project_department", "date", "risk_level"]),
("{project_department}{date}有多少项{risk_level}风险作业计划?",
["project_department", "date", "risk_level"]),
("{project_department}{date}有多少{risk_level}风险作业计划?", ["project_department", "date", "risk_level"]),
# 请帮我查一下
("请帮我查一下{date}{project_manager}作业计划是多少?", ["date", "project_manager"]),
("请帮我查一下{date}{subcontractor}有多少条作业计划?", ["date", "subcontractor"]),
("请帮我查一下{date}{team_leader}有多少作业计划?", ["date", "team_leader"]),
("请帮我查一下{date}风险等级为{risk_level}的作业计划有多少?", ["date", "risk_level"]),
("请帮我查一下{date}公司{project_department}有多少{risk_level}风险作业计划?",
["date", "project_department", "risk_level"]),
("请帮我查一下{date}{project_department}有多少{risk_level}风险作业计划?", ["date", "project_department", "risk_level"]),
("请帮我查一下{date}{project_type}类风险等级为{risk_level}的作业计划有多少?", ["date", "project_type", "risk_level"]),
("请帮我查一下{date}{construction_unit}有多少{risk_level}风险作业计划?", ["date", "construction_unit", "risk_level"]),
("请帮我查一下{date}存在{risk_level}风险的有多少", ["date", "risk_level"]),
("请帮我查一下{implementation_organization}{date}{risk_level}风险的有多少", ["implementation_organization","date", "risk_level"]),
("请帮我查一下{implementation_organization}{date}存在{risk_level}风险的有多少", ["implementation_organization","date", "risk_level"]),
("{date}{project_type}{construction_unit}负责的作业计划有多少?",
["date", "project_type", "construction_unit"]),
("{date}{project_type}{implementation_organization}组织实施的作业计划有多少?",
["date", "project_type", "implementation_organization"]),
("{date}{project_department}管理的{project_type}类作业计划有多少?",
["date", "project_department", "project_type"]),
("{date}{subcontractor}承包的{project_type}类作业计划有多少?", ["date", "subcontractor", "project_type"]),
("{date}{project_manager}负责的{project_type}类作业计划有多少?",
["date", "project_manager", "project_type"]),
("{date}{team_leader}带领的{project_type}类作业计划有多少?", ["date", "team_leader", "project_type"]),
("{date}{project_name}{project_manager}作业计划有多少?", ["date", "project_name", "project_manager"]),
("{date}{project_name}中,风险等级为{risk_level}的作业计划有多少?", ["date", "project_name", "risk_level"]),
("{date}{project_manager}作业计划有多少?", ["date", "project_manager"]),
("{project_manager}{date}作业计划有多少?", ["project_manager", "date"]),
("{date}{implementation_organization}{project_manager}的作业计划数量",
["date", "implementation_organization", "project_manager"]),
("{implementation_organization}{project_manager}{date}的作业计划数量",
["implementation_organization", "project_manager", "date"]),
# 班组
("{date}{team_name}有多少项作业计划?", ["date", "team_name"]),
("{team_name}{date}有多少作业计划?", ["team_name", "date"]),
("{team_name}{date}有多少条作业计划?", ["team_name", "date"]),
("{team_name}{date}作业计划数量", ["team_name", "date"]),
("{date}{team_name}作业计划数量", ["date", "team_name"]),
#作业
("{date}{implementation_organization}{project_department}有多少作业?",
["date", "implementation_organization", "project_department"]),
("{implementation_organization}{project_department}{date}有多少条作业?",
["implementation_organization", "project_department", "date"]),
("{date}{project_department}{implementation_organization}有多少作业?",
["date", "project_department", "implementation_organization"]),
("{project_department}{implementation_organization}{date}有多少条作业?",
["project_department", "implementation_organization", "date"]),
("{date}{implementation_organization}有多少项作业?", ["date", "implementation_organization"]),
("{date}公司{project_department}有多少作业?", ["date", "project_department"]),
("{date}{project_department}有多少项作业?", ["date", "project_department"]),
#有多少
("{date}{implementation_organization}{project_department}有多少?",
["date", "implementation_organization", "project_department"]),
("{project_department}{implementation_organization}{date}有多少?",
["project_department", "implementation_organization", "date"]),
("{date}{implementation_organization}有多少?", ["date", "implementation_organization"]),
("{date}公司{project_department}有多少?", ["date", "project_department"]),
("{date}{project_department}有多少?", ["date", "project_department"]),
("{date}{project_name}有多少", ["date", "project_name"]),
("{project_name}{date}有多少", ["project_name", "date"]),
("{date}公司{project_name}有多少?", ["date", "project_name"]),
("{date}工程性质是{project_type}有多少", ["project_type", "date"]),
("工程性质是{project_type}{date}有多少", ["project_type", "date"]),
("{date}存在{operating}的有多少", ["date", "operating"]),
("{date}{operating}的有多少", ["date", "operating"]),
]
},
"周计划数量查询": {
"date": ["本周", "上周", "上一周", "下周", "下一周", "最近一周", "本周内", "这一周"],
"templates": [
("{date}{project_name}作业计划有多少?", ["date", "project_name"]),
("{project_name}{date}作业计划有多少?", ["project_name", "date"]),
("公司{project_name}{date}作业计划有多少?", ["project_name", "date"]),
("{construction_unit}{date}作业计划有多少?", ["construction_unit", "date"]),
# 🎯 仅 date 维度
("{date}作业计划有多少?", ["date"]),
# 🎯 date + 其他单个维度
("{date}{project_name}有多少项作业计划?", ["date", "project_name"]),
("{date}{construction_unit}作业计划有多少?", ["date", "construction_unit"]),
("{date}{implementation_organization}作业计划有多少?", ["date", "implementation_organization"]),
("{date}{implementation_organization}{project_department}作业计划有多少?",
["date", "implementation_organization", "project_department"]),
("{date}{project_department}{implementation_organization}作业计划有多少?",
["date", "project_department", "implementation_organization"]),
("{date}{project_department}作业计划有多少?", ["date", "project_department"]),
("{date}{project_manager}作业计划有多少?", ["date", "project_manager"]),
("{date}{subcontractor}作业计划有多少?", ["date", "subcontractor"]),
("{date}{team_leader}作业计划有多少?", ["date", "team_leader"]),
("{date}{project_department}作业计划数量", ["date", "project_department"]),
("{date}{subcontractor}作业计划数量?", ["date", "subcontractor"]),
# 🎯 date + 风险维度
("{date}有多少项{risk_level}风险作业计划?", ["date", "risk_level"]),
("{date}有多少{risk_level}风险作业计划?", ["date", "risk_level"]),
("{date}存在{risk_level}风险的作业计划有多少?", ["date", "risk_level"]),
("{date}{risk_level}风险作业计划有多少", ["date", "risk_level"]),
# 🎯 date + construction_unit + risk_level
("{construction_unit}{date}有多少项{risk_level}风险作业计划", ["construction_unit", "date", "risk_level"]),
# 🎯 date + implementation_organization + risk_level
("{date}{implementation_organization}风险等级为{risk_level}的作业计划有多少?",
["date", "implementation_organization", "risk_level"]),
# 🎯 date + project_name + project_manager
("{date}{project_name}{project_manager}负责的作业计划有多少?", ["date", "project_name", "project_manager"]),
# 🎯 date + project_name + risk_level
("{date}{project_name}有多少项{risk_level}风险作业计划?", ["date", "project_name", "risk_level"]),
# 🎯 project_manager 维度
("公司{project_manager}{date}作业计划数量?", ["project_manager", "date"]),
("{project_manager}{date}作业计划有多少?", ["project_manager", "date"]),
("公司{project_manager}{date}作业计划有多少?", ["project_manager", "date"]),
("{project_manager}{date}负责的风险等级为{risk_level}的作业计划有多少?",
["project_manager", "date", "risk_level"]),
("{date}{team_name}有多少项作业计划?", ["date", "team_name"]),
("{team_name}{date}有多少作业计划?", ["team_name", "date"]),
("{team_name}{date}作业计划数量", ["team_name", "date"]),
("{date}{team_name}的作业计划数量", ["date", "team_name"]),
#有多少
("{date}{implementation_organization}{project_department}有多少?",
["date", "implementation_organization", "project_department"]),
("{implementation_organization}{project_department}{date}有多少?",
["implementation_organization", "project_department", "date"]),
("{date}{project_department}{implementation_organization}有多少项作业计划?",
["date", "project_department", "implementation_organization"]),
("请帮我查一下{date}{implementation_organization}有多少?", ["date", "implementation_organization"]),
("请帮我查一下{date}公司{project_department}有多少?", ["date", "project_department"]),
("请帮我查一下{date}{project_department}有多少?", ["date", "project_department"]),
("请帮我查一下{date}{project_name}有多少", ["date", "project_name"]),
("请帮我查一下{project_name}{date}有多少", ["project_name", "date"]),
("请帮我查一下{date}公司{project_name}有多少?", ["date", "project_name"]),
("请帮我查一下{date}工程性质是{project_type}有多少", ["project_type", "date"]),
("请帮我查一下工程性质是{project_type}{date}有多少", ["project_type", "date"])
]
},
"日计划作业内容": {
"date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天"],
"templates": [
("{date}{project_name}作业内容是什么?", ["date", "project_name"]),
("{project_name}{date}的作业有哪些", ["project_name", "date"]),
("{date}{project_type}类作业有哪些?", ["date", "project_type"]),
("{project_type}{date}具体作业内容是什么?", ["project_type", "date"]),
("{date}工程性质为{project_type}的作业内容是什么?", ["date", "project_type"]),
("工程性质为{project_type}{date}作业计划分别是什么?", ["project_type", "date"]),
("工程性质为{project_type}{date}4项作业计划分别是什么", ["project_type", "date"]),
("{construction_unit}{date}作业内容是什么?", ["construction_unit", "date"]),
# 1. 查询特定日期和项目的作业安排
("{date}{project_name}作业是什么?", ["date", "project_name"]),
("{date}属于{operating}作业内容是什么?", ["date", "operating"]),
("{date}存在{operating}作业是什么?", ["date", "operating"]),
# 3. 查询特定日期和项目类型的工程计划
("{date}{project_type}类作业有哪些?", ["date", "project_type"]),
("{date}{construction_unit}{risk_level}风险的作业内容是什么?", ["date", "construction_unit", "risk_level"]),
("{date}{implementation_organization}{risk_level}风险的作业是什么?",
["date", "implementation_organization", "risk_level"]),
# 5. 查询特定日期和项目经理的任务安排
("{project_manager}{date}作业内容是什么?", ["project_manager", "date"]),
("{project_manager}{date}作业计划分别是什么?", ["project_manager", "date"]),
("{project_manager}{date}4项作业计划分别是什么", ["project_manager", "date"]),
# 6. 查询特定日期和风险等级的任务
("{date}风险等级为{risk_level}的作业计划有哪些?", ["date", "risk_level"]),
("{date}风险等级为{risk_level}四项作业计划分别有哪些?", ["date", "risk_level"]),
# 7. 查询特定日期和实施单位的任务内容
("{implementation_organization}{date}作业内容是什么?", ["implementation_organization", "date"]),
# 8. 查询特定日期和团队领导的任务安排
("{team_leader}{date}作业内容是什么?", ["team_leader", "date"]),
# 9. 查询特定日期和项目类型下的高风险任务
("{date}{project_type}类风险等级为{risk_level}的作业内容是什么?", ["date", "project_type", "risk_level"]),
# 10. 查询特定日期和风险等级的任务安排
("{date}风险等级为{risk_level}2项作业计划分别是什么", ["date", "risk_level"]),
("{date}{risk_level}的作业内容是什么?", ["date", "risk_level"]),
# 11. 查询特定日期和施工单位的任务进展
("{construction_unit}{date}的作业计划是什么?", ["construction_unit", "date"]),
("{construction_unit}{date}作业有哪些", ["construction_unit", "date"]),
# 12. 查询特定日期和项目经理完成的任务
("{project_manager}{date}作业内容是什么?", ["project_manager", "date"]),
("{project_manager}{date}三项作业计划分别是什么?", ["project_manager", "date"]),
# 13. 查询特定日期和项目经理的高风险任务
("{project_manager}{date}的风险等级为{risk_level}的作业内容是什么?",
["project_manager", "date", "risk_level"]),
# 15. 查询特定日期和所有任务安排
("{date}作业内容是什么?", ["date"]),
("{date}作业计划有哪些?", ["date"]),
("{date}作业计划是什么?", ["date"]),
("{date}三项作业计划分别是什么?", ["date"]),
("{date}作业是什么?", ["date"]),
("{date}作业有哪些?", ["date"]),
("{date}{implementation_organization}{project_department}作业内容是什么?",
["date", "implementation_organization", "project_department"]),
("{implementation_organization}{project_department}{date}作业计划有哪些?",
["implementation_organization", "project_department", "date"]),
("{implementation_organization}{project_department}{date}作业计划是什么?",
["implementation_organization", "project_department", "date"]),
("{implementation_organization}{project_department}{date}作业是什么?",
["implementation_organization", "project_department", "date"]),
("{implementation_organization}{project_department}{date}两项作业计划分别是什么?",
["implementation_organization", "project_department", "date"]),
("{project_department}{implementation_organization}{date}作业计划是什么?",
["project_department", "implementation_organization", "date"]),
("{date}{project_department}{implementation_organization}作业是什么?",
["date", "project_department", "implementation_organization"]),
("{date}{project_department}{implementation_organization}两项作业计划分别是什么?",
["date", "project_department", "implementation_organization"]),
# 16. 查询特定日期和项目进度
("请帮我查一下{date}{project_name}作业内容是什么?", ["date", "project_name"]),
# 班组
("请帮我查一下{date}{team_name}作业是什么?", ["date", "team_name"]),
("请帮我查一下{team_name}{date}作业内容", ["team_name", "date"]),
("{team_leader}{date}具体的作业内容", ["team_leader", "date"]),
# 9. 查询特定日期和项目类型下的高风险任务
("{date}{project_type}类风险等级为{risk_level}具体的作业计划", ["date", "project_type", "risk_level"]),
# 10. 查询特定日期和风险等级的任务安排
("{date}风险等级为{risk_level}具体的2项作业计划", ["date", "risk_level"]),
("{date}{risk_level}作业内容", ["date", "risk_level"]),
]
},
"周计划作业内容": {
"date": ["本周", "上周", "上一周", "下周", "下一周", "最近一周", "本周内", "这一周"],
"templates": [
("工程性质为{project_type}{date}作业是什么?", ["project_type", "date"]),
("{date}工程性质为{project_type}作业内容是什么?", ["date", "project_type"]),
("{date}{construction_unit}作业有哪些?", ["date", "construction_unit"]),
("{implementation_organization}{date}的作业有哪些?", ["implementation_organization", "date"]),
("{implementation_organization}{project_department}{date}2项作业计划分别是什么",
["implementation_organization", "project_department", "date"]),
# 4. 查询某项目在指定周的所有作业计划
("{project_name}{date}有哪些作业?", ["project_name", "date"]),
# 5. 查询指定周的所有项目类型作业内容
("{date}{project_type}类作业内容是什么?", ["date", "project_type"]),
# 6. 查询某施工单位在指定周的作业任务
("{construction_unit}{date}作业计划分别是什么?", ["construction_unit", "date"]),
# 7. 查询某项目经理在指定周负责的作业内容
("{project_manager}{date}作业内容是什么?", ["project_manager", "date"]),
# 8. 查询某团队负责人在指定周的作业安排
("{team_leader}{date}作业内容是什么?", ["team_leader", "date"]),
# 9. 查询某项目类型在指定周的高风险作业内容
("{date}{project_type}类并且风险等级为{risk_level}的作业内容是什么?",
["date", "project_type", "risk_level"]),
# 10. 查询某风险等级在指定周的作业内容
("{date}风险等级为{risk_level}的作业内容是什么?", ["date", "risk_level"]),
("{date}{risk_level}风险的作业计划分别是什么?", ["date", "risk_level"]),
("{date}{risk_level}有哪些作业", ["date", "risk_level"]),
# 11. 查询某施工单位在指定周的作业进展
("{construction_unit}{date}作业内容是什么?", ["construction_unit", "date"]),
("{construction_unit}{date}有哪些作业?", ["construction_unit", "date"]),
# 13. 查询某团队在指定周的作业安排
("{team_leader}领导的团队在{date}的作业有哪些?", ["team_leader", "date"]),
# 15. 查询某项目部门在指定周的作业安排
("{project_department}{date}作业内容是什么?", ["project_department", "date"]),
("请帮我查一下{date}{team_name}作业内容是什么", ["date", "team_name"]),
("请帮我查一下{team_name}{date}2项作业计划", ["team_name", "date"]),
("请帮我查一下{team_name}{date}2项作业计划分别是什么", ["team_name", "date"]),
("请帮我查一下{team_name}{date}有哪些作业", ["team_name", "date"]),
("{date}{construction_unit}具体作业计划", ["date", "construction_unit"]),
("{implementation_organization}{date}的作业", ["implementation_organization", "date"]),
("{implementation_organization}{project_department}{date}具体2项作业计划",
["implementation_organization", "project_department", "date"]),
]
},
"施工人数": {
"date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天","2025-04-09"],
"templates": [
("{date}{project_name}施工人员有多少?", ["date", "project_name"]),
("{date}{project_name}施工人数是多少?", ["date", "project_name"]),
("{date}{project_name}现场有多少施工人员?", ["date", "project_name"]),
("{date}{project_name}现场施工人数是多少?", ["date", "project_name"]),
("{construction_unit}{date}的施工人数是多少?", ["construction_unit", "date"]),
("{construction_unit}{date}现场施工人数是多少?", ["construction_unit", "date"]),
# 2. 统计某施工单位在指定日期的施工总人数
("统计{construction_unit}{date}的施工人数是多少?", ["construction_unit", "date"]),
("{date}属于{operating}的施工人数是多少?", ["date", "operating"]),
# 4. 查询某项目类型在指定日期的施工人员需求
("{date}{project_type}类有多少施工人员?", ["date", "project_type"]),
("{date}工程性质为{project_type}有多少施工人员?", ["date", "project_type"]),
("{date}工程性质为{project_type}有多少现场施工人员?", ["date", "project_type"]),
("{date}工程性质为{project_type}的施工人数是多少?", ["date", "project_type"]),
("工程性质为{project_type}{date}的施工人数是多少?", ["project_type", "date"]),
("工程性质为{project_type}{date}的现场施工人数是多少?", ["project_type", "date"]),
("工程性质为{project_type}{date}有多少施工人员?", ["project_type", "date"]),
# 5. 统计某施工单位在指定日期的各项目施工人数
("{construction_unit}{date}的施工人数是多少?", ["construction_unit", "date"]),
# 8. 统计某项目经理管理的项目在指定日期的施工总人数
("{project_manager}负责的项目在{date}的施工人数是多少?", ["project_manager", "date"]),
("{date}{project_manager}负责的项目的现场施工人数是多少?", ["date", "project_manager"]),
# 9. 查询某分包商在指定日期的施工人员投入
("{subcontractor}{date}施工人员有多少?", ["subcontractor", "date"]),
("{subcontractor}{date}的施工人数是多少?", ["subcontractor", "date"]),
("{date}{subcontractor}的施工人员有多少?", ["date", "subcontractor"]),
("{date}{subcontractor}的施工人数是多少?", ["date", "subcontractor"]),
("{implementation_organization}{date}现场施工人员有多少?", ["implementation_organization", "date"]),
("{implementation_organization}{date}现场有多少施工人员?", ["implementation_organization", "date"]),
("{implementation_organization}{date}的现场施工人数是多少?", ["implementation_organization", "date"]),
("{date}{implementation_organization}现场有多少施工人员?", ["date", "implementation_organization"]),
("{date}{implementation_organization}现场施工人数是多少?", ["date", "implementation_organization"]),
("{team_leader}{date}的施工人员有多少?", ["team_leader", "date"]),
("{team_leader}{date}的施工人数是多少?", ["team_leader", "date"]),
("{date}{team_leader}的施工人员有多少?", ["date", "team_leader"]),
("{date}{team_leader}的施工人数是多少?", ["date", "team_leader"]),
# 11. 查询某实施单位在指定日期的施工人员总数
("{implementation_organization}{date}的施工人数是多少?", ["implementation_organization", "date"]),
("{implementation_organization}{date}的施工人员有多少?", ["implementation_organization", "date"]),
("{date}{team_leader}的施工人员有多少?", ["date", "team_leader"]),
("{date}{team_leader}的施工人数是多少?", ["date", "team_leader"]),
# 16. 统计某项目部门在指定日期的施工人员数量
("{project_department}{date}的施工人员有多少?", ["project_department", "date"]),
("{project_department}{date}的施工人数是多少?", ["project_department", "date"]),
# 20. 统计某风险等级项目在指定日期的工种配置情况
("{date}{risk_level}风险的施工人数是多少?", ["date", "risk_level"]),
# 21. 查询某分包商在指定周的施工人员安排
("{subcontractor}{date}的施工人数是多少?", ["subcontractor", "date"]),
# 22. 统计某施工单位在指定周的高风险作业人员数量
("{construction_unit}{date}风险等级为{risk_level}的施工人数是多少?",
["construction_unit", "date", "risk_level"]),
("{date}{team_name}施工人数是多少", ["date", "team_name"]),
("{date}{team_name}施工人数是什么", ["date", "team_name"]),
("{team_name}{date}施工人数有多少", ["team_name", "date"]),
("{team_name}{date}施工人数是什么", ["team_name", "date"]),
#
("{date}{implementation_organization}{project_department}作业人数是多少?",
["date", "implementation_organization", "project_department"]),
("{implementation_organization}{project_department}{date}作业人员有多少?",
["implementation_organization", "project_department", "date"]),
("{implementation_organization}{project_department}{date}作业人数是多少?",
["implementation_organization", "project_department", "date"]),
("{date}{implementation_organization}{project_department}有多少作业人员?",
["date", "implementation_organization", "project_department"]),
("{implementation_organization}{project_department}{date}作业人数是多少?",
["implementation_organization", "project_department", "date"]),
("{project_department}{implementation_organization}{date}有多少作业人员?",
["project_department", "implementation_organization", "date"]),
("{date}{project_department}{implementation_organization}有多少作业人数?",
["date", "project_department", "implementation_organization"]),
("请帮我查一下{date}{project_department}{implementation_organization}作业人员有多少?",
["date", "project_department", "implementation_organization"]),
("请帮我查一下{project_department}{implementation_organization}{date}作业人员是多少",
["project_department", "implementation_organization", "date"]),
]
},
"作业考勤人数": {
"date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天"],
"templates": [
("{date}{project_name}作业考勤人数是多少", ["date", "project_name"]),
("{project_name}{date}作业考勤人数是多少", ["project_name", "date"]),
("查询{subcontractor}{date}的出勤记录", ["subcontractor", "date"]),
("查询{subcontractor}{date}的作业考勤人数是多少", ["subcontractor", "date"]),
#出勤人
("{date}{project_name}出勤人数是多少", ["date", "project_name"]),
("{project_name}{date}出勤人数是多少", ["project_name", "date"]),
("查询{subcontractor}{date}的出勤记录", ["subcontractor", "date"]),
("查询{subcontractor}{date}的作业出勤人数是多少", ["subcontractor", "date"]),
("{date}{operating}的作业考勤人数是多少?", ["date", "operating"]),
("{team_leader}{date}的作业考勤人数是多少", ["team_leader", "date"]),
# 4. 统计某施工单位在指定日期的考勤人数
("统计{construction_unit}{date}的考勤人数", ["construction_unit", "date"]),
# 5. 查询某实施单位在指定日期的考勤情况
("{implementation_organization}{date}的考勤情况如何?", ["implementation_organization", "date"]),
# 6. 查询某风险等级项目在指定日期的考勤详情
("{date}{risk_level}风险项目考勤详情", ["date", "risk_level"]),
# 7. 统计某项目类型在指定日期的出勤人数
("{date}{project_type}类出勤人数是多少?", ["date", "project_type"]),
# 10. 统计某项目在指定周的出勤总人数
("{project_name}{date}的出勤人数是多少?", ["project_name", "date"]),
("{project_name}{date}的考勤率是多少?", ["project_name", "date"]),
("{project_name}{date}的考勤率", ["project_name", "date"]),
# 11. 查询某分包商在指定周的出勤情况
("{subcontractor}{date}的出勤情况如何?", ["subcontractor", "date"]),
("{subcontractor}{date}的出勤情况怎么样?", ["subcontractor", "date"]),
("请帮我查一下{date}{team_name}考勤人数是多少", ["date", "team_name"]),
("请帮我查一下{team_name}{date}考勤人数", ["team_name", "date"]),
]
},
"页面切换": {
"date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天"],
"templates": [
("打开{page}页面", ["page"]),
("打开{page}", ["page"]),
("打开{page}模块", ["page"]),
("进入{page}", ["page"]),
("进入{page}模块", ["page"]),
("进入{page}页面", ["page"]),
("跳转到{page}", ["page"]),
("跳转到{page}模块", ["page"]),
("跳转到{page}页面", ["page"]),
("访问{page}页面", ["page"]),
("访问{page}模块", ["page"]),
("访问{page}", ["page"]),
("请打开{page}模块", ["page"]),
("请打开{page}", ["page"]),
("请打开{page}页面", ["page"]),
("加载{page}模块", ["page"]),
("加载{page}", ["page"]),
("加载{page}页面", ["page"]),
]
},
"作业面查询": {
"date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天"],
"templates": [
("{date}{project_name}有多少作业面?", ["date", "project_name"]),
("{date}{construction_unit}作业面有多少?", ["date", "construction_unit"]),
("{date}{implementation_organization}作业面是多少?", ["date", "implementation_organization"]),
("{date}{implementation_organization}{project_department}有多少作业面?",
["date", "implementation_organization", "project_department"]),
("{date}{project_department}有多少作业面?", ["date", "project_department"]),
("{date}{project_manager}作业面是多少?", ["date", "project_manager"]),
("{date}{subcontractor}有多少作业面?", ["date", "subcontractor"]),
("{date}{team_leader}作业面是多少?", ["date", "team_leader"]),
("{date}{project_name}有多少作业面?", ["date", "project_name"]),
("{project_name}{date}有多少项作业面?", ["project_name", "date"]),
("{date}公司{project_name}有多少作业面?", ["date", "project_name"]),
("工程性质是{project_type}{date}有多少作业面?", ["project_type", "date"]),
("工程性质是{project_type}{date}有多少项作业面?", ["project_type", "date"]),
("工程性质是{project_type}{date}有多少条作业面?", ["project_type", "date"]),
("{date}风险等级为{risk_level}的作业面有多少?", ["date", "risk_level"]),
("请帮我查一下公司{date}工程性质为{project_type}作业面有多少?", ["date", "project_type"]),
("请帮我查一下{date}工程性质为{project_type}作业面有多少?", ["date", "project_type"]),
("请帮我查一下公司工程性质为{project_type}{date}作业面有多少?", ["project_type", "date"]),
]
},
"班组人数查询": {
"date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天","2025-04-09"],
"templates": [
("{date}{project_name}班组人员有多少?", ["date", "project_name"]),
("{date}{project_name}班组人数是多少?", ["date", "project_name"]),
("{date}{project_name}现场有多少班组人员?", ["date", "project_name"]),
("{date}{project_name}现场班组人数是多少?", ["date", "project_name"]),
("{construction_unit}{date}的班组人数是多少?", ["construction_unit", "date"]),
("{construction_unit}{date}现场班组人数是多少?", ["construction_unit", "date"]),
# 2. 统计某施工单位在指定日期的班组总人数
("统计{construction_unit}{date}的班组人数是多少?", ["construction_unit", "date"]),
("{date}属于{operating}的班组人数是多少?", ["date", "operating"]),
# 4. 查询某项目类型在指定日期的班组工人员需求
("{date}{project_type}类有多少班组人员?", ["date", "project_type"]),
("{date}工程性质为{project_type}有多少班组人员?", ["date", "project_type"]),
("{date}工程性质为{project_type}有多少现场班组人员?", ["date", "project_type"]),
("{date}工程性质为{project_type}的班组人数是多少?", ["date", "project_type"]),
("工程性质为{project_type}{date}有多少班组人员?", ["project_type", "date"]),
("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]),
("工程性质为{project_type}{date}有多少班组人员?", ["project_type", "date"]),
("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]),
# 5. 统计班组工单位在指定日期的各项目人数
("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]),
("{construction_unit}{date}的班组人数是多少?", ["construction_unit", "date"]),
("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]),
# 8. 统计某项目经理管理的项目在指定日期的总人数
("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]),
("{project_manager}负责的项目在{date}的班组人数是多少?", ["project_manager", "date"]),
("{date}{project_manager}负责的项目的现场班组人数是多少?", ["date", "project_manager"]),
#
("请帮我查一下{date}{implementation_organization}{project_department}班组人数是多少?",
["date", "implementation_organization", "project_department"]),
("请帮我查一下{implementation_organization}{project_department}{date}班组人员有多少?",
["implementation_organization", "project_department", "date"]),
("请帮我查一下{implementation_organization}{project_department}{date}班组人数是多少?",
["implementation_organization", "project_department", "date"]),
("{date}{team_name}有多少?", ["date", "team_name"]),
("请帮我查一下{date}{team_name}有多少?", ["date", "team_name"]),
]
},
"班组数查询": {
"date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天","2025-04-09"],
"templates": [
("{date}{project_name}现场有多少班组?", ["date", "project_name"]),
("{date}{project_name}现场有多少个班组?", ["date", "project_name"]),
("{date}{project_name}现场班组有多少个?", ["date", "project_name"]),
("{date}{project_name}班组数是多少?", ["date", "project_name"]),
("{date}{project_name}班组数是什么?", ["date", "project_name"]),
("{date}{project_name}班组是多少?", ["date", "project_name"]),
("{date}{project_name}班组有多少个?", ["date", "project_name"]),
("{date}{construction_unit}班组数有多少?", ["date", "construction_unit"]),
("{date}{implementation_organization}现场班组数是多少?", ["date", "implementation_organization"]),
("{date}{implementation_organization}{project_department}有多少班组?",
["date", "implementation_organization", "project_department"]),
("{date}{project_department}现场有多少个班组?", ["date", "project_department"]),
("{date}{project_manager}现场班组数是多少?", ["date", "project_manager"]),
("{date}{subcontractor}现场有多少班组?", ["date", "subcontractor"]),
("{date}{team_leader}班组数是多少?", ["date", "team_leader"]),
("{date}{team_leader}班组有多少?", ["date", "team_leader"]),
("{date}{project_name}有多少班组?", ["date", "project_name"]),
("{project_name}{date}有多少个班组?", ["project_name", "date"]),
("{date}公司{project_name}有多少班组?", ["date", "project_name"]),
("工程性质是{project_type}{date}有多少班组?", ["project_type", "date"]),
("工程性质是{project_type}{date}有多少班组?", ["project_type", "date"]),
("工程性质是{project_type}{date}有多少班组?", ["project_type", "date"]),
("请帮我查一下{date}风险等级为{risk_level}的班组有多少?", ["date", "risk_level"]),
("请帮我查一下公司{date}工程性质为{project_type}班组有多少个?", ["date", "project_type"]),
("请帮我查一下{date}工程性质为{project_type}现场班组有多少?", ["date", "project_type"]),
("请帮我查一下公司工程性质为{project_type}{date}有多少班组?", ["project_type", "date"]),
]
},
"作业面内容": {
"date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天","2025-04-09"],
"templates": [
("{date}{project_name}作业面是什么?", ["date", "project_name"]),
("{project_name}{date}的作业面有哪些", ["project_name", "date"]),
("{date}{project_type}类作业面有哪些?", ["date", "project_type"]),
("{project_type}{date}具体作业面内容是什么?", ["project_type", "date"]),
("{date}工程性质为{project_type}的作业面内容是什么?", ["date", "project_type"]),
("工程性质为{project_type}{date}作业面分别是什么?", ["project_type", "date"]),
("工程性质为{project_type}{date}4项作业面分别是什么", ["project_type", "date"]),
("{construction_unit}{date}作业面内容是什么?", ["construction_unit", "date"]),
# 1. 查询特定日期和项目的作业安排
("{date}{project_name}作业面是什么?", ["date", "project_name"]),
("{date}属于{operating}作业面内容是什么?", ["date", "operating"]),
("{date}存在{operating}作业面是什么?", ["date", "operating"]),
# 3. 查询特定日期和项目类型的工程计划
("{date}{project_type}类具体作业面有哪些?", ["date", "project_type"]),
("{date}{construction_unit}{risk_level}风险的作业面内容是什么?", ["date", "construction_unit", "risk_level"]),
("{date}{implementation_organization}{risk_level}风险的作业面是什么?",
["date", "implementation_organization", "risk_level"]),
# 5. 查询特定日期和项目经理的任务安排
("{project_manager}{date}作业面内容是什么?", ["project_manager", "date"]),
("{project_manager}{date}作业面分别是什么?", ["project_manager", "date"]),
("{project_manager}{date}4项作业面分别是什么", ["project_manager", "date"]),
# 6. 查询特定日期和风险等级的任务
("{date}风险等级为{risk_level}的作业面有哪些?", ["date", "risk_level"]),
("{date}风险等级为{risk_level}四项作业面具体分别有哪些?", ["date", "risk_level"]),
# 10. 查询特定日期和风险等级的任务安排
("{date}风险等级为{risk_level}2项作业面分别是什么", ["date", "risk_level"]),
("{date}{risk_level}的作业面具体内容是什么?", ["date", "risk_level"]),
# 11. 查询特定日期和施工单位的任务进展
("{construction_unit}{date}的作业面是什么?", ["construction_unit", "date"]),
("{construction_unit}{date}作业面具体内容有哪些", ["construction_unit", "date"]),
# 12. 查询特定日期和项目经理完成的任务
("{project_manager}{date}三项作业面分别是什么?", ["project_manager", "date"]),
# 13. 查询特定日期和项目经理的高风险任务
("{project_manager}{date}的风险等级为{risk_level}的作业面内容是什么?",
["project_manager", "date", "risk_level"]),
# 15. 查询特定日期和所有任务安排
("{date}作业面内容是什么?", ["date"]),
("{date}作业面有哪些?", ["date"]),
("{date}作业面是什么?", ["date"]),
("{date}{implementation_organization}{project_department}具体作业面内容是什么?",
["date", "implementation_organization", "project_department"]),
("{implementation_organization}{project_department}{date}具体作业面有哪些?",
["implementation_organization", "project_department", "date"]),
("{implementation_organization}{project_department}{date}作业面是什么?",
["implementation_organization", "project_department", "date"]),
("{implementation_organization}{project_department}{date}两项作业面分别是什么?",
["implementation_organization", "project_department", "date"]),
("{project_department}{implementation_organization}{date}具体作业面是什么?",
["project_department", "implementation_organization", "date"]),
("{date}{project_department}{implementation_organization}两项作业面分别是什么?",
["date", "project_department", "implementation_organization"]),
]
},
"班组详情": {
"date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天","2025-04-09"],
"templates": [
("{date}{project_name}现场具体有哪些班组?", ["date", "project_name"]),
("{date}{project_name}现场班组详情是什么?", ["date", "project_name"]),
("{date}{project_name}现场班组情况", ["date", "project_name"]),
("{date}{project_name}具体班组情况是什么?", ["date", "project_name"]),
("{date}{project_name}具体班组详情是什么?", ["date", "project_name"]),
("{date}{project_name}班组详细情况是什么?", ["date", "project_name"]),
("{date}{project_name}班组详情", ["date", "project_name"]),
("{date}{construction_unit}具体有哪些班组", ["date", "construction_unit"]),
("{date}{implementation_organization}班组有哪些?", ["date", "implementation_organization"]),
("{date}{implementation_organization}{project_department}现场具体有哪些班组?",
["date", "implementation_organization", "project_department"]),
("{date}{project_department}现场班组详情是什么?", ["date", "project_department"]),
("{date}{project_manager}现场班组情况?", ["date", "project_manager"]),
("{date}{subcontractor}具体班组情况是什么?", ["date", "subcontractor"]),
("{date}{team_leader}具体班组详情是什么?", ["date", "team_leader"]),
("{date}{team_leader}班组详细情况是什么?", ["date", "team_leader"]),
("{date}{project_name}班组详情?", ["date", "project_name"]),
("{project_name}{date}具体有哪些班组?", ["project_name", "date"]),
("{date}公司{project_name}班组有哪些?", ["date", "project_name"]),
("工程性质是{project_type}{date}现场具体有哪些班组?", ["project_type", "date"]),
("工程性质是{project_type}{date}现场班组详情是什么?", ["project_type", "date"]),
("工程性质是{project_type}{date}现场班组情况?", ["project_type", "date"]),
("{date}风险等级为{risk_level}具体班组情况是什么?", ["date", "risk_level"]),
("公司{date}工程性质为{project_type}具体班组详情是什么?", ["date", "project_type"]),
("{date}工程性质为{project_type}班组详情?", ["date", "project_type"]),
("公司工程性质为{project_type}{date}具体有哪些班组?", ["project_type", "date"]),
("公司工程性质为{project_type}{date}班组有哪些?", ["project_type", "date"]),
]
}
}
def generate_natural_samples(config, label):
"""生成自然语言样本"""
samples = []
variable_pool = {
"project_name": BASE_DATA["project_names"],
"project_type": BASE_DATA["project_types"],
"construction_unit": BASE_DATA["construction_units"],
"implementation_organization": BASE_DATA["implementation_organizations"],
"subcontractor": BASE_DATA["subcontractors"],
"team_leader": [f"{tl}" for tl in BASE_DATA["team_leaders"]],
"risk_level": BASE_DATA["risk_levels"],
"date": config["date"],
"project_department": BASE_DATA["project_departments"],
"project_manager": BASE_DATA["project_managers"],
"page": BASE_DATA["pages"],
"operating": BASE_DATA["operatings"],
"team_name": BASE_DATA["team_names"]
}
for template, variables in config["templates"]:
for values in product(*[variable_pool[var] for var in variables]):
text = template.format(**dict(zip(variables, values)))
# 生成标注信息
annotations = []
pos = 0
for var, val in zip(variables, values):
start = text.find(val, pos)
if start == -1:
continue
end = start + len(val)
annotations.append({
"text": val,
"start": start,
"end": end,
"label": var
})
pos = end # 更新查找位置避免重复
samples.append({
"text": text,
"annotations": annotations,
"prompt": label
})
# 保存文件
filename = f"data/{label}.json"
with open(filename, "w", encoding="utf-8") as f:
json.dump(samples, f, ensure_ascii=False, indent=2)
print(f"已生成 {len(samples)} 条自然语言 {label} 数据")
# 主执行流程
if __name__ == "__main__":
for label, config in TEMPLATE_CONFIG.items():
generate_natural_samples(config, label)