import json import os from itertools import product # 目录路径 directory = "data" # 确保目录存在 if not os.path.exists(directory): os.makedirs(directory) # 基础数据定义 BASE_DATA = { # 实施组织 "implementation_organizations": ["宏源电力建设公司(变电)", "送一分公司", "变电分公司", "消防分公司", "安徽宏源电力建设有限公司(线路)", "检修试验分公司"], # 工程性质 "project_types": ["基建", "技改大修", "用户工程", "小型基建"], # 工程名 "project_names": [ "中心变", "1号工程", "国网安徽马鞍山供电公司220kV恒兴变电站220kV配电装置改造(PROJ-2024-0271)", "国网供电公司220kV恒兴变电站220kV配电装置改造(调试部分)", "安徽蚌埠濠州220kV变电站220千伏大唐凤阳红心镇光伏间隔扩建工程(电气安装)" "滁州堰陈110千伏变电站新建工程", "安徽蚌埠濠州220kV变电站220千伏大唐凤阳红心镇光伏间隔扩建工程(电气安装)(PROJ-2024-0794)", "金牛500kV变电站新建工程(建筑)(PROJ-2023-0506)", "谷岭220kV变电站220kV蕲城电厂、埇南间隔扩建工程(PROJ-2023-0466)", "渝北±800千伏换流站工程电气安装A包(PROJ-2024-0057)", "检修公司调相机一二次设备检修维护和改造服务框架-2025年南苑调相机检修(一期)(PROJ-2023-0179)" "明生科创基地项目", "安徽明生有限公司科创基地项目", "无人机智能巡检技术实验室项目", "九号线路项目", "埇南间隔更换项目", "国网滁州供电公司电流互感器更换项目", "谷岭220kV变电站220kV蕲城电厂-埇南间隔扩建项目(PROJ-2023-0466)", "安徽明生电力投资集团有限公司科创基地项目(一期)(PROJ-2024-1035)" "九号工程", "合州变电站", "合州换流站" ], # 建管单位 "construction_units": ["国网安徽省电力有限公司建设分公司", "国网安徽省电力有限公司马鞍山供电公司", "中铁二局集团电务工程有限公司"], # 项目部名称 "project_departments": ["调试一队", "第9项目管理部", "金上第十一项目部", "第八项目管理部(合肥)", "肥东9号项目部", "金上第一项目部管理部(池州黄山)", "第一项目部管理部(肥东)", "调试四队","第一项目部"], # 项目经理 "project_managers": ["陈少平项目经理", "范文立项目经理", "何东洋项目经理"], # 分包单位 "subcontractors": ["安徽劦力建筑装饰有限责任公司", "安徽苏亚建设集团有限公司"], # 班组名称 "team_names": ["张朵班组", "刘梁玉班组", "魏玉龙班组"], # 班组长 "team_leaders": ["李元帅班组长", "刘雨豪班组长"], # 风险等级 "risk_levels": ["1级", "一级", "二级", "5级", "四级"], # 8+2工况 "operatings": ["8+2工况", "8加2工况"], # 页面切换 "pages": ["风险管控", "日计划", "周风险", "日计划统计报表", "日计划推送", "生产管控中心", "考勤统计详情", "今日作业计划", "周风险统计报表", "周风险推送"] } # 自然语言模板配置 TEMPLATE_CONFIG = { "日计划数量查询": { "date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天","2025-04-09"], "templates": [ ("{date}{project_name}有多少作业计划?", ["date", "project_name"]), ("{project_name}{date}有多少项作业计划?", ["project_name", "date"]), ("{date}公司{project_name}有多少作业计划?", ["date", "project_name"]), ("工程性质是{project_type}{date}有多少作业计划?", ["project_type", "date"]), ("工程性质是{project_type}{date}有多少项作业计划?", ["project_type", "date"]), ("工程性质是{project_type}{date}有多少条作业计划?", ["project_type", "date"]), ("{date}风险等级为{risk_level}的作业计划有多少?", ["date", "risk_level"]), ("公司{date}工程性质为{project_type}的有多少项作业计划?", ["date", "project_type"]), ("{date}工程性质为{project_type}的有多少条作业计划?", ["date", "project_type"]), ("公司工程性质为{project_type}{date}有多少作业计划?", ["project_type", "date"]), ("工程性质为{project_type}{date}有多少项作业计划?", ["project_type", "date"]), ("查询{project_name}在{date}的作业计划数量", ["project_name", "date"]), ("{date}{project_type}类作业计划有多少?", ["date", "project_type"]), ("{project_type}类{date}作业计划有多少?", ["project_type", "date"]), ("{construction_unit}在{date}有多少作业计划?", ["construction_unit", "date"]), ("{construction_unit}在{date}有多少项作业计划?", ["construction_unit", "date"]), ("{date}{construction_unit}有多少作业计划?", ["date", "construction_unit"]), ("{date}公司有多少项作业计划?", ["date"]), ("{date}有多少条作业计划?", ["date"]), ("公司{date}有多少作业计划?", ["date"]), ("{date}{operating}有多少项作业计划?", ["date", "operating"]), ("{date}公司{operating}有多少项作业计划?", ["date", "operating"]), ("{date}{implementation_organization}{project_department}有多少项作业计划?", ["date", "implementation_organization", "project_department"]), ("{date}{project_department}{implementation_organization}有多少项作业计划?", ["date", "project_department", "implementation_organization"]), ("{implementation_organization}{project_department}{date}有多少条作业计划?", ["implementation_organization", "project_department", "date"]), ("{date}{implementation_organization}有多少项作业计划?", ["date", "implementation_organization"]), ("{date}公司{project_department}有多少作业计划?", ["date", "project_department"]), ("{date}{project_department}{implementation_organization}有多少作业计划?", ["date", "project_department", "implementation_organization"]), ("{date}{project_department}有多少项作业计划?", ["date", "project_department"]), ("公司{project_department}{date}有多少项{risk_level}风险作业计划?", ["project_department", "date", "risk_level"]), ("{project_department}{date}有多少项{risk_level}风险作业计划?", ["project_department", "date", "risk_level"]), ("{project_department}{date}有多少{risk_level}风险作业计划?", ["project_department", "date", "risk_level"]), # 请帮我查一下 ("请帮我查一下{date}{project_manager}作业计划是多少?", ["date", "project_manager"]), ("请帮我查一下{date}{subcontractor}有多少条作业计划?", ["date", "subcontractor"]), ("请帮我查一下{date}{team_leader}有多少作业计划?", ["date", "team_leader"]), ("请帮我查一下{date}风险等级为{risk_level}的作业计划有多少?", ["date", "risk_level"]), ("请帮我查一下{date}公司{project_department}有多少{risk_level}风险作业计划?", ["date", "project_department", "risk_level"]), ("请帮我查一下{date}{project_department}有多少{risk_level}风险作业计划?", ["date", "project_department", "risk_level"]), ("请帮我查一下{date}{project_type}类风险等级为{risk_level}的作业计划有多少?", ["date", "project_type", "risk_level"]), ("请帮我查一下{date}{construction_unit}有多少{risk_level}风险作业计划?", ["date", "construction_unit", "risk_level"]), ("请帮我查一下{date}存在{risk_level}风险的有多少", ["date", "risk_level"]), ("请帮我查一下{implementation_organization}{date}{risk_level}风险的有多少", ["implementation_organization","date", "risk_level"]), ("请帮我查一下{implementation_organization}{date}存在{risk_level}风险的有多少", ["implementation_organization","date", "risk_level"]), ("{date}{project_type}类{construction_unit}负责的作业计划有多少?", ["date", "project_type", "construction_unit"]), ("{date}{project_type}类{implementation_organization}组织实施的作业计划有多少?", ["date", "project_type", "implementation_organization"]), ("{date}{project_department}管理的{project_type}类作业计划有多少?", ["date", "project_department", "project_type"]), ("{date}{subcontractor}承包的{project_type}类作业计划有多少?", ["date", "subcontractor", "project_type"]), ("{date}{project_manager}负责的{project_type}类作业计划有多少?", ["date", "project_manager", "project_type"]), ("{date}{team_leader}带领的{project_type}类作业计划有多少?", ["date", "team_leader", "project_type"]), ("{date}{project_name}由{project_manager}作业计划有多少?", ["date", "project_name", "project_manager"]), ("{date}{project_name}中,风险等级为{risk_level}的作业计划有多少?", ["date", "project_name", "risk_level"]), ("{date}{project_manager}作业计划有多少?", ["date", "project_manager"]), ("{project_manager}在{date}作业计划有多少?", ["project_manager", "date"]), ("{date}{implementation_organization}{project_manager}的作业计划数量", ["date", "implementation_organization", "project_manager"]), ("{implementation_organization}{project_manager}在{date}的作业计划数量", ["implementation_organization", "project_manager", "date"]), # 班组 ("{date}{team_name}有多少项作业计划?", ["date", "team_name"]), ("{team_name}{date}有多少作业计划?", ["team_name", "date"]), ("{team_name}{date}有多少条作业计划?", ["team_name", "date"]), ("{team_name}{date}作业计划数量", ["team_name", "date"]), ("{date}{team_name}作业计划数量", ["date", "team_name"]), #作业 ("{date}{implementation_organization}{project_department}有多少作业?", ["date", "implementation_organization", "project_department"]), ("{implementation_organization}{project_department}{date}有多少条作业?", ["implementation_organization", "project_department", "date"]), ("{date}{project_department}{implementation_organization}有多少作业?", ["date", "project_department", "implementation_organization"]), ("{project_department}{implementation_organization}{date}有多少条作业?", ["project_department", "implementation_organization", "date"]), ("{date}{implementation_organization}有多少项作业?", ["date", "implementation_organization"]), ("{date}公司{project_department}有多少作业?", ["date", "project_department"]), ("{date}{project_department}有多少项作业?", ["date", "project_department"]), #有多少 ("{date}{implementation_organization}{project_department}有多少?", ["date", "implementation_organization", "project_department"]), ("{project_department}{implementation_organization}{date}有多少?", ["project_department", "implementation_organization", "date"]), ("{date}{implementation_organization}有多少?", ["date", "implementation_organization"]), ("{date}公司{project_department}有多少?", ["date", "project_department"]), ("{date}{project_department}有多少?", ["date", "project_department"]), ("{date}{project_name}有多少", ["date", "project_name"]), ("{project_name}{date}有多少", ["project_name", "date"]), ("{date}公司{project_name}有多少?", ["date", "project_name"]), ("{date}工程性质是{project_type}有多少", ["project_type", "date"]), ("工程性质是{project_type}{date}有多少", ["project_type", "date"]), ("{date}存在{operating}的有多少", ["date", "operating"]), ("{date}{operating}的有多少", ["date", "operating"]), ] }, "周计划数量查询": { "date": ["本周", "上周", "上一周", "下周", "下一周", "最近一周", "本周内", "这一周"], "templates": [ ("{date}{project_name}作业计划有多少?", ["date", "project_name"]), ("{project_name}{date}作业计划有多少?", ["project_name", "date"]), ("公司{project_name}{date}作业计划有多少?", ["project_name", "date"]), ("{construction_unit}{date}作业计划有多少?", ["construction_unit", "date"]), # 🎯 仅 date 维度 ("{date}作业计划有多少?", ["date"]), # 🎯 date + 其他单个维度 ("{date}{project_name}有多少项作业计划?", ["date", "project_name"]), ("{date}{construction_unit}作业计划有多少?", ["date", "construction_unit"]), ("{date}{implementation_organization}作业计划有多少?", ["date", "implementation_organization"]), ("{date}{implementation_organization}{project_department}作业计划有多少?", ["date", "implementation_organization", "project_department"]), ("{date}{project_department}{implementation_organization}作业计划有多少?", ["date", "project_department", "implementation_organization"]), ("{date}{project_department}作业计划有多少?", ["date", "project_department"]), ("{date}{project_manager}作业计划有多少?", ["date", "project_manager"]), ("{date}{subcontractor}作业计划有多少?", ["date", "subcontractor"]), ("{date}{team_leader}作业计划有多少?", ["date", "team_leader"]), ("{date}{project_department}作业计划数量", ["date", "project_department"]), ("{date}{subcontractor}作业计划数量?", ["date", "subcontractor"]), # 🎯 date + 风险维度 ("{date}有多少项{risk_level}风险作业计划?", ["date", "risk_level"]), ("{date}有多少{risk_level}风险作业计划?", ["date", "risk_level"]), ("{date}存在{risk_level}风险的作业计划有多少?", ["date", "risk_level"]), ("{date}{risk_level}风险作业计划有多少", ["date", "risk_level"]), # 🎯 date + construction_unit + risk_level ("{construction_unit}{date}有多少项{risk_level}风险作业计划", ["construction_unit", "date", "risk_level"]), # 🎯 date + implementation_organization + risk_level ("{date}{implementation_organization}风险等级为{risk_level}的作业计划有多少?", ["date", "implementation_organization", "risk_level"]), # 🎯 date + project_name + project_manager ("{date}{project_name}{project_manager}负责的作业计划有多少?", ["date", "project_name", "project_manager"]), # 🎯 date + project_name + risk_level ("{date}{project_name}有多少项{risk_level}风险作业计划?", ["date", "project_name", "risk_level"]), # 🎯 project_manager 维度 ("公司{project_manager}{date}作业计划数量?", ["project_manager", "date"]), ("{project_manager}在{date}作业计划有多少?", ["project_manager", "date"]), ("公司{project_manager}在{date}作业计划有多少?", ["project_manager", "date"]), ("{project_manager}在{date}负责的风险等级为{risk_level}的作业计划有多少?", ["project_manager", "date", "risk_level"]), ("{date}{team_name}有多少项作业计划?", ["date", "team_name"]), ("{team_name}{date}有多少作业计划?", ["team_name", "date"]), ("{team_name}{date}作业计划数量", ["team_name", "date"]), ("{date}{team_name}的作业计划数量", ["date", "team_name"]), #有多少 ("{date}{implementation_organization}{project_department}有多少?", ["date", "implementation_organization", "project_department"]), ("{implementation_organization}{project_department}{date}有多少?", ["implementation_organization", "project_department", "date"]), ("{date}{project_department}{implementation_organization}有多少项作业计划?", ["date", "project_department", "implementation_organization"]), ("请帮我查一下{date}{implementation_organization}有多少?", ["date", "implementation_organization"]), ("请帮我查一下{date}公司{project_department}有多少?", ["date", "project_department"]), ("请帮我查一下{date}{project_department}有多少?", ["date", "project_department"]), ("请帮我查一下{date}{project_name}有多少", ["date", "project_name"]), ("请帮我查一下{project_name}{date}有多少", ["project_name", "date"]), ("请帮我查一下{date}公司{project_name}有多少?", ["date", "project_name"]), ("请帮我查一下{date}工程性质是{project_type}有多少", ["project_type", "date"]), ("请帮我查一下工程性质是{project_type}{date}有多少", ["project_type", "date"]) ] }, "日计划作业内容": { "date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天"], "templates": [ ("{date}{project_name}作业内容是什么?", ["date", "project_name"]), ("{project_name}在{date}的作业有哪些", ["project_name", "date"]), ("{date}{project_type}类作业有哪些?", ["date", "project_type"]), ("{project_type}类{date}具体作业内容是什么?", ["project_type", "date"]), ("{date}工程性质为{project_type}的作业内容是什么?", ["date", "project_type"]), ("工程性质为{project_type}的{date}作业计划分别是什么?", ["project_type", "date"]), ("工程性质为{project_type}的{date}4项作业计划分别是什么?", ["project_type", "date"]), ("{construction_unit}在{date}作业内容是什么?", ["construction_unit", "date"]), # 1. 查询特定日期和项目的作业安排 ("{date}{project_name}作业是什么?", ["date", "project_name"]), ("{date}属于{operating}作业内容是什么?", ["date", "operating"]), ("{date}存在{operating}作业是什么?", ["date", "operating"]), # 3. 查询特定日期和项目类型的工程计划 ("{date}{project_type}类作业有哪些?", ["date", "project_type"]), ("{date}{construction_unit}{risk_level}风险的作业内容是什么?", ["date", "construction_unit", "risk_level"]), ("{date}{implementation_organization}{risk_level}风险的作业是什么?", ["date", "implementation_organization", "risk_level"]), # 5. 查询特定日期和项目经理的任务安排 ("{project_manager}在{date}作业内容是什么?", ["project_manager", "date"]), ("{project_manager}在{date}作业计划分别是什么?", ["project_manager", "date"]), ("{project_manager}在{date}4项作业计划分别是什么?", ["project_manager", "date"]), # 6. 查询特定日期和风险等级的任务 ("{date}风险等级为{risk_level}的作业计划有哪些?", ["date", "risk_level"]), ("{date}风险等级为{risk_level}四项作业计划分别有哪些?", ["date", "risk_level"]), # 7. 查询特定日期和实施单位的任务内容 ("{implementation_organization}在{date}作业内容是什么?", ["implementation_organization", "date"]), # 8. 查询特定日期和团队领导的任务安排 ("{team_leader}在{date}作业内容是什么?", ["team_leader", "date"]), # 9. 查询特定日期和项目类型下的高风险任务 ("{date}的{project_type}类风险等级为{risk_level}的作业内容是什么?", ["date", "project_type", "risk_level"]), # 10. 查询特定日期和风险等级的任务安排 ("{date}风险等级为{risk_level}2项作业计划分别是什么?", ["date", "risk_level"]), ("{date}{risk_level}的作业内容是什么?", ["date", "risk_level"]), # 11. 查询特定日期和施工单位的任务进展 ("{construction_unit}{date}的作业计划是什么?", ["construction_unit", "date"]), ("{construction_unit}{date}作业有哪些", ["construction_unit", "date"]), # 12. 查询特定日期和项目经理完成的任务 ("{project_manager}在{date}作业内容是什么?", ["project_manager", "date"]), ("{project_manager}在{date}三项作业计划分别是什么?", ["project_manager", "date"]), # 13. 查询特定日期和项目经理的高风险任务 ("{project_manager}在{date}的风险等级为{risk_level}的作业内容是什么?", ["project_manager", "date", "risk_level"]), # 15. 查询特定日期和所有任务安排 ("{date}作业内容是什么?", ["date"]), ("{date}作业计划有哪些?", ["date"]), ("{date}作业计划是什么?", ["date"]), ("{date}三项作业计划分别是什么?", ["date"]), ("{date}作业是什么?", ["date"]), ("{date}作业有哪些?", ["date"]), ("{date}{implementation_organization}{project_department}作业内容是什么?", ["date", "implementation_organization", "project_department"]), ("{implementation_organization}{project_department}{date}作业计划有哪些?", ["implementation_organization", "project_department", "date"]), ("{implementation_organization}{project_department}{date}作业计划是什么?", ["implementation_organization", "project_department", "date"]), ("{implementation_organization}{project_department}{date}作业是什么?", ["implementation_organization", "project_department", "date"]), ("{implementation_organization}{project_department}{date}两项作业计划分别是什么?", ["implementation_organization", "project_department", "date"]), ("{project_department}{implementation_organization}{date}作业计划是什么?", ["project_department", "implementation_organization", "date"]), ("{date}{project_department}{implementation_organization}作业是什么?", ["date", "project_department", "implementation_organization"]), ("{date}{project_department}{implementation_organization}两项作业计划分别是什么?", ["date", "project_department", "implementation_organization"]), # 16. 查询特定日期和项目进度 ("请帮我查一下{date}{project_name}作业内容是什么?", ["date", "project_name"]), # 班组 ("请帮我查一下{date}{team_name}作业是什么?", ["date", "team_name"]), ("请帮我查一下{team_name}{date}作业内容", ["team_name", "date"]), ("{team_leader}在{date}具体的作业内容", ["team_leader", "date"]), # 9. 查询特定日期和项目类型下的高风险任务 ("{date}的{project_type}类风险等级为{risk_level}具体的作业计划", ["date", "project_type", "risk_level"]), # 10. 查询特定日期和风险等级的任务安排 ("{date}风险等级为{risk_level}具体的2项作业计划", ["date", "risk_level"]), ("{date}{risk_level}作业内容", ["date", "risk_level"]), ] }, "周计划作业内容": { "date": ["本周", "上周", "上一周", "下周", "下一周", "最近一周", "本周内", "这一周"], "templates": [ ("工程性质为{project_type}在{date}作业是什么?", ["project_type", "date"]), ("{date}工程性质为{project_type}作业内容是什么?", ["date", "project_type"]), ("{date}{construction_unit}作业有哪些?", ["date", "construction_unit"]), ("{implementation_organization}{date}的作业有哪些?", ["implementation_organization", "date"]), ("{implementation_organization}{project_department}{date}2项作业计划分别是什么?", ["implementation_organization", "project_department", "date"]), # 4. 查询某项目在指定周的所有作业计划 ("{project_name}在{date}有哪些作业?", ["project_name", "date"]), # 5. 查询指定周的所有项目类型作业内容 ("{date}{project_type}类作业内容是什么?", ["date", "project_type"]), # 6. 查询某施工单位在指定周的作业任务 ("{construction_unit}在{date}作业计划分别是什么?", ["construction_unit", "date"]), # 7. 查询某项目经理在指定周负责的作业内容 ("{project_manager}在{date}作业内容是什么?", ["project_manager", "date"]), # 8. 查询某团队负责人在指定周的作业安排 ("{team_leader}在{date}作业内容是什么?", ["team_leader", "date"]), # 9. 查询某项目类型在指定周的高风险作业内容 ("{date}的{project_type}类并且风险等级为{risk_level}的作业内容是什么?", ["date", "project_type", "risk_level"]), # 10. 查询某风险等级在指定周的作业内容 ("{date}风险等级为{risk_level}的作业内容是什么?", ["date", "risk_level"]), ("{date}{risk_level}风险的作业计划分别是什么?", ["date", "risk_level"]), ("{date}{risk_level}有哪些作业", ["date", "risk_level"]), # 11. 查询某施工单位在指定周的作业进展 ("{construction_unit}在{date}作业内容是什么?", ["construction_unit", "date"]), ("{construction_unit}在{date}有哪些作业?", ["construction_unit", "date"]), # 13. 查询某团队在指定周的作业安排 ("{team_leader}领导的团队在{date}的作业有哪些?", ["team_leader", "date"]), # 15. 查询某项目部门在指定周的作业安排 ("{project_department}在{date}作业内容是什么?", ["project_department", "date"]), ("请帮我查一下{date}{team_name}作业内容是什么", ["date", "team_name"]), ("请帮我查一下{team_name}{date}2项作业计划", ["team_name", "date"]), ("请帮我查一下{team_name}{date}2项作业计划分别是什么", ["team_name", "date"]), ("请帮我查一下{team_name}{date}有哪些作业", ["team_name", "date"]), ("{date}{construction_unit}具体作业计划", ["date", "construction_unit"]), ("{implementation_organization}{date}的作业", ["implementation_organization", "date"]), ("{implementation_organization}{project_department}{date}具体2项作业计划", ["implementation_organization", "project_department", "date"]), ] }, "施工人数": { "date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天","2025-04-09"], "templates": [ ("{date}{project_name}施工人员有多少?", ["date", "project_name"]), ("{date}{project_name}施工人数是多少?", ["date", "project_name"]), ("{date}{project_name}现场有多少施工人员?", ["date", "project_name"]), ("{date}{project_name}现场施工人数是多少?", ["date", "project_name"]), ("{construction_unit}{date}的施工人数是多少?", ["construction_unit", "date"]), ("{construction_unit}{date}现场施工人数是多少?", ["construction_unit", "date"]), # 2. 统计某施工单位在指定日期的施工总人数 ("统计{construction_unit}在{date}的施工人数是多少?", ["construction_unit", "date"]), ("{date}属于{operating}的施工人数是多少?", ["date", "operating"]), # 4. 查询某项目类型在指定日期的施工人员需求 ("{date}{project_type}类有多少施工人员?", ["date", "project_type"]), ("{date}工程性质为{project_type}有多少施工人员?", ["date", "project_type"]), ("{date}工程性质为{project_type}有多少现场施工人员?", ["date", "project_type"]), ("{date}工程性质为{project_type}的施工人数是多少?", ["date", "project_type"]), ("工程性质为{project_type}{date}的施工人数是多少?", ["project_type", "date"]), ("工程性质为{project_type}{date}的现场施工人数是多少?", ["project_type", "date"]), ("工程性质为{project_type}{date}有多少施工人员?", ["project_type", "date"]), # 5. 统计某施工单位在指定日期的各项目施工人数 ("{construction_unit}在{date}的施工人数是多少?", ["construction_unit", "date"]), # 8. 统计某项目经理管理的项目在指定日期的施工总人数 ("{project_manager}负责的项目在{date}的施工人数是多少?", ["project_manager", "date"]), ("{date}{project_manager}负责的项目的现场施工人数是多少?", ["date", "project_manager"]), # 9. 查询某分包商在指定日期的施工人员投入 ("{subcontractor}{date}施工人员有多少?", ["subcontractor", "date"]), ("{subcontractor}{date}的施工人数是多少?", ["subcontractor", "date"]), ("{date}{subcontractor}的施工人员有多少?", ["date", "subcontractor"]), ("{date}{subcontractor}的施工人数是多少?", ["date", "subcontractor"]), ("{implementation_organization}{date}现场施工人员有多少?", ["implementation_organization", "date"]), ("{implementation_organization}{date}现场有多少施工人员?", ["implementation_organization", "date"]), ("{implementation_organization}{date}的现场施工人数是多少?", ["implementation_organization", "date"]), ("{date}{implementation_organization}现场有多少施工人员?", ["date", "implementation_organization"]), ("{date}{implementation_organization}现场施工人数是多少?", ["date", "implementation_organization"]), ("{team_leader}{date}的施工人员有多少?", ["team_leader", "date"]), ("{team_leader}{date}的施工人数是多少?", ["team_leader", "date"]), ("{date}{team_leader}的施工人员有多少?", ["date", "team_leader"]), ("{date}{team_leader}的施工人数是多少?", ["date", "team_leader"]), # 11. 查询某实施单位在指定日期的施工人员总数 ("{implementation_organization}{date}的施工人数是多少?", ["implementation_organization", "date"]), ("{implementation_organization}{date}的施工人员有多少?", ["implementation_organization", "date"]), ("{date}{team_leader}的施工人员有多少?", ["date", "team_leader"]), ("{date}{team_leader}的施工人数是多少?", ["date", "team_leader"]), # 16. 统计某项目部门在指定日期的施工人员数量 ("{project_department}{date}的施工人员有多少?", ["project_department", "date"]), ("{project_department}{date}的施工人数是多少?", ["project_department", "date"]), # 20. 统计某风险等级项目在指定日期的工种配置情况 ("{date}{risk_level}风险的施工人数是多少?", ["date", "risk_level"]), # 21. 查询某分包商在指定周的施工人员安排 ("{subcontractor}{date}的施工人数是多少?", ["subcontractor", "date"]), # 22. 统计某施工单位在指定周的高风险作业人员数量 ("{construction_unit}{date}风险等级为{risk_level}的施工人数是多少?", ["construction_unit", "date", "risk_level"]), ("{date}{team_name}施工人数是多少", ["date", "team_name"]), ("{date}{team_name}施工人数是什么", ["date", "team_name"]), ("{team_name}{date}施工人数有多少", ["team_name", "date"]), ("{team_name}{date}施工人数是什么", ["team_name", "date"]), # ("{date}{implementation_organization}{project_department}作业人数是多少?", ["date", "implementation_organization", "project_department"]), ("{implementation_organization}{project_department}{date}作业人员有多少?", ["implementation_organization", "project_department", "date"]), ("{implementation_organization}{project_department}{date}作业人数是多少?", ["implementation_organization", "project_department", "date"]), ("{date}{implementation_organization}{project_department}有多少作业人员?", ["date", "implementation_organization", "project_department"]), ("{implementation_organization}{project_department}{date}作业人数是多少?", ["implementation_organization", "project_department", "date"]), ("{project_department}{implementation_organization}{date}有多少作业人员?", ["project_department", "implementation_organization", "date"]), ("{date}{project_department}{implementation_organization}有多少作业人数?", ["date", "project_department", "implementation_organization"]), ("请帮我查一下{date}{project_department}{implementation_organization}作业人员有多少?", ["date", "project_department", "implementation_organization"]), ("请帮我查一下{project_department}{implementation_organization}{date}作业人员是多少", ["project_department", "implementation_organization", "date"]), ] }, "作业考勤人数": { "date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天"], "templates": [ ("{date}{project_name}作业考勤人数是多少", ["date", "project_name"]), ("{project_name}{date}作业考勤人数是多少", ["project_name", "date"]), ("查询{subcontractor}{date}的出勤记录", ["subcontractor", "date"]), ("查询{subcontractor}{date}的作业考勤人数是多少", ["subcontractor", "date"]), #出勤人 ("{date}{project_name}出勤人数是多少", ["date", "project_name"]), ("{project_name}{date}出勤人数是多少", ["project_name", "date"]), ("查询{subcontractor}{date}的出勤记录", ["subcontractor", "date"]), ("查询{subcontractor}{date}的作业出勤人数是多少", ["subcontractor", "date"]), ("{date}{operating}的作业考勤人数是多少?", ["date", "operating"]), ("{team_leader}{date}的作业考勤人数是多少", ["team_leader", "date"]), # 4. 统计某施工单位在指定日期的考勤人数 ("统计{construction_unit}{date}的考勤人数", ["construction_unit", "date"]), # 5. 查询某实施单位在指定日期的考勤情况 ("{implementation_organization}{date}的考勤情况如何?", ["implementation_organization", "date"]), # 6. 查询某风险等级项目在指定日期的考勤详情 ("{date}{risk_level}风险项目考勤详情", ["date", "risk_level"]), # 7. 统计某项目类型在指定日期的出勤人数 ("{date}{project_type}类出勤人数是多少?", ["date", "project_type"]), # 10. 统计某项目在指定周的出勤总人数 ("{project_name}{date}的出勤人数是多少?", ["project_name", "date"]), ("{project_name}{date}的考勤率是多少?", ["project_name", "date"]), ("{project_name}{date}的考勤率", ["project_name", "date"]), # 11. 查询某分包商在指定周的出勤情况 ("{subcontractor}在{date}的出勤情况如何?", ["subcontractor", "date"]), ("{subcontractor}在{date}的出勤情况怎么样?", ["subcontractor", "date"]), ("请帮我查一下{date}{team_name}考勤人数是多少", ["date", "team_name"]), ("请帮我查一下{team_name}{date}考勤人数", ["team_name", "date"]), ] }, "页面切换": { "date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天"], "templates": [ ("打开{page}页面", ["page"]), ("打开{page}", ["page"]), ("打开{page}模块", ["page"]), ("进入{page}", ["page"]), ("进入{page}模块", ["page"]), ("进入{page}页面", ["page"]), ("跳转到{page}", ["page"]), ("跳转到{page}模块", ["page"]), ("跳转到{page}页面", ["page"]), ("访问{page}页面", ["page"]), ("访问{page}模块", ["page"]), ("访问{page}", ["page"]), ("请打开{page}模块", ["page"]), ("请打开{page}", ["page"]), ("请打开{page}页面", ["page"]), ("加载{page}模块", ["page"]), ("加载{page}", ["page"]), ("加载{page}页面", ["page"]), ] }, "作业面查询": { "date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天"], "templates": [ ("{date}{project_name}有多少作业面?", ["date", "project_name"]), ("{date}{construction_unit}作业面有多少?", ["date", "construction_unit"]), ("{date}{implementation_organization}作业面是多少?", ["date", "implementation_organization"]), ("{date}{implementation_organization}{project_department}有多少作业面?", ["date", "implementation_organization", "project_department"]), ("{date}{project_department}有多少作业面?", ["date", "project_department"]), ("{date}{project_manager}作业面是多少?", ["date", "project_manager"]), ("{date}{subcontractor}有多少作业面?", ["date", "subcontractor"]), ("{date}{team_leader}作业面是多少?", ["date", "team_leader"]), ("{date}{project_name}有多少作业面?", ["date", "project_name"]), ("{project_name}{date}有多少项作业面?", ["project_name", "date"]), ("{date}公司{project_name}有多少作业面?", ["date", "project_name"]), ("工程性质是{project_type}{date}有多少作业面?", ["project_type", "date"]), ("工程性质是{project_type}{date}有多少项作业面?", ["project_type", "date"]), ("工程性质是{project_type}{date}有多少条作业面?", ["project_type", "date"]), ("{date}风险等级为{risk_level}的作业面有多少?", ["date", "risk_level"]), ("请帮我查一下公司{date}工程性质为{project_type}作业面有多少?", ["date", "project_type"]), ("请帮我查一下{date}工程性质为{project_type}作业面有多少?", ["date", "project_type"]), ("请帮我查一下公司工程性质为{project_type}{date}作业面有多少?", ["project_type", "date"]), ] }, "班组人数查询": { "date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天","2025-04-09"], "templates": [ ("{date}{project_name}班组人员有多少?", ["date", "project_name"]), ("{date}{project_name}班组人数是多少?", ["date", "project_name"]), ("{date}{project_name}现场有多少班组人员?", ["date", "project_name"]), ("{date}{project_name}现场班组人数是多少?", ["date", "project_name"]), ("{construction_unit}{date}的班组人数是多少?", ["construction_unit", "date"]), ("{construction_unit}{date}现场班组人数是多少?", ["construction_unit", "date"]), # 2. 统计某施工单位在指定日期的班组总人数 ("统计{construction_unit}在{date}的班组人数是多少?", ["construction_unit", "date"]), ("{date}属于{operating}的班组人数是多少?", ["date", "operating"]), # 4. 查询某项目类型在指定日期的班组工人员需求 ("{date}{project_type}类有多少班组人员?", ["date", "project_type"]), ("{date}工程性质为{project_type}有多少班组人员?", ["date", "project_type"]), ("{date}工程性质为{project_type}有多少现场班组人员?", ["date", "project_type"]), ("{date}工程性质为{project_type}的班组人数是多少?", ["date", "project_type"]), ("工程性质为{project_type}{date}有多少班组人员?", ["project_type", "date"]), ("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]), ("工程性质为{project_type}{date}有多少班组人员?", ["project_type", "date"]), ("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]), # 5. 统计班组工单位在指定日期的各项目人数 ("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]), ("{construction_unit}在{date}的班组人数是多少?", ["construction_unit", "date"]), ("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]), # 8. 统计某项目经理管理的项目在指定日期的总人数 ("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]), ("{project_manager}负责的项目在{date}的班组人数是多少?", ["project_manager", "date"]), ("{date}{project_manager}负责的项目的现场班组人数是多少?", ["date", "project_manager"]), # ("请帮我查一下{date}{implementation_organization}{project_department}班组人数是多少?", ["date", "implementation_organization", "project_department"]), ("请帮我查一下{implementation_organization}{project_department}{date}班组人员有多少?", ["implementation_organization", "project_department", "date"]), ("请帮我查一下{implementation_organization}{project_department}{date}班组人数是多少?", ["implementation_organization", "project_department", "date"]), ("{date}{team_name}有多少?", ["date", "team_name"]), ("请帮我查一下{date}{team_name}有多少?", ["date", "team_name"]), ] }, "班组数查询": { "date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天","2025-04-09"], "templates": [ ("{date}{project_name}现场有多少班组?", ["date", "project_name"]), ("{date}{project_name}现场有多少个班组?", ["date", "project_name"]), ("{date}{project_name}现场班组有多少个?", ["date", "project_name"]), ("{date}{project_name}班组数是多少?", ["date", "project_name"]), ("{date}{project_name}班组数是什么?", ["date", "project_name"]), ("{date}{project_name}班组是多少?", ["date", "project_name"]), ("{date}{project_name}班组有多少个?", ["date", "project_name"]), ("{date}{construction_unit}班组数有多少?", ["date", "construction_unit"]), ("{date}{implementation_organization}现场班组数是多少?", ["date", "implementation_organization"]), ("{date}{implementation_organization}{project_department}有多少班组?", ["date", "implementation_organization", "project_department"]), ("{date}{project_department}现场有多少个班组?", ["date", "project_department"]), ("{date}{project_manager}现场班组数是多少?", ["date", "project_manager"]), ("{date}{subcontractor}现场有多少班组?", ["date", "subcontractor"]), ("{date}{team_leader}班组数是多少?", ["date", "team_leader"]), ("{date}{team_leader}班组有多少?", ["date", "team_leader"]), ("{date}{project_name}有多少班组?", ["date", "project_name"]), ("{project_name}{date}有多少个班组?", ["project_name", "date"]), ("{date}公司{project_name}有多少班组?", ["date", "project_name"]), ("工程性质是{project_type}{date}有多少班组?", ["project_type", "date"]), ("工程性质是{project_type}{date}有多少班组?", ["project_type", "date"]), ("工程性质是{project_type}{date}有多少班组?", ["project_type", "date"]), ("请帮我查一下{date}风险等级为{risk_level}的班组有多少?", ["date", "risk_level"]), ("请帮我查一下公司{date}工程性质为{project_type}班组有多少个?", ["date", "project_type"]), ("请帮我查一下{date}工程性质为{project_type}现场班组有多少?", ["date", "project_type"]), ("请帮我查一下公司工程性质为{project_type}{date}有多少班组?", ["project_type", "date"]), ] }, "作业面内容": { "date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天","2025-04-09"], "templates": [ ("{date}{project_name}作业面是什么?", ["date", "project_name"]), ("{project_name}在{date}的作业面有哪些", ["project_name", "date"]), ("{date}{project_type}类作业面有哪些?", ["date", "project_type"]), ("{project_type}类{date}具体作业面内容是什么?", ["project_type", "date"]), ("{date}工程性质为{project_type}的作业面内容是什么?", ["date", "project_type"]), ("工程性质为{project_type}的{date}作业面分别是什么?", ["project_type", "date"]), ("工程性质为{project_type}的{date}4项作业面分别是什么?", ["project_type", "date"]), ("{construction_unit}在{date}作业面内容是什么?", ["construction_unit", "date"]), # 1. 查询特定日期和项目的作业安排 ("{date}{project_name}作业面是什么?", ["date", "project_name"]), ("{date}属于{operating}作业面内容是什么?", ["date", "operating"]), ("{date}存在{operating}作业面是什么?", ["date", "operating"]), # 3. 查询特定日期和项目类型的工程计划 ("{date}{project_type}类具体作业面有哪些?", ["date", "project_type"]), ("{date}{construction_unit}{risk_level}风险的作业面内容是什么?", ["date", "construction_unit", "risk_level"]), ("{date}{implementation_organization}{risk_level}风险的作业面是什么?", ["date", "implementation_organization", "risk_level"]), # 5. 查询特定日期和项目经理的任务安排 ("{project_manager}在{date}作业面内容是什么?", ["project_manager", "date"]), ("{project_manager}在{date}作业面分别是什么?", ["project_manager", "date"]), ("{project_manager}在{date}4项作业面分别是什么?", ["project_manager", "date"]), # 6. 查询特定日期和风险等级的任务 ("{date}风险等级为{risk_level}的作业面有哪些?", ["date", "risk_level"]), ("{date}风险等级为{risk_level}四项作业面具体分别有哪些?", ["date", "risk_level"]), # 10. 查询特定日期和风险等级的任务安排 ("{date}风险等级为{risk_level}2项作业面分别是什么?", ["date", "risk_level"]), ("{date}{risk_level}的作业面具体内容是什么?", ["date", "risk_level"]), # 11. 查询特定日期和施工单位的任务进展 ("{construction_unit}{date}的作业面是什么?", ["construction_unit", "date"]), ("{construction_unit}{date}作业面具体内容有哪些", ["construction_unit", "date"]), # 12. 查询特定日期和项目经理完成的任务 ("{project_manager}在{date}三项作业面分别是什么?", ["project_manager", "date"]), # 13. 查询特定日期和项目经理的高风险任务 ("{project_manager}在{date}的风险等级为{risk_level}的作业面内容是什么?", ["project_manager", "date", "risk_level"]), # 15. 查询特定日期和所有任务安排 ("{date}作业面内容是什么?", ["date"]), ("{date}作业面有哪些?", ["date"]), ("{date}作业面是什么?", ["date"]), ("{date}{implementation_organization}{project_department}具体作业面内容是什么?", ["date", "implementation_organization", "project_department"]), ("{implementation_organization}{project_department}{date}具体作业面有哪些?", ["implementation_organization", "project_department", "date"]), ("{implementation_organization}{project_department}{date}作业面是什么?", ["implementation_organization", "project_department", "date"]), ("{implementation_organization}{project_department}{date}两项作业面分别是什么?", ["implementation_organization", "project_department", "date"]), ("{project_department}{implementation_organization}{date}具体作业面是什么?", ["project_department", "implementation_organization", "date"]), ("{date}{project_department}{implementation_organization}两项作业面分别是什么?", ["date", "project_department", "implementation_organization"]), ] }, "班组详情": { "date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天","2025-04-09"], "templates": [ ("{date}{project_name}现场具体有哪些班组?", ["date", "project_name"]), ("{date}{project_name}现场班组详情是什么?", ["date", "project_name"]), ("{date}{project_name}现场班组情况", ["date", "project_name"]), ("{date}{project_name}具体班组情况是什么?", ["date", "project_name"]), ("{date}{project_name}具体班组详情是什么?", ["date", "project_name"]), ("{date}{project_name}班组详细情况是什么?", ["date", "project_name"]), ("{date}{project_name}班组详情", ["date", "project_name"]), ("{date}{construction_unit}具体有哪些班组", ["date", "construction_unit"]), ("{date}{implementation_organization}班组有哪些?", ["date", "implementation_organization"]), ("{date}{implementation_organization}{project_department}现场具体有哪些班组?", ["date", "implementation_organization", "project_department"]), ("{date}{project_department}现场班组详情是什么?", ["date", "project_department"]), ("{date}{project_manager}现场班组情况?", ["date", "project_manager"]), ("{date}{subcontractor}具体班组情况是什么?", ["date", "subcontractor"]), ("{date}{team_leader}具体班组详情是什么?", ["date", "team_leader"]), ("{date}{team_leader}班组详细情况是什么?", ["date", "team_leader"]), ("{date}{project_name}班组详情?", ["date", "project_name"]), ("{project_name}{date}具体有哪些班组?", ["project_name", "date"]), ("{date}公司{project_name}班组有哪些?", ["date", "project_name"]), ("工程性质是{project_type}{date}现场具体有哪些班组?", ["project_type", "date"]), ("工程性质是{project_type}{date}现场班组详情是什么?", ["project_type", "date"]), ("工程性质是{project_type}{date}现场班组情况?", ["project_type", "date"]), ("{date}风险等级为{risk_level}具体班组情况是什么?", ["date", "risk_level"]), ("公司{date}工程性质为{project_type}具体班组详情是什么?", ["date", "project_type"]), ("{date}工程性质为{project_type}班组详情?", ["date", "project_type"]), ("公司工程性质为{project_type}{date}具体有哪些班组?", ["project_type", "date"]), ("公司工程性质为{project_type}{date}班组有哪些?", ["project_type", "date"]), ] } } def generate_natural_samples(config, label): """生成自然语言样本""" samples = [] variable_pool = { "project_name": BASE_DATA["project_names"], "project_type": BASE_DATA["project_types"], "construction_unit": BASE_DATA["construction_units"], "implementation_organization": BASE_DATA["implementation_organizations"], "subcontractor": BASE_DATA["subcontractors"], "team_leader": [f"{tl}" for tl in BASE_DATA["team_leaders"]], "risk_level": BASE_DATA["risk_levels"], "date": config["date"], "project_department": BASE_DATA["project_departments"], "project_manager": BASE_DATA["project_managers"], "page": BASE_DATA["pages"], "operating": BASE_DATA["operatings"], "team_name": BASE_DATA["team_names"] } for template, variables in config["templates"]: for values in product(*[variable_pool[var] for var in variables]): text = template.format(**dict(zip(variables, values))) # 生成标注信息 annotations = [] pos = 0 for var, val in zip(variables, values): start = text.find(val, pos) if start == -1: continue end = start + len(val) annotations.append({ "text": val, "start": start, "end": end, "label": var }) pos = end # 更新查找位置避免重复 samples.append({ "text": text, "annotations": annotations, "prompt": label }) # 保存文件 filename = f"data/{label}.json" with open(filename, "w", encoding="utf-8") as f: json.dump(samples, f, ensure_ascii=False, indent=2) print(f"已生成 {len(samples)} 条自然语言 {label} 数据") # 主执行流程 if __name__ == "__main__": for label, config in TEMPLATE_CONFIG.items(): generate_natural_samples(config, label)