新的多轮提取的提示词优化
This commit is contained in:
parent
168b57f1ab
commit
cd00c7efae
|
|
@ -1,3 +1,4 @@
|
|||
api_base_url = "http://36.33.26.201:27861/v1"
|
||||
api_key = 'EMPTY'
|
||||
model_name = 'qwen2.5-instruct'
|
||||
model_name = 'qwen2.5-instruct'
|
||||
|
||||
|
|
|
|||
253
api/main.py
253
api/main.py
|
|
@ -8,14 +8,13 @@ from intentRecognition import IntentRecognition
|
|||
from slotRecognition import SlotRecognition
|
||||
from utils import CheckResult, load_standard_name, generate_project_prompt, \
|
||||
load_standard_data, text_to_pinyin, multiple_standardize_single_name, \
|
||||
standardize_company_and_projectDepartment
|
||||
standardize_projectDepartment
|
||||
|
||||
from constants import PROJECT_NAME, PROJECT_DEPARTMENT, SIMILARITY_VALUE, IMPLEMENTATION_ORG, RISK_LEVEL
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from config import *
|
||||
|
||||
MODEL_ERNIE_PATH = R"../ernie/output/checkpoint-16470"
|
||||
MODEL_UIE_PATH = R"../uie/output/checkpoint-17290"
|
||||
MODEL_ERNIE_PATH = R"../ernie/output/checkpoint-30750"
|
||||
MODEL_UIE_PATH = R"../uie/output/checkpoint-31350"
|
||||
|
||||
# 类别名称列表
|
||||
labels = [
|
||||
|
|
@ -64,11 +63,10 @@ standard_company_name_pinyin_list = list(pinyin_to_standard_company_name_map.key
|
|||
|
||||
print(f"标准化的工程名是:{standard_project_name_list}", flush=True)
|
||||
print(f"pinyin标准化的工程名是 list:{standard_project_name_pinyin_list}", flush=True)
|
||||
print(f"pinyin-工程民对应关系 map:{pinyin_to_standard_company_name_map}", flush=True)
|
||||
print(f"pinyin-工程名对应关系 map:{pinyin_to_standard_company_name_map}", flush=True)
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
# 统一的异常处理函数
|
||||
@app.errorhandler(Exception)
|
||||
def handle_exception(e):
|
||||
|
|
@ -244,8 +242,8 @@ def agent():
|
|||
"answer": {"miss": sk},
|
||||
})
|
||||
|
||||
#工程名和项目名标准化
|
||||
result, information = check_project_standard_slot(predicted_id, entities)
|
||||
#工程名、分公司名和项目名标准化
|
||||
result, information = check_standard_name_slot(predicted_id, entities)
|
||||
if result == CheckResult.NEEDS_MORE_ROUNDS:
|
||||
return jsonify({
|
||||
"code": 10001, "msg": "成功",
|
||||
|
|
@ -267,58 +265,96 @@ def extract_multi_chat(messages):
|
|||
from openai import OpenAI
|
||||
client = OpenAI(base_url=api_base_url, api_key=api_key)
|
||||
|
||||
latest_message = messages[-1] # 最后一条用户提问
|
||||
if latest_message.role == "user":
|
||||
latest_user_question = latest_message.content.strip()
|
||||
time_prefixes = ["今天", "昨天", "本周", "下周", "明天", "今日"] # 可扩展的时间前缀列表
|
||||
if any(latest_user_question.startswith(prefix) for prefix in time_prefixes):
|
||||
history_messages = []
|
||||
else:
|
||||
history_messages = messages[:-1] # 除最后一条之外的历史记录
|
||||
|
||||
# 格式化对话历史
|
||||
chat_history = "\n".join([f"{msg.role}: {msg.content}" for msg in messages])
|
||||
chat_history = "\n".join([f"{msg.role}: {msg.content}" for msg in history_messages])
|
||||
latest_user_question = latest_message.content if latest_message.role == "user" else ""
|
||||
|
||||
prompt = f'''你是一个智能助手,需要从以下对话记录中提取用户最近一次提问的完整问题:
|
||||
1. **仅关注用户的最后一个问题**,无论之前用户提问了什么,**不要受到之前用户问题的影响**。
|
||||
2. **如果用户的最后一个问题包含指代词**(如“作业计划分别是什么”、“具体是哪2项”、“刚刚那个故事”、“明天呢”、“合肥中心变工程呢”等),请结合用户上一次的问题和**AI(助手)回答**,补充信息,使问题成为完整的句子。
|
||||
3. **如果用户的最后一个问题的主语是“公司”这个字眼(如“公司今天有多少四级风险作业计划”或“公司今天有多少4级风险的作业面”)则不要参考对话历史进行补全,保持用户原始表达,不要替换为具体的公司名,工程名或项目部名等。**
|
||||
4. **如果用户的最后一个问题本身是完整的**(即未使用上述2里的指代词),直接输出该问题,不要受前文影响。
|
||||
5. **如果问题缺少上下文信息**(如工程、项目部和时间等),仅在**最近的 AI 回答**提供了明确的上下文时进行补全,否则保持用户的原始输入,不要添加错误的补全信息。
|
||||
6. **如果用户的最新问题包含时间信息**(如“今天、明天、本周”),请确保其被保留,并且不改变时间表达方式。
|
||||
- **如果用户的提问本身省略了时间信息,但最近 AI 回答包含时间信息,则补全时间**。
|
||||
- **例如:用户问“具体是哪20项”时,最近 AI 回答是“今天送1分公司第二项目管理部有20项作业计划”,那么补全后的问题应为“今天送1分公司第二项目管理部具体是哪20项作业计划”**。
|
||||
7. **不要改写问题的主体和语序**,仅在需要时补全信息,避免误修改用户原始表达。
|
||||
8. 直接输出补全后的完整问题,不需要额外解释,也不需要输出“用户想了解的问题”这样的字眼。
|
||||
9. **当用户的最后一条消息使用了“第一个”、“第1个”、“第2个”……等指代方式,且上一条 AI 回复中列出了多个选项(如多个工程名、公司名、项目部等),你需要:**
|
||||
- 精确提取用户所指的序号(如“第3个”指第3个工程名、公司名或项目部名);
|
||||
- 将该工程、公司或项目部的完整名称(包括括号中的编号)提取出来;
|
||||
- **用完整名称替换掉用户上一个问题中出现的简称或模糊表达,并保留用户问题中的其它部分(如时间、计划数、内容)不变**;
|
||||
- 示例:
|
||||
- 原始问题:`2025年南苑调相机检修(PROJ-2023-0179)今天有多少作业计划`
|
||||
- AI 回答:列出多个工程,第1个是`检修公司调相机一二次设备检修维护和改造服务框架-2025年南苑调相机检修(PROJ-2023-0179)`
|
||||
- 用户回复:“第1个”
|
||||
- 则最终提问应为:
|
||||
`检修公司调相机一二次设备检修维护和改造服务框架-2025年南苑调相机检修(PROJ-2023-0179)今天有多少作业计划`
|
||||
**对话记录:**
|
||||
prompt = f'''
|
||||
你是一个意图识别与补全助手,你的任务是根据用户的最新问题判断是否需要补全,如果不需要补全,则原样返回用户的最新问题,否则需要结合对话记录请你补用户的最新问题,并只返回最终的完整问题。请严格按照如下逻辑判断并执行:
|
||||
|
||||
---
|
||||
|
||||
【规则判断与补全流程】
|
||||
|
||||
第一步:用户最新问题是否以“公司”为主语?→ 原样返回,无需补全
|
||||
- 若用户最新问题主语是“公司”,直接返回原句,无需补全。
|
||||
- 主语为“公司”的典型句式:
|
||||
- 以“公司”开头;
|
||||
- 以“今天”“昨天”“本周”“下周”等时间词开头,紧跟“公司”作为主语;
|
||||
- 示例:
|
||||
- 用户的最新问题:“今天公司有多少四级风险作业计划?”
|
||||
- 用户的最新问题:“今天公司有多少作业计划”
|
||||
- 用户的最新问题:“公司今天有多少4级风险的作业面?”
|
||||
- 最终提问均为: 原句不变。
|
||||
|
||||
第二步:用户最新问题是否是完整的问题?→ 原样返回,无需补全
|
||||
- 若用户最新问题中包含下列之一:具体的项目部名、工程名、分公司名、班组名、地区名等信息,且同时出现作业计划、作业面、班组等查询对象,视为完整问题,直接返回原句,无需补全。
|
||||
- 示例:
|
||||
- 用户最新问题:“今天张三班组有多少作业计划?”
|
||||
- 用户最新问题:“今天绿雪莲塘工程有多少作业计划”
|
||||
- 最终提问均为: 原句不变。
|
||||
|
||||
第三步:用户最新问题是否存在指代词?→ 结合用户最新问题和对话记录进行补全
|
||||
- 若用户最新问题问题中出现模糊表达,如“具体是哪些项”、“是哪两个”、“作业计划分别是什么”、“合肥中心变工程呢”、“具体是哪20项”等,请结合上一个用户问题和上一个AI回复补全问题信息。
|
||||
- 示例1:
|
||||
- 用户最新问题:“具体的作业计划分别是什么”
|
||||
- 对话记录的最后一个用户问题:“今天送一分公司有多少项作业计划”
|
||||
- 对话记录的最后一个AI回答:“今天送电一分公司有21项作业计划”
|
||||
- 则最终提问应为:
|
||||
“今天送电一分公司的21项作业计划分别是什么”
|
||||
- 示例2:
|
||||
- 用户的最新问题:“具体的作业内容是什么”
|
||||
- 对话记录的最后一个用户问题:今天送一分公司第一项目部有多少项作业计划
|
||||
- 对话记录的最后一个AI回答:今天送电一分公司第一项目管理部有21项作业计划
|
||||
- 则最终提问应为:
|
||||
“今天送电一分公司第一项目管理部的21项作业计划分别是什么”
|
||||
|
||||
第四步:用户最新问题是否为序号指代(第一个/第2个)?→ 用完整工程/项目/公司名替换补全
|
||||
- 精确提取用户所指的序号(如“第3个”指第3个工程名、公司名或项目部名);
|
||||
- 将该工程、公司或项目部的完整名称(包括括号中的编号)提取出来;
|
||||
- **用完整名称替换掉用户上一个问题中出现的简称或模糊表达,并保留用户问题中的其它部分原样不变(如时间、计划数、内容)不变**;
|
||||
- 示例1:
|
||||
- 用户最新问题:"第一个" 或"第1个"
|
||||
- 对话记录的最后一个用户问题:"2025年南苑调相机检修(PROJ-2023-0179)今天有多少作业计划""
|
||||
- 对话记录的最后一个的AI回答:列出多个工程名,第1个是`检修公司调相机一二次设备检修维护和改造服务框架-2025年南苑调相机检修(PROJ-2023-0179)`
|
||||
- 则最终提问应为:
|
||||
`检修公司调相机一二次设备检修维护和改造服务框架-2025年南苑调相机检修(PROJ-2023-0179)今天有多少作业计划`
|
||||
- 示例2:
|
||||
- 用户的最新问题:"第二个" 或"第2个"
|
||||
- 对话记录的最后一个用户问题:"宏源电力建设公司第三项目部今天有多少项作业计划""
|
||||
- 对话记录的最后一个AI回答:列出多个分公司名,第2个:"安徽宏源电力建设有限公司(线路)"
|
||||
- 则最终提问应为:
|
||||
"安徽宏源电力建设有限公司(线路)第三项目部今天有多少项作业计划"
|
||||
|
||||
第五步:输出最终问题
|
||||
- 直接输出最终问题(无解释、无多余前缀或后缀)
|
||||
- 保持句式自然清晰
|
||||
|
||||
---
|
||||
|
||||
对话记录:
|
||||
{chat_history}
|
||||
|
||||
请提取并补全用户的最新问题:'''
|
||||
|
||||
# prompt = f'''你是一个智能助手,需要从以下对话记录中提取用户最近一次提问的完整问题:
|
||||
# 1. **仅关注用户的最后一个问题**,无论之前用户提问了什么,**不要受到之前用户问题的影响**。
|
||||
# 2. **如果用户的最后一个问题包含指代词**(如“作业计划分别是什么”、“具体是哪2项”、“刚刚那个故事”、“明天呢”、“合肥中心变工程呢”等),请结合用户上一次的问题和**AI(助手)回答**,补充信息,使问题成为完整的句子。
|
||||
# 3. **如果用户的最后一个问题本身是完整的**(即未使用上述2里的指代词),直接输出该问题,不要受前文影响。
|
||||
# - **如果用户的最后一个问题包含“公司”字眼并且“公司”单独出现(如“公司今天有多少作业计划”)**,则不要参考对话历史进行补全,保持用户原始表达。
|
||||
# - **如果用户的最后一个问题里公司,工程,项目部等都不出现(如“今天有多少作业计划”),则不要参考对话历史进行补全,保持用户原始表达。
|
||||
# 4. **如果问题缺少上下文信息**(如工程、项目部和时间等),仅在**最近的 AI 回答**提供了明确的上下文时进行补全,否则保持用户的原始输入,不要添加错误的补全信息。
|
||||
# 5. **如果用户的最新问题包含时间信息**(如“今天、明天、本周”),请确保其被保留,并且不改变时间表达方式。
|
||||
# - **如果用户的提问本身省略了时间信息,但最近 AI 回答包含时间信息,则补全时间**。
|
||||
# - **例如:用户问“具体是哪20项”时,最近 AI 回答是“今天送1分公司第二项目管理部有20项作业计划”,那么补全后的问题应为“今天送1分公司第二项目管理部具体是哪20项作业计划”**。
|
||||
# 6. **不要改写问题的主体和语序**,仅在需要时补全信息,避免误修改用户原始表达。
|
||||
# 7. 直接输出补全后的完整问题,不需要额外解释,也不需要输出“用户想了解的问题”这样的字眼。
|
||||
#
|
||||
# **对话记录:**
|
||||
# {chat_history}
|
||||
#
|
||||
# 请提取并补全用户的最新问题:'''
|
||||
用户最新问题:
|
||||
{latest_user_question}
|
||||
|
||||
请输出最终问题:'''
|
||||
|
||||
message = [
|
||||
{"role": "system", "content": "你是一个智能助手,负责提取用户最近的问题,并自动补全缺失信息,使其成为完整的问题句子。"},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
print(f"message:{message}")
|
||||
|
||||
response = client.chat.completions.create(
|
||||
messages=message,
|
||||
model=model_name,
|
||||
|
|
@ -331,33 +367,6 @@ def extract_multi_chat(messages):
|
|||
print(f"多轮意图后用户想要的问题是:{res}", flush=True)
|
||||
return res
|
||||
|
||||
# def multi_slot_recognizer(intention_id, messages):
|
||||
# from openai import OpenAI
|
||||
# client = OpenAI(base_url=api_base_url, api_key=api_key)
|
||||
#
|
||||
# # prompt = f'''
|
||||
# # 根据用户的输入{messages},抽取出用户最近最想了解的一个问题,要求:保持客观真实,简单明了,不要多余解释和阐述,不需要输出如“用户想了解的问题”类似的字眼
|
||||
# # '''
|
||||
# prompt = f'''根据以下对话记录,提取用户最近一次提问的核心意图,根据关键信息和上下文的回答内容并且关注用户最后的问题,提取出的意图需表述为完整的问题句式:
|
||||
# 对话记录:{messages}'''
|
||||
#
|
||||
# message = [{"role": "system", "content": prompt}]
|
||||
# message.extend(messages)
|
||||
# # print(message)
|
||||
# response = client.chat.completions.create(
|
||||
# messages=message,
|
||||
# model=model_name,
|
||||
# max_tokens=1000,
|
||||
# temperature=0.001,
|
||||
# stream=False
|
||||
# )
|
||||
# res = response.choices[0].message.content
|
||||
#
|
||||
# print(f"多轮意图后用户想要的问题是{res}",flush=True)
|
||||
# entries = slot_recognizer.recognize(res)
|
||||
#
|
||||
# return entries
|
||||
|
||||
def check_lost(int_res, slot):
|
||||
#labels: ["天气查询","通用对话","页面切换","日计划数量查询","周计划数量查询","日计划作业内容","周计划作业内容","施工人数","作业考勤人数","知识问答"]
|
||||
mapping = {
|
||||
|
|
@ -411,7 +420,7 @@ def check_lost(int_res, slot):
|
|||
|
||||
|
||||
#标准化工程名
|
||||
def check_project_standard_slot(int_res, slot) -> tuple:
|
||||
def check_standard_name_slot(int_res, slot) -> tuple:
|
||||
intention_list = {3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15}
|
||||
if int_res not in intention_list:
|
||||
return CheckResult.NO_MATCH, ""
|
||||
|
|
@ -420,39 +429,13 @@ def check_project_standard_slot(int_res, slot) -> tuple:
|
|||
if PROJECT_DEPARTMENT in slot:
|
||||
if IMPLEMENTATION_ORG not in slot:
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, "请补充该项目部所属的分公司名称"
|
||||
else:
|
||||
standard_company, matched_projectDepartment = standardize_company_and_projectDepartment(slot[IMPLEMENTATION_ORG], slot[PROJECT_DEPARTMENT], standard_company_name_list, standard_company_program, pinyin_to_standard_company_name_map)
|
||||
print(f"check_project_standard_slot : {slot[IMPLEMENTATION_ORG]}, {slot[PROJECT_DEPARTMENT]}")
|
||||
# if not standard_company:
|
||||
# return CheckResult.NEEDS_MORE_ROUNDS, f"未匹配到您说的分公司名:{slot[IMPLEMENTATION_ORG]},请提供更准确的分公司名"
|
||||
# if not matched_projectDepartment:
|
||||
# return CheckResult.NEEDS_MORE_ROUNDS, f"未匹配到您说的项目名:{slot[PROJECT_DEPARTMENT]},请提供更准确的项目名"
|
||||
# if len(standard_company) > 1:
|
||||
# prompt = generate_project_prompt(matched_projectDepartment)
|
||||
# return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||
# if len(matched_projectDepartment) == 1:
|
||||
# slot[IMPLEMENTATION_ORG] = standard_company[0]
|
||||
# slot[PROJECT_DEPARTMENT] = matched_projectDepartment[0]
|
||||
# elif len(matched_projectDepartment) > 1:
|
||||
# prompt = generate_project_prompt(standard_company, original_name=slot[IMPLEMENTATION_ORG], type="分公司名")
|
||||
# return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||
if not standard_company:
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, f"未匹配到您说的分公司名:{slot[IMPLEMENTATION_ORG]},请提供更准确的分公司名"
|
||||
if not matched_projectDepartment:
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, f"未匹配到您说的项目名:{slot[PROJECT_DEPARTMENT]},请提供更准确的项目名"
|
||||
if standard_company and len(matched_projectDepartment) == 1:
|
||||
slot[IMPLEMENTATION_ORG] = standard_company
|
||||
slot[PROJECT_DEPARTMENT] = matched_projectDepartment[0]
|
||||
elif standard_company and len(matched_projectDepartment) > 1:
|
||||
prompt = generate_project_prompt(matched_projectDepartment)
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||
|
||||
#工程名和分公司名标准化
|
||||
#工程名和分公司名和项目名标准化
|
||||
for key, value in slot.items():
|
||||
if key == PROJECT_NAME:
|
||||
print(f"check_project_standard_slot original project : {slot[PROJECT_NAME]}")
|
||||
match_results = multiple_standardize_single_name(value, standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
print(f"standardize_single_name 工程名 :result:{match_results}",flush=True)
|
||||
print(f"check_standard_name_slot 原始工程名 : {slot[PROJECT_NAME]}")
|
||||
match_results = multiple_standardize_single_name(value, standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,80)
|
||||
print(f"check_standard_name_slot 匹配后工程名 :result:{match_results}",flush=True)
|
||||
if match_results and len(match_results) == 1:
|
||||
slot[key] = match_results[0]
|
||||
else:
|
||||
|
|
@ -460,14 +443,25 @@ def check_project_standard_slot(int_res, slot) -> tuple:
|
|||
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||
|
||||
if key == IMPLEMENTATION_ORG and slot[key] != "公司":
|
||||
print(f"check_project_standard_slot original company : {slot[IMPLEMENTATION_ORG]}")
|
||||
print(f"check_standard_name_slot 原始分公司名 : {slot[IMPLEMENTATION_ORG]}")
|
||||
match_results = multiple_standardize_single_name(value, standard_company_name_list, standard_company_name_pinyin_list, pinyin_to_standard_company_name_map, lower_score=50, high_score=80, isArabicNumConv = True)
|
||||
print(f"check_project_standard_slot 分公司名: result:{match_results}",flush=True)
|
||||
print(f"check_standard_name_slot 匹配后分公司名: result:{match_results}",flush=True)
|
||||
if match_results and len(match_results) == 1:
|
||||
slot[key] = match_results[0]
|
||||
else:
|
||||
prompt = generate_project_prompt(match_results, original_name=slot[IMPLEMENTATION_ORG], type="分公司名")
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||
|
||||
if key == PROJECT_DEPARTMENT:
|
||||
print(f"check_standard_name_slot 原始项目部名 : {slot[PROJECT_DEPARTMENT]}")
|
||||
match_results = standardize_projectDepartment(slot[IMPLEMENTATION_ORG], value, standard_company_program, high_score=85)
|
||||
print(f"check_standard_name_slot 匹配后项目部名: result:{match_results}",flush=True)
|
||||
if match_results and len(match_results) == 1:
|
||||
slot[key] = match_results[0]
|
||||
else:
|
||||
prompt = generate_project_prompt(match_results, original_name=slot[PROJECT_DEPARTMENT], type="项目名")
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||
|
||||
if key == RISK_LEVEL:
|
||||
if slot[RISK_LEVEL] not in["2级","3级","4级","5级"] and slot[RISK_LEVEL] not in["二级","三级","四级","五级"]:
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, "您查询的风险等级在系统中未找到,请确认风险等级后再次提问"
|
||||
|
|
@ -514,16 +508,34 @@ def check_project_standard_slot(int_res, slot) -> tuple:
|
|||
# print(f"输入: 芦集变电站-> 输出: {result}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("宋轶分公司", standard_company_name_list, standard_company_name_pinyin_list, pinyin_to_standard_company_name_map,75,80)
|
||||
# print(f"standardize_pinyin_single_name 输入: 宋轶分公司-> 输出: {match_results}")
|
||||
# #
|
||||
# match_results = multiple_standardize_single_name("合肥中心变", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,40,70)
|
||||
# print(f"standardize_pinyin_single_name 输入: 合肥中心变-> 输出: {match_results}")
|
||||
# print(f"multiple_standardize_single_name 输入: 宋轶分公司-> 输出: {match_results}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("淮南安丰", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,40,70)
|
||||
# print(f"standardize_pinyin_single_name 输入: 淮南安丰工程-> 输出: {match_results}")
|
||||
# match_results = multiple_standardize_single_name("宏源电力公司", standard_company_name_list, standard_company_name_pinyin_list, pinyin_to_standard_company_name_map,75,80)
|
||||
# print(f"multiple_standardize_single_name 输入: 宏源电力公司-> 输出: {match_results}")
|
||||
#
|
||||
# #
|
||||
# match_results = multiple_standardize_single_name("合肥中心变", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"multiple_standardize_single_name 输入: 合肥中心变-> 输出: {match_results}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("淮南安丰", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"multiple_standardize_single_name 输入: 淮南安丰工程-> 输出: {match_results}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("宿州萧砀新建工程", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"multiple_standardize_single_name 输入: 宿州萧砀新建工程-> 输出: {match_results}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("芦集变电站", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"standardize_pinyin_single_name 输入: 芦集变电站-> 输出: {match_results}")
|
||||
# print(f"multiple_standardize_single_name 输入: 芦集变电站-> 输出: {match_results}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("卢集变电站新建工程", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"multiple_standardize_single_name 输入: 卢集变电站-> 输出: {match_results}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("芦集古沟变电站新建工程", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"multiple_standardize_single_name 输入: 芦集古沟变电站新建工程-> 输出: {match_results}")
|
||||
# #
|
||||
# match_results = multiple_standardize_single_name("金牛变电站", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"multiple_standardize_single_name 输入: 金牛变电站-> 输出: {match_results}")
|
||||
#
|
||||
|
||||
#
|
||||
# company, project = standardize_company_and_projectDepartment("变电分公司","第一项目部", standard_company_name_list, standard_company_program, pinyin_to_standard_company_name_map)
|
||||
# print(f"company:{company}, project:{project}")
|
||||
|
|
@ -532,8 +544,11 @@ def check_project_standard_slot(int_res, slot) -> tuple:
|
|||
# print(f"company:{company}, project:{project}")
|
||||
# company, project = standardize_company_and_projectDepartment("试验分公司","电缆班", standard_company_name_list, standard_company_program, pinyin_to_standard_company_name_map)
|
||||
# print(f"company:{company}, project:{project}")
|
||||
# company, project = standardize_company_and_projectDepartment("宏源电力公司","第三项目部(六安线路)", standard_company_name_list, standard_company_program, pinyin_to_standard_company_name_map)
|
||||
# company, project = standardize_company_and_projectDepartment("宏源电力投资有限公司","第三项目部", standard_company_name_list, standard_company_program, pinyin_to_standard_company_name_map)
|
||||
# print(f"company:{company}, project:{project}")
|
||||
#
|
||||
|
||||
# match_results = standardize_projectDepartment("安徽宏源电力建设有限公司(变电)", "第3项目部", standard_company_program, high_score=85)
|
||||
# print(f"match_results:{match_results}")
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=18074, debug=True)
|
||||
|
|
|
|||
297
api/main_temp.py
297
api/main_temp.py
|
|
@ -8,15 +8,13 @@ from intentRecognition import IntentRecognition
|
|||
from slotRecognition import SlotRecognition
|
||||
from utils import CheckResult, load_standard_name, generate_project_prompt, \
|
||||
load_standard_data, text_to_pinyin, multiple_standardize_single_name, \
|
||||
standardize_company_and_projectDepartment
|
||||
standardize_projectDepartment
|
||||
|
||||
from constants import PROJECT_NAME, PROJECT_DEPARTMENT, SIMILARITY_VALUE, IMPLEMENTATION_ORG, RISK_LEVEL
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from config import *
|
||||
|
||||
# MODEL_ERNIE_PATH = R"../ernie/output/checkpoint-16470"
|
||||
# MODEL_UIE_PATH = R"../uie/output_temp/checkpoint-17060"
|
||||
|
||||
MODEL_ERNIE_PATH = R"../ernie/output/checkpoint-30750"
|
||||
MODEL_UIE_PATH = R"../uie/output_temp/checkpoint-31350"
|
||||
# 类别名称列表
|
||||
labels = [
|
||||
"天气查询", "互联网查询", "页面切换", "日计划数量查询", "周计划数量查询",
|
||||
|
|
@ -42,12 +40,6 @@ label_map = {
|
|||
13: 'B-teamName', 26: 'I-teamName',
|
||||
}
|
||||
|
||||
# # 初始化工具类
|
||||
# intent_recognizer = IntentRecognition(MODEL_ERNIE_PATH, labels)
|
||||
#
|
||||
# # 初始化槽位识别工具类
|
||||
# slot_recognizer = SlotRecognition(MODEL_UIE_PATH, label_map)
|
||||
# 设置Flask应用
|
||||
|
||||
#标准公司名和项目名
|
||||
standard_company_program = load_standard_data("./standard_data/standard_company_program.json")
|
||||
|
|
@ -62,13 +54,20 @@ standard_company_name_list = list(standard_company_program.keys())
|
|||
pinyin_to_standard_company_name_map = {text_to_pinyin(kw): kw for kw in standard_company_name_list}
|
||||
standard_company_name_pinyin_list = list(pinyin_to_standard_company_name_map.keys())
|
||||
|
||||
|
||||
# 初始化工具类
|
||||
intent_recognizer = IntentRecognition(MODEL_ERNIE_PATH, labels)
|
||||
|
||||
# 初始化槽位识别工具类
|
||||
slot_recognizer = SlotRecognition(MODEL_UIE_PATH, label_map)
|
||||
# 设置Flask应用
|
||||
|
||||
print(f"标准化的工程名是:{standard_project_name_list}", flush=True)
|
||||
print(f"pinyin标准化的工程名是 list:{standard_project_name_pinyin_list}", flush=True)
|
||||
print(f"pinyin-工程民对应关系 map:{pinyin_to_standard_company_name_map}", flush=True)
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
# 统一的异常处理函数
|
||||
@app.errorhandler(Exception)
|
||||
def handle_exception(e):
|
||||
|
|
@ -244,8 +243,8 @@ def agent():
|
|||
"answer": {"miss": sk},
|
||||
})
|
||||
|
||||
#工程名和项目名标准化
|
||||
result, information = check_project_standard_slot(predicted_id, entities)
|
||||
#工程名、分公司名和项目名标准化
|
||||
result, information = check_standard_name_slot(predicted_id, entities)
|
||||
if result == CheckResult.NEEDS_MORE_ROUNDS:
|
||||
return jsonify({
|
||||
"code": 10001, "msg": "成功",
|
||||
|
|
@ -267,64 +266,101 @@ def extract_multi_chat(messages):
|
|||
from openai import OpenAI
|
||||
client = OpenAI(base_url=api_base_url, api_key=api_key)
|
||||
|
||||
latest_message = messages[-1] # 最后一条用户提问
|
||||
if latest_message.role == "user":
|
||||
latest_user_question = latest_message.content.strip()
|
||||
time_prefixes = ["今天", "昨天", "本周", "下周", "明天", "今日"] # 可扩展的时间前缀列表
|
||||
if any(latest_user_question.startswith(prefix) for prefix in time_prefixes):
|
||||
history_messages = []
|
||||
else:
|
||||
history_messages = messages[:-1] # 除最后一条之外的历史记录
|
||||
|
||||
# 格式化对话历史
|
||||
chat_history = "\n".join([f"{msg.role}: {msg.content}" for msg in messages])
|
||||
chat_history = "\n".join([f"{msg.role}: {msg.content}" for msg in history_messages])
|
||||
latest_user_question = latest_message.content if latest_message.role == "user" else ""
|
||||
|
||||
prompt = f'''你是一个智能助手,需要从以下对话记录中提取用户最近一次提问的完整问题:
|
||||
1. **仅关注用户的最后一个问题**,无论之前用户提问了什么,**不要受到之前用户问题的影响**。
|
||||
2. **如果用户的最后一个问题包含指代词**(如“作业计划分别是什么”、“具体是哪2项”、“刚刚那个故事”、“明天呢”、“合肥中心变工程呢”等),请结合用户上一次的问题和**AI(助手)回答**,补充信息,使问题成为完整的句子。
|
||||
3. **如果用户的最后一个问题的主语是“公司”这个字眼(如“公司今天有多少四级风险作业计划”或“公司今天有多少4级风险的作业面”)则不要参考对话历史进行补全,保持用户原始表达,不要替换为具体的公司名,工程名或项目部名等。**
|
||||
4. **如果用户的最后一个问题本身是完整的**(即未使用上述2里的指代词),直接输出该问题,不要受前文影响。
|
||||
5. **如果问题缺少上下文信息**(如工程、项目部和时间等),仅在**最近的 AI 回答**提供了明确的上下文时进行补全,否则保持用户的原始输入,不要添加错误的补全信息。
|
||||
6. **如果用户的最新问题包含时间信息**(如“今天、明天、本周”),请确保其被保留,并且不改变时间表达方式。
|
||||
- **如果用户的提问本身省略了时间信息,但最近 AI 回答包含时间信息,则补全时间**。
|
||||
- **例如:用户问“具体是哪20项”时,最近 AI 回答是“今天送1分公司第二项目管理部有20项作业计划”,那么补全后的问题应为“今天送1分公司第二项目管理部具体是哪20项作业计划”**。
|
||||
7. **不要改写问题的主体和语序**,仅在需要时补全信息,避免误修改用户原始表达。
|
||||
8. 直接输出补全后的完整问题,不需要额外解释,也不需要输出“用户想了解的问题”这样的字眼。
|
||||
9. **当用户的最后一条消息使用了“第一个”、“第1个”、“第2个”……等指代方式,且上一条 AI 回复中列出了多个选项(如多个工程名、公司名、项目部等),你需要:**
|
||||
- 精确提取用户所指的序号(如“第3个”指第3个工程名、公司名或项目部名);
|
||||
- 将该工程、公司或项目部的完整名称(包括括号中的编号)提取出来;
|
||||
- **用完整名称替换掉用户上一个问题中出现的简称或模糊表达,并保留用户问题中的其它部分(如时间、计划数、内容)不变**;
|
||||
- 示例:
|
||||
- 原始问题:`2025年南苑调相机检修(PROJ-2023-0179)今天有多少作业计划`
|
||||
- AI 回答:列出多个工程,第1个是`检修公司调相机一二次设备检修维护和改造服务框架-2025年南苑调相机检修(PROJ-2023-0179)`
|
||||
- 用户回复:“第1个”
|
||||
- 则最终提问应为:
|
||||
`检修公司调相机一二次设备检修维护和改造服务框架-2025年南苑调相机检修(PROJ-2023-0179)今天有多少作业计划`
|
||||
**对话记录:**
|
||||
prompt = f'''
|
||||
你是一个意图识别与补全助手,你的任务是根据用户的最新问题判断是否需要补全,如果不需要补全,则原样返回用户的最新问题,否则需要结合对话记录请你补用户的最新问题,并只返回最终的完整问题。请严格按照如下逻辑判断并执行:
|
||||
|
||||
---
|
||||
|
||||
【规则判断与补全流程】
|
||||
|
||||
第一步:用户最新问题是否以“公司”为主语?→ 原样返回,无需补全
|
||||
- 若用户最新问题主语是“公司”,直接返回原句,无需补全。
|
||||
- 主语为“公司”的典型句式:
|
||||
- 以“公司”开头;
|
||||
- 以“今天”“昨天”“本周”“下周”等时间词开头,紧跟“公司”作为主语;
|
||||
- 示例:
|
||||
- 用户的最新问题:“今天公司有多少四级风险作业计划?”
|
||||
- 用户的最新问题:“今天公司有多少作业计划”
|
||||
- 用户的最新问题:“公司今天有多少4级风险的作业面?”
|
||||
- 最终提问均为: 原句不变。
|
||||
|
||||
第二步:用户最新问题是否是完整的问题?→ 原样返回,无需补全
|
||||
- 若用户最新问题中包含下列之一:具体的项目部名、工程名、分公司名、班组名、地区名等信息,且同时出现作业计划、作业面、班组等查询对象,视为完整问题,直接返回原句,无需补全。
|
||||
- 示例:
|
||||
- 用户最新问题:“今天张三班组有多少作业计划?”
|
||||
- 用户最新问题:“今天绿雪莲塘工程有多少作业计划”
|
||||
- 最终提问均为: 原句不变。
|
||||
|
||||
第三步:用户最新问题是否存在指代词?→ 结合用户最新问题和对话记录进行补全
|
||||
- 若用户最新问题问题中出现模糊表达,如“具体是哪些项”、“是哪两个”、“作业计划分别是什么”、“合肥中心变工程呢”、“具体是哪20项”等,请结合上一个用户问题和上一个AI回复补全问题信息。
|
||||
- 示例1:
|
||||
- 用户最新问题:“具体的作业计划分别是什么”
|
||||
- 对话记录的最后一个用户问题:“今天送一分公司有多少项作业计划”
|
||||
- 对话记录的最后一个AI回答:“今天送电一分公司有21项作业计划”
|
||||
- 则最终提问应为:
|
||||
“今天送电一分公司的21项作业计划分别是什么”
|
||||
- 示例2:
|
||||
- 用户的最新问题:“具体的作业内容是什么”
|
||||
- 对话记录的最后一个用户问题:今天送一分公司第一项目部有多少项作业计划
|
||||
- 对话记录的最后一个AI回答:今天送电一分公司第一项目管理部有21项作业计划
|
||||
- 则最终提问应为:
|
||||
“今天送电一分公司第一项目管理部的21项作业计划分别是什么”
|
||||
|
||||
第四步:用户最新问题是否为序号指代(第一个/第2个)?→ 用完整工程/项目/公司名替换补全
|
||||
- 精确提取用户所指的序号(如“第3个”指第3个工程名、公司名或项目部名);
|
||||
- 将该工程、公司或项目部的完整名称(包括括号中的编号)提取出来;
|
||||
- **用完整名称替换掉用户上一个问题中出现的简称或模糊表达,并保留用户问题中的其它部分原样不变(如时间、计划数、内容)不变**;
|
||||
- 示例1:
|
||||
- 用户最新问题:"第一个" 或"第1个"
|
||||
- 对话记录的最后一个用户问题:"2025年南苑调相机检修(PROJ-2023-0179)今天有多少作业计划""
|
||||
- 对话记录的最后一个的AI回答:列出多个工程名,第1个是`检修公司调相机一二次设备检修维护和改造服务框架-2025年南苑调相机检修(PROJ-2023-0179)`
|
||||
- 则最终提问应为:
|
||||
`检修公司调相机一二次设备检修维护和改造服务框架-2025年南苑调相机检修(PROJ-2023-0179)今天有多少作业计划`
|
||||
- 示例2:
|
||||
- 用户的最新问题:"第二个" 或"第2个"
|
||||
- 对话记录的最后一个用户问题:"宏源电力建设公司第三项目部今天有多少项作业计划""
|
||||
- 对话记录的最后一个AI回答:列出多个分公司名,第2个:"安徽宏源电力建设有限公司(线路)"
|
||||
- 则最终提问应为:
|
||||
"安徽宏源电力建设有限公司(线路)第三项目部今天有多少项作业计划"
|
||||
|
||||
第五步:输出最终问题
|
||||
- 直接输出最终问题(无解释、无多余前缀或后缀)
|
||||
- 保持句式自然清晰
|
||||
|
||||
---
|
||||
|
||||
对话记录:
|
||||
{chat_history}
|
||||
|
||||
请提取并补全用户的最新问题:'''
|
||||
|
||||
用户最新问题:
|
||||
{latest_user_question}
|
||||
|
||||
# prompt = f'''你是一个智能助手,需要从以下对话记录中提取用户最近一次提问的完整问题:
|
||||
# 1. **仅关注用户的最后一个问题**,无论之前用户提问了什么,**不要受到之前用户问题的影响**。
|
||||
# 2. **如果用户的最后一个问题包含指代词**(如“作业计划分别是什么”、“具体是哪2项”、“刚刚那个故事”、“明天呢”、“合肥中心变工程呢”等),请结合用户上一次的问题和**AI(助手)回答**,补充信息,使问题成为完整的句子。
|
||||
# 3. **如果用户的最后一个问题本身是完整的**(即未使用上述2里的指代词),直接输出该问题,不要受前文影响。
|
||||
# - **如果用户的最后一个问题包含“公司”字眼并且“公司”单独出现(如“公司今天有多少作业计划”)**,则不要参考对话历史进行补全,保持用户原始表达。
|
||||
# - **如果用户的最后一个问题里公司,工程,项目部等都不出现(如“今天有多少作业计划”),则不要参考对话历史进行补全,保持用户原始表达。
|
||||
# 4. **如果问题缺少上下文信息**(如工程、项目部和时间等),仅在**最近的 AI 回答**提供了明确的上下文时进行补全,否则保持用户的原始输入,不要添加错误的补全信息。
|
||||
# 5. **如果用户的最新问题包含时间信息**(如“今天、明天、本周”),请确保其被保留,并且不改变时间表达方式。
|
||||
# - **如果用户的提问本身省略了时间信息,但最近 AI 回答包含时间信息,则补全时间**。
|
||||
# - **例如:用户问“具体是哪20项”时,最近 AI 回答是“今天送1分公司第二项目管理部有20项作业计划”,那么补全后的问题应为“今天送1分公司第二项目管理部具体是哪20项作业计划”**。
|
||||
# 6. **不要改写问题的主体和语序**,仅在需要时补全信息,避免误修改用户原始表达。
|
||||
# 7. 直接输出补全后的完整问题,不需要额外解释,也不需要输出“用户想了解的问题”这样的字眼。
|
||||
#
|
||||
# **对话记录:**
|
||||
# {chat_history}
|
||||
#
|
||||
# 请提取并补全用户的最新问题:'''
|
||||
请输出最终问题:'''
|
||||
|
||||
message = [
|
||||
{"role": "system", "content": "你是一个智能助手,负责提取用户最近的问题,并自动补全缺失信息,使其成为完整的问题句子。"},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
print(f"message:{message}")
|
||||
|
||||
response = client.chat.completions.create(
|
||||
messages=message,
|
||||
model=model_name,
|
||||
max_tokens=100,
|
||||
temperature=0.3, # 降低随机性,提高确定性
|
||||
temperature=0.1, # 降低随机性,提高确定性
|
||||
stream=False
|
||||
)
|
||||
|
||||
|
|
@ -332,33 +368,6 @@ def extract_multi_chat(messages):
|
|||
print(f"多轮意图后用户想要的问题是:{res}", flush=True)
|
||||
return res
|
||||
|
||||
# def multi_slot_recognizer(intention_id, messages):
|
||||
# from openai import OpenAI
|
||||
# client = OpenAI(base_url=api_base_url, api_key=api_key)
|
||||
#
|
||||
# # prompt = f'''
|
||||
# # 根据用户的输入{messages},抽取出用户最近最想了解的一个问题,要求:保持客观真实,简单明了,不要多余解释和阐述,不需要输出如“用户想了解的问题”类似的字眼
|
||||
# # '''
|
||||
# prompt = f'''根据以下对话记录,提取用户最近一次提问的核心意图,根据关键信息和上下文的回答内容并且关注用户最后的问题,提取出的意图需表述为完整的问题句式:
|
||||
# 对话记录:{messages}'''
|
||||
#
|
||||
# message = [{"role": "system", "content": prompt}]
|
||||
# message.extend(messages)
|
||||
# # print(message)
|
||||
# response = client.chat.completions.create(
|
||||
# messages=message,
|
||||
# model=model_name,
|
||||
# max_tokens=1000,
|
||||
# temperature=0.001,
|
||||
# stream=False
|
||||
# )
|
||||
# res = response.choices[0].message.content
|
||||
#
|
||||
# print(f"多轮意图后用户想要的问题是{res}",flush=True)
|
||||
# entries = slot_recognizer.recognize(res)
|
||||
#
|
||||
# return entries
|
||||
|
||||
def check_lost(int_res, slot):
|
||||
#labels: ["天气查询","通用对话","页面切换","日计划数量查询","周计划数量查询","日计划作业内容","周计划作业内容","施工人数","作业考勤人数","知识问答"]
|
||||
mapping = {
|
||||
|
|
@ -412,7 +421,7 @@ def check_lost(int_res, slot):
|
|||
|
||||
|
||||
#标准化工程名
|
||||
def check_project_standard_slot(int_res, slot) -> tuple:
|
||||
def check_standard_name_slot(int_res, slot) -> tuple:
|
||||
intention_list = {3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15}
|
||||
if int_res not in intention_list:
|
||||
return CheckResult.NO_MATCH, ""
|
||||
|
|
@ -421,39 +430,13 @@ def check_project_standard_slot(int_res, slot) -> tuple:
|
|||
if PROJECT_DEPARTMENT in slot:
|
||||
if IMPLEMENTATION_ORG not in slot:
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, "请补充该项目部所属的分公司名称"
|
||||
else:
|
||||
standard_company, matched_projectDepartment = standardize_company_and_projectDepartment(slot[IMPLEMENTATION_ORG], slot[PROJECT_DEPARTMENT], standard_company_name_list, standard_company_program, pinyin_to_standard_company_name_map)
|
||||
print(f"check_project_standard_slot : {slot[IMPLEMENTATION_ORG]}, {slot[PROJECT_DEPARTMENT]}")
|
||||
# if not standard_company:
|
||||
# return CheckResult.NEEDS_MORE_ROUNDS, f"未匹配到您说的分公司名:{slot[IMPLEMENTATION_ORG]},请提供更准确的分公司名"
|
||||
# if not matched_projectDepartment:
|
||||
# return CheckResult.NEEDS_MORE_ROUNDS, f"未匹配到您说的项目名:{slot[PROJECT_DEPARTMENT]},请提供更准确的项目名"
|
||||
# if len(standard_company) > 1:
|
||||
# prompt = generate_project_prompt(matched_projectDepartment)
|
||||
# return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||
# if len(matched_projectDepartment) == 1:
|
||||
# slot[IMPLEMENTATION_ORG] = standard_company[0]
|
||||
# slot[PROJECT_DEPARTMENT] = matched_projectDepartment[0]
|
||||
# elif len(matched_projectDepartment) > 1:
|
||||
# prompt = generate_project_prompt(standard_company, original_name=slot[IMPLEMENTATION_ORG], type="分公司名")
|
||||
# return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||
if not standard_company:
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, f"未匹配到您说的分公司名:{slot[IMPLEMENTATION_ORG]},请提供更准确的分公司名"
|
||||
if not matched_projectDepartment:
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, f"未匹配到您说的项目名:{slot[PROJECT_DEPARTMENT]},请提供更准确的项目名"
|
||||
if standard_company and len(matched_projectDepartment) == 1:
|
||||
slot[IMPLEMENTATION_ORG] = standard_company
|
||||
slot[PROJECT_DEPARTMENT] = matched_projectDepartment[0]
|
||||
elif standard_company and len(matched_projectDepartment) > 1:
|
||||
prompt = generate_project_prompt(matched_projectDepartment)
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||
|
||||
#工程名和分公司名标准化
|
||||
#工程名和分公司名和项目名标准化
|
||||
for key, value in slot.items():
|
||||
if key == PROJECT_NAME:
|
||||
print(f"check_project_standard_slot original project : {slot[PROJECT_NAME]}")
|
||||
match_results = multiple_standardize_single_name(value, standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
print(f"standardize_single_name 工程名 :result:{match_results}",flush=True)
|
||||
print(f"check_standard_name_slot 原始工程名 : {slot[PROJECT_NAME]}")
|
||||
match_results = multiple_standardize_single_name(value, standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,80)
|
||||
print(f"check_standard_name_slot 匹配后工程名 :result:{match_results}",flush=True)
|
||||
if match_results and len(match_results) == 1:
|
||||
slot[key] = match_results[0]
|
||||
else:
|
||||
|
|
@ -461,14 +444,25 @@ def check_project_standard_slot(int_res, slot) -> tuple:
|
|||
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||
|
||||
if key == IMPLEMENTATION_ORG and slot[key] != "公司":
|
||||
print(f"check_project_standard_slot original company : {slot[IMPLEMENTATION_ORG]}")
|
||||
print(f"check_standard_name_slot 原始分公司名 : {slot[IMPLEMENTATION_ORG]}")
|
||||
match_results = multiple_standardize_single_name(value, standard_company_name_list, standard_company_name_pinyin_list, pinyin_to_standard_company_name_map, lower_score=50, high_score=80, isArabicNumConv = True)
|
||||
print(f"check_project_standard_slot 分公司名: result:{match_results}",flush=True)
|
||||
print(f"check_standard_name_slot 匹配后分公司名: result:{match_results}",flush=True)
|
||||
if match_results and len(match_results) == 1:
|
||||
slot[key] = match_results[0]
|
||||
else:
|
||||
prompt = generate_project_prompt(match_results, original_name=slot[IMPLEMENTATION_ORG], type="分公司名")
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||
|
||||
if key == PROJECT_DEPARTMENT:
|
||||
print(f"check_standard_name_slot 原始项目部名 : {slot[PROJECT_DEPARTMENT]}")
|
||||
match_results = standardize_projectDepartment(slot[IMPLEMENTATION_ORG], value, standard_company_program, high_score=90)
|
||||
print(f"check_standard_name_slot 匹配后项目部名: result:{match_results}",flush=True)
|
||||
if match_results and len(match_results) == 1:
|
||||
slot[key] = match_results[0]
|
||||
else:
|
||||
prompt = generate_project_prompt(match_results, original_name=slot[PROJECT_DEPARTMENT], type="项目名")
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||
|
||||
if key == RISK_LEVEL:
|
||||
if slot[RISK_LEVEL] not in["2级","3级","4级","5级"] and slot[RISK_LEVEL] not in["二级","三级","四级","五级"]:
|
||||
return CheckResult.NEEDS_MORE_ROUNDS, "您查询的风险等级在系统中未找到,请确认风险等级后再次提问"
|
||||
|
|
@ -476,21 +470,19 @@ def check_project_standard_slot(int_res, slot) -> tuple:
|
|||
return CheckResult.NO_MATCH, ""
|
||||
|
||||
# test_cases = [
|
||||
# ("安徽宏源电力建设有限公司", "第三项目管理部"), # 期望返回所有"第三项目管理部"
|
||||
# ("安徽宏源电力建设有限公司", "第九项目部"), # 期望返回 "第九项目管理部"
|
||||
# ("顺安电网公司", "第二项目部"), # 期望匹配"顺安电网建设有限公司"下的"第二项目管理部"
|
||||
# ("送电一公司", "第三项目部"), # 期望返回"第三项目管理部"
|
||||
# ("送电2公司", "第三项目部"), # 期望返回"第三项目管理部"
|
||||
# ("消防分公司", "第七项目部"), # 期望返回"第七项目管理部
|
||||
# ("建筑分公司", "第七项目部"), # 期望返回"第七项目管理部"
|
||||
# ("建筑消防分公司", "第七项目部"), # 期望返回"第七项目管理部"
|
||||
# ("建筑分公司消防分公司", "第七项目部") # 期望返回"第七项目管理部"
|
||||
# ("安徽宏源电力建设有限公司(线路)", "第三项目管理部"), # 期望返回所有"第三项目管理部"
|
||||
# ("送电一分公司", "第8项目管理部"), # 期望返回 "第九项目管理部"
|
||||
# # ("顺安电网公司", "第二项目部"), # 期望匹配"顺安电网建设有限公司"下的"第二项目管理部"
|
||||
# # ("送电一公司", "第三项目部"), # 期望返回"第三项目管理部"
|
||||
# # ("送电2公司", "第三项目部"), # 期望返回"第三项目管理部"
|
||||
# # ("消防分公司", "第七项目部"), # 期望返回"第七项目管理部
|
||||
# # ("建筑分公司", "第七项目部"), # 期望返回"第七项目管理部"
|
||||
# # ("建筑消防分公司", "第七项目部"), # 期望返回"第七项目管理部"
|
||||
# # ("建筑分公司消防分公司", "第七项目部") # 期望返回"第七项目管理部"
|
||||
# ]
|
||||
#
|
||||
|
||||
# for company, project in test_cases:
|
||||
# # result = standardize_company_and_project(company, project,standard_company_program)
|
||||
# result = standardize_company_and_projectDepartment(company, project,standard_company_name_list, standard_company_program, pinyin_to_standard_company_name_map)
|
||||
# # result = multiple_standardize_single_name("company", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,40,70)
|
||||
# result = standardize_projectDepartment(company, project,standard_company_program, high_score=90)
|
||||
# print(f"输入: {company}, {project} -> 输出: {result}")
|
||||
#
|
||||
# result = standardize_single_name("送电一公司", standard_company_name_list)
|
||||
|
|
@ -515,16 +507,34 @@ def check_project_standard_slot(int_res, slot) -> tuple:
|
|||
# print(f"输入: 芦集变电站-> 输出: {result}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("宋轶分公司", standard_company_name_list, standard_company_name_pinyin_list, pinyin_to_standard_company_name_map,75,80)
|
||||
# print(f"standardize_pinyin_single_name 输入: 宋轶分公司-> 输出: {match_results}")
|
||||
# #
|
||||
# match_results = multiple_standardize_single_name("合肥中心变", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,40,70)
|
||||
# print(f"standardize_pinyin_single_name 输入: 合肥中心变-> 输出: {match_results}")
|
||||
# print(f"multiple_standardize_single_name 输入: 宋轶分公司-> 输出: {match_results}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("淮南安丰", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,40,70)
|
||||
# print(f"standardize_pinyin_single_name 输入: 淮南安丰工程-> 输出: {match_results}")
|
||||
# match_results = multiple_standardize_single_name("宏源电力公司", standard_company_name_list, standard_company_name_pinyin_list, pinyin_to_standard_company_name_map,75,80)
|
||||
# print(f"multiple_standardize_single_name 输入: 宏源电力公司-> 输出: {match_results}")
|
||||
#
|
||||
# #
|
||||
# match_results = multiple_standardize_single_name("合肥中心变", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"multiple_standardize_single_name 输入: 合肥中心变-> 输出: {match_results}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("淮南安丰", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"multiple_standardize_single_name 输入: 淮南安丰工程-> 输出: {match_results}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("宿州萧砀新建工程", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"multiple_standardize_single_name 输入: 宿州萧砀新建工程-> 输出: {match_results}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("芦集变电站", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"standardize_pinyin_single_name 输入: 芦集变电站-> 输出: {match_results}")
|
||||
# print(f"multiple_standardize_single_name 输入: 芦集变电站-> 输出: {match_results}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("卢集变电站新建工程", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"multiple_standardize_single_name 输入: 卢集变电站-> 输出: {match_results}")
|
||||
#
|
||||
# match_results = multiple_standardize_single_name("芦集古沟变电站新建工程", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"multiple_standardize_single_name 输入: 芦集古沟变电站新建工程-> 输出: {match_results}")
|
||||
# #
|
||||
# match_results = multiple_standardize_single_name("金牛变电站", standard_project_name_list, standard_project_name_pinyin_list, pinyin_to_standard_project_name_map,20,70)
|
||||
# print(f"multiple_standardize_single_name 输入: 金牛变电站-> 输出: {match_results}")
|
||||
#
|
||||
|
||||
#
|
||||
# company, project = standardize_company_and_projectDepartment("变电分公司","第一项目部", standard_company_name_list, standard_company_program, pinyin_to_standard_company_name_map)
|
||||
# print(f"company:{company}, project:{project}")
|
||||
|
|
@ -533,8 +543,11 @@ def check_project_standard_slot(int_res, slot) -> tuple:
|
|||
# print(f"company:{company}, project:{project}")
|
||||
# company, project = standardize_company_and_projectDepartment("试验分公司","电缆班", standard_company_name_list, standard_company_program, pinyin_to_standard_company_name_map)
|
||||
# print(f"company:{company}, project:{project}")
|
||||
company, project = standardize_company_and_projectDepartment("宏源电力投资有限公司","第三项目部", standard_company_name_list, standard_company_program, pinyin_to_standard_company_name_map)
|
||||
print(f"company:{company}, project:{project}")
|
||||
# company, project = standardize_company_and_projectDepartment("宏源电力投资有限公司","第三项目部", standard_company_name_list, standard_company_program, pinyin_to_standard_company_name_map)
|
||||
# print(f"company:{company}, project:{project}")
|
||||
#
|
||||
# if __name__ == '__main__':
|
||||
# app.run(host='0.0.0.0', port=18073, debug=True)
|
||||
|
||||
# match_results = standardize_projectDepartment("安徽宏源电力建设有限公司(变电)", "第3项目部", standard_company_program, high_score=85)
|
||||
# print(f"match_results:{match_results}")
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=18073, debug=True)
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ class SlotRecognition:
|
|||
#对所有实体进行替换:替换每个实体中的 '##' 为 ' '
|
||||
entities[key] = value.replace('#', '')
|
||||
#暂时不支持分包商和监管单位的查询
|
||||
if key == SUBCONTRACTOR or key == CONSTRUCTION_UNIT:
|
||||
if (key == SUBCONTRACTOR or key == CONSTRUCTION_UNIT) and ("宏源" in value or "宏远" in value):
|
||||
updates[IMPLEMENTATION_ORG] = value # 统一映射到 IMPLEMENTATION_ORG
|
||||
else:
|
||||
updates[key] = value # 保留原 key
|
||||
|
|
|
|||
70
api/utils.py
70
api/utils.py
|
|
@ -49,12 +49,14 @@ def extract_number(text):
|
|||
return num_str # 中文数字直接返回
|
||||
return None
|
||||
|
||||
def standardize_company_and_projectDepartment(input_company, input_project, origianl_company_list , company_project_department_map, company_pinyin_to_original_map = None):
|
||||
|
||||
#标准化项目部名
|
||||
def standardize_projectDepartment(standard_company, input_project , company_project_department_map, high_score=85):
|
||||
"""
|
||||
将口语化的公司名和项目部名转换为标准化名称。
|
||||
|
||||
参数:
|
||||
input_company (str): 用户输入的公司名(可能是口语化或不完整的名称)。
|
||||
standard_company (str): 标准化公司名。
|
||||
input_project (str): 用户输入的项目部名(可能是口语化或不完整的名称)。
|
||||
company_project_department_map (dict): 标准化的公司名和项目部名数据,格式为 {公司名: [项目部名1, 项目部名2, ...]}。
|
||||
pinyin_to_original_map:分公司拼音和分公司原始名的映射
|
||||
|
|
@ -63,19 +65,11 @@ def standardize_company_and_projectDepartment(input_company, input_project, orig
|
|||
tuple: (标准化公司名, 匹配的项目部名列表)。如果无法匹配,返回 (None, None)。
|
||||
"""
|
||||
try:
|
||||
# **1. 标准化公司名**
|
||||
best_company_match = multiple_standardize_single_name(input_company, origianl_company_list,list(company_pinyin_to_original_map.keys()),company_pinyin_to_original_map,50,85,True)
|
||||
|
||||
if not best_company_match:
|
||||
return None, None
|
||||
else:
|
||||
standard_company = best_company_match[0]
|
||||
|
||||
# **2. 先尝试直接匹配最相似的项目名**
|
||||
project_match = process.extractOne(input_project, company_project_department_map[standard_company], scorer=fuzz.ratio)
|
||||
print(f"项目部名称最相似:{project_match[0]},{project_match[1]}", flush=True)
|
||||
if project_match and project_match[1] >= 85:
|
||||
return standard_company, [project_match[0]] # 直接返回匹配的项目名
|
||||
if project_match and project_match[1] >= high_score:
|
||||
return [project_match[0]] # 直接返回匹配的项目名
|
||||
|
||||
# **3. 提取项目部的数字部分**
|
||||
query_number = extract_number(input_project)
|
||||
|
|
@ -87,10 +81,10 @@ def standardize_company_and_projectDepartment(input_company, input_project, orig
|
|||
if query_number and query_number == project_number:
|
||||
matched_projects.append(project)
|
||||
|
||||
return standard_company, matched_projects
|
||||
return matched_projects
|
||||
except Exception as e:
|
||||
print(f"standardize_company_and_projectDepartment:{e}", flush=True)
|
||||
return None,None
|
||||
print(f"standardize_projectDepartment:{e}", flush=True)
|
||||
return None
|
||||
|
||||
def multiple_standardize_single_name(origin_input_name, origin_name_list, pinyin_name_list = None, pinyin_to_original_map = None, lower_score=70, high_score=85, isArabicNumConv = False):
|
||||
"""
|
||||
|
|
@ -106,9 +100,7 @@ def multiple_standardize_single_name(origin_input_name, origin_name_list, pinyin
|
|||
#First round, 原始标准名的匹配性查找,能找到直接返回
|
||||
if isArabicNumConv:
|
||||
origin_input_name = arabic_to_chinese_number(origin_input_name)
|
||||
|
||||
match_results = process.extract(origin_input_name, origin_name_list, scorer=fuzz.token_sort_ratio, limit=len(origin_name_list))
|
||||
|
||||
# 找到所有相似度 > 80 的匹配项
|
||||
original_high_confidence_matches = [(match[0], match[1]) for match in match_results if match[1] >= lower_score]
|
||||
print(f"standardize_pinyin_single_name 原始名匹配, high_confidence_matches:{original_high_confidence_matches[:3]}", flush=True)
|
||||
|
|
@ -116,10 +108,7 @@ def multiple_standardize_single_name(origin_input_name, origin_name_list, pinyin
|
|||
combined_low_confidence_matches = []
|
||||
if original_high_confidence_matches:
|
||||
origin_best_match = max(original_high_confidence_matches, key=lambda x: x[1], default=None)
|
||||
|
||||
# 直接返回最高相似度的单个匹配项
|
||||
# print(f"原始名匹配: {origin_best_match}", flush=True)
|
||||
if origin_best_match and origin_best_match[1] >= high_score:
|
||||
if origin_best_match and origin_best_match[1] > high_score:
|
||||
return [origin_best_match[0]]
|
||||
|
||||
else:
|
||||
|
|
@ -129,25 +118,26 @@ def multiple_standardize_single_name(origin_input_name, origin_name_list, pinyin
|
|||
return None #
|
||||
|
||||
#第二轮, 拼音名的匹配性查找,能找到直接返回
|
||||
pinyin_input_name = text_to_pinyin(origin_input_name)
|
||||
match_results = process.extract(pinyin_input_name, pinyin_name_list, scorer=fuzz.ratio, limit=len(pinyin_name_list))
|
||||
|
||||
# 筛选出匹配分数 > lower_score 的结果
|
||||
pinyin_high_confidence_matches = [(match[0], match[1]) for match in match_results if match[1] >= lower_score]
|
||||
print(f"standardize_pinyin_single_name 拼音匹配, input_name:{pinyin_input_name}, high_confidence_matches:{pinyin_high_confidence_matches[:3]}", flush=True)
|
||||
|
||||
if not pinyin_high_confidence_matches:
|
||||
return combined_low_confidence_matches # 没有找到匹配项
|
||||
|
||||
# 选择最高相似度的匹配项
|
||||
pinyin_best_match = max(pinyin_high_confidence_matches, key=lambda x: x[1], default=None)
|
||||
|
||||
if pinyin_best_match and pinyin_best_match[1] >= high_score:
|
||||
return [pinyin_to_original_map[pinyin_best_match[0]]] # 直接返回最高相似度的原始工程名
|
||||
|
||||
combined_low_confidence_matches.extend(
|
||||
[pinyin_to_original_map[match[0]] for match in pinyin_high_confidence_matches[:3]]
|
||||
)
|
||||
# pinyin_input_name = text_to_pinyin(origin_input_name)
|
||||
# #fuzz.partial_ratio
|
||||
# match_results = process.extract(pinyin_input_name, pinyin_name_list, scorer=fuzz.ratio, limit=len(pinyin_name_list))
|
||||
#
|
||||
# # 筛选出匹配分数 > lower_score 的结果
|
||||
# pinyin_high_confidence_matches = [(match[0], match[1]) for match in match_results if match[1] >= lower_score]
|
||||
# print(f"standardize_pinyin_single_name 拼音匹配, input_name:{pinyin_input_name}, high_confidence_matches:{pinyin_high_confidence_matches[:3]}", flush=True)
|
||||
#
|
||||
# if not pinyin_high_confidence_matches:
|
||||
# return combined_low_confidence_matches # 没有找到匹配项
|
||||
#
|
||||
# # 选择最高相似度的匹配项
|
||||
# pinyin_best_match = max(pinyin_high_confidence_matches, key=lambda x: x[1], default=None)
|
||||
#
|
||||
# if pinyin_best_match and pinyin_best_match[1] > high_score:
|
||||
# return [pinyin_to_original_map[pinyin_best_match[0]]] # 直接返回最高相似度的原始工程名
|
||||
#
|
||||
# combined_low_confidence_matches.extend(
|
||||
# [pinyin_to_original_map[match[0]] for match in pinyin_high_confidence_matches[:3]]
|
||||
# )
|
||||
# 返回所有匹配项对应的原始名,最多返回最低匹配项的前5个
|
||||
return list(dict.fromkeys(combined_low_confidence_matches))
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ from paddlenlp.trainer import Trainer, TrainingArguments
|
|||
import os
|
||||
from sklearn.metrics import precision_score, recall_score, f1_score
|
||||
|
||||
|
||||
def load_config(config_path):
|
||||
"""加载 YAML 配置文件"""
|
||||
try:
|
||||
|
|
@ -114,7 +113,7 @@ def main():
|
|||
|
||||
# 定义训练参数
|
||||
training_args = TrainingArguments(
|
||||
output_dir="./output",
|
||||
output_dir="./output_temp",
|
||||
evaluation_strategy="epoch",
|
||||
save_strategy="epoch",
|
||||
eval_steps=2000, # 每100步评估一次
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ subcontractors = ["安徽远宏电力工程有限公司", "安徽京硚建设有
|
|||
"安徽苏亚建设集团有限公司"]
|
||||
team_leaders = ["李元帅班组长", "刘雨豪班组长", "马新欣班组长", "任家泉班组长", "王海峰班组长", "王书民班组长"]
|
||||
risk_levels = ["1级", "2级", "3级", "4级", "5级"]
|
||||
labels = ["天气查询", "互联网查询", "页面切换", "日计划数量查询", "周计划数量查询", "日计划作业内容", "周计划作业内容",
|
||||
labels = ["天气查询", "通用对话", "页面切换", "日计划数量查询", "周计划数量查询", "日计划作业内容", "周计划作业内容",
|
||||
"施工人数", "作业考勤人数", "知识问答"]
|
||||
|
||||
import json
|
||||
|
|
|
|||
|
|
@ -11,7 +11,9 @@ if not os.path.exists(directory):
|
|||
# 基础数据定义
|
||||
BASE_DATA = {
|
||||
# 实施组织
|
||||
"implementation_organizations": ["宏源电力建设公司(变电)", "送一分公司", "变电分公司", "消防分公司", "安徽宏源电力建设有限公司(线路)", "检修试验分公司"],
|
||||
"implementation_organizations": ["安徽宏源电力建设公司(变电)", "宏源电力公司变电","宏源电力建设公司","送一分公司","送二分公司",
|
||||
"送电一分公司","送电二分公司","变电分公司","建筑分公司","消防分公司",
|
||||
"安徽宏源电力建设有限公司(线路)", "检修试验分公司"],
|
||||
# 工程性质
|
||||
"project_types": ["基建", "技改大修", "用户工程", "小型基建"],
|
||||
# 工程名
|
||||
|
|
@ -39,16 +41,16 @@ BASE_DATA = {
|
|||
"合州变电站",
|
||||
"合州换流站"
|
||||
],
|
||||
# 建管单位
|
||||
"construction_units": ["国网安徽省电力有限公司建设分公司", "国网安徽省电力有限公司马鞍山供电公司",
|
||||
"中铁二局集团电务工程有限公司"],
|
||||
# 建管单位,"国网安徽省电力有限公司建设分公司", "国网安徽省电力有限公司马鞍山供电公司","马鞍山供电公司",
|
||||
"construction_units": ["合肥","马鞍山","滁州"],
|
||||
# 项目部名称
|
||||
"project_departments": ["调试一队", "第9项目管理部", "金上第十一项目部", "第八项目管理部(合肥)", "肥东9号项目部",
|
||||
"金上第一项目部管理部(池州黄山)", "第一项目部管理部(肥东)", "调试四队","第一项目部"],
|
||||
"project_departments": ["调试一队", "第9项目管理部","第9项目管理部门", "金上第十一项目部门", "第八项目管理部(合肥)", "肥东9号项目部",
|
||||
"金上第一项目部管理部(池州黄山)", "第一项目部管理部(肥东)", "调试四队","第一项目部","第10项目管理部特高压部门"],
|
||||
# 项目经理
|
||||
"project_managers": ["陈少平项目经理", "范文立项目经理", "何东洋项目经理"],
|
||||
# 分包单位
|
||||
"subcontractors": ["安徽劦力建筑装饰有限责任公司", "安徽苏亚建设集团有限公司"],
|
||||
"subcontractors": ["劦力建筑责任公司","安徽劦力建筑装饰有限责任公司", "安徽苏亚建设集团有限公司","大信电力建设有限公司","优越电力公司",
|
||||
"安徽国腾电力工程有限公司","安徽嘉昂建设工程有限公司","安徽京硚建设有限公司"],
|
||||
# 班组名称
|
||||
"team_names": ["张朵班组", "刘梁玉班组", "魏玉龙班组"],
|
||||
# 班组长
|
||||
|
|
@ -71,25 +73,30 @@ TEMPLATE_CONFIG = {
|
|||
("{date}{project_name}有多少作业计划?", ["date", "project_name"]),
|
||||
("{project_name}{date}有多少项作业计划?", ["project_name", "date"]),
|
||||
("{date}公司{project_name}有多少作业计划?", ["date", "project_name"]),
|
||||
("{date}送电变公司{project_name}有多少作业计划?", ["date", "project_name"]),
|
||||
("工程性质是{project_type}{date}有多少作业计划?", ["project_type", "date"]),
|
||||
("工程性质是{project_type}{date}有多少项作业计划?", ["project_type", "date"]),
|
||||
("工程性质是{project_type}{date}有多少条作业计划?", ["project_type", "date"]),
|
||||
("{date}风险等级为{risk_level}的作业计划有多少?", ["date", "risk_level"]),
|
||||
("公司{date}工程性质为{project_type}的有多少项作业计划?", ["date", "project_type"]),
|
||||
("安徽送电变公司{date}工程性质为{project_type}的有多少项作业计划?", ["date", "project_type"]),
|
||||
("{date}工程性质为{project_type}的有多少条作业计划?", ["date", "project_type"]),
|
||||
("公司工程性质为{project_type}{date}有多少作业计划?", ["project_type", "date"]),
|
||||
("安徽送电变公司工程性质为{project_type}{date}有多少作业计划?", ["project_type", "date"]),
|
||||
("工程性质为{project_type}{date}有多少项作业计划?", ["project_type", "date"]),
|
||||
("查询{project_name}在{date}的作业计划数量", ["project_name", "date"]),
|
||||
("{date}{project_type}类作业计划有多少?", ["date", "project_type"]),
|
||||
("{project_type}类{date}作业计划有多少?", ["project_type", "date"]),
|
||||
("{construction_unit}在{date}有多少作业计划?", ["construction_unit", "date"]),
|
||||
("{construction_unit}在{date}有多少项作业计划?", ["construction_unit", "date"]),
|
||||
("{date}{construction_unit}有多少作业计划?", ["date", "construction_unit"]),
|
||||
("{date}公司有多少项作业计划?", ["date"]),
|
||||
("{date}送变电公司有多少项作业计划?", ["date"]),
|
||||
("{date}有多少条作业计划?", ["date"]),
|
||||
("公司{date}有多少作业计划?", ["date"]),
|
||||
("送变电公司{date}有多少作业计划?", ["date"]),
|
||||
("安徽送变电{date}有多少作业计划?", ["date"]),
|
||||
("{date}{operating}有多少项作业计划?", ["date", "operating"]),
|
||||
("{date}公司{operating}有多少项作业计划?", ["date", "operating"]),
|
||||
("{date}送变电公司{operating}有多少项作业计划?", ["date", "operating"]),
|
||||
("{date}安徽送变电{operating}有多少项作业计划?", ["date", "operating"]),
|
||||
("{date}{implementation_organization}{project_department}有多少项作业计划?",
|
||||
["date", "implementation_organization", "project_department"]),
|
||||
("{date}{project_department}{implementation_organization}有多少项作业计划?",
|
||||
|
|
@ -98,37 +105,44 @@ TEMPLATE_CONFIG = {
|
|||
["implementation_organization", "project_department", "date"]),
|
||||
("{date}{implementation_organization}有多少项作业计划?", ["date", "implementation_organization"]),
|
||||
("{date}公司{project_department}有多少作业计划?", ["date", "project_department"]),
|
||||
("{date}送变电公司{project_department}有多少作业计划?", ["date", "project_department"]),
|
||||
("{date}安徽送变电{project_department}有多少作业计划?", ["date", "project_department"]),
|
||||
("{date}{project_department}{implementation_organization}有多少作业计划?",
|
||||
["date", "project_department", "implementation_organization"]),
|
||||
("{date}{project_department}有多少项作业计划?", ["date", "project_department"]),
|
||||
("公司{project_department}{date}有多少项{risk_level}风险作业计划?",
|
||||
["project_department", "date", "risk_level"]),
|
||||
("送变电公司{project_department}{date}有多少项{risk_level}风险作业计划?",
|
||||
["project_department", "date", "risk_level"]),
|
||||
("安徽送变电{project_department}{date}有多少项{risk_level}风险作业计划?",
|
||||
["project_department", "date", "risk_level"]),
|
||||
("{project_department}{date}有多少项{risk_level}风险作业计划?",
|
||||
["project_department", "date", "risk_level"]),
|
||||
("{project_department}{date}有多少{risk_level}风险作业计划?", ["project_department", "date", "risk_level"]),
|
||||
# 请帮我查一下
|
||||
("请帮我查一下{date}{project_manager}作业计划是多少?", ["date", "project_manager"]),
|
||||
("请帮我查一下{date}{subcontractor}有多少条作业计划?", ["date", "subcontractor"]),
|
||||
("请帮我查一下{date}分包单位{subcontractor}有多少条作业计划?", ["date", "subcontractor"]),
|
||||
("请帮我查一下{date}{team_leader}有多少作业计划?", ["date", "team_leader"]),
|
||||
("请帮我查一下{date}风险等级为{risk_level}的作业计划有多少?", ["date", "risk_level"]),
|
||||
("请帮我查一下{date}公司{project_department}有多少{risk_level}风险作业计划?",
|
||||
["date", "project_department", "risk_level"]),
|
||||
("请帮我查一下{date}送变电公司{project_department}有多少{risk_level}风险作业计划?",
|
||||
["date", "project_department", "risk_level"]),
|
||||
("请帮我查一下{date}安徽送变电{project_department}有多少{risk_level}风险作业计划?",
|
||||
["date", "project_department", "risk_level"]),
|
||||
("请帮我查一下{date}{project_department}有多少{risk_level}风险作业计划?", ["date", "project_department", "risk_level"]),
|
||||
("请帮我查一下{date}{project_type}类风险等级为{risk_level}的作业计划有多少?", ["date", "project_type", "risk_level"]),
|
||||
("请帮我查一下{date}{construction_unit}有多少{risk_level}风险作业计划?", ["date", "construction_unit", "risk_level"]),
|
||||
|
||||
("请帮我查一下{date}存在{risk_level}风险的有多少", ["date", "risk_level"]),
|
||||
("请帮我查一下{implementation_organization}{date}{risk_level}风险的有多少", ["implementation_organization","date", "risk_level"]),
|
||||
("请帮我查一下{implementation_organization}{date}存在{risk_level}风险的有多少", ["implementation_organization","date", "risk_level"]),
|
||||
|
||||
("{date}{project_type}类{construction_unit}负责的作业计划有多少?",
|
||||
["date", "project_type", "construction_unit"]),
|
||||
|
||||
("{date}{project_type}类{implementation_organization}组织实施的作业计划有多少?",
|
||||
["date", "project_type", "implementation_organization"]),
|
||||
("{date}{project_department}管理的{project_type}类作业计划有多少?",
|
||||
["date", "project_department", "project_type"]),
|
||||
("{date}{subcontractor}承包的{project_type}类作业计划有多少?", ["date", "subcontractor", "project_type"]),
|
||||
("{date}{project_manager}负责的{project_type}类作业计划有多少?",
|
||||
("{date}分包单位{subcontractor}承包的{project_type}类作业计划有多少?", ["date", "subcontractor", "project_type"]),
|
||||
("{date}分包单位为{project_manager}负责的{project_type}类作业计划有多少?",
|
||||
["date", "project_manager", "project_type"]),
|
||||
("{date}{team_leader}带领的{project_type}类作业计划有多少?", ["date", "team_leader", "project_type"]),
|
||||
("{date}{project_name}由{project_manager}作业计划有多少?", ["date", "project_name", "project_manager"]),
|
||||
|
|
@ -159,6 +173,8 @@ TEMPLATE_CONFIG = {
|
|||
|
||||
("{date}{implementation_organization}有多少项作业?", ["date", "implementation_organization"]),
|
||||
("{date}公司{project_department}有多少作业?", ["date", "project_department"]),
|
||||
("{date}送变电公司{project_department}有多少作业?", ["date", "project_department"]),
|
||||
("{date}安徽送变电{project_department}有多少作业?", ["date", "project_department"]),
|
||||
("{date}{project_department}有多少项作业?", ["date", "project_department"]),
|
||||
#有多少
|
||||
("{date}{implementation_organization}{project_department}有多少?",
|
||||
|
|
@ -167,14 +183,34 @@ TEMPLATE_CONFIG = {
|
|||
["project_department", "implementation_organization", "date"]),
|
||||
("{date}{implementation_organization}有多少?", ["date", "implementation_organization"]),
|
||||
("{date}公司{project_department}有多少?", ["date", "project_department"]),
|
||||
("{date}送变电公司{project_department}有多少?", ["date", "project_department"]),
|
||||
("{date}安徽送变电{project_department}有多少?", ["date", "project_department"]),
|
||||
("{date}{project_department}有多少?", ["date", "project_department"]),
|
||||
("{date}{project_name}有多少", ["date", "project_name"]),
|
||||
("{project_name}{date}有多少", ["project_name", "date"]),
|
||||
("{date}公司{project_name}有多少?", ["date", "project_name"]),
|
||||
("{date}送变电公司{project_name}有多少?", ["date", "project_name"]),
|
||||
("{date}安徽送变电{project_name}有多少?", ["date", "project_name"]),
|
||||
("{date}工程性质是{project_type}有多少", ["project_type", "date"]),
|
||||
("工程性质是{project_type}{date}有多少", ["project_type", "date"]),
|
||||
("{date}存在{operating}的有多少", ["date", "operating"]),
|
||||
("{date}{operating}的有多少", ["date", "operating"]),
|
||||
|
||||
#建管单位
|
||||
("{construction_unit}地区在{date}有多少作业计划?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区在{date}有多少项作业计划?", ["construction_unit", "date"]),
|
||||
("{date}{construction_unit}地区有多少作业计划?", ["date", "construction_unit"]),
|
||||
("请帮我查一下{date}{construction_unit}地区有多少{risk_level}风险作业计划?", ["date", "construction_unit", "risk_level"]),
|
||||
("{date}{project_type}类{construction_unit}地区的作业计划有多少?", ["date", "project_type", "construction_unit"]),
|
||||
|
||||
#分包单位
|
||||
("分包单位为{subcontractor}在{date}有多少作业计划?", ["subcontractor", "date"]),
|
||||
("分包单位为{subcontractor}在{date}有多少项作业计划?", ["subcontractor", "date"]),
|
||||
("{date}{subcontractor}这个分包单位有多少作业计划?", ["date", "subcontractor"]),
|
||||
("请帮我查一下{date}{subcontractor}这个分包单位有多少{risk_level}风险作业计划?", ["date", "subcontractor", "risk_level"]),
|
||||
("{date}{project_type}类{subcontractor}这个分包单位的作业计划有多少?",
|
||||
["date", "project_type", "subcontractor"]),
|
||||
|
||||
]
|
||||
},
|
||||
|
||||
|
|
@ -184,14 +220,15 @@ TEMPLATE_CONFIG = {
|
|||
("{date}{project_name}作业计划有多少?", ["date", "project_name"]),
|
||||
("{project_name}{date}作业计划有多少?", ["project_name", "date"]),
|
||||
("公司{project_name}{date}作业计划有多少?", ["project_name", "date"]),
|
||||
("{construction_unit}{date}作业计划有多少?", ["construction_unit", "date"]),
|
||||
("送变电公司{project_name}{date}作业计划有多少?", ["project_name", "date"]),
|
||||
("{construction_unit}地区{date}作业计划有多少?", ["construction_unit", "date"]),
|
||||
# 🎯 仅 date 维度
|
||||
("{date}作业计划有多少?", ["date"]),
|
||||
|
||||
# 🎯 date + 其他单个维度
|
||||
("{date}{project_name}有多少项作业计划?", ["date", "project_name"]),
|
||||
|
||||
("{date}{construction_unit}作业计划有多少?", ["date", "construction_unit"]),
|
||||
("{date}{construction_unit}地区作业计划有多少?", ["date", "construction_unit"]),
|
||||
("{date}{implementation_organization}作业计划有多少?", ["date", "implementation_organization"]),
|
||||
("{date}{implementation_organization}{project_department}作业计划有多少?",
|
||||
["date", "implementation_organization", "project_department"]),
|
||||
|
|
@ -199,11 +236,11 @@ TEMPLATE_CONFIG = {
|
|||
["date", "project_department", "implementation_organization"]),
|
||||
("{date}{project_department}作业计划有多少?", ["date", "project_department"]),
|
||||
("{date}{project_manager}作业计划有多少?", ["date", "project_manager"]),
|
||||
("{date}{subcontractor}作业计划有多少?", ["date", "subcontractor"]),
|
||||
("{date}分包单位为{subcontractor}作业计划有多少?", ["date", "subcontractor"]),
|
||||
("{date}{team_leader}作业计划有多少?", ["date", "team_leader"]),
|
||||
|
||||
("{date}{project_department}作业计划数量", ["date", "project_department"]),
|
||||
("{date}{subcontractor}作业计划数量?", ["date", "subcontractor"]),
|
||||
("{date}{subcontractor}分包单位作业计划数量?", ["date", "subcontractor"]),
|
||||
|
||||
# 🎯 date + 风险维度
|
||||
|
||||
|
|
@ -214,7 +251,7 @@ TEMPLATE_CONFIG = {
|
|||
("{date}{risk_level}风险作业计划有多少", ["date", "risk_level"]),
|
||||
|
||||
# 🎯 date + construction_unit + risk_level
|
||||
("{construction_unit}{date}有多少项{risk_level}风险作业计划", ["construction_unit", "date", "risk_level"]),
|
||||
("{construction_unit}地区{date}有多少项{risk_level}风险作业计划", ["construction_unit", "date", "risk_level"]),
|
||||
|
||||
# 🎯 date + implementation_organization + risk_level
|
||||
("{date}{implementation_organization}风险等级为{risk_level}的作业计划有多少?",
|
||||
|
|
@ -228,8 +265,11 @@ TEMPLATE_CONFIG = {
|
|||
|
||||
# 🎯 project_manager 维度
|
||||
("公司{project_manager}{date}作业计划数量?", ["project_manager", "date"]),
|
||||
("送变电公司{project_manager}{date}作业计划数量?", ["project_manager", "date"]),
|
||||
("{project_manager}在{date}作业计划有多少?", ["project_manager", "date"]),
|
||||
("公司{project_manager}在{date}作业计划有多少?", ["project_manager", "date"]),
|
||||
("送变电公司{project_manager}在{date}作业计划有多少?", ["project_manager", "date"]),
|
||||
("安徽送变电{project_manager}在{date}作业计划有多少?", ["project_manager", "date"]),
|
||||
("{project_manager}在{date}负责的风险等级为{risk_level}的作业计划有多少?",
|
||||
["project_manager", "date", "risk_level"]),
|
||||
|
||||
|
|
@ -264,7 +304,7 @@ TEMPLATE_CONFIG = {
|
|||
("{date}工程性质为{project_type}的作业内容是什么?", ["date", "project_type"]),
|
||||
("工程性质为{project_type}的{date}作业计划分别是什么?", ["project_type", "date"]),
|
||||
("工程性质为{project_type}的{date}4项作业计划分别是什么?", ["project_type", "date"]),
|
||||
("{construction_unit}在{date}作业内容是什么?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区在{date}作业内容是什么?", ["construction_unit", "date"]),
|
||||
# 1. 查询特定日期和项目的作业安排
|
||||
("{date}{project_name}作业是什么?", ["date", "project_name"]),
|
||||
("{date}属于{operating}作业内容是什么?", ["date", "operating"]),
|
||||
|
|
@ -272,7 +312,7 @@ TEMPLATE_CONFIG = {
|
|||
# 3. 查询特定日期和项目类型的工程计划
|
||||
("{date}{project_type}类作业有哪些?", ["date", "project_type"]),
|
||||
|
||||
("{date}{construction_unit}{risk_level}风险的作业内容是什么?", ["date", "construction_unit", "risk_level"]),
|
||||
("{date}{construction_unit}地区{risk_level}风险的作业内容是什么?", ["date", "construction_unit", "risk_level"]),
|
||||
|
||||
("{date}{implementation_organization}{risk_level}风险的作业是什么?",
|
||||
["date", "implementation_organization", "risk_level"]),
|
||||
|
|
@ -301,8 +341,8 @@ TEMPLATE_CONFIG = {
|
|||
("{date}{risk_level}的作业内容是什么?", ["date", "risk_level"]),
|
||||
|
||||
# 11. 查询特定日期和施工单位的任务进展
|
||||
("{construction_unit}{date}的作业计划是什么?", ["construction_unit", "date"]),
|
||||
("{construction_unit}{date}作业有哪些", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区{date}的作业计划是什么?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区{date}作业有哪些", ["construction_unit", "date"]),
|
||||
|
||||
# 12. 查询特定日期和项目经理完成的任务
|
||||
("{project_manager}在{date}作业内容是什么?", ["project_manager", "date"]),
|
||||
|
|
@ -361,7 +401,7 @@ TEMPLATE_CONFIG = {
|
|||
("工程性质为{project_type}在{date}作业是什么?", ["project_type", "date"]),
|
||||
("{date}工程性质为{project_type}作业内容是什么?", ["date", "project_type"]),
|
||||
|
||||
("{date}{construction_unit}作业有哪些?", ["date", "construction_unit"]),
|
||||
("{date}{construction_unit}地区作业有哪些?", ["date", "construction_unit"]),
|
||||
|
||||
("{implementation_organization}{date}的作业有哪些?", ["implementation_organization", "date"]),
|
||||
|
||||
|
|
@ -375,7 +415,7 @@ TEMPLATE_CONFIG = {
|
|||
("{date}{project_type}类作业内容是什么?", ["date", "project_type"]),
|
||||
|
||||
# 6. 查询某施工单位在指定周的作业任务
|
||||
("{construction_unit}在{date}作业计划分别是什么?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区在{date}作业计划分别是什么?", ["construction_unit", "date"]),
|
||||
|
||||
# 7. 查询某项目经理在指定周负责的作业内容
|
||||
("{project_manager}在{date}作业内容是什么?", ["project_manager", "date"]),
|
||||
|
|
@ -393,8 +433,8 @@ TEMPLATE_CONFIG = {
|
|||
("{date}{risk_level}有哪些作业", ["date", "risk_level"]),
|
||||
|
||||
# 11. 查询某施工单位在指定周的作业进展
|
||||
("{construction_unit}在{date}作业内容是什么?", ["construction_unit", "date"]),
|
||||
("{construction_unit}在{date}有哪些作业?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区在{date}作业内容是什么?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区在{date}有哪些作业?", ["construction_unit", "date"]),
|
||||
|
||||
# 13. 查询某团队在指定周的作业安排
|
||||
("{team_leader}领导的团队在{date}的作业有哪些?", ["team_leader", "date"]),
|
||||
|
|
@ -407,7 +447,7 @@ TEMPLATE_CONFIG = {
|
|||
("请帮我查一下{team_name}{date}2项作业计划分别是什么", ["team_name", "date"]),
|
||||
("请帮我查一下{team_name}{date}有哪些作业", ["team_name", "date"]),
|
||||
|
||||
("{date}{construction_unit}具体作业计划", ["date", "construction_unit"]),
|
||||
("{date}{construction_unit}地区具体作业计划", ["date", "construction_unit"]),
|
||||
|
||||
("{implementation_organization}{date}的作业", ["implementation_organization", "date"]),
|
||||
|
||||
|
|
@ -423,10 +463,10 @@ TEMPLATE_CONFIG = {
|
|||
("{date}{project_name}现场有多少施工人员?", ["date", "project_name"]),
|
||||
("{date}{project_name}现场施工人数是多少?", ["date", "project_name"]),
|
||||
|
||||
("{construction_unit}{date}的施工人数是多少?", ["construction_unit", "date"]),
|
||||
("{construction_unit}{date}现场施工人数是多少?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区{date}的施工人数是多少?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区{date}现场施工人数是多少?", ["construction_unit", "date"]),
|
||||
# 2. 统计某施工单位在指定日期的施工总人数
|
||||
("统计{construction_unit}在{date}的施工人数是多少?", ["construction_unit", "date"]),
|
||||
("统计{construction_unit}地区在{date}的施工人数是多少?", ["construction_unit", "date"]),
|
||||
("{date}属于{operating}的施工人数是多少?", ["date", "operating"]),
|
||||
|
||||
# 4. 查询某项目类型在指定日期的施工人员需求
|
||||
|
|
@ -438,16 +478,16 @@ TEMPLATE_CONFIG = {
|
|||
("工程性质为{project_type}{date}的现场施工人数是多少?", ["project_type", "date"]),
|
||||
("工程性质为{project_type}{date}有多少施工人员?", ["project_type", "date"]),
|
||||
# 5. 统计某施工单位在指定日期的各项目施工人数
|
||||
("{construction_unit}在{date}的施工人数是多少?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区在{date}的施工人数是多少?", ["construction_unit", "date"]),
|
||||
# 8. 统计某项目经理管理的项目在指定日期的施工总人数
|
||||
("{project_manager}负责的项目在{date}的施工人数是多少?", ["project_manager", "date"]),
|
||||
("{date}{project_manager}负责的项目的现场施工人数是多少?", ["date", "project_manager"]),
|
||||
|
||||
# 9. 查询某分包商在指定日期的施工人员投入
|
||||
("{subcontractor}{date}施工人员有多少?", ["subcontractor", "date"]),
|
||||
("{subcontractor}{date}的施工人数是多少?", ["subcontractor", "date"]),
|
||||
("{date}{subcontractor}的施工人员有多少?", ["date", "subcontractor"]),
|
||||
("{date}{subcontractor}的施工人数是多少?", ["date", "subcontractor"]),
|
||||
("分包单位为{subcontractor}{date}施工人员有多少?", ["subcontractor", "date"]),
|
||||
("{subcontractor}分包单位{date}的施工人数是多少?", ["subcontractor", "date"]),
|
||||
("{date}分包单位为{subcontractor}的施工人员有多少?", ["date", "subcontractor"]),
|
||||
("{date}{subcontractor}分包单位的施工人数是多少?", ["date", "subcontractor"]),
|
||||
|
||||
("{implementation_organization}{date}现场施工人员有多少?", ["implementation_organization", "date"]),
|
||||
("{implementation_organization}{date}现场有多少施工人员?", ["implementation_organization", "date"]),
|
||||
|
|
@ -475,10 +515,10 @@ TEMPLATE_CONFIG = {
|
|||
("{date}{risk_level}风险的施工人数是多少?", ["date", "risk_level"]),
|
||||
|
||||
# 21. 查询某分包商在指定周的施工人员安排
|
||||
("{subcontractor}{date}的施工人数是多少?", ["subcontractor", "date"]),
|
||||
("分包单位为{subcontractor}{date}的施工人数是多少?", ["subcontractor", "date"]),
|
||||
|
||||
# 22. 统计某施工单位在指定周的高风险作业人员数量
|
||||
("{construction_unit}{date}风险等级为{risk_level}的施工人数是多少?",
|
||||
("{construction_unit}地区{date}风险等级为{risk_level}的施工人数是多少?",
|
||||
["construction_unit", "date", "risk_level"]),
|
||||
|
||||
("{date}{team_name}施工人数是多少", ["date", "team_name"]),
|
||||
|
|
@ -512,18 +552,18 @@ TEMPLATE_CONFIG = {
|
|||
"templates": [
|
||||
("{date}{project_name}作业考勤人数是多少", ["date", "project_name"]),
|
||||
("{project_name}{date}作业考勤人数是多少", ["project_name", "date"]),
|
||||
("查询{subcontractor}{date}的出勤记录", ["subcontractor", "date"]),
|
||||
("查询{subcontractor}{date}的作业考勤人数是多少", ["subcontractor", "date"]),
|
||||
("查询分包单位为{subcontractor}{date}的出勤记录", ["subcontractor", "date"]),
|
||||
("查询{subcontractor}分包单位{date}的作业考勤人数是多少", ["subcontractor", "date"]),
|
||||
#出勤人
|
||||
("{date}{project_name}出勤人数是多少", ["date", "project_name"]),
|
||||
("{project_name}{date}出勤人数是多少", ["project_name", "date"]),
|
||||
("查询{subcontractor}{date}的出勤记录", ["subcontractor", "date"]),
|
||||
("查询{subcontractor}{date}的作业出勤人数是多少", ["subcontractor", "date"]),
|
||||
("查询分包单位为{subcontractor}{date}的出勤记录", ["subcontractor", "date"]),
|
||||
("查询{subcontractor}这个分包单位{date}的作业出勤人数是多少", ["subcontractor", "date"]),
|
||||
|
||||
("{date}{operating}的作业考勤人数是多少?", ["date", "operating"]),
|
||||
("{team_leader}{date}的作业考勤人数是多少", ["team_leader", "date"]),
|
||||
# 4. 统计某施工单位在指定日期的考勤人数
|
||||
("统计{construction_unit}{date}的考勤人数", ["construction_unit", "date"]),
|
||||
("统计{construction_unit}地区{date}的考勤人数", ["construction_unit", "date"]),
|
||||
|
||||
# 5. 查询某实施单位在指定日期的考勤情况
|
||||
("{implementation_organization}{date}的考勤情况如何?", ["implementation_organization", "date"]),
|
||||
|
|
@ -540,8 +580,8 @@ TEMPLATE_CONFIG = {
|
|||
("{project_name}{date}的考勤率", ["project_name", "date"]),
|
||||
|
||||
# 11. 查询某分包商在指定周的出勤情况
|
||||
("{subcontractor}在{date}的出勤情况如何?", ["subcontractor", "date"]),
|
||||
("{subcontractor}在{date}的出勤情况怎么样?", ["subcontractor", "date"]),
|
||||
("分包单位为{subcontractor}在{date}的出勤情况如何?", ["subcontractor", "date"]),
|
||||
("{subcontractor}这个分包单位在{date}的出勤情况怎么样?", ["subcontractor", "date"]),
|
||||
|
||||
("请帮我查一下{date}{team_name}考勤人数是多少", ["date", "team_name"]),
|
||||
("请帮我查一下{team_name}{date}考勤人数", ["team_name", "date"]),
|
||||
|
|
@ -575,13 +615,13 @@ TEMPLATE_CONFIG = {
|
|||
"templates": [
|
||||
("{date}{project_name}有多少作业面?", ["date", "project_name"]),
|
||||
|
||||
("{date}{construction_unit}作业面有多少?", ["date", "construction_unit"]),
|
||||
("{date}{construction_unit}地区作业面有多少?", ["date", "construction_unit"]),
|
||||
("{date}{implementation_organization}作业面是多少?", ["date", "implementation_organization"]),
|
||||
("{date}{implementation_organization}{project_department}有多少作业面?",
|
||||
["date", "implementation_organization", "project_department"]),
|
||||
("{date}{project_department}有多少作业面?", ["date", "project_department"]),
|
||||
("{date}{project_manager}作业面是多少?", ["date", "project_manager"]),
|
||||
("{date}{subcontractor}有多少作业面?", ["date", "subcontractor"]),
|
||||
("{date}{subcontractor}分包单位有多少作业面?", ["date", "subcontractor"]),
|
||||
("{date}{team_leader}作业面是多少?", ["date", "team_leader"]),
|
||||
("{date}{project_name}有多少作业面?", ["date", "project_name"]),
|
||||
("{project_name}{date}有多少项作业面?", ["project_name", "date"]),
|
||||
|
|
@ -603,10 +643,10 @@ TEMPLATE_CONFIG = {
|
|||
("{date}{project_name}现场有多少班组人员?", ["date", "project_name"]),
|
||||
("{date}{project_name}现场班组人数是多少?", ["date", "project_name"]),
|
||||
|
||||
("{construction_unit}{date}的班组人数是多少?", ["construction_unit", "date"]),
|
||||
("{construction_unit}{date}现场班组人数是多少?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区{date}的班组人数是多少?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区{date}现场班组人数是多少?", ["construction_unit", "date"]),
|
||||
# 2. 统计某施工单位在指定日期的班组总人数
|
||||
("统计{construction_unit}在{date}的班组人数是多少?", ["construction_unit", "date"]),
|
||||
("统计{construction_unit}地区在{date}的班组人数是多少?", ["construction_unit", "date"]),
|
||||
("{date}属于{operating}的班组人数是多少?", ["date", "operating"]),
|
||||
|
||||
# 4. 查询某项目类型在指定日期的班组工人员需求
|
||||
|
|
@ -620,7 +660,7 @@ TEMPLATE_CONFIG = {
|
|||
("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]),
|
||||
# 5. 统计班组工单位在指定日期的各项目人数
|
||||
("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]),
|
||||
("{construction_unit}在{date}的班组人数是多少?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区在{date}的班组人数是多少?", ["construction_unit", "date"]),
|
||||
("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]),
|
||||
# 8. 统计某项目经理管理的项目在指定日期的总人数
|
||||
("工程性质为{project_type}{date}的现场班组人数是多少?", ["project_type", "date"]),
|
||||
|
|
@ -648,13 +688,13 @@ TEMPLATE_CONFIG = {
|
|||
("{date}{project_name}班组数是什么?", ["date", "project_name"]),
|
||||
("{date}{project_name}班组是多少?", ["date", "project_name"]),
|
||||
("{date}{project_name}班组有多少个?", ["date", "project_name"]),
|
||||
("{date}{construction_unit}班组数有多少?", ["date", "construction_unit"]),
|
||||
("{date}{construction_unit}地区班组数有多少?", ["date", "construction_unit"]),
|
||||
("{date}{implementation_organization}现场班组数是多少?", ["date", "implementation_organization"]),
|
||||
("{date}{implementation_organization}{project_department}有多少班组?",
|
||||
["date", "implementation_organization", "project_department"]),
|
||||
("{date}{project_department}现场有多少个班组?", ["date", "project_department"]),
|
||||
("{date}{project_manager}现场班组数是多少?", ["date", "project_manager"]),
|
||||
("{date}{subcontractor}现场有多少班组?", ["date", "subcontractor"]),
|
||||
("{date}分包单位为{subcontractor}现场有多少班组?", ["date", "subcontractor"]),
|
||||
("{date}{team_leader}班组数是多少?", ["date", "team_leader"]),
|
||||
("{date}{team_leader}班组有多少?", ["date", "team_leader"]),
|
||||
("{date}{project_name}有多少班组?", ["date", "project_name"]),
|
||||
|
|
@ -679,7 +719,7 @@ TEMPLATE_CONFIG = {
|
|||
("{date}工程性质为{project_type}的作业面内容是什么?", ["date", "project_type"]),
|
||||
("工程性质为{project_type}的{date}作业面分别是什么?", ["project_type", "date"]),
|
||||
("工程性质为{project_type}的{date}4项作业面分别是什么?", ["project_type", "date"]),
|
||||
("{construction_unit}在{date}作业面内容是什么?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区在{date}作业面内容是什么?", ["construction_unit", "date"]),
|
||||
# 1. 查询特定日期和项目的作业安排
|
||||
("{date}{project_name}作业面是什么?", ["date", "project_name"]),
|
||||
("{date}属于{operating}作业面内容是什么?", ["date", "operating"]),
|
||||
|
|
@ -687,7 +727,7 @@ TEMPLATE_CONFIG = {
|
|||
# 3. 查询特定日期和项目类型的工程计划
|
||||
("{date}{project_type}类具体作业面有哪些?", ["date", "project_type"]),
|
||||
|
||||
("{date}{construction_unit}{risk_level}风险的作业面内容是什么?", ["date", "construction_unit", "risk_level"]),
|
||||
("{date}{construction_unit}地区{risk_level}风险的作业面内容是什么?", ["date", "construction_unit", "risk_level"]),
|
||||
|
||||
("{date}{implementation_organization}{risk_level}风险的作业面是什么?",
|
||||
["date", "implementation_organization", "risk_level"]),
|
||||
|
|
@ -708,8 +748,8 @@ TEMPLATE_CONFIG = {
|
|||
("{date}{risk_level}的作业面具体内容是什么?", ["date", "risk_level"]),
|
||||
|
||||
# 11. 查询特定日期和施工单位的任务进展
|
||||
("{construction_unit}{date}的作业面是什么?", ["construction_unit", "date"]),
|
||||
("{construction_unit}{date}作业面具体内容有哪些", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区{date}的作业面是什么?", ["construction_unit", "date"]),
|
||||
("{construction_unit}地区{date}作业面具体内容有哪些", ["construction_unit", "date"]),
|
||||
|
||||
# 12. 查询特定日期和项目经理完成的任务
|
||||
("{project_manager}在{date}三项作业面分别是什么?", ["project_manager", "date"]),
|
||||
|
|
@ -747,13 +787,13 @@ TEMPLATE_CONFIG = {
|
|||
("{date}{project_name}具体班组详情是什么?", ["date", "project_name"]),
|
||||
("{date}{project_name}班组详细情况是什么?", ["date", "project_name"]),
|
||||
("{date}{project_name}班组详情", ["date", "project_name"]),
|
||||
("{date}{construction_unit}具体有哪些班组", ["date", "construction_unit"]),
|
||||
("{date}{construction_unit}地区具体有哪些班组", ["date", "construction_unit"]),
|
||||
("{date}{implementation_organization}班组有哪些?", ["date", "implementation_organization"]),
|
||||
("{date}{implementation_organization}{project_department}现场具体有哪些班组?",
|
||||
["date", "implementation_organization", "project_department"]),
|
||||
("{date}{project_department}现场班组详情是什么?", ["date", "project_department"]),
|
||||
("{date}{project_manager}现场班组情况?", ["date", "project_manager"]),
|
||||
("{date}{subcontractor}具体班组情况是什么?", ["date", "subcontractor"]),
|
||||
("{date}分包单位为{subcontractor}具体班组情况是什么?", ["date", "subcontractor"]),
|
||||
("{date}{team_leader}具体班组详情是什么?", ["date", "team_leader"]),
|
||||
("{date}{team_leader}班组详细情况是什么?", ["date", "team_leader"]),
|
||||
("{date}{project_name}班组详情?", ["date", "project_name"]),
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ from paddlenlp.transformers import ErnieForTokenClassification, ErnieTokenizer
|
|||
from paddlenlp.trainer import Trainer, TrainingArguments
|
||||
from paddlenlp.data import DataCollatorForTokenClassification
|
||||
|
||||
|
||||
# === 1. 加载数据 ===
|
||||
def load_dataset(data_path):
|
||||
with open(data_path, "r", encoding="utf-8") as f:
|
||||
|
|
@ -77,7 +76,7 @@ data_collator = DataCollatorForTokenClassification(tokenizer, padding=True)
|
|||
|
||||
# === 7. 训练参数 ===
|
||||
training_args = TrainingArguments(
|
||||
output_dir="./output",
|
||||
output_dir="./output_temp",
|
||||
evaluation_strategy="epoch",
|
||||
save_strategy="epoch",
|
||||
per_device_train_batch_size=16, # 你的显存较大,可调整 batch_size
|
||||
|
|
|
|||
Loading…
Reference in New Issue