打开方案,规程规范和图纸的功能的训练和标准化

This commit is contained in:
weiweiw 2025-05-22 18:33:10 +08:00
parent b2738705db
commit 5413911e64
7 changed files with 373 additions and 102 deletions

View File

@ -13,7 +13,7 @@ USELESS_PROGRAM_DEPARTMENT_WORDS = {"项目管理部", "项目部"}
#公司名标准化时需要过滤掉的词汇
USELESS_COMPANY_WORDS = ["公司","有限","责任","工程","科技"]
USELESS_DESIGN_WORDS = {"方案", "措施"}
#提取公司名热词需要过滤掉的词汇
# USELESS_COMPANY_WORDS = ["公司","有限","责任","工程","科技","安徽省","国网","四川省","安徽","集团","电力","建设","建筑","安装","股份"
# "装饰","结构","能源","发展","装饰","电气","股份"]
@ -43,8 +43,14 @@ RISK_LEVEL = "riskLevel"
TEAM_NAME = "teamName"
PAGE = "page"
PROGRAM_NAVIGATION = "programNavigation"
# PROGRAM_NAVIGATION = "programNavigation"
DESIGN_SPECIFICATION = "designSpecificationName"
DESIGN = "designName"
SPECIFICATION = "specificationName"
PICTURE = "picName"
#方案的后缀
design_suffix = ["措施","方案","规划","三措一案"]
# 意图识别和槽位抽取服务返回的关键提示语列表
SLOT_KEYWORDS = [

View File

@ -9,6 +9,7 @@ from config import redis_url
logger = setup_logger("GlobalData", level=logging.DEBUG)
class GlobalData:
# 数据字段
standard_company_program = {}
@ -32,6 +33,8 @@ class GlobalData:
simply_to_standard_team_leader_name_map = {}
pinyin_simply_to_standard_team_leader_name_map = {}
standard_design_pic = {}
@classmethod
def update_from_redis(cls):
import sys
@ -50,20 +53,22 @@ class GlobalData:
#建管单位
cls._update_list_data('SBD_QUERY_DATA:CONSTRUCTION_UNIT', './standard_data/construct_unit.txt',
cls.standard_construct_name_list, cls.simply_to_standard_construct_name_map,
cls.pinyin_simply_to_standard_construct_name_map, clean_useless_company_name)
cls.standard_construct_name_list, cls.simply_to_standard_construct_name_map,
cls.pinyin_simply_to_standard_construct_name_map, clean_useless_company_name)
logger.info(f"建管单位数量:{len(cls.standard_construct_name_list)}")
#分包单位
cls._update_list_data('SBD_QUERY_DATA:SUBCONTRACTOR', './standard_data/sub_contract.txt',
cls.standard_constractor_name_list, cls.simply_to_standard_constractor_name_map,
cls.pinyin_simply_to_standard_constractor_name_map, clean_useless_company_name)
cls.standard_constractor_name_list, cls.simply_to_standard_constractor_name_map,
cls.pinyin_simply_to_standard_constractor_name_map, clean_useless_company_name)
logger.info(f"分包单位数量:{len(cls.standard_constractor_name_list)}")
#班组名称
cls._update_list_data('SBD_QUERY_DATA:TEAM', './standard_data/team_leader.txt',
cls.standard_team_leader_name_list, cls.simply_to_standard_team_leader_name_map,
cls.pinyin_simply_to_standard_team_leader_name_map, clean_useless_team_leader_name)
cls.standard_team_leader_name_list, cls.simply_to_standard_team_leader_name_map,
cls.pinyin_simply_to_standard_team_leader_name_map, clean_useless_team_leader_name)
cls.update_design_pic_info()
logger.info(f"班组名称数量:{len(cls.standard_team_leader_name_list)}")
@classmethod
@ -80,7 +85,7 @@ class GlobalData:
json_str = r.get('SBD_QUERY_DATA:STANDARD_COMPANY_PROGRAM')
if json_str:
temp_data = json.loads(json_str)
save_dict_to_file(temp_data,"./standard_data/standard_company_program.json")
save_dict_to_file(temp_data, "./standard_data/standard_company_program.json")
logger.info("[Info] Loaded STANDARD_COMPANY_PROGRAM from Redis")
else:
raise ValueError("Redis key not found")
@ -122,7 +127,7 @@ class GlobalData:
if json_str:
try:
temp_list = json.loads(json_str)
save_standard_name_list_to_file(temp_list,local_path)
save_standard_name_list_to_file(temp_list, local_path)
logger.info(f"[Info] Loaded {redis_key} from Redis")
except json.JSONDecodeError as e:
logger.info(f"[Warning] JSON decode error on key '{redis_key}': {e}")
@ -144,3 +149,87 @@ class GlobalData:
pinyin_map.update({
text_to_pinyin(cleaner(kw)): kw for kw in temp_list
})
@classmethod
def update_design_pic_info(cls):
from utils import (
load_standard_json_data,
save_dict_to_file,
clean_useless_company_name,
text_to_pinyin
)
# 公司与工程关系数据
try:
r = redis.from_url(redis_url, decode_responses=True)
json_str = r.get('SBD_QUERY_DATA:STANDARD_DESIGN_PIC_INFO')
if json_str:
temp_data = json.loads(json_str)
save_dict_to_file(temp_data, "./standard_data/standard_project_info.json")
logger.info("[Info] Loaded STANDARD_DESIGN_PIC_INFO from Redis")
else:
raise ValueError("Redis key not found")
except Exception as e:
logger.error(f"[Error] Error loading STANDARD_DESIGN_PIC_INFO: {e}")
temp_data = load_standard_json_data("./standard_data/standard_project_info.json")
print(f"STANDARD_DESIGN_PIC_INFO:{temp_data}")
if temp_data != cls.standard_design_pic:
cls.standard_design_pic.clear()
cls.standard_design_pic.update(temp_data)
@classmethod
def get_all_company_from_design_info(cls):
#获取所有分公司信息
return list(cls.standard_design_pic.keys())
@classmethod
def get_project_from_design_info(cls):
#获取所有工程信息
project_list = []
for company, projects in cls.standard_design_pic.items():
project_list.extend(projects.keys())
return project_list
@classmethod
def get_contents_by_company_proj(cls, company_name, project_name):
#根据分公司名和工程名 获取方案,图纸和规范规程
global company_projects
if company_name and project_name:
return cls.standard_design_pic.get(company_name, {}).get(project_name, {})
elif not company_name and project_name:
for company, projects in cls.standard_design_pic.items():
if project_name in projects:
return projects[project_name] # 返回该工程下的“方案/图纸/规范规程”
elif company_name and not project_name:
result = {
"方案": [],
"图纸": [],
"规范规程": []
}
company_projects = cls.standard_design_pic.get(company_name, {})
# 忽略 "@type" 字段
if "@type" in company_projects:
del company_projects["@type"]
for proj_key, project in company_projects.items():
if "@type" in project:
del project["@type"]
for key in result:
result[key].extend(project.get(key, []))
# print(f"最终result{result}")
return result
else:
return None
@classmethod
def get_contents_by_proj(cls, project_name):
#根据工程名 获取方案,图纸和规范规程
for company, projects in cls.standard_design_pic.items():
if project_name in projects:
return projects[project_name] # 返回该工程下的“方案/图纸/规范规程”
return None # 没找到

View File

@ -16,7 +16,7 @@ from globalData import GlobalData
from apscheduler.schedulers.background import BackgroundScheduler
MODEL_ERNIE_PATH = R"../ernie/output/checkpoint-14672"
MODEL_UIE_PATH = R"../uie/output/checkpoint-16380"
MODEL_UIE_PATH = R"../uie/output_temp/checkpoint-18774"
# 类别名称列表
@ -48,8 +48,8 @@ label_map = {
15: 'B-personName', 34: 'I-personName',
16: 'B-personQueryType', 35: 'I-personQueryType',
17: 'B-projectStatus', 36: 'I-projectStatus',
18: 'B-skyNet', 37: 'I-skyNet',
19: 'B-programNavigation', 38: 'I-programNavigation'
18: 'B-picName', 37: 'I-picName',
19: 'B-designSpecificationName', 38: 'I-designSpecificationName'
}
logger = setup_logger("main", level=logging.DEBUG)
@ -406,6 +406,7 @@ def extract_multi_chat(messages):
请你仅输出还原后的完整问题不要输出任何变量中间步骤或解释说明确保结果自然通顺语义完整
'''
message = [
{"role": "user", "content": prompt}
]

View File

@ -16,7 +16,7 @@ from globalData import GlobalData
from apscheduler.schedulers.background import BackgroundScheduler
MODEL_ERNIE_PATH = R"../ernie/output/checkpoint-14672"
MODEL_UIE_PATH = R"../uie/output/checkpoint-16380"
MODEL_UIE_PATH = R"../uie/output_temp/checkpoint-20860"
# 类别名称列表
@ -48,8 +48,8 @@ label_map = {
15: 'B-personName', 34: 'I-personName',
16: 'B-personQueryType', 35: 'I-personQueryType',
17: 'B-projectStatus', 36: 'I-projectStatus',
18: 'B-skyNet', 37: 'I-skyNet',
19: 'B-programNavigation', 38: 'I-programNavigation'
18: 'B-picName', 37: 'I-picName',
19: 'B-designSpecificationName', 38: 'I-designSpecificationName'
}
logger = setup_logger("main", level=logging.DEBUG)
@ -386,37 +386,16 @@ def extract_multi_chat(messages):
示例补全模糊表达("今天送一分公司有多少作业计划", "具体是哪些") 返回 "今天送一分公司具体有哪些作业计划"
函数 是查询新属性(文本, 新问题):
如果新问题中没有查询主体仅有查询对象 则返回TRUE
如果新问题中仅有查询主体但没有查询对象 则返回TRUE
如果新问题中提取不到主体 且仅能提取到查询属性
且这个查询属性和文本中提取到的查询属性不同 则返回TRUE
其他情况均返回FALSE
示例是查询新属性("今天送一分公司有多少作业计划", "作业内容") 返回 True
函数 删除数量词(文本):
删除有多少多少几条几个等数量问句词
函数 替换查询属性(文本, 新查询属性):
说明
本函数用于在删除数量词后将原句中与新查询属性同类型的核心查询词替换为新查询属性并确保其他内容保持不变且语义自然
函数 替换新属性(文本新查询属性):
先删除文本中的"有多少"等类似的表达数量表达
再将文本里的查询属性替换为新查询属性并保持其他内容不变并返回 且保持新查询属性的语气
示例替换新属性("今天送一分公司有多少作业计划", "作业内容") 返回 "今天送一分公司的作业内容"
处理步骤
1. 删除文本中的数量类词语例如有多少多少几个几条
2. 识别原句中的核心查询属性词判断其与新查询属性是否属于相同类别如均为对象地点组织等
3. 将原有核心查询词替换为新查询属性保留句中其余上下文结构不变
4. 保持句子语气自然避免引入是什么有多少等疑问表达
返回
返回替换后的文本语义清晰语气自然
示例
替换查询属性("今天送一分公司有多少作业计划", "作业内容")
"今天送一分公司的作业内容"
替换查询属性("今天送一分公司的班组详情", "送二分公司")
"今天送二分公司的班组详情"
替换查询属性("今天送一分公司的班组详情", "明天呢")
"明天送二分公司的班组详情"
函数 有完整的句意(新问题):
如果新问题里有主体同时有操作对象或查询对象则返回TRUE
其他情况均返回FALSE
@ -427,6 +406,7 @@ def extract_multi_chat(messages):
请你仅输出还原后的完整问题不要输出任何变量中间步骤或解释说明确保结果自然通顺语义完整
'''
message = [
{"role": "user", "content": prompt}
]

View File

@ -2,8 +2,9 @@ import paddle
from paddlenlp.transformers import ErnieForTokenClassification, ErnieTokenizer
from globalData import GlobalData
from utils import standardize_name_only_high_score, clean_useless_company_name
from constants import SUBCONTRACTOR, CONSTRUCTION_UNIT, IMPLEMENTATION_ORG, PAGE, PROGRAM_NAVIGATION, PROJECT_DEPARTMENT
from utils import standardize_name_only_high_score, clean_useless_company_name, is_design_file
from constants import SUBCONTRACTOR, CONSTRUCTION_UNIT, IMPLEMENTATION_ORG, PAGE, \
PROJECT_DEPARTMENT, DESIGN_SPECIFICATION, DESIGN, SPECIFICATION
import paddle.nn.functional as F
@ -178,14 +179,13 @@ class SlotRecognition:
else:
updates[key] = value
prob_updates[key] = slot_probabilities[key]
elif key == PROGRAM_NAVIGATION or key == PAGE:
if "" in value:
updates[key] = "施工生产管理平台"
elif key == DESIGN_SPECIFICATION:
if is_design_file(value):
updates[DESIGN] = value
prob_updates[DESIGN] = 1
else:
updates[key] = value
prob_updates[key] = slot_probabilities[key]
# 先不处理 PROJECT_DEPARTMENT后续单独处理
updates[SPECIFICATION] = value
prob_updates[SPECIFICATION] = 1
elif key != PROJECT_DEPARTMENT:
updates[key] = value
prob_updates[key] = slot_probabilities[key]

View File

@ -13,7 +13,7 @@ import re
from globalData import GlobalData
from constants import USELESS_COMPANY_WORDS, USELESS_PROJECT_WORDS, CONSTRUCTION_UNIT, IMPLEMENTATION_ORG, \
SUBCONTRACTOR, PROJECT_NAME, PROJECT_DEPARTMENT, RISK_LEVEL, TEAM_NAME, USELESS_PROGRAM_DEPARTMENT_WORDS, \
SLOT_KEYWORDS
SLOT_KEYWORDS, design_suffix, DESIGN, SPECIFICATION, PICTURE, USELESS_DESIGN_WORDS
from logger_util import setup_logger
@ -64,6 +64,7 @@ def load_standard_json_data(path):
# print(f"[Error] Failed to load local JSON file: {e}")
return {}
#将字典序列的josn 存入本地文件
def save_dict_to_file(data: dict, file_path: str):
"""
@ -83,6 +84,7 @@ def save_dict_to_file(data: dict, file_path: str):
# print(f"[Error] 写入 JSON 文件失败:{e}")
logger.error("[Error] 写入 JSON 文件失败:", exc_info=e)
#从指定文件中加载标准化的名称列表。
def load_standard_name_list(file_path: str):
"""
@ -111,6 +113,7 @@ def load_standard_name_list(file_path: str):
# print(f"读取文件时发生错误:{e}", flush=True)
raise Exception(f"错误:文件 {file_path} 不存在")
#将标准化名称列表写入指定文件中,每行一个名称。
def save_standard_name_list_to_file(name_list, file_path):
"""
@ -130,6 +133,7 @@ def save_standard_name_list_to_file(name_list, file_path):
except Exception as e:
logger.error(f"[Error] 写入文件失败:{e}")
def extract_number(text):
"""
提取项目部中的数字支持阿拉伯数字和中文数字并转换为统一格式中文数字
@ -148,6 +152,7 @@ def replace_arabic_with_chinese(text):
将字符串中所有连续的阿拉伯数字转换为对应的中文数字
示例2024年25号 -> 二千零二十四年二十五号
"""
def convert(match):
num_str = match.group()
try:
@ -187,6 +192,7 @@ def fuzzy_match_and_filter(input_key, match_pool, mapping_dict, lower_score=70,
else:
return [mapping_dict[m[0]] for m in high_conf_matches[:top_k]]
def fuzzy_match_and_filter_only_high_score(input_key, match_pool, mapping_dict, high_score=90, top_k=3):
"""
对输入字符串在候选池中执行模糊匹配并返回匹配程度高的映射原始值
@ -212,6 +218,7 @@ def fuzzy_match_and_filter_only_high_score(input_key, match_pool, mapping_dict,
else:
return [mapping_dict[m[0]] for m in high_conf_matches[:top_k]]
def standardize_name(input_name, clean_func, simply_map, pinyin_map, lower_score=70, high_score=85):
"""
通用名称标准化函数按中文 清洗 简化匹配 拼音匹配 的顺序进行处理
@ -234,6 +241,7 @@ def standardize_name(input_name, clean_func, simply_map, pinyin_map, lower_score
result = fuzzy_match_and_filter(pinyin_input, list(pinyin_map.keys()), pinyin_map, lower_score, high_score)
return result
def standardize_name_only_high_score(input_name, clean_func, simply_map, pinyin_map, high_score=90):
"""
通用名称标准化函数按中文 清洗 简化匹配 拼音匹配 的顺序进行处理
@ -256,6 +264,7 @@ def standardize_name_only_high_score(input_name, clean_func, simply_map, pinyin_
result = fuzzy_match_and_filter_only_high_score(pinyin_input, list(pinyin_map.keys()), pinyin_map, high_score)
return result
#标准化班组名称
def standardize_team_name(input_name, simply_map, pinyin_map, lower_score=70, high_score=90):
"""
@ -283,7 +292,9 @@ def standardize_sub_company(input_name, simply_map, pinyin_map, lower_score=55,
:return: 匹配的标准公司名列表
"""
temp_input_name = replace_arabic_with_chinese(input_name)
return standardize_name(temp_input_name, clean_useless_company_name, simply_map, pinyin_map, lower_score, high_score)
return standardize_name(temp_input_name, clean_useless_company_name, simply_map, pinyin_map, lower_score,
high_score)
def standardize_project_name(input_name, simply_map, pinyin_map, lower_score=70, high_score=90):
"""
@ -365,7 +376,8 @@ def multiple_standardize_single_name(origin_input_name, origin_name_list, pinyin
limit=len(origin_name_list))
# 找到所有相似度 > 80 的匹配项
original_high_confidence_matches = [(match[0], match[1]) for match in match_results if match[1] >= lower_score]
logger.info(f"standardize_pinyin_single_name 原始名匹配, high_confidence_matches:{original_high_confidence_matches[:3]}")
logger.info(
f"standardize_pinyin_single_name 原始名匹配, high_confidence_matches:{original_high_confidence_matches[:3]}")
combined_low_confidence_matches = []
if original_high_confidence_matches:
@ -382,7 +394,7 @@ def multiple_standardize_single_name(origin_input_name, origin_name_list, pinyin
return list(dict.fromkeys(combined_low_confidence_matches))
def generate_project_prompt_with_key(matched_projects, original_name="", slot_key = IMPLEMENTATION_ORG):
def generate_project_prompt_with_key(matched_projects, original_name="", slot_key=IMPLEMENTATION_ORG):
"""
生成提示信息用于让用户确认匹配的项目名或分公司名或项目名
@ -393,7 +405,8 @@ def generate_project_prompt_with_key(matched_projects, original_name="", slot_ke
返回:
str: 生成的提示信息如果未找到匹配项返回提示用户提供更准确信息的字符串
"""
logger.info(f"generate_project_prompt_with_key slot_key:{slot_key},original_name:{original_name},matched_projects:{matched_projects} ")
logger.info(
f"generate_project_prompt_with_key slot_key:{slot_key},original_name:{original_name},matched_projects:{matched_projects} ")
type = ""
if slot_key == CONSTRUCTION_UNIT:
type = "建管单位名"
@ -409,7 +422,7 @@ def generate_project_prompt_with_key(matched_projects, original_name="", slot_ke
# print(f"generate_project_prompt_with_key type:{type} ")
logger.info(f"generate_project_prompt_with_key type:{type} ")
if not matched_projects:
if slot_key in (CONSTRUCTION_UNIT,IMPLEMENTATION_ORG,SUBCONTRACTOR):
if slot_key in (CONSTRUCTION_UNIT, IMPLEMENTATION_ORG, SUBCONTRACTOR):
return f"<p>未找到匹配的<strong>公司名</strong>{original_name},请提供更准确的公司名信息。</p>"
else:
return f"<p>未找到匹配的:{original_name},请提供更准确的信息。</p>"
@ -424,6 +437,7 @@ def generate_project_prompt_with_key(matched_projects, original_name="", slot_ke
html_parts.append("<p>请确认您要选择哪一个?</p>")
return "\n".join(html_parts)
def generate_project_prompt(matched_projects, original_name="", type="项目部名"):
"""
生成提示信息用于让用户确认匹配的项目名或分公司名或项目名
@ -448,6 +462,7 @@ def generate_project_prompt(matched_projects, original_name="", type="项目部
html_parts.append("<p>请确认您要选择哪一个?</p>")
return "\n".join(html_parts)
def generate_confirm_prompt(matched_projects, original_name="", type="项目部名"):
"""
生成提示信息用于让用户确认匹配的项目名或分公司名或项目名
@ -495,6 +510,8 @@ company_symbols_pattern = re.compile(r"[\s\W_]+")
useless_team_leader_words_pattern = re.compile("班组")
useless_design_words_pattern = re.compile("|".join(USELESS_DESIGN_WORDS))
def clean_useless_project_name(name: str) -> str:
# 去掉无意义词
@ -510,11 +527,13 @@ def clean_useless_company_name(name: str) -> str:
name = company_symbols_pattern.sub("", name)
return name.strip()
def clean_useless_team_leader_name(name: str) -> str:
# 去掉无意义词
name = useless_team_leader_words_pattern.sub("", name)
return name.strip()
#去掉项目部里面的不重要词汇
def clean_useless_program_departement_name(name: str) -> str:
# 去掉无意义词
@ -523,6 +542,21 @@ def clean_useless_program_departement_name(name: str) -> str:
name = project_symbols_pattern.sub("", name)
return name.strip()
def clean_useless_design_name(name: str) -> str:
# 去掉无意义词
name = useless_design_words_pattern.sub("", name)
# 去掉数字、字母、符号
name = project_symbols_pattern.sub("", name)
return name.strip()
def clean_useless_specification_name(name: str) -> str:
# 去掉数字、字母、符号
name = project_symbols_pattern.sub("", name)
return name.strip()
#槽位缺失检查
def check_lost(int_res, slot):
#labels: ["天气查询","通用对话","页面切换","日计划数量查询","周计划数量查询","日计划作业内容","周计划作业内容","施工人数","作业考勤人数","知识问答"]
@ -572,7 +606,7 @@ def check_lost(int_res, slot):
apologize_str = "非常抱歉,"
# if int_res == 2:
# return CheckResult.NEEDS_MORE_ROUNDS, f"{apologize_str}请问你想查询哪个页面?"
if int_res in [3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15,16]:
if int_res in [3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16]:
return CheckResult.NEEDS_MORE_ROUNDS, f"{apologize_str}请问你想查询什么时间的{intention_mapping[int_res]}"
@ -607,7 +641,8 @@ def check_standard_name_slot_probability(int_res, slot) -> tuple:
if match_results and len(match_results) == 1:
slot[key] = match_results[0]
else:
prompt = generate_project_prompt_with_key(match_results, original_name=slot[IMPLEMENTATION_ORG], slot_key= IMPLEMENTATION_ORG)
prompt = generate_project_prompt_with_key(match_results, original_name=slot[IMPLEMENTATION_ORG],
slot_key=IMPLEMENTATION_ORG)
return CheckResult.NEEDS_MORE_ROUNDS, prompt
if key == CONSTRUCTION_UNIT:
@ -618,7 +653,8 @@ def check_standard_name_slot_probability(int_res, slot) -> tuple:
if match_results and len(match_results) == 1:
slot[key] = match_results[0]
else:
prompt = generate_project_prompt_with_key(match_results, original_name=slot[CONSTRUCTION_UNIT], slot_key= CONSTRUCTION_UNIT)
prompt = generate_project_prompt_with_key(match_results, original_name=slot[CONSTRUCTION_UNIT],
slot_key=CONSTRUCTION_UNIT)
return CheckResult.NEEDS_MORE_ROUNDS, prompt
if key == SUBCONTRACTOR:
@ -629,12 +665,14 @@ def check_standard_name_slot_probability(int_res, slot) -> tuple:
if match_results and len(match_results) == 1:
slot[key] = match_results[0]
else:
prompt = generate_project_prompt_with_key(match_results, original_name=slot[SUBCONTRACTOR], slot_key= SUBCONTRACTOR)
prompt = generate_project_prompt_with_key(match_results, original_name=slot[SUBCONTRACTOR],
slot_key=SUBCONTRACTOR)
return CheckResult.NEEDS_MORE_ROUNDS, prompt
if key == PROJECT_DEPARTMENT:
logger.info(f"check_standard_name_slot 原始项目部名 : {slot[PROJECT_DEPARTMENT]}")
match_results = standardize_projectDepartment(slot[IMPLEMENTATION_ORG], value, GlobalData.standard_company_program,
match_results = standardize_projectDepartment(slot[IMPLEMENTATION_ORG], value,
GlobalData.standard_company_program,
high_score=95)
logger.info(f"check_standard_name_slot 匹配后项目部名: result:{match_results}")
if match_results and len(match_results) == 1:
@ -657,9 +695,139 @@ def check_standard_name_slot_probability(int_res, slot) -> tuple:
"五级"]:
return CheckResult.NEEDS_MORE_ROUNDS, "您查询的风险等级在系统中未找到,请确认风险等级后再次提问"
#前提已经做过公司名和工程名的标准化
return standardize_specification_design_pic(slot)
# return CheckResult.NO_MATCH, ""
def standardize_implement_company(slot_item) -> tuple:
if IMPLEMENTATION_ORG in slot_item:
value = slot_item[IMPLEMENTATION_ORG]
logger.info(f"standardize_specification_design_pic 原始分公司名 : {value}")
match_results = standardize_sub_company(value, GlobalData.simply_to_standard_company_name_map,
GlobalData.pinyin_simply_to_standard_company_name_map, 70, 90)
logger.info(f"standardize_specification_design_pic 匹配后分公司名: result:{match_results}")
if match_results and len(match_results) == 1:
slot_item[IMPLEMENTATION_ORG] = match_results[0]
else:
prompt = generate_project_prompt_with_key(match_results, original_name=slot_item[IMPLEMENTATION_ORG],
slot_key=IMPLEMENTATION_ORG)
return CheckResult.NEEDS_MORE_ROUNDS, prompt
return CheckResult.NO_MATCH, ""
def standardize_project(slot_item) -> tuple:
if PROJECT_NAME in slot_item:
value = slot_item[PROJECT_NAME]
logger.info(f"standardize_specification_design_pic 原始工程名 : {slot_item[PROJECT_NAME]}")
match_results = standardize_project_name(value, GlobalData.simply_to_standard_project_name_map,
GlobalData.pinyin_simply_to_standard_project_name_map, 70, 90)
logger.info(f"standardize_specification_design_pic 匹配后工程名 result:{match_results}")
if match_results and len(match_results) == 1:
slot_item[PROJECT_NAME] = match_results[0]
else:
prompt = generate_project_prompt(match_results, original_name=slot_item[PROJECT_NAME], type="工程名")
return CheckResult.NEEDS_MORE_ROUNDS, prompt
return CheckResult.NO_MATCH, ""
def standardize_design(slot_item) -> tuple:
if PROJECT_NAME in slot_item:
value = slot_item[PROJECT_NAME]
logger.info(f"standardize_specification_design_pic 原始工程名 : {slot_item[PROJECT_NAME]}")
match_results = standardize_project_name(value, GlobalData.simply_to_standard_project_name_map,
GlobalData.pinyin_simply_to_standard_project_name_map, 70, 90)
logger.info(f"standardize_specification_design_pic 匹配后工程名 result:{match_results}")
if match_results and len(match_results) == 1:
slot_item[PROJECT_NAME] = match_results[0]
else:
prompt = generate_project_prompt(match_results, original_name=slot_item[PROJECT_NAME], type="工程名")
return CheckResult.NEEDS_MORE_ROUNDS, prompt
return CheckResult.NO_MATCH, ""
def standardize_specification_design_pic(slot) -> tuple:
# #分公司名标准化
# result_type, prompt = standardize_implement_company(slot)
# if CheckResult.NEEDS_MORE_ROUNDS == result_type:
# return result_type, prompt
standard_implement_company = slot[IMPLEMENTATION_ORG] if IMPLEMENTATION_ORG in slot else ""
# #工程名标准化
# result_type, prompt = standardize_project(slot)
# if CheckResult.NEEDS_MORE_ROUNDS == result_type:
# return result_type, prompt
standard_project = slot[PROJECT_NAME] if PROJECT_NAME in slot else ""
simple_design_map = {}
if DESIGN in slot:
value = slot[DESIGN]
temp_content = GlobalData.get_contents_by_company_proj(standard_implement_company, standard_project)
if temp_content:
design_list = temp_content["方案"]
simple_design_map.update({
clean_useless_design_name(kw): kw for kw in design_list
})
simply_input = clean_useless_design_name(value)
result = fuzzy_match_and_filter(simply_input, list(simple_design_map.keys()), simple_design_map, 70, 90)
if result and len(result) == 1:
slot[DESIGN] = result[0]
else:
prompt = generate_project_prompt(result, original_name=slot[DESIGN], type="方案名")
print(f"方案名标准化返回:{prompt}")
return CheckResult.NEEDS_MORE_ROUNDS, prompt
else:
prompt = generate_project_prompt([], original_name=slot[DESIGN], type="方案名")
return CheckResult.NEEDS_MORE_ROUNDS, prompt
return CheckResult.NO_MATCH, ''
elif SPECIFICATION in slot:
value = slot[SPECIFICATION]
temp_content = GlobalData.get_contents_by_company_proj(standard_implement_company, standard_project)
if temp_content:
design_list = temp_content["规范规程"]
simple_design_map.update({
clean_useless_specification_name(kw): kw for kw in design_list
})
simply_input = clean_useless_specification_name(value)
result = fuzzy_match_and_filter(simply_input, list(simple_design_map.keys()), simple_design_map, 70, 90)
if result and len(result) == 1:
slot[SPECIFICATION] = result[0]
else:
prompt = generate_project_prompt(result, original_name=slot[SPECIFICATION], type="规范规程名")
print(f"规程规范名标准化返回:{prompt}")
return CheckResult.NEEDS_MORE_ROUNDS, prompt
else:
prompt = generate_project_prompt([], original_name=slot[DESIGN], type="规范规程名")
return CheckResult.NEEDS_MORE_ROUNDS, prompt
return CheckResult.NO_MATCH, ''
elif PICTURE in slot:
value = slot[PICTURE]
temp_content = GlobalData.get_contents_by_company_proj(standard_implement_company, standard_project)
if temp_content:
design_list = temp_content["图纸"]
simple_design_map.update({
kw: kw for kw in design_list
})
result = fuzzy_match_and_filter(value, list(simple_design_map.keys()), simple_design_map, 70, 90)
if result and len(result) == 1:
slot[PICTURE] = result[0]
else:
prompt = generate_project_prompt(result, original_name=slot[PICTURE], type="图纸名")
print(f"图纸名标准化返回:{prompt}")
return CheckResult.NEEDS_MORE_ROUNDS, prompt
else:
prompt = generate_project_prompt([], original_name=slot[DESIGN], type="规范规程名")
return CheckResult.NEEDS_MORE_ROUNDS, prompt
return CheckResult.NO_MATCH, ''
return CheckResult.NO_MATCH, ''
def process_msg_content(content):
if not any(keyword in content for keyword in SLOT_KEYWORDS):
match = re.search(r"^.*?[。!?.!?:]", content)
@ -669,4 +837,10 @@ def process_msg_content(content):
else:
return content.strip()
else:
return content
return content
def is_design_file(file_name):
# 清除结尾的引号、空格、标点等
text_clean = re.sub(r"[》〉》】】))>>」』」》))》」』)】\s]+$", "", file_name)
return text_clean.endswith(tuple(design_suffix))

View File

@ -50,7 +50,8 @@ BASE_DATA = {
"宿州萧砀线路工程建筑部分",
"1000kV淮芜线(PROJ-2020-0204-0003)",
"35kV接地极线路雁淮线",
"110kV接地极线路吉泉线(PROJ-2020-0204-0002)"
"110kV接地极线路吉泉线(PROJ-2020-0204-0002)",
"国网安徽宣城供电公司500kV河沥变加装固定融冰装置项目工程"
],
# 项目部名称
"project_departments": ["第一项目部金上","调试一队", "第9项目管理部","第9项目管理部门", "金上第十一项目部门", "第八项目管理部(合肥)", "肥东9号项目部",
@ -77,10 +78,11 @@ BASE_DATA = {
"risk_levels": ["1级", "一级", "二级", "5级", "四级"],
# 8+2工况
"operatings": ["8+2工况", "8加2工况"],
# 页面切换
# 页面切换,不能有方案图纸和规程规范这些数据集出现在pages否则会冲突
"pages": ["风险管控", "日计划", "周风险", "日计划统计报表", "日计划推送", "生产管控中心", "考勤统计详情",
"今日作业计划", "周风险统计报表", "周风险推送", "进度管理", "技术管理", "项目团队", "质量管理",
"云上会议", "项目巡航", "施工生产管理平台"],
"云上会议", "项目巡航", "施工生产管理平台", "数字化项目部","数字化项目部管理平台","施工生产管理平台",
"经营管理", "物资管理", "共享资料", "党建+", "党建加", "摄像头", "视频"],
# 具体人名
"person_names": ["何东洋", "李东","王孙强林"],
# 人名查询目标
@ -89,10 +91,22 @@ BASE_DATA = {
# 工程状态
"project_status_s": ["在建", "在作业", "在施工",""],
"pic_names": ["四号线施工图", "框架柱详图", "500kvgis室吊车梁布置图", "_站区道路及进站道路详图_A2", "_辅助用房建筑设计说明一_A2","平断面定位图目录",
"基础明细表","杆塔明细表","(500-SJC31151)_1-110 ","接地装置施工图","平断面定位图卷册说明"],
"design_specification_names": [
"《35kV电力电缆交流耐压试验方案》","220kV南蒙2753线拆线、拆塔施工方案","悬索封网实验方案","灌注桩承台基础施工方案"
"一般跨越施工措施", "省道专项施工方案","吊车组立角钢塔施工方案","承台基础及接地施工措施","项目管理实施规划","电力电缆方案","线路拆旧跨越110kV线路施工方案",
"断面悬浮抱杆组塔施工方案","灌注桩基础及接地施工措施"
"110kV-750kV架空输电线路铁塔基础施工工艺导则","国网基建2112-2022 国家电网有限公司输变电工程建设质量管理规定","1000kV架空输电线路施工质量检验及评定规程","《国家电网有限公司施工项目部标准化管理手册线路工程分册》",
"国家电网有限公司输变电工程标准工艺电缆工程分册2022版","架空输电线路螺旋锚基础施工及质量验收规范","国家电网有限公司安全生产反违章工作管理办法"],
#皖送天网
"sky_nets": ["摄像头", "视频"],
# "sky_nets": ["摄像头", "视频"],
#项目巡航
"program_navigations": ["数字化项目部", "数字化项目部管理平台", "施工生产管理平台"],
# "program_navigations": ["数字化项目部", "数字化项目部管理平台", "施工生产管理平台"],
}
# 自然语言模板配置
@ -748,39 +762,43 @@ TEMPLATE_CONFIG = {
"date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天"],
"templates": [
("打开{page}页面", ["page"]),
("打开{page}", ["page"]),
("切换{page}模块", ["page"]),
("切换到{page}页面", ["page"]),
("跳转到{page}", ["page"]),
("跳转到{page}模块", ["page"]),
("打开{page}<页面>", ["page"]),
("切换到{page}页面", ["page"]),
("切换{page}模块", ["page"]),
("请打开{page}模块", ["page"]),
("请打开{page}", ["page"]),
("请切换到{page}页面", ["page"]),
("切换{page}", ["page"]),
#施工生产管理平台
("打开{program_navigation}", ["program_navigation"]),
("打开{program_navigation}", ["program_navigation"]),
#项目巡航:分公司
("打开{implementation_organization}{program_navigation}", ["implementation_organization", "program_navigation"]),
("打开{implementation_organization}{program_navigation}", ["implementation_organization", "program_navigation"]),
#项目巡航:分公司、项目部
("打开{implementation_organization}{project_department}{program_navigation}",
["implementation_organization", "project_department", "program_navigation"]),
#项目巡航:分公司
("切换到{implementation_organization}{program_navigation}",
["implementation_organization", "program_navigation"]),
#项目巡航,工程
("打开{project_name}{program_navigation}", ["project_name", "program_navigation"]),
#皖智天网,工程名摄像头
("打开{project_name}{sky_net}", ["project_name", "sky_net"]),
("切换到{project_name}{sky_net}", ["project_name", "sky_net"]),
#皖智天网,班组名摄像头
("切换到{team_name}{sky_net}", ["team_name", "sky_net"]),
("切换{team_name}{sky_net}", ["team_name", "sky_net"]),
("打开{team_name}{sky_net}", ["team_name", "sky_net"]),
#施工生产管理平台
("切换{page}模块", ["page"]),
("打开{implementation_organization}{page}", ["implementation_organization", "page"]),
("打开{implementation_organization}{project_department}{page}",
["implementation_organization", "project_department", "page"]),
("切换到{implementation_organization}{page}",
["implementation_organization", "page"]),
("打开{project_name}{page}", ["project_name", "page"]),
("切换到{project_name}{page}", ["project_name", "page"]),
("切换到{team_name}{page}", ["team_name", "page"]),
("打开{team_name}{page}", ["team_name", "page"]),
# design_names, pic_names,specification_names
#方案和规程规范
("打开{design_specification_name}<方案>", ["project_name", "design_specification_name"]),
("打开{design_specification_name}", ["project_name", "design_specification_name"]),
("打开{project_name}{design_specification_name}<方案>", ["project_name", "design_specification_name"]),
("打开{project_name}{design_specification_name}", ["project_name", "design_specification_name"]),
("打开{implementation_organization}{project_name}{design_specification_name}",
["implementation_organization", "project_name", "design_specification_name"]),
#图纸
("打开{pic_name}", ["pic_name"]),
("打开{project_name}{pic_name}", ["project_name", "pic_name"]),
("打开{project_name}{pic_name}<图纸>", ["project_name", "pic_name"]),
("打开{project_name}{pic_name}", ["project_name", "pic_name"]),
("打开{implementation_organization}{project_name}{pic_name}",
["implementation_organization", "project_name", "pic_name"]),
]
},
"作业面查询": {
@ -1348,8 +1366,11 @@ def generate_natural_samples(config, label):
"person_name": BASE_DATA["person_names"],
"person_query_type": BASE_DATA["person_query_types"],
"project_status": BASE_DATA["project_status_s"],
"sky_net": BASE_DATA["sky_nets"],
"program_navigation": BASE_DATA["program_navigations"],
"pic_name": BASE_DATA["pic_names"],
"design_specification_name": BASE_DATA["design_specification_names"],
# "sky_net": BASE_DATA["sky_nets"],
# "program_navigation": BASE_DATA["program_navigations"],
}
for template, variables in config["templates"]: