打开方案,规程规范和图纸的功能的训练和标准化
This commit is contained in:
parent
b2738705db
commit
5413911e64
|
|
@ -13,7 +13,7 @@ USELESS_PROGRAM_DEPARTMENT_WORDS = {"项目管理部", "项目部"}
|
||||||
|
|
||||||
#公司名标准化时需要过滤掉的词汇
|
#公司名标准化时需要过滤掉的词汇
|
||||||
USELESS_COMPANY_WORDS = ["公司","有限","责任","工程","科技"]
|
USELESS_COMPANY_WORDS = ["公司","有限","责任","工程","科技"]
|
||||||
|
USELESS_DESIGN_WORDS = {"方案", "措施"}
|
||||||
#提取公司名热词需要过滤掉的词汇
|
#提取公司名热词需要过滤掉的词汇
|
||||||
# USELESS_COMPANY_WORDS = ["公司","有限","责任","工程","科技","安徽省","国网","四川省","安徽","集团","电力","建设","建筑","安装","股份"
|
# USELESS_COMPANY_WORDS = ["公司","有限","责任","工程","科技","安徽省","国网","四川省","安徽","集团","电力","建设","建筑","安装","股份"
|
||||||
# "装饰","结构","能源","发展","装饰","电气","股份"]
|
# "装饰","结构","能源","发展","装饰","电气","股份"]
|
||||||
|
|
@ -43,8 +43,14 @@ RISK_LEVEL = "riskLevel"
|
||||||
TEAM_NAME = "teamName"
|
TEAM_NAME = "teamName"
|
||||||
|
|
||||||
PAGE = "page"
|
PAGE = "page"
|
||||||
PROGRAM_NAVIGATION = "programNavigation"
|
# PROGRAM_NAVIGATION = "programNavigation"
|
||||||
|
DESIGN_SPECIFICATION = "designSpecificationName"
|
||||||
|
DESIGN = "designName"
|
||||||
|
SPECIFICATION = "specificationName"
|
||||||
|
|
||||||
|
PICTURE = "picName"
|
||||||
|
#方案的后缀
|
||||||
|
design_suffix = ["措施","方案","规划","三措一案"]
|
||||||
|
|
||||||
# 意图识别和槽位抽取服务返回的关键提示语列表
|
# 意图识别和槽位抽取服务返回的关键提示语列表
|
||||||
SLOT_KEYWORDS = [
|
SLOT_KEYWORDS = [
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from config import redis_url
|
||||||
|
|
||||||
logger = setup_logger("GlobalData", level=logging.DEBUG)
|
logger = setup_logger("GlobalData", level=logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
class GlobalData:
|
class GlobalData:
|
||||||
# 数据字段
|
# 数据字段
|
||||||
standard_company_program = {}
|
standard_company_program = {}
|
||||||
|
|
@ -32,6 +33,8 @@ class GlobalData:
|
||||||
simply_to_standard_team_leader_name_map = {}
|
simply_to_standard_team_leader_name_map = {}
|
||||||
pinyin_simply_to_standard_team_leader_name_map = {}
|
pinyin_simply_to_standard_team_leader_name_map = {}
|
||||||
|
|
||||||
|
standard_design_pic = {}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def update_from_redis(cls):
|
def update_from_redis(cls):
|
||||||
import sys
|
import sys
|
||||||
|
|
@ -50,20 +53,22 @@ class GlobalData:
|
||||||
|
|
||||||
#建管单位
|
#建管单位
|
||||||
cls._update_list_data('SBD_QUERY_DATA:CONSTRUCTION_UNIT', './standard_data/construct_unit.txt',
|
cls._update_list_data('SBD_QUERY_DATA:CONSTRUCTION_UNIT', './standard_data/construct_unit.txt',
|
||||||
cls.standard_construct_name_list, cls.simply_to_standard_construct_name_map,
|
cls.standard_construct_name_list, cls.simply_to_standard_construct_name_map,
|
||||||
cls.pinyin_simply_to_standard_construct_name_map, clean_useless_company_name)
|
cls.pinyin_simply_to_standard_construct_name_map, clean_useless_company_name)
|
||||||
logger.info(f"建管单位数量:{len(cls.standard_construct_name_list)}")
|
logger.info(f"建管单位数量:{len(cls.standard_construct_name_list)}")
|
||||||
|
|
||||||
#分包单位
|
#分包单位
|
||||||
cls._update_list_data('SBD_QUERY_DATA:SUBCONTRACTOR', './standard_data/sub_contract.txt',
|
cls._update_list_data('SBD_QUERY_DATA:SUBCONTRACTOR', './standard_data/sub_contract.txt',
|
||||||
cls.standard_constractor_name_list, cls.simply_to_standard_constractor_name_map,
|
cls.standard_constractor_name_list, cls.simply_to_standard_constractor_name_map,
|
||||||
cls.pinyin_simply_to_standard_constractor_name_map, clean_useless_company_name)
|
cls.pinyin_simply_to_standard_constractor_name_map, clean_useless_company_name)
|
||||||
logger.info(f"分包单位数量:{len(cls.standard_constractor_name_list)}")
|
logger.info(f"分包单位数量:{len(cls.standard_constractor_name_list)}")
|
||||||
|
|
||||||
#班组名称
|
#班组名称
|
||||||
cls._update_list_data('SBD_QUERY_DATA:TEAM', './standard_data/team_leader.txt',
|
cls._update_list_data('SBD_QUERY_DATA:TEAM', './standard_data/team_leader.txt',
|
||||||
cls.standard_team_leader_name_list, cls.simply_to_standard_team_leader_name_map,
|
cls.standard_team_leader_name_list, cls.simply_to_standard_team_leader_name_map,
|
||||||
cls.pinyin_simply_to_standard_team_leader_name_map, clean_useless_team_leader_name)
|
cls.pinyin_simply_to_standard_team_leader_name_map, clean_useless_team_leader_name)
|
||||||
|
|
||||||
|
cls.update_design_pic_info()
|
||||||
logger.info(f"班组名称数量:{len(cls.standard_team_leader_name_list)}")
|
logger.info(f"班组名称数量:{len(cls.standard_team_leader_name_list)}")
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
@ -80,7 +85,7 @@ class GlobalData:
|
||||||
json_str = r.get('SBD_QUERY_DATA:STANDARD_COMPANY_PROGRAM')
|
json_str = r.get('SBD_QUERY_DATA:STANDARD_COMPANY_PROGRAM')
|
||||||
if json_str:
|
if json_str:
|
||||||
temp_data = json.loads(json_str)
|
temp_data = json.loads(json_str)
|
||||||
save_dict_to_file(temp_data,"./standard_data/standard_company_program.json")
|
save_dict_to_file(temp_data, "./standard_data/standard_company_program.json")
|
||||||
logger.info("[Info] Loaded STANDARD_COMPANY_PROGRAM from Redis")
|
logger.info("[Info] Loaded STANDARD_COMPANY_PROGRAM from Redis")
|
||||||
else:
|
else:
|
||||||
raise ValueError("Redis key not found")
|
raise ValueError("Redis key not found")
|
||||||
|
|
@ -122,7 +127,7 @@ class GlobalData:
|
||||||
if json_str:
|
if json_str:
|
||||||
try:
|
try:
|
||||||
temp_list = json.loads(json_str)
|
temp_list = json.loads(json_str)
|
||||||
save_standard_name_list_to_file(temp_list,local_path)
|
save_standard_name_list_to_file(temp_list, local_path)
|
||||||
logger.info(f"[Info] Loaded {redis_key} from Redis")
|
logger.info(f"[Info] Loaded {redis_key} from Redis")
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
logger.info(f"[Warning] JSON decode error on key '{redis_key}': {e}")
|
logger.info(f"[Warning] JSON decode error on key '{redis_key}': {e}")
|
||||||
|
|
@ -144,3 +149,87 @@ class GlobalData:
|
||||||
pinyin_map.update({
|
pinyin_map.update({
|
||||||
text_to_pinyin(cleaner(kw)): kw for kw in temp_list
|
text_to_pinyin(cleaner(kw)): kw for kw in temp_list
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def update_design_pic_info(cls):
|
||||||
|
|
||||||
|
from utils import (
|
||||||
|
load_standard_json_data,
|
||||||
|
save_dict_to_file,
|
||||||
|
clean_useless_company_name,
|
||||||
|
text_to_pinyin
|
||||||
|
)
|
||||||
|
# 公司与工程关系数据
|
||||||
|
try:
|
||||||
|
r = redis.from_url(redis_url, decode_responses=True)
|
||||||
|
json_str = r.get('SBD_QUERY_DATA:STANDARD_DESIGN_PIC_INFO')
|
||||||
|
if json_str:
|
||||||
|
temp_data = json.loads(json_str)
|
||||||
|
save_dict_to_file(temp_data, "./standard_data/standard_project_info.json")
|
||||||
|
logger.info("[Info] Loaded STANDARD_DESIGN_PIC_INFO from Redis")
|
||||||
|
else:
|
||||||
|
raise ValueError("Redis key not found")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[Error] Error loading STANDARD_DESIGN_PIC_INFO: {e}")
|
||||||
|
temp_data = load_standard_json_data("./standard_data/standard_project_info.json")
|
||||||
|
|
||||||
|
print(f"STANDARD_DESIGN_PIC_INFO:{temp_data}")
|
||||||
|
if temp_data != cls.standard_design_pic:
|
||||||
|
cls.standard_design_pic.clear()
|
||||||
|
cls.standard_design_pic.update(temp_data)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_all_company_from_design_info(cls):
|
||||||
|
#获取所有分公司信息
|
||||||
|
return list(cls.standard_design_pic.keys())
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_project_from_design_info(cls):
|
||||||
|
#获取所有工程信息
|
||||||
|
project_list = []
|
||||||
|
|
||||||
|
for company, projects in cls.standard_design_pic.items():
|
||||||
|
project_list.extend(projects.keys())
|
||||||
|
return project_list
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_contents_by_company_proj(cls, company_name, project_name):
|
||||||
|
#根据分公司名和工程名 获取方案,图纸和规范规程
|
||||||
|
global company_projects
|
||||||
|
if company_name and project_name:
|
||||||
|
return cls.standard_design_pic.get(company_name, {}).get(project_name, {})
|
||||||
|
elif not company_name and project_name:
|
||||||
|
for company, projects in cls.standard_design_pic.items():
|
||||||
|
if project_name in projects:
|
||||||
|
return projects[project_name] # 返回该工程下的“方案/图纸/规范规程”
|
||||||
|
elif company_name and not project_name:
|
||||||
|
result = {
|
||||||
|
"方案": [],
|
||||||
|
"图纸": [],
|
||||||
|
"规范规程": []
|
||||||
|
}
|
||||||
|
company_projects = cls.standard_design_pic.get(company_name, {})
|
||||||
|
# 忽略 "@type" 字段
|
||||||
|
if "@type" in company_projects:
|
||||||
|
del company_projects["@type"]
|
||||||
|
|
||||||
|
for proj_key, project in company_projects.items():
|
||||||
|
if "@type" in project:
|
||||||
|
del project["@type"]
|
||||||
|
for key in result:
|
||||||
|
result[key].extend(project.get(key, []))
|
||||||
|
|
||||||
|
# print(f"最终result:{result}")
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_contents_by_proj(cls, project_name):
|
||||||
|
#根据工程名 获取方案,图纸和规范规程
|
||||||
|
for company, projects in cls.standard_design_pic.items():
|
||||||
|
if project_name in projects:
|
||||||
|
return projects[project_name] # 返回该工程下的“方案/图纸/规范规程”
|
||||||
|
return None # 没找到
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ from globalData import GlobalData
|
||||||
from apscheduler.schedulers.background import BackgroundScheduler
|
from apscheduler.schedulers.background import BackgroundScheduler
|
||||||
|
|
||||||
MODEL_ERNIE_PATH = R"../ernie/output/checkpoint-14672"
|
MODEL_ERNIE_PATH = R"../ernie/output/checkpoint-14672"
|
||||||
MODEL_UIE_PATH = R"../uie/output/checkpoint-16380"
|
MODEL_UIE_PATH = R"../uie/output_temp/checkpoint-18774"
|
||||||
|
|
||||||
|
|
||||||
# 类别名称列表
|
# 类别名称列表
|
||||||
|
|
@ -48,8 +48,8 @@ label_map = {
|
||||||
15: 'B-personName', 34: 'I-personName',
|
15: 'B-personName', 34: 'I-personName',
|
||||||
16: 'B-personQueryType', 35: 'I-personQueryType',
|
16: 'B-personQueryType', 35: 'I-personQueryType',
|
||||||
17: 'B-projectStatus', 36: 'I-projectStatus',
|
17: 'B-projectStatus', 36: 'I-projectStatus',
|
||||||
18: 'B-skyNet', 37: 'I-skyNet',
|
18: 'B-picName', 37: 'I-picName',
|
||||||
19: 'B-programNavigation', 38: 'I-programNavigation'
|
19: 'B-designSpecificationName', 38: 'I-designSpecificationName'
|
||||||
}
|
}
|
||||||
|
|
||||||
logger = setup_logger("main", level=logging.DEBUG)
|
logger = setup_logger("main", level=logging.DEBUG)
|
||||||
|
|
@ -406,6 +406,7 @@ def extract_multi_chat(messages):
|
||||||
请你仅输出还原后的完整问题,不要输出任何变量、中间步骤或解释说明,确保结果自然通顺,语义完整。
|
请你仅输出还原后的完整问题,不要输出任何变量、中间步骤或解释说明,确保结果自然通顺,语义完整。
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
message = [
|
message = [
|
||||||
{"role": "user", "content": prompt}
|
{"role": "user", "content": prompt}
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ from globalData import GlobalData
|
||||||
from apscheduler.schedulers.background import BackgroundScheduler
|
from apscheduler.schedulers.background import BackgroundScheduler
|
||||||
|
|
||||||
MODEL_ERNIE_PATH = R"../ernie/output/checkpoint-14672"
|
MODEL_ERNIE_PATH = R"../ernie/output/checkpoint-14672"
|
||||||
MODEL_UIE_PATH = R"../uie/output/checkpoint-16380"
|
MODEL_UIE_PATH = R"../uie/output_temp/checkpoint-20860"
|
||||||
|
|
||||||
|
|
||||||
# 类别名称列表
|
# 类别名称列表
|
||||||
|
|
@ -48,8 +48,8 @@ label_map = {
|
||||||
15: 'B-personName', 34: 'I-personName',
|
15: 'B-personName', 34: 'I-personName',
|
||||||
16: 'B-personQueryType', 35: 'I-personQueryType',
|
16: 'B-personQueryType', 35: 'I-personQueryType',
|
||||||
17: 'B-projectStatus', 36: 'I-projectStatus',
|
17: 'B-projectStatus', 36: 'I-projectStatus',
|
||||||
18: 'B-skyNet', 37: 'I-skyNet',
|
18: 'B-picName', 37: 'I-picName',
|
||||||
19: 'B-programNavigation', 38: 'I-programNavigation'
|
19: 'B-designSpecificationName', 38: 'I-designSpecificationName'
|
||||||
}
|
}
|
||||||
|
|
||||||
logger = setup_logger("main", level=logging.DEBUG)
|
logger = setup_logger("main", level=logging.DEBUG)
|
||||||
|
|
@ -386,37 +386,16 @@ def extract_multi_chat(messages):
|
||||||
示例:补全模糊表达("今天送一分公司有多少作业计划", "具体是哪些") 返回 "今天送一分公司具体有哪些作业计划"
|
示例:补全模糊表达("今天送一分公司有多少作业计划", "具体是哪些") 返回 "今天送一分公司具体有哪些作业计划"
|
||||||
|
|
||||||
函数 是查询新属性(文本, 新问题):
|
函数 是查询新属性(文本, 新问题):
|
||||||
如果新问题中没有查询主体仅有查询对象 则返回TRUE
|
如果新问题中提取不到主体 且仅能提取到查询属性
|
||||||
如果新问题中仅有查询主体但没有查询对象 则返回TRUE
|
且这个查询属性和文本中提取到的查询属性不同 则返回TRUE
|
||||||
其他情况均返回FALSE
|
其他情况均返回FALSE
|
||||||
示例:是查询新属性("今天送一分公司有多少作业计划", "作业内容") 返回 True
|
示例:是查询新属性("今天送一分公司有多少作业计划", "作业内容") 返回 True
|
||||||
|
|
||||||
函数 删除数量词(文本):
|
函数 替换新属性(文本,新查询属性):
|
||||||
删除“有多少”、“多少”、“几条”、“几个”等数量问句词
|
先删除文本中的"有多少"等类似的表达数量表达,
|
||||||
|
再将文本里的查询属性替换为新查询属性,并保持其他内容不变并返回 且保持新查询属性的语气
|
||||||
函数 替换查询属性(文本, 新查询属性):
|
示例:替换新属性("今天送一分公司有多少作业计划", "作业内容") 返回 "今天送一分公司的作业内容"
|
||||||
说明:
|
|
||||||
本函数用于在删除数量词后,将原句中与“新查询属性”同类型的核心查询词替换为“新查询属性”,并确保其他内容保持不变且语义自然。
|
|
||||||
|
|
||||||
处理步骤:
|
|
||||||
1. 删除文本中的数量类词语,例如“有多少”、“多少”、“几个”、“几条”等。
|
|
||||||
2. 识别原句中的核心查询属性词,判断其与“新查询属性”是否属于相同类别(如均为对象、地点、组织等)。
|
|
||||||
3. 将原有核心查询词替换为“新查询属性”,保留句中其余上下文结构不变。
|
|
||||||
4. 保持句子语气自然,避免引入“是什么”、“有多少”等疑问表达。
|
|
||||||
|
|
||||||
返回:
|
|
||||||
返回替换后的文本,语义清晰、语气自然。
|
|
||||||
|
|
||||||
示例:
|
|
||||||
替换查询属性("今天送一分公司有多少作业计划", "作业内容")
|
|
||||||
→ "今天送一分公司的作业内容"
|
|
||||||
|
|
||||||
替换查询属性("今天送一分公司的班组详情", "送二分公司")
|
|
||||||
→ "今天送二分公司的班组详情"
|
|
||||||
|
|
||||||
替换查询属性("今天送一分公司的班组详情", "明天呢")
|
|
||||||
→ "明天送二分公司的班组详情"
|
|
||||||
|
|
||||||
函数 有完整的句意(新问题):
|
函数 有完整的句意(新问题):
|
||||||
如果新问题里有主体同时有操作对象或查询对象则返回TRUE
|
如果新问题里有主体同时有操作对象或查询对象则返回TRUE
|
||||||
其他情况均返回FALSE
|
其他情况均返回FALSE
|
||||||
|
|
@ -427,6 +406,7 @@ def extract_multi_chat(messages):
|
||||||
请你仅输出还原后的完整问题,不要输出任何变量、中间步骤或解释说明,确保结果自然通顺,语义完整。
|
请你仅输出还原后的完整问题,不要输出任何变量、中间步骤或解释说明,确保结果自然通顺,语义完整。
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
message = [
|
message = [
|
||||||
{"role": "user", "content": prompt}
|
{"role": "user", "content": prompt}
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,9 @@ import paddle
|
||||||
from paddlenlp.transformers import ErnieForTokenClassification, ErnieTokenizer
|
from paddlenlp.transformers import ErnieForTokenClassification, ErnieTokenizer
|
||||||
|
|
||||||
from globalData import GlobalData
|
from globalData import GlobalData
|
||||||
from utils import standardize_name_only_high_score, clean_useless_company_name
|
from utils import standardize_name_only_high_score, clean_useless_company_name, is_design_file
|
||||||
from constants import SUBCONTRACTOR, CONSTRUCTION_UNIT, IMPLEMENTATION_ORG, PAGE, PROGRAM_NAVIGATION, PROJECT_DEPARTMENT
|
from constants import SUBCONTRACTOR, CONSTRUCTION_UNIT, IMPLEMENTATION_ORG, PAGE, \
|
||||||
|
PROJECT_DEPARTMENT, DESIGN_SPECIFICATION, DESIGN, SPECIFICATION
|
||||||
import paddle.nn.functional as F
|
import paddle.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -178,14 +179,13 @@ class SlotRecognition:
|
||||||
else:
|
else:
|
||||||
updates[key] = value
|
updates[key] = value
|
||||||
prob_updates[key] = slot_probabilities[key]
|
prob_updates[key] = slot_probabilities[key]
|
||||||
elif key == PROGRAM_NAVIGATION or key == PAGE:
|
elif key == DESIGN_SPECIFICATION:
|
||||||
if "施" in value:
|
if is_design_file(value):
|
||||||
updates[key] = "施工生产管理平台"
|
updates[DESIGN] = value
|
||||||
|
prob_updates[DESIGN] = 1
|
||||||
else:
|
else:
|
||||||
updates[key] = value
|
updates[SPECIFICATION] = value
|
||||||
prob_updates[key] = slot_probabilities[key]
|
prob_updates[SPECIFICATION] = 1
|
||||||
|
|
||||||
# 先不处理 PROJECT_DEPARTMENT,后续单独处理
|
|
||||||
elif key != PROJECT_DEPARTMENT:
|
elif key != PROJECT_DEPARTMENT:
|
||||||
updates[key] = value
|
updates[key] = value
|
||||||
prob_updates[key] = slot_probabilities[key]
|
prob_updates[key] = slot_probabilities[key]
|
||||||
|
|
|
||||||
198
api/utils.py
198
api/utils.py
|
|
@ -13,7 +13,7 @@ import re
|
||||||
from globalData import GlobalData
|
from globalData import GlobalData
|
||||||
from constants import USELESS_COMPANY_WORDS, USELESS_PROJECT_WORDS, CONSTRUCTION_UNIT, IMPLEMENTATION_ORG, \
|
from constants import USELESS_COMPANY_WORDS, USELESS_PROJECT_WORDS, CONSTRUCTION_UNIT, IMPLEMENTATION_ORG, \
|
||||||
SUBCONTRACTOR, PROJECT_NAME, PROJECT_DEPARTMENT, RISK_LEVEL, TEAM_NAME, USELESS_PROGRAM_DEPARTMENT_WORDS, \
|
SUBCONTRACTOR, PROJECT_NAME, PROJECT_DEPARTMENT, RISK_LEVEL, TEAM_NAME, USELESS_PROGRAM_DEPARTMENT_WORDS, \
|
||||||
SLOT_KEYWORDS
|
SLOT_KEYWORDS, design_suffix, DESIGN, SPECIFICATION, PICTURE, USELESS_DESIGN_WORDS
|
||||||
|
|
||||||
from logger_util import setup_logger
|
from logger_util import setup_logger
|
||||||
|
|
||||||
|
|
@ -64,6 +64,7 @@ def load_standard_json_data(path):
|
||||||
# print(f"[Error] Failed to load local JSON file: {e}")
|
# print(f"[Error] Failed to load local JSON file: {e}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
#将字典序列的josn 存入本地文件
|
#将字典序列的josn 存入本地文件
|
||||||
def save_dict_to_file(data: dict, file_path: str):
|
def save_dict_to_file(data: dict, file_path: str):
|
||||||
"""
|
"""
|
||||||
|
|
@ -83,6 +84,7 @@ def save_dict_to_file(data: dict, file_path: str):
|
||||||
# print(f"[Error] 写入 JSON 文件失败:{e}")
|
# print(f"[Error] 写入 JSON 文件失败:{e}")
|
||||||
logger.error("[Error] 写入 JSON 文件失败:", exc_info=e)
|
logger.error("[Error] 写入 JSON 文件失败:", exc_info=e)
|
||||||
|
|
||||||
|
|
||||||
#从指定文件中加载标准化的名称列表。
|
#从指定文件中加载标准化的名称列表。
|
||||||
def load_standard_name_list(file_path: str):
|
def load_standard_name_list(file_path: str):
|
||||||
"""
|
"""
|
||||||
|
|
@ -111,6 +113,7 @@ def load_standard_name_list(file_path: str):
|
||||||
# print(f"读取文件时发生错误:{e}", flush=True)
|
# print(f"读取文件时发生错误:{e}", flush=True)
|
||||||
raise Exception(f"错误:文件 {file_path} 不存在")
|
raise Exception(f"错误:文件 {file_path} 不存在")
|
||||||
|
|
||||||
|
|
||||||
#将标准化名称列表写入指定文件中,每行一个名称。
|
#将标准化名称列表写入指定文件中,每行一个名称。
|
||||||
def save_standard_name_list_to_file(name_list, file_path):
|
def save_standard_name_list_to_file(name_list, file_path):
|
||||||
"""
|
"""
|
||||||
|
|
@ -130,6 +133,7 @@ def save_standard_name_list_to_file(name_list, file_path):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[Error] 写入文件失败:{e}")
|
logger.error(f"[Error] 写入文件失败:{e}")
|
||||||
|
|
||||||
|
|
||||||
def extract_number(text):
|
def extract_number(text):
|
||||||
"""
|
"""
|
||||||
提取项目部中的数字(支持阿拉伯数字和中文数字),并转换为统一格式(中文数字)。
|
提取项目部中的数字(支持阿拉伯数字和中文数字),并转换为统一格式(中文数字)。
|
||||||
|
|
@ -148,6 +152,7 @@ def replace_arabic_with_chinese(text):
|
||||||
将字符串中所有连续的阿拉伯数字转换为对应的中文数字。
|
将字符串中所有连续的阿拉伯数字转换为对应的中文数字。
|
||||||
示例:2024年25号 -> 二千零二十四年二十五号
|
示例:2024年25号 -> 二千零二十四年二十五号
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def convert(match):
|
def convert(match):
|
||||||
num_str = match.group()
|
num_str = match.group()
|
||||||
try:
|
try:
|
||||||
|
|
@ -187,6 +192,7 @@ def fuzzy_match_and_filter(input_key, match_pool, mapping_dict, lower_score=70,
|
||||||
else:
|
else:
|
||||||
return [mapping_dict[m[0]] for m in high_conf_matches[:top_k]]
|
return [mapping_dict[m[0]] for m in high_conf_matches[:top_k]]
|
||||||
|
|
||||||
|
|
||||||
def fuzzy_match_and_filter_only_high_score(input_key, match_pool, mapping_dict, high_score=90, top_k=3):
|
def fuzzy_match_and_filter_only_high_score(input_key, match_pool, mapping_dict, high_score=90, top_k=3):
|
||||||
"""
|
"""
|
||||||
对输入字符串在候选池中执行模糊匹配,并返回匹配程度高的映射原始值。
|
对输入字符串在候选池中执行模糊匹配,并返回匹配程度高的映射原始值。
|
||||||
|
|
@ -212,6 +218,7 @@ def fuzzy_match_and_filter_only_high_score(input_key, match_pool, mapping_dict,
|
||||||
else:
|
else:
|
||||||
return [mapping_dict[m[0]] for m in high_conf_matches[:top_k]]
|
return [mapping_dict[m[0]] for m in high_conf_matches[:top_k]]
|
||||||
|
|
||||||
|
|
||||||
def standardize_name(input_name, clean_func, simply_map, pinyin_map, lower_score=70, high_score=85):
|
def standardize_name(input_name, clean_func, simply_map, pinyin_map, lower_score=70, high_score=85):
|
||||||
"""
|
"""
|
||||||
通用名称标准化函数,按中文 → 清洗 → 简化匹配 → 拼音匹配 的顺序进行处理。
|
通用名称标准化函数,按中文 → 清洗 → 简化匹配 → 拼音匹配 的顺序进行处理。
|
||||||
|
|
@ -234,6 +241,7 @@ def standardize_name(input_name, clean_func, simply_map, pinyin_map, lower_score
|
||||||
result = fuzzy_match_and_filter(pinyin_input, list(pinyin_map.keys()), pinyin_map, lower_score, high_score)
|
result = fuzzy_match_and_filter(pinyin_input, list(pinyin_map.keys()), pinyin_map, lower_score, high_score)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def standardize_name_only_high_score(input_name, clean_func, simply_map, pinyin_map, high_score=90):
|
def standardize_name_only_high_score(input_name, clean_func, simply_map, pinyin_map, high_score=90):
|
||||||
"""
|
"""
|
||||||
通用名称标准化函数,按中文 → 清洗 → 简化匹配 → 拼音匹配 的顺序进行处理。
|
通用名称标准化函数,按中文 → 清洗 → 简化匹配 → 拼音匹配 的顺序进行处理。
|
||||||
|
|
@ -256,6 +264,7 @@ def standardize_name_only_high_score(input_name, clean_func, simply_map, pinyin_
|
||||||
result = fuzzy_match_and_filter_only_high_score(pinyin_input, list(pinyin_map.keys()), pinyin_map, high_score)
|
result = fuzzy_match_and_filter_only_high_score(pinyin_input, list(pinyin_map.keys()), pinyin_map, high_score)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
#标准化班组名称
|
#标准化班组名称
|
||||||
def standardize_team_name(input_name, simply_map, pinyin_map, lower_score=70, high_score=90):
|
def standardize_team_name(input_name, simply_map, pinyin_map, lower_score=70, high_score=90):
|
||||||
"""
|
"""
|
||||||
|
|
@ -283,7 +292,9 @@ def standardize_sub_company(input_name, simply_map, pinyin_map, lower_score=55,
|
||||||
:return: 匹配的标准公司名列表
|
:return: 匹配的标准公司名列表
|
||||||
"""
|
"""
|
||||||
temp_input_name = replace_arabic_with_chinese(input_name)
|
temp_input_name = replace_arabic_with_chinese(input_name)
|
||||||
return standardize_name(temp_input_name, clean_useless_company_name, simply_map, pinyin_map, lower_score, high_score)
|
return standardize_name(temp_input_name, clean_useless_company_name, simply_map, pinyin_map, lower_score,
|
||||||
|
high_score)
|
||||||
|
|
||||||
|
|
||||||
def standardize_project_name(input_name, simply_map, pinyin_map, lower_score=70, high_score=90):
|
def standardize_project_name(input_name, simply_map, pinyin_map, lower_score=70, high_score=90):
|
||||||
"""
|
"""
|
||||||
|
|
@ -365,7 +376,8 @@ def multiple_standardize_single_name(origin_input_name, origin_name_list, pinyin
|
||||||
limit=len(origin_name_list))
|
limit=len(origin_name_list))
|
||||||
# 找到所有相似度 > 80 的匹配项
|
# 找到所有相似度 > 80 的匹配项
|
||||||
original_high_confidence_matches = [(match[0], match[1]) for match in match_results if match[1] >= lower_score]
|
original_high_confidence_matches = [(match[0], match[1]) for match in match_results if match[1] >= lower_score]
|
||||||
logger.info(f"standardize_pinyin_single_name 原始名匹配, high_confidence_matches:{original_high_confidence_matches[:3]}")
|
logger.info(
|
||||||
|
f"standardize_pinyin_single_name 原始名匹配, high_confidence_matches:{original_high_confidence_matches[:3]}")
|
||||||
|
|
||||||
combined_low_confidence_matches = []
|
combined_low_confidence_matches = []
|
||||||
if original_high_confidence_matches:
|
if original_high_confidence_matches:
|
||||||
|
|
@ -382,7 +394,7 @@ def multiple_standardize_single_name(origin_input_name, origin_name_list, pinyin
|
||||||
return list(dict.fromkeys(combined_low_confidence_matches))
|
return list(dict.fromkeys(combined_low_confidence_matches))
|
||||||
|
|
||||||
|
|
||||||
def generate_project_prompt_with_key(matched_projects, original_name="", slot_key = IMPLEMENTATION_ORG):
|
def generate_project_prompt_with_key(matched_projects, original_name="", slot_key=IMPLEMENTATION_ORG):
|
||||||
"""
|
"""
|
||||||
生成提示信息,用于让用户确认匹配的项目名或分公司名或项目名。
|
生成提示信息,用于让用户确认匹配的项目名或分公司名或项目名。
|
||||||
|
|
||||||
|
|
@ -393,7 +405,8 @@ def generate_project_prompt_with_key(matched_projects, original_name="", slot_ke
|
||||||
返回:
|
返回:
|
||||||
str: 生成的提示信息。如果未找到匹配项,返回提示用户提供更准确信息的字符串。
|
str: 生成的提示信息。如果未找到匹配项,返回提示用户提供更准确信息的字符串。
|
||||||
"""
|
"""
|
||||||
logger.info(f"generate_project_prompt_with_key slot_key:{slot_key},original_name:{original_name},matched_projects:{matched_projects} ")
|
logger.info(
|
||||||
|
f"generate_project_prompt_with_key slot_key:{slot_key},original_name:{original_name},matched_projects:{matched_projects} ")
|
||||||
type = ""
|
type = ""
|
||||||
if slot_key == CONSTRUCTION_UNIT:
|
if slot_key == CONSTRUCTION_UNIT:
|
||||||
type = "建管单位名"
|
type = "建管单位名"
|
||||||
|
|
@ -409,7 +422,7 @@ def generate_project_prompt_with_key(matched_projects, original_name="", slot_ke
|
||||||
# print(f"generate_project_prompt_with_key type:{type} ")
|
# print(f"generate_project_prompt_with_key type:{type} ")
|
||||||
logger.info(f"generate_project_prompt_with_key type:{type} ")
|
logger.info(f"generate_project_prompt_with_key type:{type} ")
|
||||||
if not matched_projects:
|
if not matched_projects:
|
||||||
if slot_key in (CONSTRUCTION_UNIT,IMPLEMENTATION_ORG,SUBCONTRACTOR):
|
if slot_key in (CONSTRUCTION_UNIT, IMPLEMENTATION_ORG, SUBCONTRACTOR):
|
||||||
return f"<p>未找到匹配的<strong>公司名</strong>:{original_name},请提供更准确的公司名信息。</p>"
|
return f"<p>未找到匹配的<strong>公司名</strong>:{original_name},请提供更准确的公司名信息。</p>"
|
||||||
else:
|
else:
|
||||||
return f"<p>未找到匹配的:{original_name},请提供更准确的信息。</p>"
|
return f"<p>未找到匹配的:{original_name},请提供更准确的信息。</p>"
|
||||||
|
|
@ -424,6 +437,7 @@ def generate_project_prompt_with_key(matched_projects, original_name="", slot_ke
|
||||||
html_parts.append("<p>请确认您要选择哪一个?</p>")
|
html_parts.append("<p>请确认您要选择哪一个?</p>")
|
||||||
return "\n".join(html_parts)
|
return "\n".join(html_parts)
|
||||||
|
|
||||||
|
|
||||||
def generate_project_prompt(matched_projects, original_name="", type="项目部名"):
|
def generate_project_prompt(matched_projects, original_name="", type="项目部名"):
|
||||||
"""
|
"""
|
||||||
生成提示信息,用于让用户确认匹配的项目名或分公司名或项目名。
|
生成提示信息,用于让用户确认匹配的项目名或分公司名或项目名。
|
||||||
|
|
@ -448,6 +462,7 @@ def generate_project_prompt(matched_projects, original_name="", type="项目部
|
||||||
html_parts.append("<p>请确认您要选择哪一个?</p>")
|
html_parts.append("<p>请确认您要选择哪一个?</p>")
|
||||||
return "\n".join(html_parts)
|
return "\n".join(html_parts)
|
||||||
|
|
||||||
|
|
||||||
def generate_confirm_prompt(matched_projects, original_name="", type="项目部名"):
|
def generate_confirm_prompt(matched_projects, original_name="", type="项目部名"):
|
||||||
"""
|
"""
|
||||||
生成提示信息,用于让用户确认匹配的项目名或分公司名或项目名。
|
生成提示信息,用于让用户确认匹配的项目名或分公司名或项目名。
|
||||||
|
|
@ -495,6 +510,8 @@ company_symbols_pattern = re.compile(r"[\s\W_]+")
|
||||||
|
|
||||||
useless_team_leader_words_pattern = re.compile("班组")
|
useless_team_leader_words_pattern = re.compile("班组")
|
||||||
|
|
||||||
|
useless_design_words_pattern = re.compile("|".join(USELESS_DESIGN_WORDS))
|
||||||
|
|
||||||
|
|
||||||
def clean_useless_project_name(name: str) -> str:
|
def clean_useless_project_name(name: str) -> str:
|
||||||
# 去掉无意义词
|
# 去掉无意义词
|
||||||
|
|
@ -510,11 +527,13 @@ def clean_useless_company_name(name: str) -> str:
|
||||||
name = company_symbols_pattern.sub("", name)
|
name = company_symbols_pattern.sub("", name)
|
||||||
return name.strip()
|
return name.strip()
|
||||||
|
|
||||||
|
|
||||||
def clean_useless_team_leader_name(name: str) -> str:
|
def clean_useless_team_leader_name(name: str) -> str:
|
||||||
# 去掉无意义词
|
# 去掉无意义词
|
||||||
name = useless_team_leader_words_pattern.sub("", name)
|
name = useless_team_leader_words_pattern.sub("", name)
|
||||||
return name.strip()
|
return name.strip()
|
||||||
|
|
||||||
|
|
||||||
#去掉项目部里面的不重要词汇
|
#去掉项目部里面的不重要词汇
|
||||||
def clean_useless_program_departement_name(name: str) -> str:
|
def clean_useless_program_departement_name(name: str) -> str:
|
||||||
# 去掉无意义词
|
# 去掉无意义词
|
||||||
|
|
@ -523,6 +542,21 @@ def clean_useless_program_departement_name(name: str) -> str:
|
||||||
name = project_symbols_pattern.sub("", name)
|
name = project_symbols_pattern.sub("", name)
|
||||||
return name.strip()
|
return name.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def clean_useless_design_name(name: str) -> str:
|
||||||
|
# 去掉无意义词
|
||||||
|
name = useless_design_words_pattern.sub("", name)
|
||||||
|
# 去掉数字、字母、符号
|
||||||
|
name = project_symbols_pattern.sub("", name)
|
||||||
|
return name.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def clean_useless_specification_name(name: str) -> str:
|
||||||
|
# 去掉数字、字母、符号
|
||||||
|
name = project_symbols_pattern.sub("", name)
|
||||||
|
return name.strip()
|
||||||
|
|
||||||
|
|
||||||
#槽位缺失检查
|
#槽位缺失检查
|
||||||
def check_lost(int_res, slot):
|
def check_lost(int_res, slot):
|
||||||
#labels: ["天气查询","通用对话","页面切换","日计划数量查询","周计划数量查询","日计划作业内容","周计划作业内容","施工人数","作业考勤人数","知识问答"]
|
#labels: ["天气查询","通用对话","页面切换","日计划数量查询","周计划数量查询","日计划作业内容","周计划作业内容","施工人数","作业考勤人数","知识问答"]
|
||||||
|
|
@ -572,7 +606,7 @@ def check_lost(int_res, slot):
|
||||||
apologize_str = "非常抱歉,"
|
apologize_str = "非常抱歉,"
|
||||||
# if int_res == 2:
|
# if int_res == 2:
|
||||||
# return CheckResult.NEEDS_MORE_ROUNDS, f"{apologize_str}请问你想查询哪个页面?"
|
# return CheckResult.NEEDS_MORE_ROUNDS, f"{apologize_str}请问你想查询哪个页面?"
|
||||||
if int_res in [3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15,16]:
|
if int_res in [3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16]:
|
||||||
return CheckResult.NEEDS_MORE_ROUNDS, f"{apologize_str}请问你想查询什么时间的{intention_mapping[int_res]}?"
|
return CheckResult.NEEDS_MORE_ROUNDS, f"{apologize_str}请问你想查询什么时间的{intention_mapping[int_res]}?"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -607,7 +641,8 @@ def check_standard_name_slot_probability(int_res, slot) -> tuple:
|
||||||
if match_results and len(match_results) == 1:
|
if match_results and len(match_results) == 1:
|
||||||
slot[key] = match_results[0]
|
slot[key] = match_results[0]
|
||||||
else:
|
else:
|
||||||
prompt = generate_project_prompt_with_key(match_results, original_name=slot[IMPLEMENTATION_ORG], slot_key= IMPLEMENTATION_ORG)
|
prompt = generate_project_prompt_with_key(match_results, original_name=slot[IMPLEMENTATION_ORG],
|
||||||
|
slot_key=IMPLEMENTATION_ORG)
|
||||||
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||||
|
|
||||||
if key == CONSTRUCTION_UNIT:
|
if key == CONSTRUCTION_UNIT:
|
||||||
|
|
@ -618,7 +653,8 @@ def check_standard_name_slot_probability(int_res, slot) -> tuple:
|
||||||
if match_results and len(match_results) == 1:
|
if match_results and len(match_results) == 1:
|
||||||
slot[key] = match_results[0]
|
slot[key] = match_results[0]
|
||||||
else:
|
else:
|
||||||
prompt = generate_project_prompt_with_key(match_results, original_name=slot[CONSTRUCTION_UNIT], slot_key= CONSTRUCTION_UNIT)
|
prompt = generate_project_prompt_with_key(match_results, original_name=slot[CONSTRUCTION_UNIT],
|
||||||
|
slot_key=CONSTRUCTION_UNIT)
|
||||||
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||||
|
|
||||||
if key == SUBCONTRACTOR:
|
if key == SUBCONTRACTOR:
|
||||||
|
|
@ -629,12 +665,14 @@ def check_standard_name_slot_probability(int_res, slot) -> tuple:
|
||||||
if match_results and len(match_results) == 1:
|
if match_results and len(match_results) == 1:
|
||||||
slot[key] = match_results[0]
|
slot[key] = match_results[0]
|
||||||
else:
|
else:
|
||||||
prompt = generate_project_prompt_with_key(match_results, original_name=slot[SUBCONTRACTOR], slot_key= SUBCONTRACTOR)
|
prompt = generate_project_prompt_with_key(match_results, original_name=slot[SUBCONTRACTOR],
|
||||||
|
slot_key=SUBCONTRACTOR)
|
||||||
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||||
|
|
||||||
if key == PROJECT_DEPARTMENT:
|
if key == PROJECT_DEPARTMENT:
|
||||||
logger.info(f"check_standard_name_slot 原始项目部名 : {slot[PROJECT_DEPARTMENT]}")
|
logger.info(f"check_standard_name_slot 原始项目部名 : {slot[PROJECT_DEPARTMENT]}")
|
||||||
match_results = standardize_projectDepartment(slot[IMPLEMENTATION_ORG], value, GlobalData.standard_company_program,
|
match_results = standardize_projectDepartment(slot[IMPLEMENTATION_ORG], value,
|
||||||
|
GlobalData.standard_company_program,
|
||||||
high_score=95)
|
high_score=95)
|
||||||
logger.info(f"check_standard_name_slot 匹配后项目部名: result:{match_results}")
|
logger.info(f"check_standard_name_slot 匹配后项目部名: result:{match_results}")
|
||||||
if match_results and len(match_results) == 1:
|
if match_results and len(match_results) == 1:
|
||||||
|
|
@ -657,9 +695,139 @@ def check_standard_name_slot_probability(int_res, slot) -> tuple:
|
||||||
"五级"]:
|
"五级"]:
|
||||||
return CheckResult.NEEDS_MORE_ROUNDS, "您查询的风险等级在系统中未找到,请确认风险等级后再次提问"
|
return CheckResult.NEEDS_MORE_ROUNDS, "您查询的风险等级在系统中未找到,请确认风险等级后再次提问"
|
||||||
|
|
||||||
|
#前提已经做过公司名和工程名的标准化
|
||||||
|
return standardize_specification_design_pic(slot)
|
||||||
|
# return CheckResult.NO_MATCH, ""
|
||||||
|
|
||||||
|
|
||||||
|
def standardize_implement_company(slot_item) -> tuple:
|
||||||
|
if IMPLEMENTATION_ORG in slot_item:
|
||||||
|
value = slot_item[IMPLEMENTATION_ORG]
|
||||||
|
logger.info(f"standardize_specification_design_pic 原始分公司名 : {value}")
|
||||||
|
match_results = standardize_sub_company(value, GlobalData.simply_to_standard_company_name_map,
|
||||||
|
GlobalData.pinyin_simply_to_standard_company_name_map, 70, 90)
|
||||||
|
logger.info(f"standardize_specification_design_pic 匹配后分公司名: result:{match_results}")
|
||||||
|
if match_results and len(match_results) == 1:
|
||||||
|
slot_item[IMPLEMENTATION_ORG] = match_results[0]
|
||||||
|
else:
|
||||||
|
prompt = generate_project_prompt_with_key(match_results, original_name=slot_item[IMPLEMENTATION_ORG],
|
||||||
|
slot_key=IMPLEMENTATION_ORG)
|
||||||
|
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||||
return CheckResult.NO_MATCH, ""
|
return CheckResult.NO_MATCH, ""
|
||||||
|
|
||||||
|
|
||||||
|
def standardize_project(slot_item) -> tuple:
|
||||||
|
if PROJECT_NAME in slot_item:
|
||||||
|
value = slot_item[PROJECT_NAME]
|
||||||
|
logger.info(f"standardize_specification_design_pic 原始工程名 : {slot_item[PROJECT_NAME]}")
|
||||||
|
match_results = standardize_project_name(value, GlobalData.simply_to_standard_project_name_map,
|
||||||
|
GlobalData.pinyin_simply_to_standard_project_name_map, 70, 90)
|
||||||
|
logger.info(f"standardize_specification_design_pic 匹配后工程名 :result:{match_results}")
|
||||||
|
|
||||||
|
if match_results and len(match_results) == 1:
|
||||||
|
slot_item[PROJECT_NAME] = match_results[0]
|
||||||
|
else:
|
||||||
|
prompt = generate_project_prompt(match_results, original_name=slot_item[PROJECT_NAME], type="工程名")
|
||||||
|
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||||
|
return CheckResult.NO_MATCH, ""
|
||||||
|
|
||||||
|
|
||||||
|
def standardize_design(slot_item) -> tuple:
|
||||||
|
if PROJECT_NAME in slot_item:
|
||||||
|
value = slot_item[PROJECT_NAME]
|
||||||
|
logger.info(f"standardize_specification_design_pic 原始工程名 : {slot_item[PROJECT_NAME]}")
|
||||||
|
match_results = standardize_project_name(value, GlobalData.simply_to_standard_project_name_map,
|
||||||
|
GlobalData.pinyin_simply_to_standard_project_name_map, 70, 90)
|
||||||
|
logger.info(f"standardize_specification_design_pic 匹配后工程名 :result:{match_results}")
|
||||||
|
|
||||||
|
if match_results and len(match_results) == 1:
|
||||||
|
slot_item[PROJECT_NAME] = match_results[0]
|
||||||
|
else:
|
||||||
|
prompt = generate_project_prompt(match_results, original_name=slot_item[PROJECT_NAME], type="工程名")
|
||||||
|
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||||
|
return CheckResult.NO_MATCH, ""
|
||||||
|
|
||||||
|
|
||||||
|
def standardize_specification_design_pic(slot) -> tuple:
|
||||||
|
# #分公司名标准化
|
||||||
|
# result_type, prompt = standardize_implement_company(slot)
|
||||||
|
# if CheckResult.NEEDS_MORE_ROUNDS == result_type:
|
||||||
|
# return result_type, prompt
|
||||||
|
|
||||||
|
standard_implement_company = slot[IMPLEMENTATION_ORG] if IMPLEMENTATION_ORG in slot else ""
|
||||||
|
|
||||||
|
# #工程名标准化
|
||||||
|
# result_type, prompt = standardize_project(slot)
|
||||||
|
# if CheckResult.NEEDS_MORE_ROUNDS == result_type:
|
||||||
|
# return result_type, prompt
|
||||||
|
standard_project = slot[PROJECT_NAME] if PROJECT_NAME in slot else ""
|
||||||
|
simple_design_map = {}
|
||||||
|
|
||||||
|
if DESIGN in slot:
|
||||||
|
value = slot[DESIGN]
|
||||||
|
temp_content = GlobalData.get_contents_by_company_proj(standard_implement_company, standard_project)
|
||||||
|
if temp_content:
|
||||||
|
design_list = temp_content["方案"]
|
||||||
|
simple_design_map.update({
|
||||||
|
clean_useless_design_name(kw): kw for kw in design_list
|
||||||
|
})
|
||||||
|
simply_input = clean_useless_design_name(value)
|
||||||
|
result = fuzzy_match_and_filter(simply_input, list(simple_design_map.keys()), simple_design_map, 70, 90)
|
||||||
|
if result and len(result) == 1:
|
||||||
|
slot[DESIGN] = result[0]
|
||||||
|
else:
|
||||||
|
prompt = generate_project_prompt(result, original_name=slot[DESIGN], type="方案名")
|
||||||
|
print(f"方案名标准化返回:{prompt}")
|
||||||
|
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||||
|
else:
|
||||||
|
prompt = generate_project_prompt([], original_name=slot[DESIGN], type="方案名")
|
||||||
|
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||||
|
return CheckResult.NO_MATCH, ''
|
||||||
|
|
||||||
|
elif SPECIFICATION in slot:
|
||||||
|
value = slot[SPECIFICATION]
|
||||||
|
temp_content = GlobalData.get_contents_by_company_proj(standard_implement_company, standard_project)
|
||||||
|
if temp_content:
|
||||||
|
design_list = temp_content["规范规程"]
|
||||||
|
simple_design_map.update({
|
||||||
|
clean_useless_specification_name(kw): kw for kw in design_list
|
||||||
|
})
|
||||||
|
simply_input = clean_useless_specification_name(value)
|
||||||
|
result = fuzzy_match_and_filter(simply_input, list(simple_design_map.keys()), simple_design_map, 70, 90)
|
||||||
|
if result and len(result) == 1:
|
||||||
|
slot[SPECIFICATION] = result[0]
|
||||||
|
else:
|
||||||
|
prompt = generate_project_prompt(result, original_name=slot[SPECIFICATION], type="规范规程名")
|
||||||
|
print(f"规程规范名标准化返回:{prompt}")
|
||||||
|
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||||
|
else:
|
||||||
|
prompt = generate_project_prompt([], original_name=slot[DESIGN], type="规范规程名")
|
||||||
|
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||||
|
return CheckResult.NO_MATCH, ''
|
||||||
|
|
||||||
|
elif PICTURE in slot:
|
||||||
|
value = slot[PICTURE]
|
||||||
|
temp_content = GlobalData.get_contents_by_company_proj(standard_implement_company, standard_project)
|
||||||
|
if temp_content:
|
||||||
|
design_list = temp_content["图纸"]
|
||||||
|
simple_design_map.update({
|
||||||
|
kw: kw for kw in design_list
|
||||||
|
})
|
||||||
|
result = fuzzy_match_and_filter(value, list(simple_design_map.keys()), simple_design_map, 70, 90)
|
||||||
|
if result and len(result) == 1:
|
||||||
|
slot[PICTURE] = result[0]
|
||||||
|
else:
|
||||||
|
prompt = generate_project_prompt(result, original_name=slot[PICTURE], type="图纸名")
|
||||||
|
print(f"图纸名标准化返回:{prompt}")
|
||||||
|
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||||
|
else:
|
||||||
|
prompt = generate_project_prompt([], original_name=slot[DESIGN], type="规范规程名")
|
||||||
|
return CheckResult.NEEDS_MORE_ROUNDS, prompt
|
||||||
|
return CheckResult.NO_MATCH, ''
|
||||||
|
|
||||||
|
return CheckResult.NO_MATCH, ''
|
||||||
|
|
||||||
|
|
||||||
def process_msg_content(content):
|
def process_msg_content(content):
|
||||||
if not any(keyword in content for keyword in SLOT_KEYWORDS):
|
if not any(keyword in content for keyword in SLOT_KEYWORDS):
|
||||||
match = re.search(r"^.*?[。!?.!?::]", content)
|
match = re.search(r"^.*?[。!?.!?::]", content)
|
||||||
|
|
@ -669,4 +837,10 @@ def process_msg_content(content):
|
||||||
else:
|
else:
|
||||||
return content.strip()
|
return content.strip()
|
||||||
else:
|
else:
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def is_design_file(file_name):
|
||||||
|
# 清除结尾的引号、空格、标点等
|
||||||
|
text_clean = re.sub(r"[》〉》】】))>)>」』」》))》」』)】\s]+$", "", file_name)
|
||||||
|
return text_clean.endswith(tuple(design_suffix))
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,8 @@ BASE_DATA = {
|
||||||
"宿州萧砀线路工程建筑部分",
|
"宿州萧砀线路工程建筑部分",
|
||||||
"1000kV淮芜Ⅰ线(PROJ-2020-0204-0003)",
|
"1000kV淮芜Ⅰ线(PROJ-2020-0204-0003)",
|
||||||
"35kV接地极线路雁淮线",
|
"35kV接地极线路雁淮线",
|
||||||
"110kV接地极线路(吉泉线)(PROJ-2020-0204-0002)"
|
"110kV接地极线路(吉泉线)(PROJ-2020-0204-0002)",
|
||||||
|
"国网安徽宣城供电公司500kV河沥变加装固定融冰装置项目工程"
|
||||||
],
|
],
|
||||||
# 项目部名称
|
# 项目部名称
|
||||||
"project_departments": ["第一项目部金上","调试一队", "第9项目管理部","第9项目管理部门", "金上第十一项目部门", "第八项目管理部(合肥)", "肥东9号项目部",
|
"project_departments": ["第一项目部金上","调试一队", "第9项目管理部","第9项目管理部门", "金上第十一项目部门", "第八项目管理部(合肥)", "肥东9号项目部",
|
||||||
|
|
@ -77,10 +78,11 @@ BASE_DATA = {
|
||||||
"risk_levels": ["1级", "一级", "二级", "5级", "四级"],
|
"risk_levels": ["1级", "一级", "二级", "5级", "四级"],
|
||||||
# 8+2工况
|
# 8+2工况
|
||||||
"operatings": ["8+2工况", "8加2工况"],
|
"operatings": ["8+2工况", "8加2工况"],
|
||||||
# 页面切换
|
# 页面切换,不能有方案,图纸和规程规范这些数据集出现在pages,否则会冲突
|
||||||
"pages": ["风险管控", "日计划", "周风险", "日计划统计报表", "日计划推送", "生产管控中心", "考勤统计详情",
|
"pages": ["风险管控", "日计划", "周风险", "日计划统计报表", "日计划推送", "生产管控中心", "考勤统计详情",
|
||||||
"今日作业计划", "周风险统计报表", "周风险推送", "进度管理", "技术管理", "项目团队", "质量管理",
|
"今日作业计划", "周风险统计报表", "周风险推送", "进度管理", "技术管理", "项目团队", "质量管理",
|
||||||
"云上会议", "项目巡航", "施工生产管理平台"],
|
"云上会议", "项目巡航", "施工生产管理平台", "数字化项目部","数字化项目部管理平台","施工生产管理平台",
|
||||||
|
"经营管理", "物资管理", "共享资料", "党建+", "党建加", "摄像头", "视频"],
|
||||||
# 具体人名
|
# 具体人名
|
||||||
"person_names": ["何东洋", "李东","王孙强林"],
|
"person_names": ["何东洋", "李东","王孙强林"],
|
||||||
# 人名查询目标
|
# 人名查询目标
|
||||||
|
|
@ -89,10 +91,22 @@ BASE_DATA = {
|
||||||
# 工程状态
|
# 工程状态
|
||||||
"project_status_s": ["在建", "在作业", "在施工",""],
|
"project_status_s": ["在建", "在作业", "在施工",""],
|
||||||
|
|
||||||
|
"pic_names": ["四号线施工图", "框架柱详图", "500kvgis室吊车梁布置图", "_站区道路及进站道路详图_A2", "_辅助用房建筑设计说明一_A2","平断面定位图目录",
|
||||||
|
"基础明细表","杆塔明细表","(500-SJC31151)_1-110 ","接地装置施工图","平断面定位图卷册说明"],
|
||||||
|
|
||||||
|
"design_specification_names": [
|
||||||
|
"《35kV电力电缆交流耐压试验方案》","220kV南蒙2753线拆线、拆塔施工方案","悬索封网实验方案","灌注桩承台基础施工方案"
|
||||||
|
"一般跨越施工措施", "省道专项施工方案","吊车组立角钢塔施工方案","承台基础及接地施工措施","项目管理实施规划","电力电缆方案","线路拆旧跨越110kV线路施工方案",
|
||||||
|
"断面悬浮抱杆组塔施工方案","灌注桩基础及接地施工措施"
|
||||||
|
|
||||||
|
"110kV-750kV架空输电线路铁塔基础施工工艺导则","国网(基建2)112-2022 国家电网有限公司输变电工程建设质量管理规定","1000kV架空输电线路施工质量检验及评定规程","《国家电网有限公司施工项目部标准化管理手册线路工程分册》",
|
||||||
|
"国家电网有限公司输变电工程标准工艺(电缆工程分册)2022版","架空输电线路螺旋锚基础施工及质量验收规范","国家电网有限公司安全生产反违章工作管理办法"],
|
||||||
|
|
||||||
|
|
||||||
#皖送天网
|
#皖送天网
|
||||||
"sky_nets": ["摄像头", "视频"],
|
# "sky_nets": ["摄像头", "视频"],
|
||||||
#项目巡航
|
#项目巡航
|
||||||
"program_navigations": ["数字化项目部", "数字化项目部管理平台", "施工生产管理平台"],
|
# "program_navigations": ["数字化项目部", "数字化项目部管理平台", "施工生产管理平台"],
|
||||||
}
|
}
|
||||||
|
|
||||||
# 自然语言模板配置
|
# 自然语言模板配置
|
||||||
|
|
@ -748,39 +762,43 @@ TEMPLATE_CONFIG = {
|
||||||
"date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天"],
|
"date": ["今日", "昨日", "2024年5月24日", "5月24日", "今天", "昨天"],
|
||||||
"templates": [
|
"templates": [
|
||||||
("打开{page}页面", ["page"]),
|
("打开{page}页面", ["page"]),
|
||||||
("打开{page}", ["page"]),
|
("打开{page}<页面>", ["page"]),
|
||||||
("切换{page}模块", ["page"]),
|
|
||||||
("切换到{page}页面", ["page"]),
|
|
||||||
("跳转到{page}。", ["page"]),
|
|
||||||
("跳转到{page}模块", ["page"]),
|
|
||||||
("切换到{page}页面", ["page"]),
|
("切换到{page}页面", ["page"]),
|
||||||
("切换{page}模块", ["page"]),
|
("切换{page}模块", ["page"]),
|
||||||
("请打开{page}模块", ["page"]),
|
("切换{page}模块", ["page"]),
|
||||||
("请打开{page}。", ["page"]),
|
|
||||||
("请切换到{page}页面", ["page"]),
|
("打开{implementation_organization}{page}", ["implementation_organization", "page"]),
|
||||||
("切换{page}", ["page"]),
|
|
||||||
#施工生产管理平台
|
("打开{implementation_organization}{project_department}{page}",
|
||||||
("打开{program_navigation}", ["program_navigation"]),
|
["implementation_organization", "project_department", "page"]),
|
||||||
("打开{program_navigation}。", ["program_navigation"]),
|
|
||||||
#项目巡航:分公司
|
("切换到{implementation_organization}{page}",
|
||||||
("打开{implementation_organization}{program_navigation}", ["implementation_organization", "program_navigation"]),
|
["implementation_organization", "page"]),
|
||||||
("打开{implementation_organization}{program_navigation}。", ["implementation_organization", "program_navigation"]),
|
|
||||||
#项目巡航:分公司、项目部
|
("打开{project_name}{page}", ["project_name", "page"]),
|
||||||
("打开{implementation_organization}{project_department}{program_navigation}",
|
("切换到{project_name}{page}", ["project_name", "page"]),
|
||||||
["implementation_organization", "project_department", "program_navigation"]),
|
|
||||||
#项目巡航:分公司
|
("切换到{team_name}{page}", ["team_name", "page"]),
|
||||||
("切换到{implementation_organization}{program_navigation}",
|
("打开{team_name}{page}", ["team_name", "page"]),
|
||||||
["implementation_organization", "program_navigation"]),
|
|
||||||
#项目巡航,工程
|
# design_names, pic_names,specification_names
|
||||||
("打开{project_name}{program_navigation}", ["project_name", "program_navigation"]),
|
#方案和规程规范
|
||||||
#皖智天网,工程名摄像头
|
("打开{design_specification_name}<方案>", ["project_name", "design_specification_name"]),
|
||||||
("打开{project_name}{sky_net}", ["project_name", "sky_net"]),
|
("打开{design_specification_name}", ["project_name", "design_specification_name"]),
|
||||||
("切换到{project_name}{sky_net}", ["project_name", "sky_net"]),
|
("打开{project_name}{design_specification_name}<方案>", ["project_name", "design_specification_name"]),
|
||||||
#皖智天网,班组名摄像头
|
|
||||||
("切换到{team_name}{sky_net}", ["team_name", "sky_net"]),
|
("打开{project_name}的{design_specification_name}", ["project_name", "design_specification_name"]),
|
||||||
("切换{team_name}{sky_net}", ["team_name", "sky_net"]),
|
|
||||||
("打开{team_name}{sky_net}", ["team_name", "sky_net"]),
|
("打开{implementation_organization}{project_name}的{design_specification_name}",
|
||||||
#施工生产管理平台
|
["implementation_organization", "project_name", "design_specification_name"]),
|
||||||
|
|
||||||
|
#图纸
|
||||||
|
("打开{pic_name}", ["pic_name"]),
|
||||||
|
("打开{project_name}{pic_name}", ["project_name", "pic_name"]),
|
||||||
|
("打开{project_name}{pic_name}<图纸>", ["project_name", "pic_name"]),
|
||||||
|
("打开{project_name}的{pic_name}", ["project_name", "pic_name"]),
|
||||||
|
("打开{implementation_organization}{project_name}的{pic_name}",
|
||||||
|
["implementation_organization", "project_name", "pic_name"]),
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"作业面查询": {
|
"作业面查询": {
|
||||||
|
|
@ -1348,8 +1366,11 @@ def generate_natural_samples(config, label):
|
||||||
"person_name": BASE_DATA["person_names"],
|
"person_name": BASE_DATA["person_names"],
|
||||||
"person_query_type": BASE_DATA["person_query_types"],
|
"person_query_type": BASE_DATA["person_query_types"],
|
||||||
"project_status": BASE_DATA["project_status_s"],
|
"project_status": BASE_DATA["project_status_s"],
|
||||||
"sky_net": BASE_DATA["sky_nets"],
|
"pic_name": BASE_DATA["pic_names"],
|
||||||
"program_navigation": BASE_DATA["program_navigations"],
|
"design_specification_name": BASE_DATA["design_specification_names"],
|
||||||
|
|
||||||
|
# "sky_net": BASE_DATA["sky_nets"],
|
||||||
|
# "program_navigation": BASE_DATA["program_navigations"],
|
||||||
}
|
}
|
||||||
|
|
||||||
for template, variables in config["templates"]:
|
for template, variables in config["templates"]:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue