362 lines
14 KiB
Python
362 lines
14 KiB
Python
|
||
from globalData import GlobalData
|
||
from utils import standardize_name, clean_useless_team_leader_name, standardize_sub_company, standardize_project_name, \
|
||
standardize_projectDepartment, standardize_team_name, check_standard_name_slot_probability
|
||
import time
|
||
|
||
from apscheduler.schedulers.blocking import BlockingScheduler
|
||
from globalData import GlobalData
|
||
|
||
# def job():
|
||
# print("[Info] Executing update_from_redis...")
|
||
# GlobalData.update_from_redis()
|
||
#
|
||
#
|
||
GlobalData.update_from_redis()
|
||
|
||
def check_standard_name_slot_probability_test():
|
||
slot_list = [{"constructionUnit": "合肥供电公司"},
|
||
{
|
||
"date": "今天",
|
||
"constructionUnit": "芜湖供电公司"
|
||
},
|
||
{
|
||
"date": "今天",
|
||
"implementationOrganization": "送电一分公司"
|
||
},
|
||
{
|
||
"date": "今天",
|
||
"subcontractor": "百瑞建设发展有限公司"
|
||
},
|
||
{
|
||
"date": "今天",
|
||
"subcontractor": "安徽宝德电力建设工程有限"
|
||
},
|
||
{
|
||
"date": "今天",
|
||
"teamName": "徐局班组"
|
||
}
|
||
]
|
||
for slot in slot_list:
|
||
match_results = check_standard_name_slot_probability(12, slot)
|
||
print(f"加权混合策略 项目部名称 输入: 原始槽位:{slot},输出: {match_results}")
|
||
|
||
|
||
def standardize_team_leader_test():
|
||
team_leader_list = [
|
||
"李东班组",
|
||
"磐基班组",
|
||
"章永班组",
|
||
"张勇班组",
|
||
"王治国班组",
|
||
"代贵华班组",
|
||
"黄安印班组",
|
||
"刘闩班组",
|
||
"王虎班组",
|
||
"周勇勇班组",
|
||
"魏在华班组",
|
||
"王礼良班组",
|
||
"林学刚班组",
|
||
"崔新荣班组",
|
||
"江军班组",
|
||
"笪淦班组",
|
||
"杨海平班组",
|
||
"蔡来云班组",
|
||
"贺中林班组",
|
||
"何勇班组",
|
||
"韦幸朝班组",
|
||
"刘文虎班组",
|
||
"金生班组",
|
||
"段宝强班组",
|
||
"何计划班组",
|
||
"刘兆班组",
|
||
"徐南班组",
|
||
"贺广飞班组",
|
||
"孙泽栋班组",
|
||
"钱小林班组",
|
||
"朱锋东班组",
|
||
]
|
||
for item in team_leader_list:
|
||
match_results = standardize_team_name(item, GlobalData.simply_to_standard_team_leader_name_map,
|
||
GlobalData.pinyin_simply_to_standard_team_leader_name_map, lower_score=70, high_score=90)
|
||
# match_results = standardize_name(item, clean_useless_team_leader_name, GlobalData.simply_to_standard_team_leader_name_map,
|
||
# GlobalData.pinyin_simply_to_standard_team_leader_name_map, lower_score=70, high_score=90)
|
||
print(f"班组长名匹配 输入: {item}-> 输出: {match_results}")
|
||
|
||
|
||
def standardize_company_test():
|
||
test_cases = [
|
||
("送一分公司"),
|
||
("送二分公司"),
|
||
("变电分公司"),
|
||
("建筑分公司"),
|
||
("检修试验分公司"),
|
||
("宏源电力公司"),
|
||
("宏源电力限公司"),
|
||
("宏源电力限公司线路"),
|
||
("宏源电力限公司变电"),
|
||
("送一分"),
|
||
("送二分"),
|
||
("变电分"),
|
||
("建筑分"),
|
||
("检修试验分"),
|
||
("宏源电力"),
|
||
("红源电力"),
|
||
("宏源电力有限"),
|
||
("宏源电力限线路"),
|
||
("宏源电力限变电"),
|
||
]
|
||
|
||
print(f"加权混合策略 分公司名匹配**********************")
|
||
start = time.perf_counter()
|
||
for item in test_cases:
|
||
match_results = standardize_sub_company(item,GlobalData.simply_to_standard_company_name_map, GlobalData.pinyin_simply_to_standard_company_name_map,70,90)
|
||
print(f"加权混合策略 分公司名匹配 输入: {item}-> 输出: {match_results}")
|
||
end = time.perf_counter()
|
||
print(f"加权混合策略 耗时: {end - start:.4f} 秒")
|
||
|
||
|
||
def standardize_construction_test():
|
||
test_cases = [
|
||
("合肥供电公司"),
|
||
("淮北供电公司"),
|
||
("六安市城郊供电公司"),
|
||
]
|
||
print(f"加权混合策略 建管单位名匹配**********************")
|
||
start = time.perf_counter()
|
||
for item in test_cases:
|
||
match_results = standardize_sub_company(item,GlobalData.simply_to_standard_construct_name_map, GlobalData.pinyin_simply_to_standard_construct_name_map,70,90)
|
||
print(f"加权混合策略 建管单位名匹配 输入: {item}-> 输出: {match_results}")
|
||
|
||
def standardize_project_test():
|
||
test_cases = [
|
||
("金牛变电站新建建筑"),
|
||
("金牛变电站建筑工程"),
|
||
("金牛新建工程"),
|
||
("金牛新建工程调试"),
|
||
("金牛新建调试工程"),
|
||
("金牛变电站工程"),
|
||
("芦集"),
|
||
("芦集变电站"),
|
||
("安庆四变电站"),
|
||
("锦绣变电站"),
|
||
("滁州护桥变电站"),
|
||
("合州换流站"),
|
||
("陕北合州换流站"),
|
||
("陕北安徽合州换流站"),
|
||
("金牛变电站"),
|
||
("香涧鹭岛工程"),
|
||
("延庆换流站"),
|
||
("国网延庆换流站"),
|
||
("国网北京延庆换流站"),
|
||
("陶楼广银线路工程"),
|
||
("紫蓬变电站"),
|
||
("宿州萧砀变电站"),
|
||
("冯井变电站"),
|
||
("富邦秋浦变电站"),
|
||
("包河玉龙变电站"),
|
||
("绿雪莲塘工程"),
|
||
("合肥循环园工程"),
|
||
("合肥长临河工程"),
|
||
("合肥中心变"),
|
||
("锁库变电站工程"),
|
||
("槽坊工程"),
|
||
("富东2798线"),
|
||
# ("安庆四500kV变电站新建工程(PROJ-2024-0862)"),
|
||
# ("锦绣-常青π入中心变电站220kV架空线路工程(PROJ-2024-1206)"),
|
||
# ("渝北±800千伏换流站电气安装A包(调试部分)(PROJ-2024-1192)"),
|
||
# ("先锋-泉河π入安庆四变电站220kV线路工程(PROJ-2024-0834)"),
|
||
# ("安徽滁州护桥220kV变电站2号主变扩建工程(PROJ-2024-0821)"),
|
||
# ("合州士800千伏换流站电气安装A包(PROJ-2025-0056)"),
|
||
# ("卫田-陶楼T接首业变电站110kV电缆线路工程(PROJ-2024-1236)"),
|
||
# ("谯城(亳三)-希夷220kV线路工程(PROJ-2024-1205)"),
|
||
]
|
||
print(f"去不重要词汇 工程名匹配******************************************")
|
||
start = time.perf_counter()
|
||
for item in test_cases:
|
||
match_results = standardize_project_name(item, GlobalData.simply_to_standard_project_name_map,
|
||
GlobalData.pinyin_simply_to_standard_project_name_map, 70, 90)
|
||
print(f"***************工程名匹配 输入: {item}-> 输出: {match_results}")
|
||
end = time.perf_counter()
|
||
print(f"词集匹配 耗时: {end - start:.4f} 秒")
|
||
|
||
def standardize_program_test():
|
||
print(f"项目名匹配******************************************")
|
||
oral_program_name_list = [
|
||
("金上第一项目部"),
|
||
("第一项目部金上"),
|
||
# ("第1项目部"), # 期望返回所有"第三项目管理部"
|
||
# ("第2项目部"),
|
||
# ("第3项目部"),
|
||
# ("第4项目部"),
|
||
# ("第5项目部"),
|
||
# ("第6项目部"),
|
||
# ("第7项目部"),
|
||
# ("第8项目部"),
|
||
# ("第9项目部"),
|
||
# ("第10项目部"),
|
||
# ("第11项目部"),
|
||
# ("第12项目部"),
|
||
# ("第13项目部"),
|
||
# ("电缆班"),
|
||
# ("调试1队"),
|
||
# ("调试2队"),
|
||
# ("调试3队"),
|
||
# ("调试4队"),
|
||
# ("调试5队"),
|
||
# ("第一项目管理部"),
|
||
# ("第二项目管理部"),
|
||
# ("第五项目管理部"),
|
||
# ("第十一项目管理部(萧砀线路)"),
|
||
# ("第三项目管理部(张店线路)"),
|
||
# ("第三项目管理部(岳西线路)"),
|
||
# ("第五项目管理部(蚌埠)"),
|
||
# ("第三项目管理部(六安线路)"),
|
||
# ("第十一项目管理部(宿州线路)"),
|
||
# ("调试一队"),
|
||
# ("调试二队"),
|
||
# ("调试三队"),
|
||
# ("电缆班"),
|
||
]
|
||
|
||
for company in GlobalData.standard_company_name_list:
|
||
for program in oral_program_name_list:
|
||
match_results = standardize_projectDepartment(company, program, GlobalData.standard_company_program, high_score=90)
|
||
print(f"加权混合策略 项目部名称 输入: 公司:{company},项目部:{program}-> 输出: {match_results}")
|
||
|
||
|
||
|
||
def standardize_sub_constractor_test():
|
||
test_cases = [
|
||
("怀电能源科技"),
|
||
("泰央建设有限责任公司"),
|
||
("泓源电力建设有限公司"),
|
||
("怀电能源科技公司"),
|
||
("宝德电力公司"),
|
||
("亿甲建筑公司"),
|
||
]
|
||
print(f"加权混合策略 分包单位名匹配**********************")
|
||
start = time.perf_counter()
|
||
for item in test_cases:
|
||
match_results = standardize_sub_company(item,GlobalData.simply_to_standard_constractor_name_map, GlobalData.pinyin_simply_to_standard_constractor_name_map,70,90)
|
||
print(f"分包单位名匹配 输入: {item}-> 输出: {match_results}")
|
||
|
||
|
||
def get_file_name():
|
||
import os
|
||
# 你想要遍历的目录路径
|
||
target_dir = '/Users/wangvivi/Desktop/Work/2025项目材料/送变电大模型/知识库文档/送变电文档合并V5' # ← 请替换为你的目标目录路径
|
||
|
||
# 存储文件名的列表
|
||
all_file_names = []
|
||
|
||
# 遍历目录及其子目录
|
||
for root, dirs, files in os.walk(target_dir):
|
||
for file in files:
|
||
all_file_names.append(file)
|
||
|
||
# 写入 name.txt 文件
|
||
output_path = 'name.txt' # 会生成在当前运行目录下
|
||
with open(output_path, 'w', encoding='utf-8') as f:
|
||
for name in all_file_names:
|
||
f.write(name + '\n')
|
||
|
||
print(f"共收集了 {len(all_file_names)} 个文件名,已写入 {output_path}")
|
||
|
||
|
||
class Message:
|
||
def __init__(self, role, content):
|
||
self.role = role
|
||
self.content = content
|
||
|
||
class Message:
|
||
def __init__(self, role, content):
|
||
self.role = role
|
||
self.content = content
|
||
|
||
def history_message():
|
||
from collections import namedtuple
|
||
Message = namedtuple("Message", ["role", "content"])
|
||
|
||
messages = [
|
||
# Message("user", "延庆换流站今天有多少作业计划"),
|
||
# Message("assistant", "2025-04-23 ±500KV延庆换流站备用换流变安装(PROJ-2025-0162)风险等级为2级的有0项,3级的有0项,4级的有1项,5级的有0项,一共有1项作业计划"),
|
||
# Message("user", "河州换流站今天有多少作业计划"),
|
||
# Message("assistant", "您说的工程名可能是: 第1个:合州±800千伏换流站电气安装A包(PROJ-2025-0056) 第2个:合州换流站-文都500千伏线路工程(PROJ-2024-1089) 第3个:陕北-安徽直流工程合州±800千伏换流站土建A包(PROJ-2024-0312) 第4个:文都-官山改接入合州换流站500千伏线路工程(PROJ-2024-1090) 请确认您要选择哪一个"),
|
||
Message("user", "第一个")
|
||
]
|
||
|
||
latest_message = messages[-1]
|
||
latest_user_question = latest_message.content if latest_message.role == "user" else ""
|
||
|
||
time_prefixes = ["今天", "昨天", "本周", "下周", "明天", "今日"]
|
||
history_messages = [] if any(prefix in latest_user_question for prefix in time_prefixes) else messages[:-1]
|
||
|
||
print("len(history_messages):\n", len(history_messages))
|
||
oldest_chat_history = "\n".join([f"{msg.role}: {msg.content}" for msg in history_messages[:2]])
|
||
last_chat_history = "\n".join([f"{msg.role}: {msg.content}" for msg in history_messages[-2:]])
|
||
|
||
print("oldest_chat_history:\n", oldest_chat_history)
|
||
print("last_chat_history:\n", last_chat_history)
|
||
print("latest_user_question:\n", latest_user_question)
|
||
|
||
|
||
def standardize_program():
|
||
from rapidfuzz import process, fuzz
|
||
# query = "金上第一项目"
|
||
# choices = [
|
||
# "第五项目管理部(阜阳)",
|
||
# "第一项目管理部(金上)",
|
||
# "第二项目管理部(香鹭西段)",
|
||
# "第十一项目管理部(宣城)",
|
||
# "第八项目管理部(芜湖)",
|
||
# "第十三项目管理部(黄山)",
|
||
# "第六项目管理部(滁州)",
|
||
# "第四项目管理部(甘浙)",
|
||
# "第九项目管理部(马鞍山)",
|
||
# "第三项目管理部(香鹭东段)",
|
||
# "第一项目管理部(天津)"
|
||
# ]
|
||
|
||
query = "第一金上"
|
||
choices = [
|
||
"第五阜阳",
|
||
"第一金上",
|
||
"第二香鹭西段",
|
||
"第十一宣城",
|
||
"第八芜湖",
|
||
"第十三黄山",
|
||
"第六滁州",
|
||
"第四甘浙",
|
||
"第九马鞍山)",
|
||
"第三香鹭东段",
|
||
"第一天津"
|
||
]
|
||
|
||
match = process.extractOne(query, choices, scorer=fuzz.WRatio)
|
||
print(match)
|
||
def get_size():
|
||
import sys
|
||
total_size = sys.getsizeof(GlobalData.standard_project_name_list) + sys.getsizeof(GlobalData.simply_to_standard_project_name_map) + sys.getsizeof(GlobalData.pinyin_simply_to_standard_project_name_map)
|
||
print(f"standard_project size: {total_size} bytes")
|
||
total_size = sys.getsizeof(GlobalData.standard_construct_name_list) + sys.getsizeof(GlobalData.simply_to_standard_construct_name_map) + sys.getsizeof(GlobalData.pinyin_simply_to_standard_construct_name_map)
|
||
print(f"standard_construct size: {total_size} bytes")
|
||
total_size = sys.getsizeof(GlobalData.standard_constractor_name_list) + sys.getsizeof(GlobalData.simply_to_standard_constractor_name_map) + sys.getsizeof(GlobalData.pinyin_simply_to_standard_constractor_name_map)
|
||
print(f"standard_constractor size: {total_size} bytes")
|
||
total_size = sys.getsizeof(GlobalData.standard_team_leader_name_list) + sys.getsizeof(GlobalData.simply_to_standard_team_leader_name_map) + sys.getsizeof(GlobalData.pinyin_simply_to_standard_constractor_name_map)
|
||
print(f"standard_team size: {total_size} bytes")
|
||
|
||
standardize_project_test()
|
||
# standardize_program()
|
||
# history_message()
|
||
|
||
# standardize_team_leader_test()
|
||
#
|
||
# standardize_sub_constractor_test()
|
||
#
|
||
# check_standard_name_slot_probability_test()
|
||
#
|
||
# standardize_construction_test()
|
||
# standardize_program_test()
|
||
|