from globalData import GlobalData from utils import standardize_name, clean_useless_team_leader_name, standardize_sub_company, standardize_project_name, \ standardize_projectDepartment, standardize_team_name, check_standard_name_slot_probability import time from apscheduler.schedulers.blocking import BlockingScheduler from globalData import GlobalData # def job(): # print("[Info] Executing update_from_redis...") # GlobalData.update_from_redis() # # GlobalData.update_from_redis() def check_standard_name_slot_probability_test(): slot_list = [{"constructionUnit": "合肥供电公司"}, { "date": "今天", "constructionUnit": "芜湖供电公司" }, { "date": "今天", "implementationOrganization": "送电一分公司" }, { "date": "今天", "subcontractor": "百瑞建设发展有限公司" }, { "date": "今天", "subcontractor": "安徽宝德电力建设工程有限" }, { "date": "今天", "teamName": "徐局班组" } ] for slot in slot_list: match_results = check_standard_name_slot_probability(12, slot) print(f"加权混合策略 项目部名称 输入: 原始槽位:{slot},输出: {match_results}") def standardize_team_leader_test(): team_leader_list = [ "李东班组", "磐基班组", "章永班组", "张勇班组", "王治国班组", "代贵华班组", "黄安印班组", "刘闩班组", "王虎班组", "周勇勇班组", "魏在华班组", "王礼良班组", "林学刚班组", "崔新荣班组", "江军班组", "笪淦班组", "杨海平班组", "蔡来云班组", "贺中林班组", "何勇班组", "韦幸朝班组", "刘文虎班组", "金生班组", "段宝强班组", "何计划班组", "刘兆班组", "徐南班组", "贺广飞班组", "孙泽栋班组", "钱小林班组", "朱锋东班组", ] for item in team_leader_list: match_results = standardize_team_name(item, GlobalData.simply_to_standard_team_leader_name_map, GlobalData.pinyin_simply_to_standard_team_leader_name_map, lower_score=70, high_score=90) # match_results = standardize_name(item, clean_useless_team_leader_name, GlobalData.simply_to_standard_team_leader_name_map, # GlobalData.pinyin_simply_to_standard_team_leader_name_map, lower_score=70, high_score=90) print(f"班组长名匹配 输入: {item}-> 输出: {match_results}") def standardize_company_test(): test_cases = [ ("送一分公司"), ("送二分公司"), ("变电分公司"), ("建筑分公司"), ("检修试验分公司"), ("宏源电力公司"), ("宏源电力限公司"), ("宏源电力限公司线路"), ("宏源电力限公司变电"), ("送一分"), ("送二分"), ("变电分"), ("建筑分"), ("检修试验分"), ("宏源电力"), ("红源电力"), ("宏源电力有限"), ("宏源电力限线路"), ("宏源电力限变电"), ] print(f"加权混合策略 分公司名匹配**********************") start = time.perf_counter() for item in test_cases: match_results = standardize_sub_company(item,GlobalData.simply_to_standard_company_name_map, GlobalData.pinyin_simply_to_standard_company_name_map,70,90) print(f"加权混合策略 分公司名匹配 输入: {item}-> 输出: {match_results}") end = time.perf_counter() print(f"加权混合策略 耗时: {end - start:.4f} 秒") def standardize_construction_test(): test_cases = [ ("合肥供电公司"), ("淮北供电公司"), ("六安市城郊供电公司"), ] print(f"加权混合策略 建管单位名匹配**********************") start = time.perf_counter() for item in test_cases: match_results = standardize_sub_company(item,GlobalData.simply_to_standard_construct_name_map, GlobalData.pinyin_simply_to_standard_construct_name_map,70,90) print(f"加权混合策略 建管单位名匹配 输入: {item}-> 输出: {match_results}") def standardize_project_test(): test_cases = [ ("金牛变电站新建建筑"), ("金牛变电站建筑工程"), ("金牛新建工程"), ("金牛新建工程调试"), ("金牛新建调试工程"), ("金牛变电站工程"), ("芦集"), ("芦集变电站"), ("安庆四变电站"), ("锦绣变电站"), ("滁州护桥变电站"), ("合州换流站"), ("陕北合州换流站"), ("陕北安徽合州换流站"), ("金牛变电站"), ("香涧鹭岛工程"), ("延庆换流站"), ("国网延庆换流站"), ("国网北京延庆换流站"), ("陶楼广银线路工程"), ("紫蓬变电站"), ("宿州萧砀变电站"), ("冯井变电站"), ("富邦秋浦变电站"), ("包河玉龙变电站"), ("绿雪莲塘工程"), ("合肥循环园工程"), ("合肥长临河工程"), ("合肥中心变"), ("锁库变电站工程"), ("槽坊工程"), ("富东2798线"), # ("安庆四500kV变电站新建工程(PROJ-2024-0862)"), # ("锦绣-常青π入中心变电站220kV架空线路工程(PROJ-2024-1206)"), # ("渝北±800千伏换流站电气安装A包(调试部分)(PROJ-2024-1192)"), # ("先锋-泉河π入安庆四变电站220kV线路工程(PROJ-2024-0834)"), # ("安徽滁州护桥220kV变电站2号主变扩建工程(PROJ-2024-0821)"), # ("合州士800千伏换流站电气安装A包(PROJ-2025-0056)"), # ("卫田-陶楼T接首业变电站110kV电缆线路工程(PROJ-2024-1236)"), # ("谯城(亳三)-希夷220kV线路工程(PROJ-2024-1205)"), ] print(f"去不重要词汇 工程名匹配******************************************") start = time.perf_counter() for item in test_cases: match_results = standardize_project_name(item, GlobalData.simply_to_standard_project_name_map, GlobalData.pinyin_simply_to_standard_project_name_map, 70, 90) print(f"***************工程名匹配 输入: {item}-> 输出: {match_results}") end = time.perf_counter() print(f"词集匹配 耗时: {end - start:.4f} 秒") def standardize_program_test(): print(f"项目名匹配******************************************") oral_program_name_list = [ ("金上第一项目部"), ("第一项目部金上"), # ("第1项目部"), # 期望返回所有"第三项目管理部" # ("第2项目部"), # ("第3项目部"), # ("第4项目部"), # ("第5项目部"), # ("第6项目部"), # ("第7项目部"), # ("第8项目部"), # ("第9项目部"), # ("第10项目部"), # ("第11项目部"), # ("第12项目部"), # ("第13项目部"), # ("电缆班"), # ("调试1队"), # ("调试2队"), # ("调试3队"), # ("调试4队"), # ("调试5队"), # ("第一项目管理部"), # ("第二项目管理部"), # ("第五项目管理部"), # ("第十一项目管理部(萧砀线路)"), # ("第三项目管理部(张店线路)"), # ("第三项目管理部(岳西线路)"), # ("第五项目管理部(蚌埠)"), # ("第三项目管理部(六安线路)"), # ("第十一项目管理部(宿州线路)"), # ("调试一队"), # ("调试二队"), # ("调试三队"), # ("电缆班"), ] for company in GlobalData.standard_company_name_list: for program in oral_program_name_list: match_results = standardize_projectDepartment(company, program, GlobalData.standard_company_program, high_score=90) print(f"加权混合策略 项目部名称 输入: 公司:{company},项目部:{program}-> 输出: {match_results}") def standardize_sub_constractor_test(): test_cases = [ ("怀电能源科技"), ("泰央建设有限责任公司"), ("泓源电力建设有限公司"), ("怀电能源科技公司"), ("宝德电力公司"), ("亿甲建筑公司"), ] print(f"加权混合策略 分包单位名匹配**********************") start = time.perf_counter() for item in test_cases: match_results = standardize_sub_company(item,GlobalData.simply_to_standard_constractor_name_map, GlobalData.pinyin_simply_to_standard_constractor_name_map,70,90) print(f"分包单位名匹配 输入: {item}-> 输出: {match_results}") def get_file_name(): import os # 你想要遍历的目录路径 target_dir = '/Users/wangvivi/Desktop/Work/2025项目材料/送变电大模型/知识库文档/送变电文档合并V5' # ← 请替换为你的目标目录路径 # 存储文件名的列表 all_file_names = [] # 遍历目录及其子目录 for root, dirs, files in os.walk(target_dir): for file in files: all_file_names.append(file) # 写入 name.txt 文件 output_path = 'name.txt' # 会生成在当前运行目录下 with open(output_path, 'w', encoding='utf-8') as f: for name in all_file_names: f.write(name + '\n') print(f"共收集了 {len(all_file_names)} 个文件名,已写入 {output_path}") class Message: def __init__(self, role, content): self.role = role self.content = content class Message: def __init__(self, role, content): self.role = role self.content = content def history_message(): from collections import namedtuple Message = namedtuple("Message", ["role", "content"]) messages = [ # Message("user", "延庆换流站今天有多少作业计划"), # Message("assistant", "2025-04-23 ±500KV延庆换流站备用换流变安装(PROJ-2025-0162)风险等级为2级的有0项,3级的有0项,4级的有1项,5级的有0项,一共有1项作业计划"), # Message("user", "河州换流站今天有多少作业计划"), # Message("assistant", "您说的工程名可能是: 第1个:合州±800千伏换流站电气安装A包(PROJ-2025-0056) 第2个:合州换流站-文都500千伏线路工程(PROJ-2024-1089) 第3个:陕北-安徽直流工程合州±800千伏换流站土建A包(PROJ-2024-0312) 第4个:文都-官山改接入合州换流站500千伏线路工程(PROJ-2024-1090) 请确认您要选择哪一个"), Message("user", "第一个") ] latest_message = messages[-1] latest_user_question = latest_message.content if latest_message.role == "user" else "" time_prefixes = ["今天", "昨天", "本周", "下周", "明天", "今日"] history_messages = [] if any(prefix in latest_user_question for prefix in time_prefixes) else messages[:-1] print("len(history_messages):\n", len(history_messages)) oldest_chat_history = "\n".join([f"{msg.role}: {msg.content}" for msg in history_messages[:2]]) last_chat_history = "\n".join([f"{msg.role}: {msg.content}" for msg in history_messages[-2:]]) print("oldest_chat_history:\n", oldest_chat_history) print("last_chat_history:\n", last_chat_history) print("latest_user_question:\n", latest_user_question) def standardize_program(): from rapidfuzz import process, fuzz # query = "金上第一项目" # choices = [ # "第五项目管理部(阜阳)", # "第一项目管理部(金上)", # "第二项目管理部(香鹭西段)", # "第十一项目管理部(宣城)", # "第八项目管理部(芜湖)", # "第十三项目管理部(黄山)", # "第六项目管理部(滁州)", # "第四项目管理部(甘浙)", # "第九项目管理部(马鞍山)", # "第三项目管理部(香鹭东段)", # "第一项目管理部(天津)" # ] query = "第一金上" choices = [ "第五阜阳", "第一金上", "第二香鹭西段", "第十一宣城", "第八芜湖", "第十三黄山", "第六滁州", "第四甘浙", "第九马鞍山)", "第三香鹭东段", "第一天津" ] match = process.extractOne(query, choices, scorer=fuzz.WRatio) print(match) def get_size(): import sys total_size = sys.getsizeof(GlobalData.standard_project_name_list) + sys.getsizeof(GlobalData.simply_to_standard_project_name_map) + sys.getsizeof(GlobalData.pinyin_simply_to_standard_project_name_map) print(f"standard_project size: {total_size} bytes") total_size = sys.getsizeof(GlobalData.standard_construct_name_list) + sys.getsizeof(GlobalData.simply_to_standard_construct_name_map) + sys.getsizeof(GlobalData.pinyin_simply_to_standard_construct_name_map) print(f"standard_construct size: {total_size} bytes") total_size = sys.getsizeof(GlobalData.standard_constractor_name_list) + sys.getsizeof(GlobalData.simply_to_standard_constractor_name_map) + sys.getsizeof(GlobalData.pinyin_simply_to_standard_constractor_name_map) print(f"standard_constractor size: {total_size} bytes") total_size = sys.getsizeof(GlobalData.standard_team_leader_name_list) + sys.getsizeof(GlobalData.simply_to_standard_team_leader_name_map) + sys.getsizeof(GlobalData.pinyin_simply_to_standard_constractor_name_map) print(f"standard_team size: {total_size} bytes") standardize_project_test() # standardize_program() # history_message() # standardize_team_leader_test() # # standardize_sub_constractor_test() # # check_standard_name_slot_probability_test() # # standardize_construction_test() # standardize_program_test()