# from langchain_openai import OpenAIEmbeddings # from utils import CheckResult, StandardType, load_standard_name # # standard_program_name_list = load_standard_name('./standard_data/standard_program.txt') # # params = {'model': 'bge-large-zh-v1.5', # 'openai_api_base': 'http://218.23.122.14:63015/v1-openai/', # 'openai_api_key': 'gpustack_baacebfd27bb3d01_092ce528ae05cb7d05acb052e6490090', # 'openai_proxy': ''} # # try: # embedding = OpenAIEmbeddings(**params) # result = embedding.embed_documents(standard_program_name_list,chunk_size=500) # # print(f"mbedding.embed_documents 结果:{result}") # # # except Exception as e: # print(f"failed to create Embeddings for model. {e}") from langchain_openai import OpenAIEmbeddings from utils import CheckResult, StandardType, load_standard_name from sklearn.metrics.pairwise import cosine_similarity import numpy as np # 加载标准项目部名称列表 standard_program_name_list = load_standard_name('./standard_data/standard_program.txt') # 模型参数 params = {'model': 'bge-large-zh-v1.5', 'openai_api_base': 'http://127.0.0.1:9997/v1', 'openai_api_key': 'EMPTY', 'openai_proxy': ''} # 创建嵌入模型 embedding = OpenAIEmbeddings(**params) # 获取标准项目部名称的嵌入向量 standard_embeddings = embedding.embed_documents(standard_program_name_list, chunk_size=500) def fuzzy_match(query): try: from sklearn.metrics.pairwise import cosine_similarity import numpy as np # 查询名称 query_embedding = embedding.embed_query(query) # 计算相似度 similarities = cosine_similarity([query_embedding], standard_embeddings)[0] # 找到最相似的项目部名称 most_similar_index = np.argmax(similarities) most_similar_name = standard_program_name_list[most_similar_index] print(f"输入名称: {query}") print(f"最相似的项目部名称: {most_similar_name}") print(f"相似度: {similarities[most_similar_index]:.4f}") return most_similar_name, similarities[most_similar_index] except Exception as e: print(f"相似性判断错误{e}") # try: # # 查询名称 # query = "定西第一项目部" # query_embedding = embedding.embed_query(query) # # # 计算相似度 # similarities = cosine_similarity([query_embedding], standard_embeddings)[0] # # # 找到最相似的项目部名称 # most_similar_index = np.argmax(similarities) # most_similar_name = standard_program_name_list[most_similar_index] # # print(f"输入名称: {query}") # print(f"最相似的项目部名称: {most_similar_name}") # print(f"相似度: {similarities[most_similar_index]:.4f}") # # except Exception as e: # print(f"Failed to create embeddings or compute similarity: {e}") match_program, match_possibility = fuzzy_match("第一项目部定西") print(f"fuzzy_match program result:{match_program}, {match_possibility}")