81 lines
3.0 KiB
Python
81 lines
3.0 KiB
Python
# from langchain_openai import OpenAIEmbeddings
|
|
# from utils import CheckResult, StandardType, load_standard_name
|
|
#
|
|
# standard_program_name_list = load_standard_name('./standard_data/standard_program.txt')
|
|
#
|
|
# params = {'model': 'bge-large-zh-v1.5',
|
|
# 'openai_api_base': 'http://218.23.122.14:63015/v1-openai/',
|
|
# 'openai_api_key': 'gpustack_baacebfd27bb3d01_092ce528ae05cb7d05acb052e6490090',
|
|
# 'openai_proxy': ''}
|
|
#
|
|
# try:
|
|
# embedding = OpenAIEmbeddings(**params)
|
|
# result = embedding.embed_documents(standard_program_name_list,chunk_size=500)
|
|
#
|
|
# print(f"mbedding.embed_documents 结果:{result}")
|
|
#
|
|
#
|
|
# except Exception as e:
|
|
# print(f"failed to create Embeddings for model. {e}")
|
|
|
|
|
|
from langchain_openai import OpenAIEmbeddings
|
|
from utils import CheckResult, StandardType, load_standard_name
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
import numpy as np
|
|
|
|
# 加载标准项目部名称列表
|
|
standard_program_name_list = load_standard_name('./standard_data/standard_program.txt')
|
|
# 模型参数
|
|
params = {'model': 'bge-large-zh-v1.5',
|
|
'openai_api_base': 'http://127.0.0.1:9997/v1',
|
|
'openai_api_key': 'EMPTY',
|
|
'openai_proxy': ''}
|
|
# 创建嵌入模型
|
|
embedding = OpenAIEmbeddings(**params)
|
|
|
|
# 获取标准项目部名称的嵌入向量
|
|
standard_embeddings = embedding.embed_documents(standard_program_name_list, chunk_size=500)
|
|
|
|
|
|
def fuzzy_match(query):
|
|
try:
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
import numpy as np
|
|
# 查询名称
|
|
query_embedding = embedding.embed_query(query)
|
|
|
|
# 计算相似度
|
|
similarities = cosine_similarity([query_embedding], standard_embeddings)[0]
|
|
|
|
# 找到最相似的项目部名称
|
|
most_similar_index = np.argmax(similarities)
|
|
most_similar_name = standard_program_name_list[most_similar_index]
|
|
print(f"输入名称: {query}")
|
|
print(f"最相似的项目部名称: {most_similar_name}")
|
|
print(f"相似度: {similarities[most_similar_index]:.4f}")
|
|
return most_similar_name, similarities[most_similar_index]
|
|
except Exception as e:
|
|
print(f"相似性判断错误{e}")
|
|
|
|
# try:
|
|
# # 查询名称
|
|
# query = "定西第一项目部"
|
|
# query_embedding = embedding.embed_query(query)
|
|
#
|
|
# # 计算相似度
|
|
# similarities = cosine_similarity([query_embedding], standard_embeddings)[0]
|
|
#
|
|
# # 找到最相似的项目部名称
|
|
# most_similar_index = np.argmax(similarities)
|
|
# most_similar_name = standard_program_name_list[most_similar_index]
|
|
#
|
|
# print(f"输入名称: {query}")
|
|
# print(f"最相似的项目部名称: {most_similar_name}")
|
|
# print(f"相似度: {similarities[most_similar_index]:.4f}")
|
|
#
|
|
# except Exception as e:
|
|
# print(f"Failed to create embeddings or compute similarity: {e}")
|
|
|
|
match_program, match_possibility = fuzzy_match("第一项目部定西")
|
|
print(f"fuzzy_match program result:{match_program}, {match_possibility}") |