Intention/api/similiary.py

81 lines
3.0 KiB
Python

# from langchain_openai import OpenAIEmbeddings
# from utils import CheckResult, StandardType, load_standard_name
#
# standard_program_name_list = load_standard_name('./standard_data/standard_program.txt')
#
# params = {'model': 'bge-large-zh-v1.5',
# 'openai_api_base': 'http://218.23.122.14:63015/v1-openai/',
# 'openai_api_key': 'gpustack_baacebfd27bb3d01_092ce528ae05cb7d05acb052e6490090',
# 'openai_proxy': ''}
#
# try:
# embedding = OpenAIEmbeddings(**params)
# result = embedding.embed_documents(standard_program_name_list,chunk_size=500)
#
# print(f"mbedding.embed_documents 结果:{result}")
#
#
# except Exception as e:
# print(f"failed to create Embeddings for model. {e}")
from langchain_openai import OpenAIEmbeddings
from utils import CheckResult, StandardType, load_standard_name
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
# 加载标准项目部名称列表
standard_program_name_list = load_standard_name('./standard_data/standard_program.txt')
# 模型参数
params = {'model': 'bge-large-zh-v1.5',
'openai_api_base': 'http://127.0.0.1:9997/v1',
'openai_api_key': 'EMPTY',
'openai_proxy': ''}
# 创建嵌入模型
embedding = OpenAIEmbeddings(**params)
# 获取标准项目部名称的嵌入向量
standard_embeddings = embedding.embed_documents(standard_program_name_list, chunk_size=500)
def fuzzy_match(query):
try:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
# 查询名称
query_embedding = embedding.embed_query(query)
# 计算相似度
similarities = cosine_similarity([query_embedding], standard_embeddings)[0]
# 找到最相似的项目部名称
most_similar_index = np.argmax(similarities)
most_similar_name = standard_program_name_list[most_similar_index]
print(f"输入名称: {query}")
print(f"最相似的项目部名称: {most_similar_name}")
print(f"相似度: {similarities[most_similar_index]:.4f}")
return most_similar_name, similarities[most_similar_index]
except Exception as e:
print(f"相似性判断错误{e}")
# try:
# # 查询名称
# query = "定西第一项目部"
# query_embedding = embedding.embed_query(query)
#
# # 计算相似度
# similarities = cosine_similarity([query_embedding], standard_embeddings)[0]
#
# # 找到最相似的项目部名称
# most_similar_index = np.argmax(similarities)
# most_similar_name = standard_program_name_list[most_similar_index]
#
# print(f"输入名称: {query}")
# print(f"最相似的项目部名称: {most_similar_name}")
# print(f"相似度: {similarities[most_similar_index]:.4f}")
#
# except Exception as e:
# print(f"Failed to create embeddings or compute similarity: {e}")
match_program, match_possibility = fuzzy_match("第一项目部定西")
print(f"fuzzy_match program result:{match_program}, {match_possibility}")