This commit is contained in:
wvivi2023 2024-01-02 15:22:35 +08:00
parent 7b7a180323
commit df348e96a5
6 changed files with 11 additions and 0 deletions

View File

@ -77,3 +77,4 @@ streamlit-chatbox==1.1.11
streamlit-modal>=0.1.0
streamlit-aggrid>=0.3.4.post3
watchdog>=3.0.0
docx2txt

View File

@ -67,3 +67,4 @@ arxiv>=2.0.0
youtube-search>=2.1.2
duckduckgo-search>=3.9.9
metaphor-python>=0.1.23
docx2txt

View File

@ -7,3 +7,4 @@ streamlit-modal>=0.1.0
streamlit-aggrid>=0.3.4.post3
httpx[brotli,http2,socks]>=0.25.2
watchdog>=3.0.0
docx2txt

View File

@ -35,7 +35,9 @@ def search_docs(
data = []
if kb is not None:
if query:
print(f"search_docs, query:{query}")
docs = kb.search_docs(query, top_k, score_threshold)
print(f"search_docs, docs:{docs}")
data = [DocumentWithVSId(**x[0].dict(), score=x[1], id=x[0].metadata.get("id")) for x in docs]
elif file_name or metadata:
data = kb.list_docs(file_name=file_name, metadata=metadata)
@ -155,6 +157,8 @@ def upload_docs(
failed_files = {}
file_names = list(docs.keys())
print(f"upload_docs, file_names:{file_names}")
# 先将上传的文件保存到磁盘
for result in _save_files_in_thread(files, knowledge_base_name=knowledge_base_name, override=override):
filename = result["data"]["file_name"]
@ -164,7 +168,9 @@ def upload_docs(
if filename not in file_names:
file_names.append(filename)
# 对保存的文件进行向量化
print(f"upload_docs, to_vector_store:{to_vector_store}")
if to_vector_store:
result = update_docs(
knowledge_base_name=knowledge_base_name,

View File

@ -141,6 +141,7 @@ class ESKBService(KBService):
def do_search(self, query:str, top_k: int, score_threshold: float):
# 文本相似性检索
print(f"do_search,top_k:{top_k},score_threshold:{score_threshold}")
docs = self.db_init.similarity_search_with_score(query=query,
k=top_k)
return docs

View File

@ -62,6 +62,7 @@ class FaissKBService(KBService):
top_k: int,
score_threshold: float = SCORE_THRESHOLD,
) -> List[Document]:
print(f"do_search,top_k:{top_k},score_threshold:{score_threshold}")
embed_func = EmbeddingsFunAdapter(self.embed_model)
embeddings = embed_func.embed_query(query)
with self.load_vector_store().acquire() as vs: