This commit is contained in:
wvivi2023 2024-01-04 18:02:43 +08:00
parent df348e96a5
commit 3593e6ca2c
6 changed files with 30 additions and 16 deletions

View File

@ -78,3 +78,4 @@ streamlit-modal>=0.1.0
streamlit-aggrid>=0.3.4.post3
watchdog>=3.0.0
docx2txt
elasticsearch

View File

@ -78,11 +78,12 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
max_tokens=max_tokens,
callbacks=[callback],
)
docs = await run_in_threadpool(search_docs,
query=query,
knowledge_base_name=knowledge_base_name,
top_k=top_k,
score_threshold=score_threshold)
docs = search_docs(query, knowledge_base_name, 10, score_threshold)
# docs = await run_in_threadpool(search_docs,
# query=query,
# knowledge_base_name=knowledge_base_name,
# top_k=10,
# score_threshold=score_threshold)
# 加入reranker
if USE_RERANKER:
@ -99,6 +100,7 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
query=query)
print("---------after rerank------------------")
print(docs)
context = "\n".join([doc.page_content for doc in docs])
if len(docs) == 0: # 如果没有找到相关文档使用empty模板

View File

@ -36,12 +36,12 @@ class ESKBService(KBService):
except Exception as e:
logger.error(f"Error 发生 : {e}")
raise e
try:
# 首先尝试通过es_client_python创建
self.es_client_python.indices.create(index=self.index_name)
except BadRequestError as e:
logger.error("创建索引失败,重新")
logger.error(e)
# try:
# # 首先尝试通过es_client_python创建
# self.es_client_python.indices.create(index=self.index_name)
# except BadRequestError as e:
# logger.error("创建索引失败,重新")
# logger.error(e)
try:
# langchain ES 连接、创建索引
@ -156,15 +156,17 @@ class ESKBService(KBService):
logger.error(f"ES Docs Delete Error! {e}")
def do_delete_doc(self, kb_file, **kwargs):
base_file_name = os.path.basename(kb_file.filepath)
if self.es_client_python.indices.exists(index=self.index_name):
# 从向量数据库中删除索引(文档名称是Keyword)
query = {
"query": {
"term": {
"metadata.source.keyword": kb_file.filepath
"metadata.source.keyword": base_file_name
}
}
}
print(f"***do_delete_doc: kb_file.filepath:{kb_file.filepath}, base_file_name:{base_file_name}")
# 注意设置size默认返回10个。
search_results = self.es_client_python.search(body=query, size=50)
delete_list = [hit["_id"] for hit in search_results['hits']['hits']]

View File

@ -66,6 +66,7 @@ def list_files_from_folder(kb_name: str):
if is_skiped_path(entry.path):
return
if entry.is_symlink():
target_path = os.path.realpath(entry.path)
with os.scandir(target_path) as target_it:
@ -79,10 +80,16 @@ def list_files_from_folder(kb_name: str):
for sub_entry in it:
process_entry(sub_entry)
with os.scandir(doc_path) as it:
for entry in it:
process_entry(entry)
#added by weiweiwang 2024.1.3 for catch exception
try:
print(f"list_files_from_folder,doc_path:{doc_path}")
with os.scandir(doc_path) as it:
for entry in it:
process_entry(entry)
except Exception as e:
logger.error(f"Error 发生 : {e}")
return result
#PDFPlumberLoader

View File

@ -46,7 +46,7 @@ class LangchainReranker(BaseDocumentCompressor):
# self.activation_fct=activation_fct
# self.apply_softmax=apply_softmax
self._model = CrossEncoder(model_name=model_name_or_path, max_length=1024, device=device)
self._model = CrossEncoder(model_name=model_name_or_path, max_length=512, device=device)
super().__init__(
top_n=top_n,
model_name_or_path=model_name_or_path,

View File

@ -285,6 +285,7 @@ def knowledge_base_page(api: ApiRequest, is_lite: bool = None):
st.divider()
#added by weiweiw 2024.1.3
# cols = st.columns(3)
# if cols[0].button(
@ -318,6 +319,7 @@ def knowledge_base_page(api: ApiRequest, is_lite: bool = None):
# with st.sidebar:
# keyword = st.text_input("查询关键字")
# top_k = st.slider("匹配条数", 1, 100, 3)
#ending added by weiweiw 2024.1.3
st.write("文件内文档列表。双击进行修改,在删除列填入 Y 可删除对应行。")
docs = []