update
This commit is contained in:
parent
df348e96a5
commit
3593e6ca2c
|
|
@ -78,3 +78,4 @@ streamlit-modal>=0.1.0
|
|||
streamlit-aggrid>=0.3.4.post3
|
||||
watchdog>=3.0.0
|
||||
docx2txt
|
||||
elasticsearch
|
||||
|
|
@ -78,11 +78,12 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
|
|||
max_tokens=max_tokens,
|
||||
callbacks=[callback],
|
||||
)
|
||||
docs = await run_in_threadpool(search_docs,
|
||||
query=query,
|
||||
knowledge_base_name=knowledge_base_name,
|
||||
top_k=top_k,
|
||||
score_threshold=score_threshold)
|
||||
docs = search_docs(query, knowledge_base_name, 10, score_threshold)
|
||||
# docs = await run_in_threadpool(search_docs,
|
||||
# query=query,
|
||||
# knowledge_base_name=knowledge_base_name,
|
||||
# top_k=10,
|
||||
# score_threshold=score_threshold)
|
||||
|
||||
# 加入reranker
|
||||
if USE_RERANKER:
|
||||
|
|
@ -99,6 +100,7 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
|
|||
query=query)
|
||||
print("---------after rerank------------------")
|
||||
print(docs)
|
||||
|
||||
context = "\n".join([doc.page_content for doc in docs])
|
||||
|
||||
if len(docs) == 0: # 如果没有找到相关文档,使用empty模板
|
||||
|
|
|
|||
|
|
@ -36,12 +36,12 @@ class ESKBService(KBService):
|
|||
except Exception as e:
|
||||
logger.error(f"Error 发生 : {e}")
|
||||
raise e
|
||||
try:
|
||||
# 首先尝试通过es_client_python创建
|
||||
self.es_client_python.indices.create(index=self.index_name)
|
||||
except BadRequestError as e:
|
||||
logger.error("创建索引失败,重新")
|
||||
logger.error(e)
|
||||
# try:
|
||||
# # 首先尝试通过es_client_python创建
|
||||
# self.es_client_python.indices.create(index=self.index_name)
|
||||
# except BadRequestError as e:
|
||||
# logger.error("创建索引失败,重新")
|
||||
# logger.error(e)
|
||||
|
||||
try:
|
||||
# langchain ES 连接、创建索引
|
||||
|
|
@ -156,15 +156,17 @@ class ESKBService(KBService):
|
|||
logger.error(f"ES Docs Delete Error! {e}")
|
||||
|
||||
def do_delete_doc(self, kb_file, **kwargs):
|
||||
base_file_name = os.path.basename(kb_file.filepath)
|
||||
if self.es_client_python.indices.exists(index=self.index_name):
|
||||
# 从向量数据库中删除索引(文档名称是Keyword)
|
||||
query = {
|
||||
"query": {
|
||||
"term": {
|
||||
"metadata.source.keyword": kb_file.filepath
|
||||
"metadata.source.keyword": base_file_name
|
||||
}
|
||||
}
|
||||
}
|
||||
print(f"***do_delete_doc: kb_file.filepath:{kb_file.filepath}, base_file_name:{base_file_name}")
|
||||
# 注意设置size,默认返回10个。
|
||||
search_results = self.es_client_python.search(body=query, size=50)
|
||||
delete_list = [hit["_id"] for hit in search_results['hits']['hits']]
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ def list_files_from_folder(kb_name: str):
|
|||
if is_skiped_path(entry.path):
|
||||
return
|
||||
|
||||
|
||||
if entry.is_symlink():
|
||||
target_path = os.path.realpath(entry.path)
|
||||
with os.scandir(target_path) as target_it:
|
||||
|
|
@ -79,10 +80,16 @@ def list_files_from_folder(kb_name: str):
|
|||
for sub_entry in it:
|
||||
process_entry(sub_entry)
|
||||
|
||||
with os.scandir(doc_path) as it:
|
||||
for entry in it:
|
||||
process_entry(entry)
|
||||
#added by weiweiwang 2024.1.3 for catch exception
|
||||
try:
|
||||
print(f"list_files_from_folder,doc_path:{doc_path}")
|
||||
with os.scandir(doc_path) as it:
|
||||
for entry in it:
|
||||
process_entry(entry)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error 发生 : {e}")
|
||||
|
||||
return result
|
||||
|
||||
#PDFPlumberLoader
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ class LangchainReranker(BaseDocumentCompressor):
|
|||
# self.activation_fct=activation_fct
|
||||
# self.apply_softmax=apply_softmax
|
||||
|
||||
self._model = CrossEncoder(model_name=model_name_or_path, max_length=1024, device=device)
|
||||
self._model = CrossEncoder(model_name=model_name_or_path, max_length=512, device=device)
|
||||
super().__init__(
|
||||
top_n=top_n,
|
||||
model_name_or_path=model_name_or_path,
|
||||
|
|
|
|||
|
|
@ -285,6 +285,7 @@ def knowledge_base_page(api: ApiRequest, is_lite: bool = None):
|
|||
|
||||
st.divider()
|
||||
|
||||
#added by weiweiw 2024.1.3
|
||||
# cols = st.columns(3)
|
||||
|
||||
# if cols[0].button(
|
||||
|
|
@ -318,6 +319,7 @@ def knowledge_base_page(api: ApiRequest, is_lite: bool = None):
|
|||
# with st.sidebar:
|
||||
# keyword = st.text_input("查询关键字")
|
||||
# top_k = st.slider("匹配条数", 1, 100, 3)
|
||||
#ending added by weiweiw 2024.1.3
|
||||
|
||||
st.write("文件内文档列表。双击进行修改,在删除列填入 Y 可删除对应行。")
|
||||
docs = []
|
||||
|
|
|
|||
Loading…
Reference in New Issue