title enhancement and remove the logic of query scope
This commit is contained in:
parent
b1abafeb50
commit
9fa7a1fbbf
Binary file not shown.
|
|
@ -63,15 +63,16 @@ def search_docs(query: str = Body(..., description="用户输入", examples=["
|
|||
#print(f"chain1._call, result:{result},similiarit text:{query1}")
|
||||
|
||||
|
||||
pre_doc = kb.search_docs(query, 1, None)
|
||||
print(f"len(pre_doc):{len(pre_doc)}")
|
||||
if len(pre_doc) > 0:
|
||||
print(f"search_docs, pre_doc:{pre_doc}")
|
||||
filpath = pre_doc[0][0].metadata['source']
|
||||
file_name = os.path.basename(filpath)
|
||||
file_name, file_extension = os.path.splitext(file_name)
|
||||
query = "根据" +file_name + ","+ query
|
||||
# pre_doc = kb.search_docs(query, 1, None)
|
||||
# print(f"len(pre_doc):{len(pre_doc)}")
|
||||
# if len(pre_doc) > 0:
|
||||
# print(f"search_docs, pre_doc:{pre_doc}")
|
||||
# filpath = pre_doc[0][0].metadata['source']
|
||||
# file_name = os.path.basename(filpath)
|
||||
# file_name, file_extension = os.path.splitext(file_name)
|
||||
# query = "根据" +file_name + ","+ query
|
||||
|
||||
pre_doc = []
|
||||
print(f"search_docs, query:{query}")
|
||||
docs = kb.search_docs(query, top_k, score_threshold)
|
||||
print(f"search_docs, docs:{docs}")
|
||||
|
|
|
|||
|
|
@ -275,7 +275,9 @@ class KnowledgeFile:
|
|||
'''
|
||||
self.kb_name = knowledge_base_name
|
||||
self.filename = filename
|
||||
self.ext = os.path.splitext(filename)[-1].lower()
|
||||
#self.ext = os.path.splitext(filename)[-1].lower()
|
||||
self.doc_title_name, file_extension = os.path.splitext(filename)
|
||||
self.ext = file_extension.lower()
|
||||
if self.ext not in SUPPORTED_EXTS:
|
||||
raise ValueError(f"暂未支持的文件格式 {self.ext}")
|
||||
self.filepath = get_file_path(knowledge_base_name, filename)
|
||||
|
|
@ -301,6 +303,14 @@ class KnowledgeFile:
|
|||
chunk_overlap: int = OVERLAP_SIZE,
|
||||
text_splitter: TextSplitter = None,
|
||||
):
|
||||
def customize_zh_title_enhance(docs: Document) -> Document:
|
||||
if len(docs) > 0:
|
||||
for doc in docs:
|
||||
doc.page_content = f"下文与({self.doc_title_name})有关。{doc.page_content}"
|
||||
return docs
|
||||
else:
|
||||
print("文件不存在")
|
||||
|
||||
docs = docs or self.file2docs(refresh=refresh)
|
||||
file_name_without_extension, file_extension = os.path.splitext(self.filepath)
|
||||
print(f"filepath:{self.filepath},文件名拆分后:{file_name_without_extension},{file_extension}")
|
||||
|
|
@ -339,10 +349,22 @@ class KnowledgeFile:
|
|||
i = i+1
|
||||
|
||||
if zh_title_enhance:
|
||||
docs = func_zh_title_enhance(docs)
|
||||
docs = customize_zh_title_enhance(docs)
|
||||
i = 1
|
||||
outputfile = file_name_without_extension + "_split.txt"
|
||||
# 打开文件以写入模式
|
||||
with open(outputfile, 'w') as file:
|
||||
for doc in docs:
|
||||
print(f"**********切分段{i}:{doc}")
|
||||
file.write(f"\n**********切分段{i}")
|
||||
file.write(doc.page_content)
|
||||
i = i+1
|
||||
|
||||
self.splited_docs = docs
|
||||
return self.splited_docs
|
||||
|
||||
|
||||
|
||||
def file2text(
|
||||
self,
|
||||
zh_title_enhance: bool = ZH_TITLE_ENHANCE,
|
||||
|
|
|
|||
Loading…
Reference in New Issue