parent
4cdd2a5e79
commit
60a12c05f6
Binary file not shown.
|
After Width: | Height: | Size: 146 KiB |
Binary file not shown.
|
|
@ -41,11 +41,13 @@ def search_docs(query: str = Body(..., description="用户输入", examples=["
|
|||
|
||||
print(f"search_docs, query:{query}")
|
||||
docs = kb.search_docs(query, top_k, score_threshold)
|
||||
if len(pre_doc) > 0:
|
||||
if docs is not None:
|
||||
docs.append(pre_doc[0])
|
||||
else:
|
||||
docs = pre_doc[0]
|
||||
data = [DocumentWithScore(**x[0].dict(), score=x[1]) for x in docs]
|
||||
# i = 1
|
||||
# for x in docs:
|
||||
# print(f"相似文档 {i}: {x}")
|
||||
# i = i+1
|
||||
|
||||
|
||||
return data
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ class FaissKBService(KBService):
|
|||
print(f"do_search,top_k:{top_k},score_threshold:{score_threshold}")
|
||||
with self.load_vector_store().acquire() as vs:
|
||||
docs = vs.similarity_search_with_score(query, k=top_k, score_threshold=score_threshold)
|
||||
print(f"do_search,docs:{docs}")
|
||||
#print(f"do_search,docs:{docs}")
|
||||
return docs
|
||||
|
||||
def do_add_doc(self,
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ def load_embeddings(model: str = EMBEDDING_MODEL, device: str = embedding_device
|
|||
from server.knowledge_base.kb_cache.base import embeddings_pool
|
||||
return embeddings_pool.load_embeddings(model=model, device=device)
|
||||
|
||||
|
||||
#PDFPlumberLoader
|
||||
LOADER_DICT = {"UnstructuredHTMLLoader": ['.html'],
|
||||
"UnstructuredMarkdownLoader": ['.md'],
|
||||
"CustomJSONLoader": [".json"],
|
||||
|
|
@ -302,6 +302,8 @@ class KnowledgeFile:
|
|||
text_splitter: TextSplitter = None,
|
||||
):
|
||||
docs = docs or self.file2docs(refresh=refresh)
|
||||
file_name_without_extension, file_extension = os.path.splitext(self.filepath)
|
||||
print(f"filepath:{self.filepath},文件名拆分后:{file_name_without_extension},{file_extension}")
|
||||
if not docs:
|
||||
return []
|
||||
if self.ext not in [".csv"]:
|
||||
|
|
@ -314,13 +316,27 @@ class KnowledgeFile:
|
|||
if doc.metadata:
|
||||
doc.metadata["source"] = os.path.basename(self.filepath)
|
||||
else:
|
||||
print(f"**********************docs2texts: text_splitter.split_documents(docs)")
|
||||
outputfile = file_name_without_extension + "_source.txt"
|
||||
with open(outputfile, 'w') as file:
|
||||
for doc in docs:
|
||||
file.write(doc.page_content)
|
||||
docs = text_splitter.split_documents(docs)
|
||||
|
||||
#print(f"文档切分示例:{docs[0]}")
|
||||
i = 0
|
||||
for doc in docs:
|
||||
print(f"**********切分段{i}:{doc}")
|
||||
i = i+1
|
||||
# print(f"KnowledgeFile: filepath:{self.filepath}")
|
||||
# file_name_without_extension, file_extension = os.path.splitext(self.filepath)
|
||||
# print("filepath:{self.filepath},文件名拆分后:{file_name_without_extension},{file_extension}")
|
||||
|
||||
i = 1
|
||||
outputfile = file_name_without_extension + "_split.txt"
|
||||
# 打开文件以写入模式
|
||||
with open(outputfile, 'w') as file:
|
||||
for doc in docs:
|
||||
print(f"**********切分段{i}:{doc}")
|
||||
file.write(f"分段{i}")
|
||||
file.write(doc.page_content)
|
||||
i = i+1
|
||||
|
||||
if zh_title_enhance:
|
||||
docs = func_zh_title_enhance(docs)
|
||||
|
|
@ -407,7 +423,8 @@ if __name__ == "__main__":
|
|||
kb_file = KnowledgeFile(filename="test.txt", knowledge_base_name="samples")
|
||||
# kb_file.text_splitter_name = "RecursiveCharacterTextSplitter"
|
||||
docs = kb_file.file2docs()
|
||||
pprint(docs[-1])
|
||||
#pprint(docs[-1])
|
||||
|
||||
docs = kb_file.file2text()
|
||||
pprint(docs[-1])
|
||||
docs = kb_file.docs2texts()
|
||||
#docs = kb_file.file2text()
|
||||
#pprint(docs[-1])
|
||||
|
|
|
|||
4
test.py
4
test.py
|
|
@ -13,9 +13,9 @@ if __name__ == '__main__':
|
|||
# pprint(docs[-1])
|
||||
|
||||
faissService = FaissKBService("test")
|
||||
faissService.add_doc(KnowledgeFile("国网安徽信通公司安全准入实施要求_修订.docx", "test"))
|
||||
faissService.add_doc(KnowledgeFile("电力电缆故障测寻车技术规范.docx", "test"))
|
||||
# faissService.delete_doc(KnowledgeFile("README.md", "test"))
|
||||
# faissService.do_drop_kb()
|
||||
print(faissService.search_docs("准入手续的内容是什么?"))
|
||||
#print(faissService.search_docs("准入手续的内容是什么?"))
|
||||
|
||||
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
5
webui.py
5
webui.py
|
|
@ -17,7 +17,7 @@ if __name__ == "__main__":
|
|||
menu_items={
|
||||
'Get Help': 'https://github.com/chatchat-space/Langchain-Chatchat',
|
||||
'Report a bug': "https://github.com/chatchat-space/Langchain-Chatchat/issues",
|
||||
'About': f"""欢迎使用 Langchain-Chatchat WebUI {VERSION}!"""
|
||||
'About': f"""欢迎使用 思极大模型 WebUI {VERSION}!"""
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -36,7 +36,8 @@ if __name__ == "__main__":
|
|||
st.image(
|
||||
os.path.join(
|
||||
"img",
|
||||
"logo-long-chatchat-trans-v2.png"
|
||||
"siji.jpg"
|
||||
#"logo-long-chatchat-trans-v2.png"
|
||||
),
|
||||
use_column_width=True
|
||||
)
|
||||
|
|
|
|||
Binary file not shown.
Loading…
Reference in New Issue