parent
4cdd2a5e79
commit
60a12c05f6
Binary file not shown.
|
After Width: | Height: | Size: 146 KiB |
Binary file not shown.
|
|
@ -41,11 +41,13 @@ def search_docs(query: str = Body(..., description="用户输入", examples=["
|
||||||
|
|
||||||
print(f"search_docs, query:{query}")
|
print(f"search_docs, query:{query}")
|
||||||
docs = kb.search_docs(query, top_k, score_threshold)
|
docs = kb.search_docs(query, top_k, score_threshold)
|
||||||
|
if len(pre_doc) > 0:
|
||||||
|
if docs is not None:
|
||||||
|
docs.append(pre_doc[0])
|
||||||
|
else:
|
||||||
|
docs = pre_doc[0]
|
||||||
data = [DocumentWithScore(**x[0].dict(), score=x[1]) for x in docs]
|
data = [DocumentWithScore(**x[0].dict(), score=x[1]) for x in docs]
|
||||||
# i = 1
|
|
||||||
# for x in docs:
|
|
||||||
# print(f"相似文档 {i}: {x}")
|
|
||||||
# i = i+1
|
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,7 @@ class FaissKBService(KBService):
|
||||||
print(f"do_search,top_k:{top_k},score_threshold:{score_threshold}")
|
print(f"do_search,top_k:{top_k},score_threshold:{score_threshold}")
|
||||||
with self.load_vector_store().acquire() as vs:
|
with self.load_vector_store().acquire() as vs:
|
||||||
docs = vs.similarity_search_with_score(query, k=top_k, score_threshold=score_threshold)
|
docs = vs.similarity_search_with_score(query, k=top_k, score_threshold=score_threshold)
|
||||||
print(f"do_search,docs:{docs}")
|
#print(f"do_search,docs:{docs}")
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
def do_add_doc(self,
|
def do_add_doc(self,
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,7 @@ def load_embeddings(model: str = EMBEDDING_MODEL, device: str = embedding_device
|
||||||
from server.knowledge_base.kb_cache.base import embeddings_pool
|
from server.knowledge_base.kb_cache.base import embeddings_pool
|
||||||
return embeddings_pool.load_embeddings(model=model, device=device)
|
return embeddings_pool.load_embeddings(model=model, device=device)
|
||||||
|
|
||||||
|
#PDFPlumberLoader
|
||||||
LOADER_DICT = {"UnstructuredHTMLLoader": ['.html'],
|
LOADER_DICT = {"UnstructuredHTMLLoader": ['.html'],
|
||||||
"UnstructuredMarkdownLoader": ['.md'],
|
"UnstructuredMarkdownLoader": ['.md'],
|
||||||
"CustomJSONLoader": [".json"],
|
"CustomJSONLoader": [".json"],
|
||||||
|
|
@ -302,6 +302,8 @@ class KnowledgeFile:
|
||||||
text_splitter: TextSplitter = None,
|
text_splitter: TextSplitter = None,
|
||||||
):
|
):
|
||||||
docs = docs or self.file2docs(refresh=refresh)
|
docs = docs or self.file2docs(refresh=refresh)
|
||||||
|
file_name_without_extension, file_extension = os.path.splitext(self.filepath)
|
||||||
|
print(f"filepath:{self.filepath},文件名拆分后:{file_name_without_extension},{file_extension}")
|
||||||
if not docs:
|
if not docs:
|
||||||
return []
|
return []
|
||||||
if self.ext not in [".csv"]:
|
if self.ext not in [".csv"]:
|
||||||
|
|
@ -314,12 +316,26 @@ class KnowledgeFile:
|
||||||
if doc.metadata:
|
if doc.metadata:
|
||||||
doc.metadata["source"] = os.path.basename(self.filepath)
|
doc.metadata["source"] = os.path.basename(self.filepath)
|
||||||
else:
|
else:
|
||||||
|
print(f"**********************docs2texts: text_splitter.split_documents(docs)")
|
||||||
|
outputfile = file_name_without_extension + "_source.txt"
|
||||||
|
with open(outputfile, 'w') as file:
|
||||||
|
for doc in docs:
|
||||||
|
file.write(doc.page_content)
|
||||||
docs = text_splitter.split_documents(docs)
|
docs = text_splitter.split_documents(docs)
|
||||||
|
|
||||||
#print(f"文档切分示例:{docs[0]}")
|
#print(f"文档切分示例:{docs[0]}")
|
||||||
i = 0
|
# print(f"KnowledgeFile: filepath:{self.filepath}")
|
||||||
|
# file_name_without_extension, file_extension = os.path.splitext(self.filepath)
|
||||||
|
# print("filepath:{self.filepath},文件名拆分后:{file_name_without_extension},{file_extension}")
|
||||||
|
|
||||||
|
i = 1
|
||||||
|
outputfile = file_name_without_extension + "_split.txt"
|
||||||
|
# 打开文件以写入模式
|
||||||
|
with open(outputfile, 'w') as file:
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
print(f"**********切分段{i}:{doc}")
|
print(f"**********切分段{i}:{doc}")
|
||||||
|
file.write(f"分段{i}")
|
||||||
|
file.write(doc.page_content)
|
||||||
i = i+1
|
i = i+1
|
||||||
|
|
||||||
if zh_title_enhance:
|
if zh_title_enhance:
|
||||||
|
|
@ -407,7 +423,8 @@ if __name__ == "__main__":
|
||||||
kb_file = KnowledgeFile(filename="test.txt", knowledge_base_name="samples")
|
kb_file = KnowledgeFile(filename="test.txt", knowledge_base_name="samples")
|
||||||
# kb_file.text_splitter_name = "RecursiveCharacterTextSplitter"
|
# kb_file.text_splitter_name = "RecursiveCharacterTextSplitter"
|
||||||
docs = kb_file.file2docs()
|
docs = kb_file.file2docs()
|
||||||
pprint(docs[-1])
|
#pprint(docs[-1])
|
||||||
|
|
||||||
docs = kb_file.file2text()
|
docs = kb_file.docs2texts()
|
||||||
pprint(docs[-1])
|
#docs = kb_file.file2text()
|
||||||
|
#pprint(docs[-1])
|
||||||
|
|
|
||||||
4
test.py
4
test.py
|
|
@ -13,9 +13,9 @@ if __name__ == '__main__':
|
||||||
# pprint(docs[-1])
|
# pprint(docs[-1])
|
||||||
|
|
||||||
faissService = FaissKBService("test")
|
faissService = FaissKBService("test")
|
||||||
faissService.add_doc(KnowledgeFile("国网安徽信通公司安全准入实施要求_修订.docx", "test"))
|
faissService.add_doc(KnowledgeFile("电力电缆故障测寻车技术规范.docx", "test"))
|
||||||
# faissService.delete_doc(KnowledgeFile("README.md", "test"))
|
# faissService.delete_doc(KnowledgeFile("README.md", "test"))
|
||||||
# faissService.do_drop_kb()
|
# faissService.do_drop_kb()
|
||||||
print(faissService.search_docs("准入手续的内容是什么?"))
|
#print(faissService.search_docs("准入手续的内容是什么?"))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
5
webui.py
5
webui.py
|
|
@ -17,7 +17,7 @@ if __name__ == "__main__":
|
||||||
menu_items={
|
menu_items={
|
||||||
'Get Help': 'https://github.com/chatchat-space/Langchain-Chatchat',
|
'Get Help': 'https://github.com/chatchat-space/Langchain-Chatchat',
|
||||||
'Report a bug': "https://github.com/chatchat-space/Langchain-Chatchat/issues",
|
'Report a bug': "https://github.com/chatchat-space/Langchain-Chatchat/issues",
|
||||||
'About': f"""欢迎使用 Langchain-Chatchat WebUI {VERSION}!"""
|
'About': f"""欢迎使用 思极大模型 WebUI {VERSION}!"""
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -36,7 +36,8 @@ if __name__ == "__main__":
|
||||||
st.image(
|
st.image(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
"img",
|
"img",
|
||||||
"logo-long-chatchat-trans-v2.png"
|
"siji.jpg"
|
||||||
|
#"logo-long-chatchat-trans-v2.png"
|
||||||
),
|
),
|
||||||
use_column_width=True
|
use_column_width=True
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Binary file not shown.
Loading…
Reference in New Issue