增加日志

This commit is contained in:
weiweiw 2025-01-14 11:02:23 +08:00
parent 345c57fbca
commit 464436dd20
1 changed files with 14 additions and 13 deletions

View File

@ -228,6 +228,7 @@ def make_text_splitter(splitter_name, chunk_size, chunk_overlap):
"""
根据参数获取特定的分词器
"""
logger.info(f"make_text_splitter start....splitter_name:{splitter_name}")
splitter_name = splitter_name or "SpacyTextSplitter"
try:
if (
@ -523,16 +524,16 @@ def format_reference(kb_name: str, docs: List[Dict], api_base_url: str="") -> Li
return source_documents
if __name__ == "__main__":
from pprint import pprint
kb_file = KnowledgeFile(
filename="E:\\LLM\\Data\\Test.md", knowledge_base_name="samples"
)
# kb_file.text_splitter_name = "RecursiveCharacterTextSplitter"
kb_file.text_splitter_name = "MarkdownHeaderTextSplitter"
docs = kb_file.file2docs()
# pprint(docs[-1])
texts = kb_file.docs2texts(docs)
for text in texts:
print(text)
# if __name__ == "__main__":
# from pprint import pprint
#
# kb_file = KnowledgeFile(
# filename="E:\\LLM\\Data\\Test.md", knowledge_base_name="samples"
# )
# # kb_file.text_splitter_name = "RecursiveCharacterTextSplitter"
# kb_file.text_splitter_name = "MarkdownHeaderTextSplitter"
# docs = kb_file.file2docs()
# # pprint(docs[-1])
# texts = kb_file.docs2texts(docs)
# for text in texts:
# print(text)