增加日志

This commit is contained in:
weiweiw 2025-01-14 11:02:23 +08:00
parent 345c57fbca
commit 464436dd20
1 changed files with 14 additions and 13 deletions

View File

@ -228,6 +228,7 @@ def make_text_splitter(splitter_name, chunk_size, chunk_overlap):
""" """
根据参数获取特定的分词器 根据参数获取特定的分词器
""" """
logger.info(f"make_text_splitter start....splitter_name:{splitter_name}")
splitter_name = splitter_name or "SpacyTextSplitter" splitter_name = splitter_name or "SpacyTextSplitter"
try: try:
if ( if (
@ -523,16 +524,16 @@ def format_reference(kb_name: str, docs: List[Dict], api_base_url: str="") -> Li
return source_documents return source_documents
if __name__ == "__main__": # if __name__ == "__main__":
from pprint import pprint # from pprint import pprint
#
kb_file = KnowledgeFile( # kb_file = KnowledgeFile(
filename="E:\\LLM\\Data\\Test.md", knowledge_base_name="samples" # filename="E:\\LLM\\Data\\Test.md", knowledge_base_name="samples"
) # )
# kb_file.text_splitter_name = "RecursiveCharacterTextSplitter" # # kb_file.text_splitter_name = "RecursiveCharacterTextSplitter"
kb_file.text_splitter_name = "MarkdownHeaderTextSplitter" # kb_file.text_splitter_name = "MarkdownHeaderTextSplitter"
docs = kb_file.file2docs() # docs = kb_file.file2docs()
# pprint(docs[-1]) # # pprint(docs[-1])
texts = kb_file.docs2texts(docs) # texts = kb_file.docs2texts(docs)
for text in texts: # for text in texts:
print(text) # print(text)