update pdf read strategy

This commit is contained in:
imClumsyPanda 2023-05-11 10:54:45 +08:00
parent 2987c9cd52
commit 5852b4c62e
1 changed files with 1 additions and 1 deletions

View File

@ -22,7 +22,7 @@ def load_file(filepath, sentence_size=SENTENCE_SIZE):
loader = UnstructuredFileLoader(filepath, mode="elements")
docs = loader.load()
elif filepath.lower().endswith(".pdf"):
loader = UnstructuredFileLoader(filepath)
loader = UnstructuredFileLoader(filepath, strategy="fast")
textsplitter = ChineseTextSplitter(pdf=True, sentence_size=sentence_size)
docs = loader.load_and_split(textsplitter)
else: