From 5852b4c62e5dbfdafccc87a34d8500bac6b5a90e Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Thu, 11 May 2023 10:54:45 +0800 Subject: [PATCH] update pdf read strategy --- chains/local_doc_qa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chains/local_doc_qa.py b/chains/local_doc_qa.py index ea4b0e1..51e47d4 100644 --- a/chains/local_doc_qa.py +++ b/chains/local_doc_qa.py @@ -22,7 +22,7 @@ def load_file(filepath, sentence_size=SENTENCE_SIZE): loader = UnstructuredFileLoader(filepath, mode="elements") docs = loader.load() elif filepath.lower().endswith(".pdf"): - loader = UnstructuredFileLoader(filepath) + loader = UnstructuredFileLoader(filepath, strategy="fast") textsplitter = ChineseTextSplitter(pdf=True, sentence_size=sentence_size) docs = loader.load_and_split(textsplitter) else: