diff --git a/document_loaders/mypdfloader.py b/document_loaders/mypdfloader.py index 71e063d..a3153a8 100644 --- a/document_loaders/mypdfloader.py +++ b/document_loaders/mypdfloader.py @@ -5,7 +5,7 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader class RapidOCRPDFLoader(UnstructuredFileLoader): def _get_elements(self) -> List: def pdf2text(filepath): - import fitz + import fitz # pyMuPDF里面的fitz包,不要与pip install fitz混淆 from rapidocr_onnxruntime import RapidOCR import numpy as np ocr = RapidOCR()