From 196bde3ead6091724fa10ae0809866a11376e2bd Mon Sep 17 00:00:00 2001 From: roki1031 <52522880+roki1031@users.noreply.github.com> Date: Fri, 8 Sep 2023 12:20:20 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8F=90=E9=86=92fitz=E5=8C=85=E6=98=AF?= =?UTF-8?q?=E6=9D=A5=E8=87=AApyMuPDF=20(#1407)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- document_loaders/mypdfloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/document_loaders/mypdfloader.py b/document_loaders/mypdfloader.py index 71e063d..a3153a8 100644 --- a/document_loaders/mypdfloader.py +++ b/document_loaders/mypdfloader.py @@ -5,7 +5,7 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader class RapidOCRPDFLoader(UnstructuredFileLoader): def _get_elements(self) -> List: def pdf2text(filepath): - import fitz + import fitz # pyMuPDF里面的fitz包,不要与pip install fitz混淆 from rapidocr_onnxruntime import RapidOCR import numpy as np ocr = RapidOCR()