提醒fitz包是来自pyMuPDF (#1407)
This commit is contained in:
parent
f422575a17
commit
196bde3ead
|
|
@ -5,7 +5,7 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
|
||||||
class RapidOCRPDFLoader(UnstructuredFileLoader):
|
class RapidOCRPDFLoader(UnstructuredFileLoader):
|
||||||
def _get_elements(self) -> List:
|
def _get_elements(self) -> List:
|
||||||
def pdf2text(filepath):
|
def pdf2text(filepath):
|
||||||
import fitz
|
import fitz # pyMuPDF里面的fitz包,不要与pip install fitz混淆
|
||||||
from rapidocr_onnxruntime import RapidOCR
|
from rapidocr_onnxruntime import RapidOCR
|
||||||
import numpy as np
|
import numpy as np
|
||||||
ocr = RapidOCR()
|
ocr = RapidOCR()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue