From 4fae7d4884fe045b2f95dfaa4880947a12d81c81 Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Sun, 3 Sep 2023 16:13:37 +0800 Subject: [PATCH] add unit test for RapidOCRLoader and RapidOCRPDFLoader --- tests/document_loader/test_imgloader.py | 21 +++++++++++++++++++++ tests/document_loader/test_pdfloader.py | 21 +++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 tests/document_loader/test_imgloader.py create mode 100644 tests/document_loader/test_pdfloader.py diff --git a/tests/document_loader/test_imgloader.py b/tests/document_loader/test_imgloader.py new file mode 100644 index 0000000..8bba7da --- /dev/null +++ b/tests/document_loader/test_imgloader.py @@ -0,0 +1,21 @@ +import sys +from pathlib import Path + +root_path = Path(__file__).parent.parent.parent +sys.path.append(str(root_path)) +from pprint import pprint + +test_files = { + "ocr_test.pdf": str(root_path / "tests" / "samples" / "ocr_test.pdf"), +} + +def test_rapidocrpdfloader(): + pdf_path = test_files["ocr_test.pdf"] + from document_loaders import RapidOCRPDFLoader + + loader = RapidOCRPDFLoader(pdf_path) + docs = loader.load() + pprint(docs) + assert isinstance(docs, list) and len(docs) > 0 and isinstance(docs[0].page_content, str) + + diff --git a/tests/document_loader/test_pdfloader.py b/tests/document_loader/test_pdfloader.py new file mode 100644 index 0000000..92460cb --- /dev/null +++ b/tests/document_loader/test_pdfloader.py @@ -0,0 +1,21 @@ +import sys +from pathlib import Path + +root_path = Path(__file__).parent.parent.parent +sys.path.append(str(root_path)) +from pprint import pprint + +test_files = { + "ocr_test.jpg": str(root_path / "tests" / "samples" / "ocr_test.jpg"), +} + +def test_rapidocrloader(): + img_path = test_files["ocr_test.jpg"] + from document_loaders import RapidOCRLoader + + loader = RapidOCRLoader(img_path) + docs = loader.load() + pprint(docs) + assert isinstance(docs, list) and len(docs) > 0 and isinstance(docs[0].page_content, str) + +