diff --git a/tests/document_loader/test_imgloader.py b/tests/document_loader/test_imgloader.py new file mode 100644 index 0000000..8bba7da --- /dev/null +++ b/tests/document_loader/test_imgloader.py @@ -0,0 +1,21 @@ +import sys +from pathlib import Path + +root_path = Path(__file__).parent.parent.parent +sys.path.append(str(root_path)) +from pprint import pprint + +test_files = { + "ocr_test.pdf": str(root_path / "tests" / "samples" / "ocr_test.pdf"), +} + +def test_rapidocrpdfloader(): + pdf_path = test_files["ocr_test.pdf"] + from document_loaders import RapidOCRPDFLoader + + loader = RapidOCRPDFLoader(pdf_path) + docs = loader.load() + pprint(docs) + assert isinstance(docs, list) and len(docs) > 0 and isinstance(docs[0].page_content, str) + + diff --git a/tests/document_loader/test_pdfloader.py b/tests/document_loader/test_pdfloader.py new file mode 100644 index 0000000..92460cb --- /dev/null +++ b/tests/document_loader/test_pdfloader.py @@ -0,0 +1,21 @@ +import sys +from pathlib import Path + +root_path = Path(__file__).parent.parent.parent +sys.path.append(str(root_path)) +from pprint import pprint + +test_files = { + "ocr_test.jpg": str(root_path / "tests" / "samples" / "ocr_test.jpg"), +} + +def test_rapidocrloader(): + img_path = test_files["ocr_test.jpg"] + from document_loaders import RapidOCRLoader + + loader = RapidOCRLoader(img_path) + docs = loader.load() + pprint(docs) + assert isinstance(docs, list) and len(docs) > 0 and isinstance(docs[0].page_content, str) + +