diff --git a/loader/image_loader.py b/loader/image_loader.py index ec32459..4ac4c51 100644 --- a/loader/image_loader.py +++ b/loader/image_loader.py @@ -5,9 +5,6 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader from paddleocr import PaddleOCR import os import nltk -from configs.model_config import NLTK_DATA_PATH - -nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path class UnstructuredPaddleImageLoader(UnstructuredFileLoader): """Loader that uses unstructured to load image files, such as PNGs and JPGs.""" @@ -35,6 +32,10 @@ class UnstructuredPaddleImageLoader(UnstructuredFileLoader): if __name__ == "__main__": import sys sys.path.append(os.path.dirname(os.path.dirname(__file__))) + + from configs.model_config import NLTK_DATA_PATH + nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path + filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base", "samples", "content", "test.jpg") loader = UnstructuredPaddleImageLoader(filepath, mode="elements") docs = loader.load()