From 5acea5e4fcfa5eed939ae3a1ae60502659fac7a3 Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Sun, 21 May 2023 23:52:35 +0800 Subject: [PATCH] update loaders --- api.py | 3 +-- loader/image_loader.py | 3 +++ loader/pdf_loader.py | 3 +++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/api.py b/api.py index c0cd1cc..af20303 100644 --- a/api.py +++ b/api.py @@ -16,9 +16,8 @@ from starlette.responses import RedirectResponse from chains.local_doc_qa import LocalDocQA from configs.model_config import (VS_ROOT_PATH, UPLOAD_ROOT_PATH, EMBEDDING_DEVICE, - EMBEDDING_MODEL, LLM_MODEL, NLTK_DATA_PATH, + EMBEDDING_MODEL, NLTK_DATA_PATH, VECTOR_SEARCH_TOP_K, LLM_HISTORY_LEN, OPEN_CROSS_DOMAIN) -from agent import bing_search import models.shared as shared from models.loader.args import parser from models.loader import LoaderCheckPoint diff --git a/loader/image_loader.py b/loader/image_loader.py index d9e468e..48b9d57 100644 --- a/loader/image_loader.py +++ b/loader/image_loader.py @@ -4,7 +4,10 @@ from typing import List from langchain.document_loaders.unstructured import UnstructuredFileLoader from paddleocr import PaddleOCR import os +import nltk +from configs.model_config import NLTK_DATA_PATH +nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path class UnstructuredPaddleImageLoader(UnstructuredFileLoader): """Loader that uses unstructured to load image files, such as PNGs and JPGs.""" diff --git a/loader/pdf_loader.py b/loader/pdf_loader.py index 67eb826..8f55a94 100644 --- a/loader/pdf_loader.py +++ b/loader/pdf_loader.py @@ -5,7 +5,10 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader from paddleocr import PaddleOCR import os import fitz +import nltk +from configs.model_config import NLTK_DATA_PATH +nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path class UnstructuredPaddlePDFLoader(UnstructuredFileLoader): """Loader that uses unstructured to load image files, such as PNGs and JPGs."""