add support for csv file

This commit is contained in:
imClumsyPanda 2023-06-07 22:14:08 +08:00
parent 46b872a854
commit 8b7c2e417c
1 changed files with 4 additions and 1 deletions

View File

@ -1,6 +1,6 @@
from langchain.embeddings.huggingface import HuggingFaceEmbeddings from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS from langchain.vectorstores import FAISS
from langchain.document_loaders import UnstructuredFileLoader, TextLoader from langchain.document_loaders import UnstructuredFileLoader, TextLoader, CSVLoader
from configs.model_config import * from configs.model_config import *
import datetime import datetime
from textsplitter import ChineseTextSplitter from textsplitter import ChineseTextSplitter
@ -74,6 +74,9 @@ def load_file(filepath, sentence_size=SENTENCE_SIZE):
loader = UnstructuredPaddleImageLoader(filepath, mode="elements") loader = UnstructuredPaddleImageLoader(filepath, mode="elements")
textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size) textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size)
docs = loader.load_and_split(text_splitter=textsplitter) docs = loader.load_and_split(text_splitter=textsplitter)
elif filepath.lower().endswith(".csv"):
loader = CSVLoader(filepath)
docs = loader.load()
else: else:
loader = UnstructuredFileLoader(filepath, mode="elements") loader = UnstructuredFileLoader(filepath, mode="elements")
textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size) textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size)