parent
26393f488b
commit
2c72a00954
|
|
@ -9,6 +9,7 @@ from typing import List, Tuple
|
||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from utils import torch_gc
|
from utils import torch_gc
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
DEVICE_ = EMBEDDING_DEVICE
|
DEVICE_ = EMBEDDING_DEVICE
|
||||||
|
|
@ -136,6 +137,7 @@ class LocalDocQA:
|
||||||
filepath: str or List[str],
|
filepath: str or List[str],
|
||||||
vs_path: str or os.PathLike = None):
|
vs_path: str or os.PathLike = None):
|
||||||
loaded_files = []
|
loaded_files = []
|
||||||
|
failed_files = []
|
||||||
if isinstance(filepath, str):
|
if isinstance(filepath, str):
|
||||||
if not os.path.exists(filepath):
|
if not os.path.exists(filepath):
|
||||||
print("路径不存在")
|
print("路径不存在")
|
||||||
|
|
@ -152,15 +154,19 @@ class LocalDocQA:
|
||||||
return None
|
return None
|
||||||
elif os.path.isdir(filepath):
|
elif os.path.isdir(filepath):
|
||||||
docs = []
|
docs = []
|
||||||
for file in os.listdir(filepath):
|
for file in tqdm(os.listdir(filepath), desc="加载文件"):
|
||||||
fullfilepath = os.path.join(filepath, file)
|
fullfilepath = os.path.join(filepath, file)
|
||||||
try:
|
try:
|
||||||
docs += load_file(fullfilepath)
|
docs += load_file(fullfilepath)
|
||||||
print(f"{file} 已成功加载")
|
|
||||||
loaded_files.append(fullfilepath)
|
loaded_files.append(fullfilepath)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
failed_files.append(file)
|
||||||
print(f"{file} 未能成功加载")
|
|
||||||
|
if len(failed_files) > 0:
|
||||||
|
print("以下文件未能成功加载:")
|
||||||
|
for file in failed_files:
|
||||||
|
print(file,end="\n")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
docs = []
|
docs = []
|
||||||
for file in filepath:
|
for file in filepath:
|
||||||
|
|
@ -172,6 +178,7 @@ class LocalDocQA:
|
||||||
print(e)
|
print(e)
|
||||||
print(f"{file} 未能成功加载")
|
print(f"{file} 未能成功加载")
|
||||||
if len(docs) > 0:
|
if len(docs) > 0:
|
||||||
|
print("文件加载完毕,正在生成向量库")
|
||||||
if vs_path and os.path.isdir(vs_path):
|
if vs_path and os.path.isdir(vs_path):
|
||||||
vector_store = FAISS.load_local(vs_path, self.embeddings)
|
vector_store = FAISS.load_local(vs_path, self.embeddings)
|
||||||
vector_store.add_documents(docs)
|
vector_store.add_documents(docs)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue