use lru_cache to optimize loading of local vector store (#496)
* Add files via upload * Update README.md * use lru_cache to optimize loading of local vector store --------- Co-authored-by: imClumsyPanda <littlepanda0716@gmail.com> Co-authored-by: liunux <liunux@office>
This commit is contained in:
parent
0a605bf4b5
commit
88fee48d8c
|
|
@ -221,6 +221,6 @@ Web UI 可以实现如下功能:
|
|||
- [x] VUE 前端
|
||||
|
||||
## 项目交流群
|
||||

|
||||

|
||||
|
||||
🎉 langchain-ChatGLM 项目交流群,如果你也对本项目感兴趣,欢迎加入群聊参与讨论交流。
|
||||
|
|
|
|||
|
|
@ -18,6 +18,21 @@ from models.loader import LoaderCheckPoint
|
|||
import models.shared as shared
|
||||
from agent import bing_search
|
||||
from langchain.docstore.document import Document
|
||||
from functools import lru_cache
|
||||
|
||||
|
||||
# patch HuggingFaceEmbeddings to make it hashable
|
||||
def _embeddings_hash(self):
|
||||
return hash(self.model_name)
|
||||
|
||||
|
||||
HuggingFaceEmbeddings.__hash__ = _embeddings_hash
|
||||
|
||||
|
||||
# will keep CACHED_VS_NUM of vector store caches
|
||||
@lru_cache(CACHED_VS_NUM)
|
||||
def load_vector_store(vs_path, embeddings):
|
||||
return FAISS.load_local(vs_path, embeddings)
|
||||
|
||||
|
||||
def tree(filepath, ignore_dir_names=None, ignore_file_names=None):
|
||||
|
|
@ -238,7 +253,7 @@ class LocalDocQA:
|
|||
if len(docs) > 0:
|
||||
logger.info("文件加载完毕,正在生成向量库")
|
||||
if vs_path and os.path.isdir(vs_path):
|
||||
vector_store = FAISS.load_local(vs_path, self.embeddings)
|
||||
vector_store = load_vector_store(vs_path, self.embeddings)
|
||||
vector_store.add_documents(docs)
|
||||
torch_gc()
|
||||
else:
|
||||
|
|
@ -264,7 +279,7 @@ class LocalDocQA:
|
|||
text_splitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size)
|
||||
docs = text_splitter.split_documents(docs)
|
||||
if os.path.isdir(vs_path):
|
||||
vector_store = FAISS.load_local(vs_path, self.embeddings)
|
||||
vector_store = load_vector_store(vs_path, self.embeddings)
|
||||
vector_store.add_documents(docs)
|
||||
else:
|
||||
vector_store = FAISS.from_documents(docs, self.embeddings) ##docs 为Document列表
|
||||
|
|
@ -276,7 +291,7 @@ class LocalDocQA:
|
|||
return None, [one_title]
|
||||
|
||||
def get_knowledge_based_answer(self, query, vs_path, chat_history=[], streaming: bool = STREAMING):
|
||||
vector_store = FAISS.load_local(vs_path, self.embeddings)
|
||||
vector_store = load_vector_store(vs_path, self.embeddings)
|
||||
FAISS.similarity_search_with_score_by_vector = similarity_search_with_score_by_vector
|
||||
vector_store.chunk_size = self.chunk_size
|
||||
vector_store.chunk_conent = self.chunk_conent
|
||||
|
|
@ -304,7 +319,7 @@ class LocalDocQA:
|
|||
def get_knowledge_based_conent_test(self, query, vs_path, chunk_conent,
|
||||
score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
|
||||
vector_search_top_k=VECTOR_SEARCH_TOP_K, chunk_size=CHUNK_SIZE):
|
||||
vector_store = FAISS.load_local(vs_path, self.embeddings)
|
||||
vector_store = load_vector_store(vs_path, self.embeddings)
|
||||
FAISS.similarity_search_with_score_by_vector = similarity_search_with_score_by_vector
|
||||
vector_store.chunk_conent = chunk_conent
|
||||
vector_store.score_threshold = score_threshold
|
||||
|
|
|
|||
|
|
@ -114,6 +114,9 @@ PROMPT_TEMPLATE = """已知信息:
|
|||
|
||||
根据上述已知信息,简洁和专业的来回答用户的问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”,不允许在答案中添加编造成分,答案请使用中文。 问题是:{question}"""
|
||||
|
||||
# 缓存知识库数量
|
||||
CACHED_VS_NUM = 1
|
||||
|
||||
# 文本分句长度
|
||||
SENTENCE_SIZE = 100
|
||||
|
||||
|
|
|
|||
Binary file not shown.
|
After Width: | Height: | Size: 266 KiB |
Loading…
Reference in New Issue