optimize recreate vector store: save vector store once after all docs parsed for FAISS

This commit is contained in:
liunux4odoo 2023-08-21 08:50:15 +08:00
parent f40bb69224
commit c571585ffd
4 changed files with 29 additions and 8 deletions

View File

@ -2,6 +2,8 @@ from server.knowledge_base.migrate import create_tables, folder2db, recreate_all
from configs.model_config import NLTK_DATA_PATH from configs.model_config import NLTK_DATA_PATH
import nltk import nltk
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
from startup import dump_server_info
if __name__ == "__main__": if __name__ == "__main__":
import argparse import argparse
@ -21,6 +23,8 @@ if __name__ == "__main__":
) )
args = parser.parse_args() args = parser.parse_args()
dump_server_info()
create_tables() create_tables()
print("database talbes created") print("database talbes created")

View File

@ -201,7 +201,11 @@ async def recreate_vector_store(
"finished": i, "finished": i,
"doc": doc, "doc": doc,
}, ensure_ascii=False) }, ensure_ascii=False)
kb.add_doc(kb_file) if i == len(docs) - 1:
not_refresh_vs_cache = False
else:
not_refresh_vs_cache = True
kb.add_doc(kb_file, not_refresh_vs_cache=not_refresh_vs_cache)
except Exception as e: except Exception as e:
print(e) print(e)
yield json.dumps({ yield json.dumps({

View File

@ -43,7 +43,11 @@ def folder2db(
kb_file = KnowledgeFile(doc, kb_name) kb_file = KnowledgeFile(doc, kb_name)
if callable(callback_before): if callable(callback_before):
callback_before(kb_file, i, docs) callback_before(kb_file, i, docs)
kb.add_doc(kb_file) if i == len(docs) - 1:
not_refresh_vs_cache = False
else:
not_refresh_vs_cache = True
kb.add_doc(kb_file, not_refresh_vs_cache=not_refresh_vs_cache)
if callable(callback_after): if callable(callback_after):
callback_after(kb_file, i, docs) callback_after(kb_file, i, docs)
except Exception as e: except Exception as e:
@ -67,7 +71,11 @@ def folder2db(
kb_file = KnowledgeFile(doc, kb_name) kb_file = KnowledgeFile(doc, kb_name)
if callable(callback_before): if callable(callback_before):
callback_before(kb_file, i, docs) callback_before(kb_file, i, docs)
kb.update_doc(kb_file) if i == len(docs) - 1:
not_refresh_vs_cache = False
else:
not_refresh_vs_cache = True
kb.update_doc(kb_file, not_refresh_vs_cache=not_refresh_vs_cache)
if callable(callback_after): if callable(callback_after):
callback_after(kb_file, i, docs) callback_after(kb_file, i, docs)
except Exception as e: except Exception as e:
@ -81,7 +89,11 @@ def folder2db(
kb_file = KnowledgeFile(doc, kb_name) kb_file = KnowledgeFile(doc, kb_name)
if callable(callback_before): if callable(callback_before):
callback_before(kb_file, i, docs) callback_before(kb_file, i, docs)
kb.add_doc(kb_file) if i == len(docs) - 1:
not_refresh_vs_cache = False
else:
not_refresh_vs_cache = True
kb.add_doc(kb_file, not_refresh_vs_cache=not_refresh_vs_cache)
if callable(callback_after): if callable(callback_after):
callback_after(kb_file, i, docs) callback_after(kb_file, i, docs)
except Exception as e: except Exception as e:

View File

@ -317,6 +317,11 @@ def parse_args() -> argparse.ArgumentParser:
def dump_server_info(after_start=False): def dump_server_info(after_start=False):
import platform
import langchain
import fastchat
from configs.server_config import api_address, webui_address
print("\n\n") print("\n\n")
print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30) print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)
print(f"操作系统:{platform.platform()}.") print(f"操作系统:{platform.platform()}.")
@ -342,11 +347,7 @@ def dump_server_info(after_start=False):
if __name__ == "__main__": if __name__ == "__main__":
import platform
import time import time
import langchain
import fastchat
from configs.server_config import api_address, webui_address
mp.set_start_method("spawn") mp.set_start_method("spawn")
queue = Queue() queue = Queue()