optimize recreate vector store: save vector store once after all docs parsed for FAISS
This commit is contained in:
parent
f40bb69224
commit
c571585ffd
|
|
@ -2,6 +2,8 @@ from server.knowledge_base.migrate import create_tables, folder2db, recreate_all
|
||||||
from configs.model_config import NLTK_DATA_PATH
|
from configs.model_config import NLTK_DATA_PATH
|
||||||
import nltk
|
import nltk
|
||||||
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
||||||
|
from startup import dump_server_info
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import argparse
|
import argparse
|
||||||
|
|
@ -21,6 +23,8 @@ if __name__ == "__main__":
|
||||||
)
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
dump_server_info()
|
||||||
|
|
||||||
create_tables()
|
create_tables()
|
||||||
print("database talbes created")
|
print("database talbes created")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -201,7 +201,11 @@ async def recreate_vector_store(
|
||||||
"finished": i,
|
"finished": i,
|
||||||
"doc": doc,
|
"doc": doc,
|
||||||
}, ensure_ascii=False)
|
}, ensure_ascii=False)
|
||||||
kb.add_doc(kb_file)
|
if i == len(docs) - 1:
|
||||||
|
not_refresh_vs_cache = False
|
||||||
|
else:
|
||||||
|
not_refresh_vs_cache = True
|
||||||
|
kb.add_doc(kb_file, not_refresh_vs_cache=not_refresh_vs_cache)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
yield json.dumps({
|
yield json.dumps({
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,11 @@ def folder2db(
|
||||||
kb_file = KnowledgeFile(doc, kb_name)
|
kb_file = KnowledgeFile(doc, kb_name)
|
||||||
if callable(callback_before):
|
if callable(callback_before):
|
||||||
callback_before(kb_file, i, docs)
|
callback_before(kb_file, i, docs)
|
||||||
kb.add_doc(kb_file)
|
if i == len(docs) - 1:
|
||||||
|
not_refresh_vs_cache = False
|
||||||
|
else:
|
||||||
|
not_refresh_vs_cache = True
|
||||||
|
kb.add_doc(kb_file, not_refresh_vs_cache=not_refresh_vs_cache)
|
||||||
if callable(callback_after):
|
if callable(callback_after):
|
||||||
callback_after(kb_file, i, docs)
|
callback_after(kb_file, i, docs)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -67,7 +71,11 @@ def folder2db(
|
||||||
kb_file = KnowledgeFile(doc, kb_name)
|
kb_file = KnowledgeFile(doc, kb_name)
|
||||||
if callable(callback_before):
|
if callable(callback_before):
|
||||||
callback_before(kb_file, i, docs)
|
callback_before(kb_file, i, docs)
|
||||||
kb.update_doc(kb_file)
|
if i == len(docs) - 1:
|
||||||
|
not_refresh_vs_cache = False
|
||||||
|
else:
|
||||||
|
not_refresh_vs_cache = True
|
||||||
|
kb.update_doc(kb_file, not_refresh_vs_cache=not_refresh_vs_cache)
|
||||||
if callable(callback_after):
|
if callable(callback_after):
|
||||||
callback_after(kb_file, i, docs)
|
callback_after(kb_file, i, docs)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -81,7 +89,11 @@ def folder2db(
|
||||||
kb_file = KnowledgeFile(doc, kb_name)
|
kb_file = KnowledgeFile(doc, kb_name)
|
||||||
if callable(callback_before):
|
if callable(callback_before):
|
||||||
callback_before(kb_file, i, docs)
|
callback_before(kb_file, i, docs)
|
||||||
kb.add_doc(kb_file)
|
if i == len(docs) - 1:
|
||||||
|
not_refresh_vs_cache = False
|
||||||
|
else:
|
||||||
|
not_refresh_vs_cache = True
|
||||||
|
kb.add_doc(kb_file, not_refresh_vs_cache=not_refresh_vs_cache)
|
||||||
if callable(callback_after):
|
if callable(callback_after):
|
||||||
callback_after(kb_file, i, docs)
|
callback_after(kb_file, i, docs)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -317,6 +317,11 @@ def parse_args() -> argparse.ArgumentParser:
|
||||||
|
|
||||||
|
|
||||||
def dump_server_info(after_start=False):
|
def dump_server_info(after_start=False):
|
||||||
|
import platform
|
||||||
|
import langchain
|
||||||
|
import fastchat
|
||||||
|
from configs.server_config import api_address, webui_address
|
||||||
|
|
||||||
print("\n\n")
|
print("\n\n")
|
||||||
print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)
|
print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)
|
||||||
print(f"操作系统:{platform.platform()}.")
|
print(f"操作系统:{platform.platform()}.")
|
||||||
|
|
@ -342,11 +347,7 @@ def dump_server_info(after_start=False):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import platform
|
|
||||||
import time
|
import time
|
||||||
import langchain
|
|
||||||
import fastchat
|
|
||||||
from configs.server_config import api_address, webui_address
|
|
||||||
|
|
||||||
mp.set_start_method("spawn")
|
mp.set_start_method("spawn")
|
||||||
queue = Queue()
|
queue = Queue()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue