165 lines
4.5 KiB
Python
165 lines
4.5 KiB
Python
# Description: 初始化数据库,包括创建表、导入数据、更新向量空间等操作
|
||
from datetime import datetime
|
||
import multiprocessing as mp
|
||
import sys
|
||
import time
|
||
from typing import Dict
|
||
|
||
import click
|
||
|
||
from chatchat.settings import Settings
|
||
from chatchat.server.knowledge_base.migrate import (
|
||
create_tables,
|
||
folder2db,
|
||
import_from_db,
|
||
prune_db_docs,
|
||
prune_folder_files,
|
||
reset_tables,
|
||
)
|
||
from chatchat.utils import build_logger
|
||
from chatchat.server.utils import get_default_embedding
|
||
|
||
|
||
logger = build_logger()
|
||
|
||
|
||
def worker(args: dict):
|
||
start_time = datetime.now()
|
||
|
||
try:
|
||
if args.get("create_tables"):
|
||
create_tables() # confirm tables exist
|
||
|
||
if args.get("clear_tables"):
|
||
reset_tables()
|
||
print("database tables reset")
|
||
|
||
if args.get("recreate_vs"):
|
||
create_tables()
|
||
print("recreating all vector stores")
|
||
folder2db(
|
||
kb_names=args.get("kb_name"), mode="recreate_vs", embed_model=args.get("embed_model")
|
||
)
|
||
elif args.get("import_db"):
|
||
import_from_db(args.get("import_db"))
|
||
elif args.get("update_in_db"):
|
||
folder2db(
|
||
kb_names=args.get("kb_name"), mode="update_in_db", embed_model=args.get("embed_model")
|
||
)
|
||
elif args.get("increment"):
|
||
folder2db(
|
||
kb_names=args.get("kb_name"), mode="increment", embed_model=args.get("embed_model")
|
||
)
|
||
elif args.get("prune_db"):
|
||
prune_db_docs(args.get("kb_name"))
|
||
elif args.get("prune_folder"):
|
||
prune_folder_files(args.get("kb_name"))
|
||
|
||
end_time = datetime.now()
|
||
print(f"总计用时\t:{end_time-start_time}\n")
|
||
except Exception as e:
|
||
logger.exception(e)
|
||
|
||
|
||
@click.command(help="知识库相关功能")
|
||
@click.option(
|
||
"-r",
|
||
"--recreate-vs",
|
||
is_flag=True,
|
||
help=(
|
||
"""
|
||
recreate vector store.
|
||
use this option if you have copied document files to the content folder, but vector store has not been populated or DEFAUL_VS_TYPE/DEFAULT_EMBEDDING_MODEL changed.
|
||
"""
|
||
),
|
||
)
|
||
@click.option(
|
||
"--create-tables",
|
||
is_flag=True,
|
||
help=("create empty tables if not existed"),
|
||
)
|
||
@click.option(
|
||
"--clear-tables",
|
||
is_flag=True,
|
||
help=(
|
||
"create empty tables, or drop the database tables before recreate vector stores"
|
||
),
|
||
)
|
||
@click.option(
|
||
"-u",
|
||
"--update-in-db",
|
||
is_flag=True,
|
||
help=(
|
||
"""
|
||
update vector store for files exist in database.
|
||
use this option if you want to recreate vectors for files exist in db and skip files exist in local folder only.
|
||
"""
|
||
),
|
||
)
|
||
@click.option(
|
||
"-i",
|
||
"--increment",
|
||
is_flag=True,
|
||
help=(
|
||
"""
|
||
update vector store for files exist in local folder and not exist in database.
|
||
use this option if you want to create vectors incrementally.
|
||
"""
|
||
),
|
||
)
|
||
@click.option(
|
||
"--prune-db",
|
||
is_flag=True,
|
||
help=(
|
||
"""
|
||
delete docs in database that not existed in local folder.
|
||
it is used to delete database docs after user deleted some doc files in file browser
|
||
"""
|
||
),
|
||
)
|
||
@click.option(
|
||
"--prune-folder",
|
||
is_flag=True,
|
||
help=(
|
||
"""
|
||
delete doc files in local folder that not existed in database.
|
||
is is used to free local disk space by delete unused doc files.
|
||
"""
|
||
),
|
||
)
|
||
@click.option(
|
||
"-n",
|
||
"--kb-name",
|
||
multiple=True,
|
||
default=[],
|
||
help=(
|
||
"specify knowledge base names to operate on. default is all folders exist in KB_ROOT_PATH."
|
||
),
|
||
)
|
||
@click.option(
|
||
"-e",
|
||
"--embed-model",
|
||
type=str,
|
||
default=get_default_embedding(),
|
||
help=("specify embeddings model."),
|
||
)
|
||
@click.option(
|
||
"--import-db",
|
||
help="import tables from specified sqlite database"
|
||
)
|
||
def main(**kwds):
|
||
p = mp.Process(target=worker, args=(kwds,), daemon=True)
|
||
p.start()
|
||
while p.is_alive():
|
||
try:
|
||
time.sleep(0.1)
|
||
except KeyboardInterrupt:
|
||
logger.warning("Caught KeyboardInterrupt! Setting stop event...")
|
||
p.terminate()
|
||
sys.exit()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
mp.set_start_method("spawn")
|
||
main()
|