reformat server/knowledge_base/migrate.py

This commit is contained in:
imClumsyPanda 2023-11-06 22:49:35 +08:00
parent fb32c31a70
commit e233e4f220
1 changed files with 29 additions and 24 deletions

View File

@ -1,9 +1,13 @@
from configs import (EMBEDDING_MODEL, DEFAULT_VS_TYPE, ZH_TITLE_ENHANCE,
CHUNK_SIZE, OVERLAP_SIZE,
logger, log_verbose)
from server.knowledge_base.utils import (get_file_path, list_kbs_from_folder,
list_files_from_folder,files2docs_in_thread,
KnowledgeFile,)
from configs import (
EMBEDDING_MODEL, DEFAULT_VS_TYPE, ZH_TITLE_ENHANCE,
CHUNK_SIZE, OVERLAP_SIZE,
logger, log_verbose
)
from server.knowledge_base.utils import (
get_file_path, list_kbs_from_folder,
list_files_from_folder, files2docs_in_thread,
KnowledgeFile
)
from server.knowledge_base.kb_service.base import KBServiceFactory
from server.db.models.chat_history_model import ChatHistoryModel
from server.db.repository.knowledge_file_repository import add_file_to_db # ensure Models are imported
@ -24,15 +28,15 @@ def reset_tables():
def import_from_db(
sqlite_path: str = None,
# csv_path: str = None,
sqlite_path: str = None,
# csv_path: str = None,
) -> bool:
'''
"""
在知识库与向量库无变化的情况下从备份数据库中导入数据到 info.db
适用于版本升级时info.db 结构变化但无需重新向量化的情况
请确保两边数据库表名一致需要导入的字段名一致
当前仅支持 sqlite
'''
"""
import sqlite3 as sql
from pprint import pprint
@ -76,22 +80,23 @@ def file_to_kbfile(kb_name: str, files: List[str]) -> List[KnowledgeFile]:
def folder2db(
kb_names: List[str],
mode: Literal["recreate_vs", "update_in_db", "increament"],
vs_type: Literal["faiss", "milvus", "pg", "chromadb"] = DEFAULT_VS_TYPE,
embed_model: str = EMBEDDING_MODEL,
chunk_size: int = CHUNK_SIZE,
chunk_overlap: int = OVERLAP_SIZE,
zh_title_enhance: bool = ZH_TITLE_ENHANCE,
kb_names: List[str],
mode: Literal["recreate_vs", "update_in_db", "increament"],
vs_type: Literal["faiss", "milvus", "pg", "chromadb"] = DEFAULT_VS_TYPE,
embed_model: str = EMBEDDING_MODEL,
chunk_size: int = CHUNK_SIZE,
chunk_overlap: int = OVERLAP_SIZE,
zh_title_enhance: bool = ZH_TITLE_ENHANCE,
):
'''
"""
use existed files in local folder to populate database and/or vector store.
set parameter `mode` to:
recreate_vs: recreate all vector store and fill info to database using existed files in local folder
fill_info_only(disabled): do not create vector store, fill info to db using existed files only
update_in_db: update vector store and database info using local files that existed in database only
increament: create vector store and database info for local files that not existed in database only
'''
"""
def files2vs(kb_name: str, kb_files: List[KnowledgeFile]):
for success, result in files2docs_in_thread(kb_files,
chunk_size=chunk_size,
@ -146,10 +151,10 @@ def folder2db(
def prune_db_docs(kb_names: List[str]):
'''
"""
delete docs in database that not existed in local folder.
it is used to delete database docs after user deleted some doc files in file browser
'''
"""
for kb_name in kb_names:
kb = KBServiceFactory.get_service_by_name(kb_name)
if kb and kb.exists():
@ -164,10 +169,10 @@ def prune_db_docs(kb_names: List[str]):
def prune_folder_files(kb_names: List[str]):
'''
"""
delete doc files in local folder that not existed in database.
is is used to free local disk space by delete unused doc files.
'''
it is used to free local disk space by delete unused doc files.
"""
for kb_name in kb_names:
kb = KBServiceFactory.get_service_by_name(kb_name)
if kb and kb.exists():