diff --git a/server/knowledge_base/migrate.py b/server/knowledge_base/migrate.py index 58a2ffd..146dc77 100644 --- a/server/knowledge_base/migrate.py +++ b/server/knowledge_base/migrate.py @@ -1,9 +1,13 @@ -from configs import (EMBEDDING_MODEL, DEFAULT_VS_TYPE, ZH_TITLE_ENHANCE, - CHUNK_SIZE, OVERLAP_SIZE, - logger, log_verbose) -from server.knowledge_base.utils import (get_file_path, list_kbs_from_folder, - list_files_from_folder,files2docs_in_thread, - KnowledgeFile,) +from configs import ( + EMBEDDING_MODEL, DEFAULT_VS_TYPE, ZH_TITLE_ENHANCE, + CHUNK_SIZE, OVERLAP_SIZE, + logger, log_verbose +) +from server.knowledge_base.utils import ( + get_file_path, list_kbs_from_folder, + list_files_from_folder, files2docs_in_thread, + KnowledgeFile +) from server.knowledge_base.kb_service.base import KBServiceFactory from server.db.models.chat_history_model import ChatHistoryModel from server.db.repository.knowledge_file_repository import add_file_to_db # ensure Models are imported @@ -24,15 +28,15 @@ def reset_tables(): def import_from_db( - sqlite_path: str = None, - # csv_path: str = None, + sqlite_path: str = None, + # csv_path: str = None, ) -> bool: - ''' + """ 在知识库与向量库无变化的情况下,从备份数据库中导入数据到 info.db。 适用于版本升级时,info.db 结构变化,但无需重新向量化的情况。 请确保两边数据库表名一致,需要导入的字段名一致 当前仅支持 sqlite - ''' + """ import sqlite3 as sql from pprint import pprint @@ -76,22 +80,23 @@ def file_to_kbfile(kb_name: str, files: List[str]) -> List[KnowledgeFile]: def folder2db( - kb_names: List[str], - mode: Literal["recreate_vs", "update_in_db", "increament"], - vs_type: Literal["faiss", "milvus", "pg", "chromadb"] = DEFAULT_VS_TYPE, - embed_model: str = EMBEDDING_MODEL, - chunk_size: int = CHUNK_SIZE, - chunk_overlap: int = OVERLAP_SIZE, - zh_title_enhance: bool = ZH_TITLE_ENHANCE, + kb_names: List[str], + mode: Literal["recreate_vs", "update_in_db", "increament"], + vs_type: Literal["faiss", "milvus", "pg", "chromadb"] = DEFAULT_VS_TYPE, + embed_model: str = EMBEDDING_MODEL, + chunk_size: int = CHUNK_SIZE, + chunk_overlap: int = OVERLAP_SIZE, + zh_title_enhance: bool = ZH_TITLE_ENHANCE, ): - ''' + """ use existed files in local folder to populate database and/or vector store. set parameter `mode` to: recreate_vs: recreate all vector store and fill info to database using existed files in local folder fill_info_only(disabled): do not create vector store, fill info to db using existed files only update_in_db: update vector store and database info using local files that existed in database only increament: create vector store and database info for local files that not existed in database only - ''' + """ + def files2vs(kb_name: str, kb_files: List[KnowledgeFile]): for success, result in files2docs_in_thread(kb_files, chunk_size=chunk_size, @@ -146,10 +151,10 @@ def folder2db( def prune_db_docs(kb_names: List[str]): - ''' + """ delete docs in database that not existed in local folder. it is used to delete database docs after user deleted some doc files in file browser - ''' + """ for kb_name in kb_names: kb = KBServiceFactory.get_service_by_name(kb_name) if kb and kb.exists(): @@ -164,10 +169,10 @@ def prune_db_docs(kb_names: List[str]): def prune_folder_files(kb_names: List[str]): - ''' + """ delete doc files in local folder that not existed in database. - is is used to free local disk space by delete unused doc files. - ''' + it is used to free local disk space by delete unused doc files. + """ for kb_name in kb_names: kb = KBServiceFactory.get_service_by_name(kb_name) if kb and kb.exists():