reformat server/knowledge_base/migrate.py
This commit is contained in:
parent
fb32c31a70
commit
e233e4f220
|
|
@ -1,9 +1,13 @@
|
|||
from configs import (EMBEDDING_MODEL, DEFAULT_VS_TYPE, ZH_TITLE_ENHANCE,
|
||||
CHUNK_SIZE, OVERLAP_SIZE,
|
||||
logger, log_verbose)
|
||||
from server.knowledge_base.utils import (get_file_path, list_kbs_from_folder,
|
||||
list_files_from_folder,files2docs_in_thread,
|
||||
KnowledgeFile,)
|
||||
from configs import (
|
||||
EMBEDDING_MODEL, DEFAULT_VS_TYPE, ZH_TITLE_ENHANCE,
|
||||
CHUNK_SIZE, OVERLAP_SIZE,
|
||||
logger, log_verbose
|
||||
)
|
||||
from server.knowledge_base.utils import (
|
||||
get_file_path, list_kbs_from_folder,
|
||||
list_files_from_folder, files2docs_in_thread,
|
||||
KnowledgeFile
|
||||
)
|
||||
from server.knowledge_base.kb_service.base import KBServiceFactory
|
||||
from server.db.models.chat_history_model import ChatHistoryModel
|
||||
from server.db.repository.knowledge_file_repository import add_file_to_db # ensure Models are imported
|
||||
|
|
@ -24,15 +28,15 @@ def reset_tables():
|
|||
|
||||
|
||||
def import_from_db(
|
||||
sqlite_path: str = None,
|
||||
# csv_path: str = None,
|
||||
sqlite_path: str = None,
|
||||
# csv_path: str = None,
|
||||
) -> bool:
|
||||
'''
|
||||
"""
|
||||
在知识库与向量库无变化的情况下,从备份数据库中导入数据到 info.db。
|
||||
适用于版本升级时,info.db 结构变化,但无需重新向量化的情况。
|
||||
请确保两边数据库表名一致,需要导入的字段名一致
|
||||
当前仅支持 sqlite
|
||||
'''
|
||||
"""
|
||||
import sqlite3 as sql
|
||||
from pprint import pprint
|
||||
|
||||
|
|
@ -76,22 +80,23 @@ def file_to_kbfile(kb_name: str, files: List[str]) -> List[KnowledgeFile]:
|
|||
|
||||
|
||||
def folder2db(
|
||||
kb_names: List[str],
|
||||
mode: Literal["recreate_vs", "update_in_db", "increament"],
|
||||
vs_type: Literal["faiss", "milvus", "pg", "chromadb"] = DEFAULT_VS_TYPE,
|
||||
embed_model: str = EMBEDDING_MODEL,
|
||||
chunk_size: int = CHUNK_SIZE,
|
||||
chunk_overlap: int = OVERLAP_SIZE,
|
||||
zh_title_enhance: bool = ZH_TITLE_ENHANCE,
|
||||
kb_names: List[str],
|
||||
mode: Literal["recreate_vs", "update_in_db", "increament"],
|
||||
vs_type: Literal["faiss", "milvus", "pg", "chromadb"] = DEFAULT_VS_TYPE,
|
||||
embed_model: str = EMBEDDING_MODEL,
|
||||
chunk_size: int = CHUNK_SIZE,
|
||||
chunk_overlap: int = OVERLAP_SIZE,
|
||||
zh_title_enhance: bool = ZH_TITLE_ENHANCE,
|
||||
):
|
||||
'''
|
||||
"""
|
||||
use existed files in local folder to populate database and/or vector store.
|
||||
set parameter `mode` to:
|
||||
recreate_vs: recreate all vector store and fill info to database using existed files in local folder
|
||||
fill_info_only(disabled): do not create vector store, fill info to db using existed files only
|
||||
update_in_db: update vector store and database info using local files that existed in database only
|
||||
increament: create vector store and database info for local files that not existed in database only
|
||||
'''
|
||||
"""
|
||||
|
||||
def files2vs(kb_name: str, kb_files: List[KnowledgeFile]):
|
||||
for success, result in files2docs_in_thread(kb_files,
|
||||
chunk_size=chunk_size,
|
||||
|
|
@ -146,10 +151,10 @@ def folder2db(
|
|||
|
||||
|
||||
def prune_db_docs(kb_names: List[str]):
|
||||
'''
|
||||
"""
|
||||
delete docs in database that not existed in local folder.
|
||||
it is used to delete database docs after user deleted some doc files in file browser
|
||||
'''
|
||||
"""
|
||||
for kb_name in kb_names:
|
||||
kb = KBServiceFactory.get_service_by_name(kb_name)
|
||||
if kb and kb.exists():
|
||||
|
|
@ -164,10 +169,10 @@ def prune_db_docs(kb_names: List[str]):
|
|||
|
||||
|
||||
def prune_folder_files(kb_names: List[str]):
|
||||
'''
|
||||
"""
|
||||
delete doc files in local folder that not existed in database.
|
||||
is is used to free local disk space by delete unused doc files.
|
||||
'''
|
||||
it is used to free local disk space by delete unused doc files.
|
||||
"""
|
||||
for kb_name in kb_names:
|
||||
kb = KBServiceFactory.get_service_by_name(kb_name)
|
||||
if kb and kb.exists():
|
||||
|
|
|
|||
Loading…
Reference in New Issue