diff --git a/init_database.py b/init_database.py index dc4743a..a394b75 100644 --- a/init_database.py +++ b/init_database.py @@ -6,7 +6,6 @@ from configs.model_config import NLTK_DATA_PATH, EMBEDDING_MODEL import nltk nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path from datetime import datetime -import sys if __name__ == "__main__": @@ -50,11 +49,11 @@ if __name__ == "__main__": ) parser.add_argument( "-i", - "--increament", + "--increment", action="store_true", help=(''' update vector store for files exist in local folder and not exist in database. - use this option if you want to create vectors increamentally. + use this option if you want to create vectors incrementally. ''' ) ) @@ -100,7 +99,7 @@ if __name__ == "__main__": if args.clear_tables: reset_tables() - print("database talbes reseted") + print("database tables reset") if args.recreate_vs: create_tables() @@ -110,8 +109,8 @@ if __name__ == "__main__": import_from_db(args.import_db) elif args.update_in_db: folder2db(kb_names=args.kb_name, mode="update_in_db", embed_model=args.embed_model) - elif args.increament: - folder2db(kb_names=args.kb_name, mode="increament", embed_model=args.embed_model) + elif args.increment: + folder2db(kb_names=args.kb_name, mode="increment", embed_model=args.embed_model) elif args.prune_db: prune_db_docs(args.kb_name) elif args.prune_folder: diff --git a/server/knowledge_base/migrate.py b/server/knowledge_base/migrate.py index bde6e1f..596e1f6 100644 --- a/server/knowledge_base/migrate.py +++ b/server/knowledge_base/migrate.py @@ -84,7 +84,7 @@ def file_to_kbfile(kb_name: str, files: List[str]) -> List[KnowledgeFile]: def folder2db( kb_names: List[str], - mode: Literal["recreate_vs", "update_in_db", "increament"], + mode: Literal["recreate_vs", "update_in_db", "increment"], vs_type: Literal["faiss", "milvus", "pg", "chromadb"] = DEFAULT_VS_TYPE, embed_model: str = EMBEDDING_MODEL, chunk_size: int = CHUNK_SIZE, @@ -97,7 +97,7 @@ def folder2db( recreate_vs: recreate all vector store and fill info to database using existed files in local folder fill_info_only(disabled): do not create vector store, fill info to db using existed files only update_in_db: update vector store and database info using local files that existed in database only - increament: create vector store and database info for local files that not existed in database only + increment: create vector store and database info for local files that not existed in database only """ def files2vs(kb_name: str, kb_files: List[KnowledgeFile]): @@ -142,7 +142,7 @@ def folder2db( files2vs(kb_name, kb_files) kb.save_vector_store() # 对比本地目录与数据库中的文件列表,进行增量向量化 - elif mode == "increament": + elif mode == "increment": db_files = kb.list_files() folder_files = list_files_from_folder(kb_name) files = list(set(folder_files) - set(db_files)) @@ -150,7 +150,7 @@ def folder2db( files2vs(kb_name, kb_files) kb.save_vector_store() else: - print(f"unspported migrate mode: {mode}") + print(f"unsupported migrate mode: {mode}") def prune_db_docs(kb_names: List[str]): diff --git a/tests/test_migrate.py b/tests/test_migrate.py index b794b02..0a967d1 100644 --- a/tests/test_migrate.py +++ b/tests/test_migrate.py @@ -14,8 +14,7 @@ from server.knowledge_base.migrate import folder2db, prune_db_docs, prune_folder # setup test knowledge base kb_name = "test_kb_for_migrate" test_files = { - "faq.md": str(root_path / "docs" / "faq.md"), - "install.md": str(root_path / "docs" / "install.md"), + "readme.md": str(root_path / "readme.md"), } @@ -56,13 +55,13 @@ def test_recreate_vs(): assert doc.metadata["source"] == name -def test_increament(): +def test_increment(): kb = KBServiceFactory.get_service_by_name(kb_name) kb.clear_vs() assert kb.list_files() == [] assert kb.list_docs() == [] - folder2db([kb_name], "increament") + folder2db([kb_name], "increment") files = kb.list_files() print(files)