删除重复的引入和纠正拼写错误 (#2599)

* 1.删除重复的引入
2.拼写错误

* 1.参数或者文档拼写错误纠正
2.doc下的faq、install已经删除,更新为ES部署指南,考虑到doc下的文档经常更新,即使扫描doc文件夹,也可能为空的情况,readme.md大概率不会删除。
This commit is contained in:
天地 2024-01-11 18:45:09 +08:00 committed by GitHub
parent b653c25fbc
commit 3da68b5ce3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 14 deletions

View File

@ -6,7 +6,6 @@ from configs.model_config import NLTK_DATA_PATH, EMBEDDING_MODEL
import nltk
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
from datetime import datetime
import sys
if __name__ == "__main__":
@ -50,11 +49,11 @@ if __name__ == "__main__":
)
parser.add_argument(
"-i",
"--increament",
"--increment",
action="store_true",
help=('''
update vector store for files exist in local folder and not exist in database.
use this option if you want to create vectors increamentally.
use this option if you want to create vectors incrementally.
'''
)
)
@ -100,7 +99,7 @@ if __name__ == "__main__":
if args.clear_tables:
reset_tables()
print("database talbes reseted")
print("database tables reset")
if args.recreate_vs:
create_tables()
@ -110,8 +109,8 @@ if __name__ == "__main__":
import_from_db(args.import_db)
elif args.update_in_db:
folder2db(kb_names=args.kb_name, mode="update_in_db", embed_model=args.embed_model)
elif args.increament:
folder2db(kb_names=args.kb_name, mode="increament", embed_model=args.embed_model)
elif args.increment:
folder2db(kb_names=args.kb_name, mode="increment", embed_model=args.embed_model)
elif args.prune_db:
prune_db_docs(args.kb_name)
elif args.prune_folder:

View File

@ -84,7 +84,7 @@ def file_to_kbfile(kb_name: str, files: List[str]) -> List[KnowledgeFile]:
def folder2db(
kb_names: List[str],
mode: Literal["recreate_vs", "update_in_db", "increament"],
mode: Literal["recreate_vs", "update_in_db", "increment"],
vs_type: Literal["faiss", "milvus", "pg", "chromadb"] = DEFAULT_VS_TYPE,
embed_model: str = EMBEDDING_MODEL,
chunk_size: int = CHUNK_SIZE,
@ -97,7 +97,7 @@ def folder2db(
recreate_vs: recreate all vector store and fill info to database using existed files in local folder
fill_info_only(disabled): do not create vector store, fill info to db using existed files only
update_in_db: update vector store and database info using local files that existed in database only
increament: create vector store and database info for local files that not existed in database only
increment: create vector store and database info for local files that not existed in database only
"""
def files2vs(kb_name: str, kb_files: List[KnowledgeFile]):
@ -142,7 +142,7 @@ def folder2db(
files2vs(kb_name, kb_files)
kb.save_vector_store()
# 对比本地目录与数据库中的文件列表,进行增量向量化
elif mode == "increament":
elif mode == "increment":
db_files = kb.list_files()
folder_files = list_files_from_folder(kb_name)
files = list(set(folder_files) - set(db_files))
@ -150,7 +150,7 @@ def folder2db(
files2vs(kb_name, kb_files)
kb.save_vector_store()
else:
print(f"unspported migrate mode: {mode}")
print(f"unsupported migrate mode: {mode}")
def prune_db_docs(kb_names: List[str]):

View File

@ -14,8 +14,7 @@ from server.knowledge_base.migrate import folder2db, prune_db_docs, prune_folder
# setup test knowledge base
kb_name = "test_kb_for_migrate"
test_files = {
"faq.md": str(root_path / "docs" / "faq.md"),
"install.md": str(root_path / "docs" / "install.md"),
"readme.md": str(root_path / "readme.md"),
}
@ -56,13 +55,13 @@ def test_recreate_vs():
assert doc.metadata["source"] == name
def test_increament():
def test_increment():
kb = KBServiceFactory.get_service_by_name(kb_name)
kb.clear_vs()
assert kb.list_files() == []
assert kb.list_docs() == []
folder2db([kb_name], "increament")
folder2db([kb_name], "increment")
files = kb.list_files()
print(files)