删除重复的引入和纠正拼写错误 (#2599)
* 1.删除重复的引入 2.拼写错误 * 1.参数或者文档拼写错误纠正 2.doc下的faq、install已经删除,更新为ES部署指南,考虑到doc下的文档经常更新,即使扫描doc文件夹,也可能为空的情况,readme.md大概率不会删除。
This commit is contained in:
parent
b653c25fbc
commit
3da68b5ce3
|
|
@ -6,7 +6,6 @@ from configs.model_config import NLTK_DATA_PATH, EMBEDDING_MODEL
|
|||
import nltk
|
||||
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
||||
from datetime import datetime
|
||||
import sys
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
@ -50,11 +49,11 @@ if __name__ == "__main__":
|
|||
)
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--increament",
|
||||
"--increment",
|
||||
action="store_true",
|
||||
help=('''
|
||||
update vector store for files exist in local folder and not exist in database.
|
||||
use this option if you want to create vectors increamentally.
|
||||
use this option if you want to create vectors incrementally.
|
||||
'''
|
||||
)
|
||||
)
|
||||
|
|
@ -100,7 +99,7 @@ if __name__ == "__main__":
|
|||
|
||||
if args.clear_tables:
|
||||
reset_tables()
|
||||
print("database talbes reseted")
|
||||
print("database tables reset")
|
||||
|
||||
if args.recreate_vs:
|
||||
create_tables()
|
||||
|
|
@ -110,8 +109,8 @@ if __name__ == "__main__":
|
|||
import_from_db(args.import_db)
|
||||
elif args.update_in_db:
|
||||
folder2db(kb_names=args.kb_name, mode="update_in_db", embed_model=args.embed_model)
|
||||
elif args.increament:
|
||||
folder2db(kb_names=args.kb_name, mode="increament", embed_model=args.embed_model)
|
||||
elif args.increment:
|
||||
folder2db(kb_names=args.kb_name, mode="increment", embed_model=args.embed_model)
|
||||
elif args.prune_db:
|
||||
prune_db_docs(args.kb_name)
|
||||
elif args.prune_folder:
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ def file_to_kbfile(kb_name: str, files: List[str]) -> List[KnowledgeFile]:
|
|||
|
||||
def folder2db(
|
||||
kb_names: List[str],
|
||||
mode: Literal["recreate_vs", "update_in_db", "increament"],
|
||||
mode: Literal["recreate_vs", "update_in_db", "increment"],
|
||||
vs_type: Literal["faiss", "milvus", "pg", "chromadb"] = DEFAULT_VS_TYPE,
|
||||
embed_model: str = EMBEDDING_MODEL,
|
||||
chunk_size: int = CHUNK_SIZE,
|
||||
|
|
@ -97,7 +97,7 @@ def folder2db(
|
|||
recreate_vs: recreate all vector store and fill info to database using existed files in local folder
|
||||
fill_info_only(disabled): do not create vector store, fill info to db using existed files only
|
||||
update_in_db: update vector store and database info using local files that existed in database only
|
||||
increament: create vector store and database info for local files that not existed in database only
|
||||
increment: create vector store and database info for local files that not existed in database only
|
||||
"""
|
||||
|
||||
def files2vs(kb_name: str, kb_files: List[KnowledgeFile]):
|
||||
|
|
@ -142,7 +142,7 @@ def folder2db(
|
|||
files2vs(kb_name, kb_files)
|
||||
kb.save_vector_store()
|
||||
# 对比本地目录与数据库中的文件列表,进行增量向量化
|
||||
elif mode == "increament":
|
||||
elif mode == "increment":
|
||||
db_files = kb.list_files()
|
||||
folder_files = list_files_from_folder(kb_name)
|
||||
files = list(set(folder_files) - set(db_files))
|
||||
|
|
@ -150,7 +150,7 @@ def folder2db(
|
|||
files2vs(kb_name, kb_files)
|
||||
kb.save_vector_store()
|
||||
else:
|
||||
print(f"unspported migrate mode: {mode}")
|
||||
print(f"unsupported migrate mode: {mode}")
|
||||
|
||||
|
||||
def prune_db_docs(kb_names: List[str]):
|
||||
|
|
|
|||
|
|
@ -14,8 +14,7 @@ from server.knowledge_base.migrate import folder2db, prune_db_docs, prune_folder
|
|||
# setup test knowledge base
|
||||
kb_name = "test_kb_for_migrate"
|
||||
test_files = {
|
||||
"faq.md": str(root_path / "docs" / "faq.md"),
|
||||
"install.md": str(root_path / "docs" / "install.md"),
|
||||
"readme.md": str(root_path / "readme.md"),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -56,13 +55,13 @@ def test_recreate_vs():
|
|||
assert doc.metadata["source"] == name
|
||||
|
||||
|
||||
def test_increament():
|
||||
def test_increment():
|
||||
kb = KBServiceFactory.get_service_by_name(kb_name)
|
||||
kb.clear_vs()
|
||||
assert kb.list_files() == []
|
||||
assert kb.list_docs() == []
|
||||
|
||||
folder2db([kb_name], "increament")
|
||||
folder2db([kb_name], "increment")
|
||||
|
||||
files = kb.list_files()
|
||||
print(files)
|
||||
|
|
|
|||
Loading…
Reference in New Issue