From e918244159ae33bb8a151140cb7503b5e0d9b068 Mon Sep 17 00:00:00 2001 From: liunux4odoo Date: Tue, 8 Aug 2023 17:41:58 +0800 Subject: [PATCH] move KBServiceFactory to server.knowledge_base.kb_service.base --- configs/model_config.py.example | 2 +- server/chat/knowledge_base_chat.py | 3 +- server/knowledge_base/__init__.py | 6 ++-- server/knowledge_base/kb_api.py | 3 +- server/knowledge_base/kb_doc_api.py | 3 +- server/knowledge_base/kb_service/base.py | 34 ++++++++++++++++++++-- server/knowledge_base/utils.py | 37 +++--------------------- 7 files changed, 45 insertions(+), 43 deletions(-) diff --git a/configs/model_config.py.example b/configs/model_config.py.example index 5d2170e..1183432 100644 --- a/configs/model_config.py.example +++ b/configs/model_config.py.example @@ -242,7 +242,7 @@ KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowled # 数据库默认存储路径。 # 如果使用sqlite,可以直接修改DB_ROOT_PATH;如果使用其它数据库,请直接修改SQLALCHEMY_DATABASE_URI。 DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db") -SQLALCHEMY_DATABASE_URI = f"sqlite://{DB_ROOT_PATH}" +SQLALCHEMY_DATABASE_URI = f"sqlite:///{DB_ROOT_PATH}" # 缓存向量库数量 CACHED_VS_NUM = 1 diff --git a/server/chat/knowledge_base_chat.py b/server/chat/knowledge_base_chat.py index 8998f0d..5723d08 100644 --- a/server/chat/knowledge_base_chat.py +++ b/server/chat/knowledge_base_chat.py @@ -10,8 +10,7 @@ from langchain.callbacks import AsyncIteratorCallbackHandler from typing import AsyncIterable import asyncio from langchain.prompts import PromptTemplate -from server.knowledge_base.utils import KBServiceFactory -from server.knowledge_base.kb_service.base import KBService +from server.knowledge_base.kb_service.base import KBService, KBServiceFactory import json diff --git a/server/knowledge_base/__init__.py b/server/knowledge_base/__init__.py index 4556f7d..19de504 100644 --- a/server/knowledge_base/__init__.py +++ b/server/knowledge_base/__init__.py @@ -1,3 +1,3 @@ -from .kb_api import list_kbs, create_kb, delete_kb -from .kb_doc_api import list_docs, upload_doc, delete_doc, update_doc, download_doc, recreate_vector_store -from .utils import KnowledgeFile, KBServiceFactory +# from .kb_api import list_kbs, create_kb, delete_kb +# from .kb_doc_api import list_docs, upload_doc, delete_doc, update_doc, download_doc, recreate_vector_store +# from .utils import KnowledgeFile, KBServiceFactory diff --git a/server/knowledge_base/kb_api.py b/server/knowledge_base/kb_api.py index 0ae7ac1..84c8298 100644 --- a/server/knowledge_base/kb_api.py +++ b/server/knowledge_base/kb_api.py @@ -1,6 +1,7 @@ import urllib from server.utils import BaseResponse, ListResponse -from server.knowledge_base.utils import validate_kb_name, KBServiceFactory +from server.knowledge_base.utils import validate_kb_name +from server.knowledge_base.kb_service.base import KBServiceFactory from server.db.repository.knowledge_base_repository import list_kbs_from_db from configs.model_config import EMBEDDING_MODEL diff --git a/server/knowledge_base/kb_doc_api.py b/server/knowledge_base/kb_doc_api.py index 7721ebe..41c399e 100644 --- a/server/knowledge_base/kb_doc_api.py +++ b/server/knowledge_base/kb_doc_api.py @@ -5,7 +5,8 @@ from server.utils import BaseResponse, ListResponse from server.knowledge_base.utils import (validate_kb_name) from fastapi.responses import StreamingResponse import json -from server.knowledge_base.utils import KnowledgeFile, KBServiceFactory, list_docs_from_folder +from server.knowledge_base.utils import KnowledgeFile, list_docs_from_folder +from server.knowledge_base.kb_service.base import KBServiceFactory from server.knowledge_base.kb_service.base import SupportedVSType from server.knowledge_base.kb_service.faiss_kb_service import refresh_vs_cache diff --git a/server/knowledge_base/kb_service/base.py b/server/knowledge_base/kb_service/base.py index 0b542e5..c7cbf46 100644 --- a/server/knowledge_base/kb_service/base.py +++ b/server/knowledge_base/kb_service/base.py @@ -7,13 +7,13 @@ from langchain.embeddings import HuggingFaceEmbeddings from langchain.embeddings.base import Embeddings from langchain.docstore.document import Document -from server.db.repository.knowledge_base_repository import add_kb_to_db, delete_kb_from_db, list_kbs_from_db, kb_exists +from server.db.repository.knowledge_base_repository import add_kb_to_db, delete_kb_from_db, list_kbs_from_db, kb_exists, load_kb_from_db from server.db.repository.knowledge_file_repository import add_doc_to_db, delete_file_from_db, doc_exists, \ list_docs_from_db from configs.model_config import (DB_ROOT_PATH, kbs_config, VECTOR_SEARCH_TOP_K, embedding_model_dict, EMBEDDING_DEVICE, EMBEDDING_MODEL) from server.knowledge_base.utils import (get_kb_path, get_doc_path, load_embeddings, KnowledgeFile) -from typing import List +from typing import List, Union class SupportedVSType: @@ -162,3 +162,33 @@ class KBService(ABC): 从知识库删除全部向量子类实自己逻辑 """ pass + + +class KBServiceFactory: + + @staticmethod + def get_service(kb_name: str, + vector_store_type: Union[str, SupportedVSType], + embed_model: str = EMBEDDING_MODEL, + ) -> KBService: + if isinstance(vector_store_type, str): + vector_store_type = getattr(SupportedVSType, vector_store_type.upper()) + if SupportedVSType.FAISS == vector_store_type: + from server.knowledge_base.kb_service.faiss_kb_service import FaissKBService + return FaissKBService(kb_name, embed_model=embed_model) + elif SupportedVSType.MILVUS == vector_store_type: + from server.knowledge_base.kb_service.milvus_kb_service import MilvusKBService + return MilvusKBService(kb_name, embed_model=embed_model) # other milvus parameters are set in model_config.kbs_config + elif SupportedVSType.DEFAULT == vector_store_type: # kb_exists of default kbservice is False, to make validation easier. + return DefaultKBService(kb_name) + + @staticmethod + def get_service_by_name(kb_name: str + ) -> KBService: + kb_name, vs_type, embed_model = load_kb_from_db(kb_name) + return KBServiceFactory.get_service(kb_name, vs_type, embed_model) + + @staticmethod + def get_default(): + return KBServiceFactory.get_service("default", SupportedVSType.DEFAULT) + diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py index 91e9b40..a616424 100644 --- a/server/knowledge_base/utils.py +++ b/server/knowledge_base/utils.py @@ -3,9 +3,6 @@ import os from langchain.embeddings.huggingface import HuggingFaceEmbeddings from configs.model_config import (embedding_model_dict, KB_ROOT_PATH, EMBEDDING_MODEL, kbs_config) from functools import lru_cache -from server.knowledge_base.kb_service.base import KBService, SupportedVSType -from server.db.repository.knowledge_base_repository import load_kb_from_db -from server.knowledge_base.kb_service.default_kb_service import DefaultKBService def validate_kb_name(knowledge_base_id: str) -> bool: @@ -26,6 +23,10 @@ def get_vs_path(knowledge_base_name: str): def get_file_path(knowledge_base_name: str, doc_name: str): return os.path.join(get_doc_path(knowledge_base_name), doc_name) +def list_kbs_from_folder(): + return [f for f in os.listdir(KB_ROOT_PATH) + if os.path.isdir(os.path.join(KB_ROOT_PATH, f))] + def list_docs_from_folder(kb_name: str): doc_path = get_doc_path(kb_name) return [file for file in os.listdir(doc_path) @@ -78,33 +79,3 @@ class KnowledgeFile: TextSplitter = getattr(sys.modules['langchain.text_splitter'], self.text_splitter_name) text_splitter = TextSplitter(chunk_size=500, chunk_overlap=200) return loader.load_and_split(text_splitter) - - -class KBServiceFactory: - - @staticmethod - def get_service(kb_name: str, - vector_store_type: Union[str, SupportedVSType], - embed_model: str = EMBEDDING_MODEL, - ) -> KBService: - if isinstance(vector_store_type, str): - vector_store_type = getattr(SupportedVSType, vector_store_type.upper()) - if SupportedVSType.FAISS == vector_store_type: - from server.knowledge_base.kb_service.faiss_kb_service import FaissKBService - return FaissKBService(kb_name, embed_model=embed_model) - elif SupportedVSType.MILVUS == vector_store_type: - from server.knowledge_base.kb_service.milvus_kb_service import MilvusKBService - return MilvusKBService(kb_name, embed_model=embed_model) # other milvus parameters are set in model_config.kbs_config - elif SupportedVSType.DEFAULT == vector_store_type: # kb_exists of default kbservice is False, to make validation easier. - return DefaultKBService(kb_name) - - @staticmethod - def get_service_by_name(kb_name: str - ) -> KBService: - kb_name, vs_type, embed_model = load_kb_from_db(kb_name) - return KBServiceFactory.get_service(kb_name, vs_type, embed_model) - - @staticmethod - def get_default(): - return KBServiceFactory.get_service("default", SupportedVSType.DEFAULT) -