add recreate_vector_store to kb_doc_api, and the router in api.py.

This commit is contained in:
liunux4odoo 2023-08-04 20:26:14 +08:00
parent 46c7d8d169
commit 5d1a0ec15d
3 changed files with 63 additions and 4 deletions

View File

@ -11,7 +11,8 @@ from starlette.responses import RedirectResponse
from server.chat import (chat, knowledge_base_chat, openai_chat,
search_engine_chat)
from server.knowledge_base import (list_kbs, create_kb, delete_kb,
list_docs, upload_doc, delete_doc, update_doc)
list_docs, upload_doc, delete_doc,
update_doc, recreate_vector_store)
from server.utils import BaseResponse, ListResponse
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
@ -97,6 +98,9 @@ def create_app():
response_model=BaseResponse,
summary="上传文件到知识库,并删除另一个文件"
)(update_doc)
app.post("/knowledge_base/recreate_vector_store",
summary="根据content中文档重建向量库流式输出处理进度。"
)(recreate_vector_store)
return app
app = create_app()

View File

@ -5,7 +5,10 @@ from fastapi import File, Form, UploadFile
from server.utils import BaseResponse, ListResponse
from server.knowledge_base.utils import (validate_kb_name, get_kb_path, get_doc_path,
get_file_path, file2text, docs2vs,
refresh_vs_cache, )
refresh_vs_cache, get_vs_path, )
from fastapi.responses import StreamingResponse
import json
import shutil
async def list_docs(knowledge_base_name: str):
@ -98,3 +101,30 @@ async def update_doc():
async def download_doc():
# TODO: 下载文件
pass
async def recreate_vector_store(knowledge_base_name: str):
'''
recreate vector store from the content.
this is usefull when user can copy files to content folder directly instead of upload through network.
'''
async def output(kb):
vs_path = get_vs_path(kb)
if os.path.isdir(vs_path):
shutil.rmtree(vs_path)
os.mkdir(vs_path)
print(f"start to recreate vectore in {vs_path}")
docs = (await list_docs(kb)).data
for i, filename in enumerate(docs):
filepath = get_file_path(kb, filename)
print(f"processing {filepath} to vector store.")
docs = file2text(filepath)
docs2vs(docs, kb)
yield json.dumps({
"total": len(docs),
"finished": i + 1,
"doc": filename,
})
return StreamingResponse(output(knowledge_base_name), media_type="text/event-stream")

View File

@ -444,9 +444,31 @@ class ApiRequest:
)
return response.json()
def recreate_vector_store(
self,
knowledge_base_name: str,
no_remote_api: bool = None,
):
'''
对应api.py/knowledge_base/recreate_vector_store接口
'''
if no_remote_api is None:
no_remote_api = self.no_remote_api
if no_remote_api:
from server.knowledge_base.kb_doc_api import recreate_vector_store
response = run_async(recreate_vector_store(knowledge_base_name))
return self._fastapi_stream2generator(response, as_json=True)
else:
response = self.post(
"/knowledge_base/recreate_vector_store",
json={"knowledge_base_name": knowledge_base_name},
)
return self._httpx_stream2generator(response, as_json=True)
if __name__ == "__main__":
api = ApiRequest()
api = ApiRequest(no_remote_api=True)
# print(api.chat_fastchat(
# messages=[{"role": "user", "content": "hello"}]
@ -464,4 +486,7 @@ if __name__ == "__main__":
# for t in r:
# print(t)
print(api.list_knowledge_bases())
# print(api.list_knowledge_bases())
for t in api.recreate_vector_store('kblog'):
print(t)