add recreate_vector_store to kb_doc_api, and the router in api.py.
This commit is contained in:
parent
46c7d8d169
commit
5d1a0ec15d
|
|
@ -11,7 +11,8 @@ from starlette.responses import RedirectResponse
|
|||
from server.chat import (chat, knowledge_base_chat, openai_chat,
|
||||
search_engine_chat)
|
||||
from server.knowledge_base import (list_kbs, create_kb, delete_kb,
|
||||
list_docs, upload_doc, delete_doc, update_doc)
|
||||
list_docs, upload_doc, delete_doc,
|
||||
update_doc, recreate_vector_store)
|
||||
from server.utils import BaseResponse, ListResponse
|
||||
|
||||
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
||||
|
|
@ -97,6 +98,9 @@ def create_app():
|
|||
response_model=BaseResponse,
|
||||
summary="上传文件到知识库,并删除另一个文件"
|
||||
)(update_doc)
|
||||
app.post("/knowledge_base/recreate_vector_store",
|
||||
summary="根据content中文档重建向量库,流式输出处理进度。"
|
||||
)(recreate_vector_store)
|
||||
return app
|
||||
|
||||
app = create_app()
|
||||
|
|
|
|||
|
|
@ -5,7 +5,10 @@ from fastapi import File, Form, UploadFile
|
|||
from server.utils import BaseResponse, ListResponse
|
||||
from server.knowledge_base.utils import (validate_kb_name, get_kb_path, get_doc_path,
|
||||
get_file_path, file2text, docs2vs,
|
||||
refresh_vs_cache, )
|
||||
refresh_vs_cache, get_vs_path, )
|
||||
from fastapi.responses import StreamingResponse
|
||||
import json
|
||||
import shutil
|
||||
|
||||
|
||||
async def list_docs(knowledge_base_name: str):
|
||||
|
|
@ -98,3 +101,30 @@ async def update_doc():
|
|||
async def download_doc():
|
||||
# TODO: 下载文件
|
||||
pass
|
||||
|
||||
|
||||
async def recreate_vector_store(knowledge_base_name: str):
|
||||
'''
|
||||
recreate vector store from the content.
|
||||
this is usefull when user can copy files to content folder directly instead of upload through network.
|
||||
'''
|
||||
async def output(kb):
|
||||
vs_path = get_vs_path(kb)
|
||||
if os.path.isdir(vs_path):
|
||||
shutil.rmtree(vs_path)
|
||||
os.mkdir(vs_path)
|
||||
print(f"start to recreate vectore in {vs_path}")
|
||||
|
||||
docs = (await list_docs(kb)).data
|
||||
for i, filename in enumerate(docs):
|
||||
filepath = get_file_path(kb, filename)
|
||||
print(f"processing {filepath} to vector store.")
|
||||
docs = file2text(filepath)
|
||||
docs2vs(docs, kb)
|
||||
yield json.dumps({
|
||||
"total": len(docs),
|
||||
"finished": i + 1,
|
||||
"doc": filename,
|
||||
})
|
||||
|
||||
return StreamingResponse(output(knowledge_base_name), media_type="text/event-stream")
|
||||
|
|
|
|||
|
|
@ -444,9 +444,31 @@ class ApiRequest:
|
|||
)
|
||||
return response.json()
|
||||
|
||||
def recreate_vector_store(
|
||||
self,
|
||||
knowledge_base_name: str,
|
||||
no_remote_api: bool = None,
|
||||
):
|
||||
'''
|
||||
对应api.py/knowledge_base/recreate_vector_store接口
|
||||
'''
|
||||
if no_remote_api is None:
|
||||
no_remote_api = self.no_remote_api
|
||||
|
||||
if no_remote_api:
|
||||
from server.knowledge_base.kb_doc_api import recreate_vector_store
|
||||
response = run_async(recreate_vector_store(knowledge_base_name))
|
||||
return self._fastapi_stream2generator(response, as_json=True)
|
||||
else:
|
||||
response = self.post(
|
||||
"/knowledge_base/recreate_vector_store",
|
||||
json={"knowledge_base_name": knowledge_base_name},
|
||||
)
|
||||
return self._httpx_stream2generator(response, as_json=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
api = ApiRequest()
|
||||
api = ApiRequest(no_remote_api=True)
|
||||
|
||||
# print(api.chat_fastchat(
|
||||
# messages=[{"role": "user", "content": "hello"}]
|
||||
|
|
@ -464,4 +486,7 @@ if __name__ == "__main__":
|
|||
# for t in r:
|
||||
# print(t)
|
||||
|
||||
print(api.list_knowledge_bases())
|
||||
# print(api.list_knowledge_bases())
|
||||
|
||||
for t in api.recreate_vector_store('kblog'):
|
||||
print(t)
|
||||
|
|
|
|||
Loading…
Reference in New Issue