add recreate_vector_store to kb_doc_api, and the router in api.py.
This commit is contained in:
parent
46c7d8d169
commit
5d1a0ec15d
|
|
@ -11,7 +11,8 @@ from starlette.responses import RedirectResponse
|
||||||
from server.chat import (chat, knowledge_base_chat, openai_chat,
|
from server.chat import (chat, knowledge_base_chat, openai_chat,
|
||||||
search_engine_chat)
|
search_engine_chat)
|
||||||
from server.knowledge_base import (list_kbs, create_kb, delete_kb,
|
from server.knowledge_base import (list_kbs, create_kb, delete_kb,
|
||||||
list_docs, upload_doc, delete_doc, update_doc)
|
list_docs, upload_doc, delete_doc,
|
||||||
|
update_doc, recreate_vector_store)
|
||||||
from server.utils import BaseResponse, ListResponse
|
from server.utils import BaseResponse, ListResponse
|
||||||
|
|
||||||
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
||||||
|
|
@ -97,6 +98,9 @@ def create_app():
|
||||||
response_model=BaseResponse,
|
response_model=BaseResponse,
|
||||||
summary="上传文件到知识库,并删除另一个文件"
|
summary="上传文件到知识库,并删除另一个文件"
|
||||||
)(update_doc)
|
)(update_doc)
|
||||||
|
app.post("/knowledge_base/recreate_vector_store",
|
||||||
|
summary="根据content中文档重建向量库,流式输出处理进度。"
|
||||||
|
)(recreate_vector_store)
|
||||||
return app
|
return app
|
||||||
|
|
||||||
app = create_app()
|
app = create_app()
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,10 @@ from fastapi import File, Form, UploadFile
|
||||||
from server.utils import BaseResponse, ListResponse
|
from server.utils import BaseResponse, ListResponse
|
||||||
from server.knowledge_base.utils import (validate_kb_name, get_kb_path, get_doc_path,
|
from server.knowledge_base.utils import (validate_kb_name, get_kb_path, get_doc_path,
|
||||||
get_file_path, file2text, docs2vs,
|
get_file_path, file2text, docs2vs,
|
||||||
refresh_vs_cache, )
|
refresh_vs_cache, get_vs_path, )
|
||||||
|
from fastapi.responses import StreamingResponse
|
||||||
|
import json
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
async def list_docs(knowledge_base_name: str):
|
async def list_docs(knowledge_base_name: str):
|
||||||
|
|
@ -98,3 +101,30 @@ async def update_doc():
|
||||||
async def download_doc():
|
async def download_doc():
|
||||||
# TODO: 下载文件
|
# TODO: 下载文件
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
async def recreate_vector_store(knowledge_base_name: str):
|
||||||
|
'''
|
||||||
|
recreate vector store from the content.
|
||||||
|
this is usefull when user can copy files to content folder directly instead of upload through network.
|
||||||
|
'''
|
||||||
|
async def output(kb):
|
||||||
|
vs_path = get_vs_path(kb)
|
||||||
|
if os.path.isdir(vs_path):
|
||||||
|
shutil.rmtree(vs_path)
|
||||||
|
os.mkdir(vs_path)
|
||||||
|
print(f"start to recreate vectore in {vs_path}")
|
||||||
|
|
||||||
|
docs = (await list_docs(kb)).data
|
||||||
|
for i, filename in enumerate(docs):
|
||||||
|
filepath = get_file_path(kb, filename)
|
||||||
|
print(f"processing {filepath} to vector store.")
|
||||||
|
docs = file2text(filepath)
|
||||||
|
docs2vs(docs, kb)
|
||||||
|
yield json.dumps({
|
||||||
|
"total": len(docs),
|
||||||
|
"finished": i + 1,
|
||||||
|
"doc": filename,
|
||||||
|
})
|
||||||
|
|
||||||
|
return StreamingResponse(output(knowledge_base_name), media_type="text/event-stream")
|
||||||
|
|
|
||||||
|
|
@ -444,9 +444,31 @@ class ApiRequest:
|
||||||
)
|
)
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
|
def recreate_vector_store(
|
||||||
|
self,
|
||||||
|
knowledge_base_name: str,
|
||||||
|
no_remote_api: bool = None,
|
||||||
|
):
|
||||||
|
'''
|
||||||
|
对应api.py/knowledge_base/recreate_vector_store接口
|
||||||
|
'''
|
||||||
|
if no_remote_api is None:
|
||||||
|
no_remote_api = self.no_remote_api
|
||||||
|
|
||||||
|
if no_remote_api:
|
||||||
|
from server.knowledge_base.kb_doc_api import recreate_vector_store
|
||||||
|
response = run_async(recreate_vector_store(knowledge_base_name))
|
||||||
|
return self._fastapi_stream2generator(response, as_json=True)
|
||||||
|
else:
|
||||||
|
response = self.post(
|
||||||
|
"/knowledge_base/recreate_vector_store",
|
||||||
|
json={"knowledge_base_name": knowledge_base_name},
|
||||||
|
)
|
||||||
|
return self._httpx_stream2generator(response, as_json=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
api = ApiRequest()
|
api = ApiRequest(no_remote_api=True)
|
||||||
|
|
||||||
# print(api.chat_fastchat(
|
# print(api.chat_fastchat(
|
||||||
# messages=[{"role": "user", "content": "hello"}]
|
# messages=[{"role": "user", "content": "hello"}]
|
||||||
|
|
@ -464,4 +486,7 @@ if __name__ == "__main__":
|
||||||
# for t in r:
|
# for t in r:
|
||||||
# print(t)
|
# print(t)
|
||||||
|
|
||||||
print(api.list_knowledge_bases())
|
# print(api.list_knowledge_bases())
|
||||||
|
|
||||||
|
for t in api.recreate_vector_store('kblog'):
|
||||||
|
print(t)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue