enhance
This commit is contained in:
parent
c3321a6a34
commit
173b23ad7d
|
|
@ -1,3 +1,8 @@
|
||||||
Langchain-Chatchat
|
技术要求
|
||||||
数据科学与大数据技术
|
直流输电线路
|
||||||
人工智能与先进计算
|
直流架空输电线路
|
||||||
|
交流输电线路
|
||||||
|
交流架空输电线路
|
||||||
|
交流紧凑型输电线路
|
||||||
|
交流同塔双回线路
|
||||||
|
送电线路
|
||||||
|
|
@ -72,6 +72,7 @@ class ChineseRecursiveTextSplitter(RecursiveCharacterTextSplitter):
|
||||||
text = re.sub(r'(\n+(一、|二、|三、|四、|五、|六、|七、|八、|九、|十、|十一、|十二、|十三、|十四、|十五、|十六、|十七、|十八、|十九、|二十、))', r"\n\n\n\n\n\n\n\n\1", text) # 通过第 条
|
text = re.sub(r'(\n+(一、|二、|三、|四、|五、|六、|七、|八、|九、|十、|十一、|十二、|十三、|十四、|十五、|十六、|十七、|十八、|十九、|二十、))', r"\n\n\n\n\n\n\n\n\1", text) # 通过第 条
|
||||||
|
|
||||||
text = re.sub(r'(\n+(?<!\.|[a-zA-Z0-9])[a-zA-Z0-9]+\s*\.\s*[a-zA-Z0-9]+\s*\.\s*[a-zA-Z0-9]+[^\S\n]+[^\s\.]+(?!\.|[a-zA-Z0-9]))', r"\n\n\n\n\n\n\1", text) # 再通过 1.2.3
|
text = re.sub(r'(\n+(?<!\.|[a-zA-Z0-9])[a-zA-Z0-9]+\s*\.\s*[a-zA-Z0-9]+\s*\.\s*[a-zA-Z0-9]+[^\S\n]+[^\s\.]+(?!\.|[a-zA-Z0-9]))', r"\n\n\n\n\n\n\1", text) # 再通过 1.2.3
|
||||||
|
text = re.sub(r'(\n+((一)|(二)|(三)|(四)|(五)|(六)|(七)|(八)|(九)|(十)|(十一)|(十二)|(十三)|(十四)|(十五)|(十六)|(十七)|(十八)|(十九)|(二十)))', r"\n\n\n\n\n\n\1", text) # 通过第 条
|
||||||
text = text.rstrip() # 段尾如果有多余的\n就去掉它
|
text = text.rstrip() # 段尾如果有多余的\n就去掉它
|
||||||
self.is_recursive = True
|
self.is_recursive = True
|
||||||
for i, _s in enumerate(separators):
|
for i, _s in enumerate(separators):
|
||||||
|
|
|
||||||
|
|
@ -285,80 +285,80 @@ def knowledge_base_page(api: ApiRequest, is_lite: bool = None):
|
||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
|
|
||||||
# cols = st.columns(3)
|
cols = st.columns(3)
|
||||||
|
|
||||||
# if cols[0].button(
|
if cols[0].button(
|
||||||
# "依据源文件重建向量库",
|
"依据源文件重建向量库",
|
||||||
# # help="无需上传文件,通过其它方式将文档拷贝到对应知识库content目录下,点击本按钮即可重建知识库。",
|
# help="无需上传文件,通过其它方式将文档拷贝到对应知识库content目录下,点击本按钮即可重建知识库。",
|
||||||
# use_container_width=True,
|
use_container_width=True,
|
||||||
# type="primary",
|
type="primary",
|
||||||
# ):
|
):
|
||||||
# with st.spinner("向量库重构中,请耐心等待,勿刷新或关闭页面。"):
|
with st.spinner("向量库重构中,请耐心等待,勿刷新或关闭页面。"):
|
||||||
# empty = st.empty()
|
empty = st.empty()
|
||||||
# empty.progress(0.0, "")
|
empty.progress(0.0, "")
|
||||||
# for d in api.recreate_vector_store(kb,
|
for d in api.recreate_vector_store(kb,
|
||||||
# chunk_size=chunk_size,
|
chunk_size=chunk_size,
|
||||||
# chunk_overlap=chunk_overlap,
|
chunk_overlap=chunk_overlap,
|
||||||
# zh_title_enhance=zh_title_enhance):
|
zh_title_enhance=zh_title_enhance):
|
||||||
# if msg := check_error_msg(d):
|
if msg := check_error_msg(d):
|
||||||
# st.toast(msg)
|
st.toast(msg)
|
||||||
# else:
|
else:
|
||||||
# empty.progress(d["finished"] / d["total"], d["msg"])
|
empty.progress(d["finished"] / d["total"], d["msg"])
|
||||||
# st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
# if cols[2].button(
|
if cols[2].button(
|
||||||
# "删除知识库",
|
"删除知识库",
|
||||||
# use_container_width=True,
|
use_container_width=True,
|
||||||
# ):
|
):
|
||||||
# ret = api.delete_knowledge_base(kb)
|
ret = api.delete_knowledge_base(kb)
|
||||||
# st.toast(ret.get("msg", " "))
|
st.toast(ret.get("msg", " "))
|
||||||
# time.sleep(1)
|
time.sleep(1)
|
||||||
# st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
# with st.sidebar:
|
# with st.sidebar:
|
||||||
# keyword = st.text_input("查询关键字")
|
# keyword = st.text_input("查询关键字")
|
||||||
# top_k = st.slider("匹配条数", 1, 100, 3)
|
# top_k = st.slider("匹配条数", 1, 100, 3)
|
||||||
|
|
||||||
st.write("文件内文档列表。双击进行修改,在删除列填入 Y 可删除对应行。")
|
# st.write("文件内文档列表。双击进行修改,在删除列填入 Y 可删除对应行。")
|
||||||
docs = []
|
# docs = []
|
||||||
df = pd.DataFrame([], columns=["seq", "id", "content", "source"])
|
# df = pd.DataFrame([], columns=["seq", "id", "content", "source"])
|
||||||
if selected_rows:
|
# if selected_rows:
|
||||||
file_name = selected_rows[0]["file_name"]
|
# file_name = selected_rows[0]["file_name"]
|
||||||
docs = api.search_kb_docs(knowledge_base_name=selected_kb, file_name=file_name)
|
# docs = api.search_kb_docs(knowledge_base_name=selected_kb, file_name=file_name)
|
||||||
data = [{"seq": i+1, "id": x["id"], "page_content": x["page_content"], "source": x["metadata"].get("source"),
|
# data = [{"seq": i+1, "id": x["id"], "page_content": x["page_content"], "source": x["metadata"].get("source"),
|
||||||
"type": x["type"],
|
# "type": x["type"],
|
||||||
"metadata": json.dumps(x["metadata"], ensure_ascii=False),
|
# "metadata": json.dumps(x["metadata"], ensure_ascii=False),
|
||||||
"to_del": "",
|
# "to_del": "",
|
||||||
} for i, x in enumerate(docs)]
|
# } for i, x in enumerate(docs)]
|
||||||
df = pd.DataFrame(data)
|
# df = pd.DataFrame(data)
|
||||||
|
|
||||||
gb = GridOptionsBuilder.from_dataframe(df)
|
# gb = GridOptionsBuilder.from_dataframe(df)
|
||||||
gb.configure_columns(["id", "source", "type", "metadata"], hide=True)
|
# gb.configure_columns(["id", "source", "type", "metadata"], hide=True)
|
||||||
gb.configure_column("seq", "No.", width=50)
|
# gb.configure_column("seq", "No.", width=50)
|
||||||
gb.configure_column("page_content", "内容", editable=True, autoHeight=True, wrapText=True, flex=1,
|
# gb.configure_column("page_content", "内容", editable=True, autoHeight=True, wrapText=True, flex=1,
|
||||||
cellEditor="agLargeTextCellEditor", cellEditorPopup=True)
|
# cellEditor="agLargeTextCellEditor", cellEditorPopup=True)
|
||||||
gb.configure_column("to_del", "删除", editable=True, width=50, wrapHeaderText=True,
|
# gb.configure_column("to_del", "删除", editable=True, width=50, wrapHeaderText=True,
|
||||||
cellEditor="agCheckboxCellEditor", cellRender="agCheckboxCellRenderer")
|
# cellEditor="agCheckboxCellEditor", cellRender="agCheckboxCellRenderer")
|
||||||
gb.configure_selection()
|
# gb.configure_selection()
|
||||||
edit_docs = AgGrid(df, gb.build())
|
# edit_docs = AgGrid(df, gb.build())
|
||||||
|
|
||||||
if st.button("保存更改"):
|
# if st.button("保存更改"):
|
||||||
# origin_docs = {x["id"]: {"page_content": x["page_content"], "type": x["type"], "metadata": x["metadata"]} for x in docs}
|
# # origin_docs = {x["id"]: {"page_content": x["page_content"], "type": x["type"], "metadata": x["metadata"]} for x in docs}
|
||||||
changed_docs = []
|
# changed_docs = []
|
||||||
for index, row in edit_docs.data.iterrows():
|
# for index, row in edit_docs.data.iterrows():
|
||||||
# origin_doc = origin_docs[row["id"]]
|
# # origin_doc = origin_docs[row["id"]]
|
||||||
# if row["page_content"] != origin_doc["page_content"]:
|
# # if row["page_content"] != origin_doc["page_content"]:
|
||||||
if row["to_del"] not in ["Y", "y", 1]:
|
# if row["to_del"] not in ["Y", "y", 1]:
|
||||||
changed_docs.append({
|
# changed_docs.append({
|
||||||
"page_content": row["page_content"],
|
# "page_content": row["page_content"],
|
||||||
"type": row["type"],
|
# "type": row["type"],
|
||||||
"metadata": json.loads(row["metadata"]),
|
# "metadata": json.loads(row["metadata"]),
|
||||||
})
|
# })
|
||||||
|
|
||||||
if changed_docs:
|
# if changed_docs:
|
||||||
if api.update_kb_docs(knowledge_base_name=selected_kb,
|
# if api.update_kb_docs(knowledge_base_name=selected_kb,
|
||||||
file_names=[file_name],
|
# file_names=[file_name],
|
||||||
docs={file_name: changed_docs}):
|
# docs={file_name: changed_docs}):
|
||||||
st.toast("更新文档成功")
|
# st.toast("更新文档成功")
|
||||||
else:
|
# else:
|
||||||
st.toast("更新文档失败")
|
# st.toast("更新文档失败")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue