From 173b23ad7da0835f8655d0395fdb9b796b757e58 Mon Sep 17 00:00:00 2001 From: wvivi2023 Date: Thu, 18 Jan 2024 15:44:14 +0800 Subject: [PATCH] enhance --- embeddings/embedding_keywords.txt | 11 +- .../chinese_recursive_text_splitter.py | 1 + webui_pages/knowledge_base/knowledge_base.py | 134 +++++++++--------- 3 files changed, 76 insertions(+), 70 deletions(-) diff --git a/embeddings/embedding_keywords.txt b/embeddings/embedding_keywords.txt index 3822b99..57d79d6 100644 --- a/embeddings/embedding_keywords.txt +++ b/embeddings/embedding_keywords.txt @@ -1,3 +1,8 @@ -Langchain-Chatchat -数据科学与大数据技术 -人工智能与先进计算 \ No newline at end of file +技术要求 +直流输电线路 +直流架空输电线路 +交流输电线路 +交流架空输电线路 +交流紧凑型输电线路 +交流同塔双回线路 +送电线路 \ No newline at end of file diff --git a/text_splitter/chinese_recursive_text_splitter.py b/text_splitter/chinese_recursive_text_splitter.py index 0a9f232..82c2539 100644 --- a/text_splitter/chinese_recursive_text_splitter.py +++ b/text_splitter/chinese_recursive_text_splitter.py @@ -72,6 +72,7 @@ class ChineseRecursiveTextSplitter(RecursiveCharacterTextSplitter): text = re.sub(r'(\n+(一、|二、|三、|四、|五、|六、|七、|八、|九、|十、|十一、|十二、|十三、|十四、|十五、|十六、|十七、|十八、|十九、|二十、))', r"\n\n\n\n\n\n\n\n\1", text) # 通过第 条 text = re.sub(r'(\n+(?