diff --git a/embeddings/embedding_keywords.txt b/embeddings/embedding_keywords.txt index 3822b99..57d79d6 100644 --- a/embeddings/embedding_keywords.txt +++ b/embeddings/embedding_keywords.txt @@ -1,3 +1,8 @@ -Langchain-Chatchat -数据科学与大数据技术 -人工智能与先进计算 \ No newline at end of file +技术要求 +直流输电线路 +直流架空输电线路 +交流输电线路 +交流架空输电线路 +交流紧凑型输电线路 +交流同塔双回线路 +送电线路 \ No newline at end of file diff --git a/text_splitter/chinese_recursive_text_splitter.py b/text_splitter/chinese_recursive_text_splitter.py index 0a9f232..82c2539 100644 --- a/text_splitter/chinese_recursive_text_splitter.py +++ b/text_splitter/chinese_recursive_text_splitter.py @@ -72,6 +72,7 @@ class ChineseRecursiveTextSplitter(RecursiveCharacterTextSplitter): text = re.sub(r'(\n+(一、|二、|三、|四、|五、|六、|七、|八、|九、|十、|十一、|十二、|十三、|十四、|十五、|十六、|十七、|十八、|十九、|二十、))', r"\n\n\n\n\n\n\n\n\1", text) # 通过第 条 text = re.sub(r'(\n+(?