From 1e12944a5dfac1d0c5f38aa29b92be6d6b77bbe7 Mon Sep 17 00:00:00 2001
From: hzg0601 <hzg0601@163.com>
Date: Wed, 16 Aug 2023 22:58:59 +0800
Subject: [PATCH 01/12] =?UTF-8?q?=E6=9B=B4=E6=96=B0shutdown=5Fall.sh:?=
 =?UTF-8?q?=E6=9B=B4=E6=96=B0mac=E8=AE=BE=E5=A4=87=E7=9A=84=E9=A2=9D?=
 =?UTF-8?q?=E5=A4=96=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 shutdown_all.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/shutdown_all.sh b/shutdown_all.sh
index 961260d..0218147 100644
--- a/shutdown_all.sh
+++ b/shutdown_all.sh
@@ -1 +1,2 @@
+# mac设备上的grep命令可能不支持grep -P选项，请使用Homebrew安装;或使用ggrep命令
 ps -eo pid,user,cmd|grep -P 'server/api.py|webui.py|fastchat.serve'|grep -v grep|awk '{print $1}'|xargs kill -9
\ No newline at end of file

From cb3fe84fe3f49167d8b846df4b2bdd92b52772d3 Mon Sep 17 00:00:00 2001
From: imClumsyPanda <littlepanda0716@gmail.com>
Date: Sun, 20 Aug 2023 10:41:28 +0800
Subject: [PATCH 02/12] update VERSION

---
 configs/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/__init__.py b/configs/__init__.py
index 6e0ad13..dc9dd40 100644
--- a/configs/__init__.py
+++ b/configs/__init__.py
@@ -1,4 +1,4 @@
 from .model_config import *
 from .server_config import *
 
-VERSION = "v0.2.1"
+VERSION = "v0.2.2-preview"

From cc0bd4efd99106a461a0074b6ab9bf7f074d2b44 Mon Sep 17 00:00:00 2001
From: hzg0601 <hzg0601@163.com>
Date: Fri, 25 Aug 2023 11:27:39 +0800
Subject: [PATCH 03/12] =?UTF-8?q?update=20server=5Fconfig.py.example:=20?=
 =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=A4=9A=E5=8D=A1=E5=90=AF=E5=8A=A8=E7=9A=84?=
 =?UTF-8?q?=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 configs/server_config.py.example | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/configs/server_config.py.example b/configs/server_config.py.example
index 5f37779..b0f37bf 100644
--- a/configs/server_config.py.example
+++ b/configs/server_config.py.example
@@ -34,11 +34,11 @@ FSCHAT_MODEL_WORKERS = {
         "port": 20002,
         "device": LLM_DEVICE,
         # todo: 多卡加载需要配置的参数
-        "gpus": None,
-        "numgpus": 1,
+        "gpus": None, # 使用的GPU，以str的格式指定，如"0,1"
+        "num_gpus": 1, # 使用GPU的数量
         # 以下为非常用参数，可根据需要配置
-        # "max_gpu_memory": "20GiB",
-        # "load_8bit": False,
+        # "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存
+        # "load_8bit": False, # 开启8bit量化
         # "cpu_offloading": None,
         # "gptq_ckpt": None,
         # "gptq_wbits": 16,

From 33ce276e3ea75a360206896e06a489d426ce6fd8 Mon Sep 17 00:00:00 2001
From: imClumsyPanda <littlepanda0716@gmail.com>
Date: Fri, 25 Aug 2023 15:18:43 +0800
Subject: [PATCH 04/12] change default search_engine option to 'duckduckgo'

---
 webui_pages/dialogue/dialogue.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/webui_pages/dialogue/dialogue.py b/webui_pages/dialogue/dialogue.py
index a317aba..04ece7d 100644
--- a/webui_pages/dialogue/dialogue.py
+++ b/webui_pages/dialogue/dialogue.py
@@ -80,8 +80,13 @@ def dialogue_page(api: ApiRequest):
                 # chunk_content = st.checkbox("关联上下文", False, disabled=True)
                 # chunk_size = st.slider("关联长度：", 0, 500, 250, disabled=True)
         elif dialogue_mode == "搜索引擎问答":
+            search_engine_list = list(SEARCH_ENGINES.keys())
             with st.expander("搜索引擎配置", True):
-                search_engine = st.selectbox("请选择搜索引擎", SEARCH_ENGINES.keys(), 0)
+                search_engine = st.selectbox(
+                    label="请选择搜索引擎",
+                    options=search_engine_list,
+                    index=search_engine_list.index("duckduckgo") if "duckduckgo" in search_engine_list else 0,
+                )
                 se_top_k = st.number_input("匹配搜索结果条数：", 1, 20, 3)
 
     # Display chat messages from history on app rerun
@@ -125,11 +130,12 @@ def dialogue_page(api: ApiRequest):
             ])
             text = ""
             for d in api.search_engine_chat(prompt, search_engine, se_top_k):
-                if error_msg := check_error_msg(d): # check whether error occured
+                if error_msg := check_error_msg(d):  # check whether error occured
                     st.error(error_msg)
-                text += d["answer"]
-                chat_box.update_msg(text, 0)
-                chat_box.update_msg("\n\n".join(d["docs"]), 1, streaming=False)
+                else:
+                    text += d["answer"]
+                    chat_box.update_msg(text, 0)
+                    chat_box.update_msg("\n\n".join(d["docs"]), 1, streaming=False)
             chat_box.update_msg(text, 0, streaming=False)
 
     now = datetime.now()

From faba1b3877623891cfef60ac08f13780b1713586 Mon Sep 17 00:00:00 2001
From: hzg0601 <hzg0601@163.com>
Date: Fri, 25 Aug 2023 16:16:44 +0800
Subject: [PATCH 05/12] =?UTF-8?q?update=20readme.md,=20shutdown=5Fall.sh:?=
 =?UTF-8?q?=20=E5=9C=A8Linux=E4=B8=8A=E4=BD=BF=E7=94=A8ctrl+C=E9=80=80?=
 =?UTF-8?q?=E5=87=BA=E5=8F=AF=E8=83=BD=E4=BC=9A=E7=94=B1=E4=BA=8Elinux?=
 =?UTF-8?q?=E7=9A=84=E5=A4=9A=E8=BF=9B=E7=A8=8B=E6=9C=BA=E5=88=B6=E5=AF=BC?=
 =?UTF-8?q?=E8=87=B4multiprocessing=E9=81=97=E7=95=99=E5=AD=A4=E5=84=BF?=
 =?UTF-8?q?=E8=BF=9B=E7=A8=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md       | 16 +++++++---------
 shutdown_all.sh |  2 +-
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index bf3a5ae..208d5de 100644
--- a/README.md
+++ b/README.md
@@ -208,7 +208,8 @@ embedding_model_dict = {
                         "m3e-base": "/Users/xxx/Downloads/m3e-base",
                        }
 ```
-如果你选择使用OpenAI的Embedding模型，请将模型的```key```写入`embedding_model_dict`中。使用该模型，你需要鞥能够访问OpenAI官的API，或设置代理。
+
+如果你选择使用OpenAI的Embedding模型，请将模型的 ``key``写入 `embedding_model_dict`中。使用该模型，你需要鞥能够访问OpenAI官的API，或设置代理。
 
 ### 4. 知识库初始化与迁移
 
@@ -219,7 +220,7 @@ embedding_model_dict = {
   ```shell
   $ python init_database.py
   ```
-- 如果您是第一次运行本项目，知识库尚未建立，或者配置文件中的知识库类型、嵌入模型发生变化，或者之前的向量库没有开启`normalize_L2`，需要以下命令初始化或重建知识库：
+- 如果您是第一次运行本项目，知识库尚未建立，或者配置文件中的知识库类型、嵌入模型发生变化，或者之前的向量库没有开启 `normalize_L2`，需要以下命令初始化或重建知识库：
 
   ```shell
   $ python init_database.py --recreate-vs
@@ -308,7 +309,6 @@ $ python server/llm_api_shutdown.py --serve all
 
 ![image](https://github.com/chatchat-space/Langchain-Chatchat/assets/22924096/4e056c1c-5c4b-4865-a1af-859cd58a625d)
 
-
 #### 5.2 启动 API 服务
 
 本地部署情况下，按照 [5.1 节](README.md#5.1-启动-LLM-服务)**启动 LLM 服务后**，再执行 [server/api.py](server/api.py) 脚本启动 **API** 服务；
@@ -366,17 +366,13 @@ $ python startup.py -a
 
 并可使用 `Ctrl + C` 直接关闭所有运行服务。如果一次结束不了，可以多按几次。
 
-可选参数包括 `-a (或--all-webui)`, `--all-api`, `--llm-api`, `-c (或--controller)`, `--openai-api`, 
+可选参数包括 `-a (或--all-webui)`, `--all-api`, `--llm-api`, `-c (或--controller)`, `--openai-api`,
 `-m (或--model-worker)`, `--api`, `--webui`，其中：
 
 - `--all-webui` 为一键启动 WebUI 所有依赖服务；
-
 - `--all-api` 为一键启动 API 所有依赖服务；
-
 - `--llm-api` 为一键启动 Fastchat 所有依赖的 LLM 服务；
-
 - `--openai-api` 为仅启动 FastChat 的 controller 和 openai-api-server 服务；
-
 - 其他为单独服务启动选项。
 
 若想指定非默认模型，需要用 `--model-name` 选项，示例：
@@ -385,7 +381,7 @@ $ python startup.py -a
 $ python startup.py --all-webui --model-name Qwen-7B-Chat
 ```
 
-更多信息可通过`python startup.py -h`查看。
+更多信息可通过 `python startup.py -h`查看。
 
 **注意：**
 
@@ -393,6 +389,8 @@ $ python startup.py --all-webui --model-name Qwen-7B-Chat
 
 **2.服务启动时间示设备不同而不同，约 3-10 分钟，如长时间没有启动请前往 `./logs`目录下监控日志，定位问题。**
 
+**3. 在Linux上使用ctrl+C退出可能会由于linux的多进程机制导致multiprocessing遗留孤儿进程，可通过shutdown_all.sh进行退出**
+
 ## 常见问题
 
 参见 [常见问题](docs/FAQ.md)。
diff --git a/shutdown_all.sh b/shutdown_all.sh
index 0218147..8c64806 100644
--- a/shutdown_all.sh
+++ b/shutdown_all.sh
@@ -1,2 +1,2 @@
 # mac设备上的grep命令可能不支持grep -P选项，请使用Homebrew安装;或使用ggrep命令
-ps -eo pid,user,cmd|grep -P 'server/api.py|webui.py|fastchat.serve'|grep -v grep|awk '{print $1}'|xargs kill -9
\ No newline at end of file
+ps -eo pid,user,cmd|grep -P 'server/api.py|webui.py|fastchat.serve|multiprocessing'|grep -v grep|awk '{print $1}'|xargs kill -9
\ No newline at end of file

From 79d225be36d24b05c7ac4522f20e1a2c2cc64841 Mon Sep 17 00:00:00 2001
From: imClumsyPanda <littlepanda0716@gmail.com>
Date: Fri, 25 Aug 2023 16:26:51 +0800
Subject: [PATCH 06/12] update VERSION

---
 configs/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/__init__.py b/configs/__init__.py
index dc9dd40..7c105f4 100644
--- a/configs/__init__.py
+++ b/configs/__init__.py
@@ -1,4 +1,4 @@
 from .model_config import *
 from .server_config import *
 
-VERSION = "v0.2.2-preview"
+VERSION = "v0.2.2"

From 021677d5d66cf624cd0c8ebfe9d9a2b34c4a41a5 Mon Sep 17 00:00:00 2001
From: imClumsyPanda <littlepanda0716@gmail.com>
Date: Fri, 25 Aug 2023 18:04:06 +0800
Subject: [PATCH 07/12] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 208d5de..c5c2d68 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@
 
 🚩 本项目未涉及微调、训练过程，但可利用微调或训练对本项目效果进行优化。
 
-🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/imClumsyPanda/langchain-ChatGLM/Langchain-Chatchat) 中 `v5` 版本所使用代码已更新至本项目 `0.2.0` 版本。
+🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/imClumsyPanda/langchain-ChatGLM/Langchain-Chatchat) 中 `v6` 版本所使用代码已更新至本项目 `0.2.2` 版本。
 
 🐳 [Docker 镜像](registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.0)
 

From 2e930cf43c49fb3287155c31736b63d6f2768221 Mon Sep 17 00:00:00 2001
From: imClumsyPanda <littlepanda0716@gmail.com>
Date: Fri, 25 Aug 2023 22:14:56 +0800
Subject: [PATCH 08/12] update requirements.txt

---
 requirements.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 93908dd..171e2a7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,6 +14,8 @@ python-magic-bin; sys_platform == 'win32'
 SQLAlchemy==2.0.19
 faiss-cpu
 nltk
+accelerate
+spacy
 
 # uncomment libs if you want to use corresponding vector store
 # pymilvus==2.1.3 # requires milvus==2.1.3
@@ -28,3 +30,4 @@ streamlit-antd-components>=0.1.11
 streamlit-chatbox>=1.1.6
 streamlit-aggrid>=0.3.4.post3
 httpx~=0.24.1
+watchdog
\ No newline at end of file

From ab5d1df2a34d0ebbae6f901b0705bcfd09021f36 Mon Sep 17 00:00:00 2001
From: imClumsyPanda <littlepanda0716@gmail.com>
Date: Fri, 25 Aug 2023 22:16:49 +0800
Subject: [PATCH 09/12] update requirements

---
 requirements_api.txt   | 2 ++
 requirements_webui.txt | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/requirements_api.txt b/requirements_api.txt
index f567f9f..58dbc0c 100644
--- a/requirements_api.txt
+++ b/requirements_api.txt
@@ -14,6 +14,8 @@ python-magic-bin; sys_platform == 'win32'
 SQLAlchemy==2.0.19
 faiss-cpu
 nltk
+accelerate
+spacy
 
 # uncomment libs if you want to use corresponding vector store
 # pymilvus==2.1.3 # requires milvus==2.1.3
diff --git a/requirements_webui.txt b/requirements_webui.txt
index a832550..1645fe1 100644
--- a/requirements_webui.txt
+++ b/requirements_webui.txt
@@ -6,4 +6,5 @@ streamlit-antd-components>=0.1.11
 streamlit-chatbox>=1.1.6
 streamlit-aggrid>=0.3.4.post3
 httpx~=0.24.1
-nltk
\ No newline at end of file
+nltk
+watchdog
\ No newline at end of file

From 32bf508745254e74c4b845a926cba38079c04e16 Mon Sep 17 00:00:00 2001
From: imClumsyPanda <littlepanda0716@gmail.com>
Date: Fri, 25 Aug 2023 22:53:49 +0800
Subject: [PATCH 10/12] update requirements

---
 requirements.txt       | 2 +-
 requirements_webui.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 171e2a7..a72c760 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,7 +24,7 @@ spacy
 
 numpy~=1.24.4
 pandas~=2.0.3
-streamlit>=1.25.0
+streamlit>=1.26.0
 streamlit-option-menu>=0.3.6
 streamlit-antd-components>=0.1.11
 streamlit-chatbox>=1.1.6
diff --git a/requirements_webui.txt b/requirements_webui.txt
index 1645fe1..da66c30 100644
--- a/requirements_webui.txt
+++ b/requirements_webui.txt
@@ -1,6 +1,6 @@
 numpy~=1.24.4
 pandas~=2.0.3
-streamlit>=1.25.0
+streamlit>=1.26.0
 streamlit-option-menu>=0.3.6
 streamlit-antd-components>=0.1.11
 streamlit-chatbox>=1.1.6

From fea7e8ddf172eb5ad66213608f05c981f0e333b1 Mon Sep 17 00:00:00 2001
From: imClumsyPanda <littlepanda0716@gmail.com>
Date: Sat, 26 Aug 2023 11:10:45 +0800
Subject: [PATCH 11/12] update VERSION

---
 configs/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/__init__.py b/configs/__init__.py
index 7c105f4..b42f0b5 100644
--- a/configs/__init__.py
+++ b/configs/__init__.py
@@ -1,4 +1,4 @@
 from .model_config import *
 from .server_config import *
 
-VERSION = "v0.2.2"
+VERSION = "v0.2.3-preview"

From f95d41ef471707ddc0f0a2430c1cbc9faf2dfdcc Mon Sep 17 00:00:00 2001
From: imClumsyPanda <littlepanda0716@gmail.com>
Date: Sat, 26 Aug 2023 11:45:01 +0800
Subject: [PATCH 12/12] =?UTF-8?q?[BUG]=20=E4=BF=AE=E5=A4=8Dcsv=E6=96=87?=
 =?UTF-8?q?=E4=BB=B6=E8=AF=BB=E5=8F=96=E5=90=8E=EF=BC=8C=E5=8D=95=E8=A1=8C?=
 =?UTF-8?q?=E6=95=B0=E6=8D=AE=E8=A2=AB=E5=88=86=E6=88=90=E5=A4=9A=E6=AE=B5?=
 =?UTF-8?q?=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 server/knowledge_base/utils.py | 43 ++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py
index da53049..34f2083 100644
--- a/server/knowledge_base/utils.py
+++ b/server/knowledge_base/utils.py
@@ -104,32 +104,35 @@ class KnowledgeFile:
         else:
             loader = DocumentLoader(self.filepath)
 
-        try:
-            if self.text_splitter_name is None:
+        if self.ext in ".csv":
+            docs = loader.load()
+        else:
+            try:
+                if self.text_splitter_name is None:
+                    text_splitter_module = importlib.import_module('langchain.text_splitter')
+                    TextSplitter = getattr(text_splitter_module, "SpacyTextSplitter")
+                    text_splitter = TextSplitter(
+                        pipeline="zh_core_web_sm",
+                        chunk_size=CHUNK_SIZE,
+                        chunk_overlap=OVERLAP_SIZE,
+                    )
+                    self.text_splitter_name = "SpacyTextSplitter"
+                else:
+                    text_splitter_module = importlib.import_module('langchain.text_splitter')
+                    TextSplitter = getattr(text_splitter_module, self.text_splitter_name)
+                    text_splitter = TextSplitter(
+                        chunk_size=CHUNK_SIZE,
+                        chunk_overlap=OVERLAP_SIZE)
+            except Exception as e:
+                print(e)
                 text_splitter_module = importlib.import_module('langchain.text_splitter')
-                TextSplitter = getattr(text_splitter_module, "SpacyTextSplitter")
+                TextSplitter = getattr(text_splitter_module, "RecursiveCharacterTextSplitter")
                 text_splitter = TextSplitter(
-                    pipeline="zh_core_web_sm",
                     chunk_size=CHUNK_SIZE,
                     chunk_overlap=OVERLAP_SIZE,
                 )
-                self.text_splitter_name = "SpacyTextSplitter"
-            else:
-                text_splitter_module = importlib.import_module('langchain.text_splitter')
-                TextSplitter = getattr(text_splitter_module, self.text_splitter_name)
-                text_splitter = TextSplitter(
-                    chunk_size=CHUNK_SIZE,
-                    chunk_overlap=OVERLAP_SIZE)
-        except Exception as e:
-            print(e)
-            text_splitter_module = importlib.import_module('langchain.text_splitter')
-            TextSplitter = getattr(text_splitter_module, "RecursiveCharacterTextSplitter")
-            text_splitter = TextSplitter(
-                chunk_size=CHUNK_SIZE,
-                chunk_overlap=OVERLAP_SIZE,
-            )
 
-        docs = loader.load_and_split(text_splitter)
+            docs = loader.load_and_split(text_splitter)
         print(docs[0])
         if using_zh_title_enhance:
             docs = zh_title_enhance(docs)