diff --git a/.gitignore b/.gitignore index eac0805..ced8d9e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,169 @@ *.log *.log.* +*.bak logs -.idea/ -__pycache__/ /knowledge_base/ /configs/*.py .vscode/ + +# below are standard python ignore files +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ .pytest_cache/ -*.bak +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ diff --git a/README.md b/README.md index e1fd464..a8df59b 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ 🚩 本项目未涉及微调、训练过程,但可利用微调或训练对本项目效果进行优化。 -🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/imClumsyPanda/langchain-ChatGLM/Langchain-Chatchat) 中 `v8` 版本所使用代码已更新至本项目 `v0.2.4` 版本。 +🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/chatchat-space/Langchain-Chatchat/Langchain-Chatchat) 中 `v8` 版本所使用代码已更新至本项目 `v0.2.4` 版本。 🐳 [Docker 镜像](registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.3) @@ -416,6 +416,6 @@ CUDA_VISIBLE_DEVICES=0,1 python startup.py -a ## 项目交流群 -二维码 +二维码 🎉 langchain-ChatGLM 项目微信交流群,如果你也对本项目感兴趣,欢迎加入群聊参与讨论交流。 diff --git a/img/qr_code_61.jpg b/img/qr_code_61.jpg deleted file mode 100644 index 3d7d163..0000000 Binary files a/img/qr_code_61.jpg and /dev/null differ diff --git a/img/qr_code_62.jpg b/img/qr_code_62.jpg new file mode 100644 index 0000000..e90b353 Binary files /dev/null and b/img/qr_code_62.jpg differ diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py index 49a5421..aa286ca 100644 --- a/server/knowledge_base/utils.py +++ b/server/knowledge_base/utils.py @@ -25,6 +25,7 @@ from concurrent.futures import ThreadPoolExecutor from server.utils import run_in_thread_pool, embedding_device, get_model_worker_config import io from typing import List, Union, Callable, Dict, Optional, Tuple, Generator +import chardet def validate_kb_name(knowledge_base_id: str) -> bool: @@ -166,7 +167,14 @@ def get_loader(loader_name: str, file_path_or_content: Union[str, bytes, io.Stri if loader_name == "UnstructuredFileLoader": loader = DocumentLoader(file_path_or_content, autodetect_encoding=True) elif loader_name == "CSVLoader": - loader = DocumentLoader(file_path_or_content, encoding="utf-8") + # 自动识别文件编码类型,避免langchain loader 加载文件报编码错误 + with open(file_path_or_content, 'rb') as struct_file: + encode_detect = chardet.detect(struct_file.read()) + if encode_detect: + loader = DocumentLoader(file_path_or_content, encoding=encode_detect["encoding"]) + else: + loader = DocumentLoader(file_path_or_content, encoding="utf-8") + elif loader_name == "JSONLoader": loader = DocumentLoader(file_path_or_content, jq_schema=".", text_content=False) elif loader_name == "CustomJSONLoader": diff --git a/server/llm_api_stale.py b/server/llm_api_stale.py index cb02e0d..f0ac9a4 100644 --- a/server/llm_api_stale.py +++ b/server/llm_api_stale.py @@ -26,7 +26,7 @@ parser.add_argument('--model-path-address', default="THUDM/chatglm2-6b@localhost@20002", nargs="+", type=str, - help="model path, host, and port, formatted as model-path@host@path") + help="model path, host, and port, formatted as model-path@host@port") # ---------------controller------------------------- parser.add_argument("--controller-host", type=str, default="localhost")