From 3cf7422c21acadbc5a5eb3b97360fea99b6334fc Mon Sep 17 00:00:00 2001 From: liunux4odoo <41217877+liunux4odoo@users.noreply.github.com> Date: Tue, 21 Nov 2023 21:00:46 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=EF=BC=9A=E7=9F=A5=E8=AF=86?= =?UTF-8?q?=E5=BA=93json=E6=96=87=E4=BB=B6=E7=9A=84=E4=B8=AD=E6=96=87?= =?UTF-8?q?=E8=A2=AB=E8=BD=AC=E4=B8=BAunicode=E7=A0=81=EF=BC=8C=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=E6=97=A0=E6=B3=95=E5=8C=B9=E9=85=8D=20(#2128)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/knowledge_base/utils.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py index d302a04..28863c3 100644 --- a/server/knowledge_base/utils.py +++ b/server/knowledge_base/utils.py @@ -17,7 +17,7 @@ from langchain.docstore.document import Document from langchain.text_splitter import TextSplitter from pathlib import Path from server.utils import run_in_thread_pool, get_model_worker_config -import io +import json from typing import List, Union,Dict, Tuple, Generator import chardet @@ -101,6 +101,16 @@ LOADER_DICT = {"UnstructuredHTMLLoader": ['.html'], SUPPORTED_EXTS = [ext for sublist in LOADER_DICT.values() for ext in sublist] +# patch json.dumps to disable ensure_ascii +def _new_json_dumps(obj, **kwargs): + kwargs["ensure_ascii"] = False + return _origin_json_dumps(obj, **kwargs) + +if json.dumps is not _new_json_dumps: + _origin_json_dumps = json.dumps + json.dumps = _new_json_dumps + + class JSONLinesLoader(langchain.document_loaders.JSONLoader): ''' 行式 Json 加载器,要求文件扩展名为 .jsonl