修复:知识库json文件的中文被转为unicode码,导致无法匹配 (#2128)
This commit is contained in:
parent
a9cf191532
commit
3cf7422c21
|
|
@ -17,7 +17,7 @@ from langchain.docstore.document import Document
|
||||||
from langchain.text_splitter import TextSplitter
|
from langchain.text_splitter import TextSplitter
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from server.utils import run_in_thread_pool, get_model_worker_config
|
from server.utils import run_in_thread_pool, get_model_worker_config
|
||||||
import io
|
import json
|
||||||
from typing import List, Union,Dict, Tuple, Generator
|
from typing import List, Union,Dict, Tuple, Generator
|
||||||
import chardet
|
import chardet
|
||||||
|
|
||||||
|
|
@ -101,6 +101,16 @@ LOADER_DICT = {"UnstructuredHTMLLoader": ['.html'],
|
||||||
SUPPORTED_EXTS = [ext for sublist in LOADER_DICT.values() for ext in sublist]
|
SUPPORTED_EXTS = [ext for sublist in LOADER_DICT.values() for ext in sublist]
|
||||||
|
|
||||||
|
|
||||||
|
# patch json.dumps to disable ensure_ascii
|
||||||
|
def _new_json_dumps(obj, **kwargs):
|
||||||
|
kwargs["ensure_ascii"] = False
|
||||||
|
return _origin_json_dumps(obj, **kwargs)
|
||||||
|
|
||||||
|
if json.dumps is not _new_json_dumps:
|
||||||
|
_origin_json_dumps = json.dumps
|
||||||
|
json.dumps = _new_json_dumps
|
||||||
|
|
||||||
|
|
||||||
class JSONLinesLoader(langchain.document_loaders.JSONLoader):
|
class JSONLinesLoader(langchain.document_loaders.JSONLoader):
|
||||||
'''
|
'''
|
||||||
行式 Json 加载器,要求文件扩展名为 .jsonl
|
行式 Json 加载器,要求文件扩展名为 .jsonl
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue