diff --git a/document_loaders/__init__.py b/document_loaders/__init__.py index ff1f046..8ad1da6 100644 --- a/document_loaders/__init__.py +++ b/document_loaders/__init__.py @@ -2,4 +2,4 @@ from .mypdfloader import RapidOCRPDFLoader from .myimgloader import RapidOCRLoader from .customiedpdfloader import CustomizedPDFLoader from .mywordload import RapidWordLoader -from .customercore import custom_group_broken_paragraphs +#from .customercore import custom_group_broken_paragraphs diff --git a/document_loaders/mywordload.py b/document_loaders/mywordload.py index eecb653..9721a79 100644 --- a/document_loaders/mywordload.py +++ b/document_loaders/mywordload.py @@ -5,12 +5,12 @@ from docx.document import Document as _Document from docx.table import _Cell from docx.oxml.text.paragraph import CT_P from docx.oxml.table import CT_Tbl -from docx.oxml.table import CT_TblGrid +#from docx.oxml.table import CT_TblGrid from docx.table import _Cell, Table from docx.text.paragraph import Paragraph from unstructured.partition.text import partition_text import unstructured.cleaners.core -from customercore import custom_group_broken_paragraphs +from .customercore import custom_group_broken_paragraphs unstructured.cleaners.core.group_broken_paragraphs = custom_group_broken_paragraphs class RapidWordLoader(UnstructuredFileLoader): @@ -33,10 +33,10 @@ class RapidWordLoader(UnstructuredFileLoader): yield Paragraph(child, parent) elif isinstance(child, CT_Tbl): yield Table(child, parent) - elif isinstance(child, CT_TblGrid): - yield Table(child, parent) - else: - print(f"都不属于") + # elif isinstance(child, CT_TblGrid): + # yield Table(child, parent) + # else: + # print(f"都不属于") def read_table(table): # 获取表格列标题 @@ -66,7 +66,7 @@ class RapidWordLoader(UnstructuredFileLoader): doc = docxDocument(filepath) for block in iter_block_items(doc): if isinstance(block,Paragraph): - print(f"Paragraph:{block.text}") + #print(f"Paragraph:{block.text}") resp += (block.text + "\n\n") elif isinstance(block, Table): resp += read_table(block) + "\n" diff --git a/server/utils.py b/server/utils.py index e0aff28..e5e0476 100644 --- a/server/utils.py +++ b/server/utils.py @@ -9,8 +9,7 @@ from configs import (LLM_MODELS, LLM_DEVICE, EMBEDDING_DEVICE, FSCHAT_MODEL_WORKERS, HTTPX_DEFAULT_TIMEOUT) import os from concurrent.futures import ThreadPoolExecutor, as_completed -#from langchain.chat_models import ChatOpenAI -from langchain._api import ChatOpenAI +from langchain.chat_models import ChatOpenAI from langchain.llms import OpenAI, AzureOpenAI, Anthropic import httpx from typing import Literal, Optional, Callable, Generator, Dict, Any, Awaitable, Union, Tuple @@ -673,4 +672,4 @@ def get_temp_dir(id: str = None) -> Tuple[str, str]: return path, id path = tempfile.mkdtemp(dir=BASE_TEMP_DIR) - return path, os.path.basename(path) + return path, os.path.basename(path) \ No newline at end of file