roll back the last commit

This commit is contained in:
wvivi2023 2024-01-26 14:32:17 +08:00
parent 1d12f84310
commit 5a9c25d010
3 changed files with 10 additions and 11 deletions

View File

@ -2,4 +2,4 @@ from .mypdfloader import RapidOCRPDFLoader
from .myimgloader import RapidOCRLoader
from .customiedpdfloader import CustomizedPDFLoader
from .mywordload import RapidWordLoader
from .customercore import custom_group_broken_paragraphs
#from .customercore import custom_group_broken_paragraphs

View File

@ -5,12 +5,12 @@ from docx.document import Document as _Document
from docx.table import _Cell
from docx.oxml.text.paragraph import CT_P
from docx.oxml.table import CT_Tbl
from docx.oxml.table import CT_TblGrid
#from docx.oxml.table import CT_TblGrid
from docx.table import _Cell, Table
from docx.text.paragraph import Paragraph
from unstructured.partition.text import partition_text
import unstructured.cleaners.core
from customercore import custom_group_broken_paragraphs
from .customercore import custom_group_broken_paragraphs
unstructured.cleaners.core.group_broken_paragraphs = custom_group_broken_paragraphs
class RapidWordLoader(UnstructuredFileLoader):
@ -33,10 +33,10 @@ class RapidWordLoader(UnstructuredFileLoader):
yield Paragraph(child, parent)
elif isinstance(child, CT_Tbl):
yield Table(child, parent)
elif isinstance(child, CT_TblGrid):
yield Table(child, parent)
else:
print(f"都不属于")
# elif isinstance(child, CT_TblGrid):
# yield Table(child, parent)
# else:
# print(f"都不属于")
def read_table(table):
# 获取表格列标题
@ -66,7 +66,7 @@ class RapidWordLoader(UnstructuredFileLoader):
doc = docxDocument(filepath)
for block in iter_block_items(doc):
if isinstance(block,Paragraph):
print(f"Paragraph:{block.text}")
#print(f"Paragraph:{block.text}")
resp += (block.text + "\n\n")
elif isinstance(block, Table):
resp += read_table(block) + "\n"

View File

@ -9,8 +9,7 @@ from configs import (LLM_MODELS, LLM_DEVICE, EMBEDDING_DEVICE,
FSCHAT_MODEL_WORKERS, HTTPX_DEFAULT_TIMEOUT)
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
#from langchain.chat_models import ChatOpenAI
from langchain._api import ChatOpenAI
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI, AzureOpenAI, Anthropic
import httpx
from typing import Literal, Optional, Callable, Generator, Dict, Any, Awaitable, Union, Tuple
@ -673,4 +672,4 @@ def get_temp_dir(id: str = None) -> Tuple[str, str]:
return path, id
path = tempfile.mkdtemp(dir=BASE_TEMP_DIR)
return path, os.path.basename(path)
return path, os.path.basename(path)