roll back the last commit

This commit is contained in:
wvivi2023 2024-01-26 14:32:17 +08:00
parent 1d12f84310
commit 5a9c25d010
3 changed files with 10 additions and 11 deletions

View File

@ -2,4 +2,4 @@ from .mypdfloader import RapidOCRPDFLoader
from .myimgloader import RapidOCRLoader from .myimgloader import RapidOCRLoader
from .customiedpdfloader import CustomizedPDFLoader from .customiedpdfloader import CustomizedPDFLoader
from .mywordload import RapidWordLoader from .mywordload import RapidWordLoader
from .customercore import custom_group_broken_paragraphs #from .customercore import custom_group_broken_paragraphs

View File

@ -5,12 +5,12 @@ from docx.document import Document as _Document
from docx.table import _Cell from docx.table import _Cell
from docx.oxml.text.paragraph import CT_P from docx.oxml.text.paragraph import CT_P
from docx.oxml.table import CT_Tbl from docx.oxml.table import CT_Tbl
from docx.oxml.table import CT_TblGrid #from docx.oxml.table import CT_TblGrid
from docx.table import _Cell, Table from docx.table import _Cell, Table
from docx.text.paragraph import Paragraph from docx.text.paragraph import Paragraph
from unstructured.partition.text import partition_text from unstructured.partition.text import partition_text
import unstructured.cleaners.core import unstructured.cleaners.core
from customercore import custom_group_broken_paragraphs from .customercore import custom_group_broken_paragraphs
unstructured.cleaners.core.group_broken_paragraphs = custom_group_broken_paragraphs unstructured.cleaners.core.group_broken_paragraphs = custom_group_broken_paragraphs
class RapidWordLoader(UnstructuredFileLoader): class RapidWordLoader(UnstructuredFileLoader):
@ -33,10 +33,10 @@ class RapidWordLoader(UnstructuredFileLoader):
yield Paragraph(child, parent) yield Paragraph(child, parent)
elif isinstance(child, CT_Tbl): elif isinstance(child, CT_Tbl):
yield Table(child, parent) yield Table(child, parent)
elif isinstance(child, CT_TblGrid): # elif isinstance(child, CT_TblGrid):
yield Table(child, parent) # yield Table(child, parent)
else: # else:
print(f"都不属于") # print(f"都不属于")
def read_table(table): def read_table(table):
# 获取表格列标题 # 获取表格列标题
@ -66,7 +66,7 @@ class RapidWordLoader(UnstructuredFileLoader):
doc = docxDocument(filepath) doc = docxDocument(filepath)
for block in iter_block_items(doc): for block in iter_block_items(doc):
if isinstance(block,Paragraph): if isinstance(block,Paragraph):
print(f"Paragraph:{block.text}") #print(f"Paragraph:{block.text}")
resp += (block.text + "\n\n") resp += (block.text + "\n\n")
elif isinstance(block, Table): elif isinstance(block, Table):
resp += read_table(block) + "\n" resp += read_table(block) + "\n"

View File

@ -9,8 +9,7 @@ from configs import (LLM_MODELS, LLM_DEVICE, EMBEDDING_DEVICE,
FSCHAT_MODEL_WORKERS, HTTPX_DEFAULT_TIMEOUT) FSCHAT_MODEL_WORKERS, HTTPX_DEFAULT_TIMEOUT)
import os import os
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
#from langchain.chat_models import ChatOpenAI from langchain.chat_models import ChatOpenAI
from langchain._api import ChatOpenAI
from langchain.llms import OpenAI, AzureOpenAI, Anthropic from langchain.llms import OpenAI, AzureOpenAI, Anthropic
import httpx import httpx
from typing import Literal, Optional, Callable, Generator, Dict, Any, Awaitable, Union, Tuple from typing import Literal, Optional, Callable, Generator, Dict, Any, Awaitable, Union, Tuple