YuXin_Liu/图像分割/sishu.py

import os
import uvicorn
from fastapi import FastAPI, HTTPException
from paddleocr import PaddleOCR
from fastapi.responses import JSONResponse
import json
import re

ocr = PaddleOCR(lang='ch')

app = FastAPI()

@app.post("/sishu_ocr")
def sishu_ocr(path: str):
    if not path or not os.path.exists(path):
        return JSONResponse(content={"code": "201"}, status_code=201)


    text = extract_text(path)
    result = parse_text(text)
    return JSONResponse(content=result, status_code=200)


def extract_text(path: str)-> str:
    result = ocr.ocr(path, cls=False)
    text = ""
    for idx in range(len(result)):
        res = result[idx]
        for line in res:
            text += (line[1][0] + '\n')
    return text

# 定义一个函数来检查一个字符串是否只包含中文字符
def is_chinese_only(s):
    for char in s:
        if not '\u4e00' <= char <= '\u9fff':  # 检查是否在中文字符范围内
            return False
    return True


def parse_text(text: str) :
    text = text.replace(' ', '')
    text = text.replace('（', '(')
    text = text.replace('）', ')')
    print(f"{text}")
    data = {
        "code": 200,
        "packNo": "",
        "lotno": "",
        "spec": "",
        "specstr": "",
        "company": "双维伊士曼纤维有限公司"
    }

    try:
        packNo = re.search(r"(?<![0-9.])(\d+)\n", text, re.DOTALL)
        if packNo:
            data["packNo"] = packNo.group(1)
        lotno = re.search(r"\n*([0-9][0-9.]+-[0-9.]+)\n", text, re.DOTALL)
        if lotno:
            data["lotno"] = lotno.group(1).strip()

        spec = re.search(r"\n*([0-9.]+Y[0-9.]+)", text, re.DOTALL)
        if spec:
            data["spec"] = spec.group(1).strip()

        specstr = re.search(r'\((\d+\.*\d+Y\d+)\)'  , text, re.DOTALL)
        if specstr:
            data["specstr"] = specstr.group(1).strip()

        lines = text.strip().split('\n')

        for line in lines:
            line = line.strip()
            if is_chinese_only(line):
                data["company"] = line
                break

        # for key, value in data.items():
        #     if not value or len(value) == 0:
        #         result["code"] = 201
        #         result["msg"] = "识别信息不完整"


    except Exception as e:
        # result["code"] = 201
        # result["msg"] = "识别出错"
        print(e)
    return data


if __name__ == '__main__':
    uvicorn.run(app, host='0.0.0.0', port=8008, workers=1)


# # #
# # # # text = extract_text("./sisu2/6.jpg")
# text =  """
# 3.33Y3.56
# 6044060302
# 双维伊士曼纤维有限公司
# (3.0 Y  3 2000)
# ho:3.0-309
# """
# result = parse_text(text)
#
# print(f"result:\n {result}")