YuXin_Liu/图像分割/sishu.py

107 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import uvicorn
from fastapi import FastAPI, HTTPException
from paddleocr import PaddleOCR
from fastapi.responses import JSONResponse
import json
import re
ocr = PaddleOCR(lang='ch')
app = FastAPI()
@app.post("/sishu_ocr")
def sishu_ocr(path: str):
if not path or not os.path.exists(path):
return JSONResponse(content={"code": "201"}, status_code=201)
text = extract_text(path)
result = parse_text(text)
return JSONResponse(content=result, status_code=200)
def extract_text(path: str)-> str:
result = ocr.ocr(path, cls=False)
text = ""
for idx in range(len(result)):
res = result[idx]
for line in res:
text += (line[1][0] + '\n')
return text
# 定义一个函数来检查一个字符串是否只包含中文字符
def is_chinese_only(s):
for char in s:
if not '\u4e00' <= char <= '\u9fff': # 检查是否在中文字符范围内
return False
return True
def parse_text(text: str) :
text = text.replace(' ', '')
text = text.replace('', '(')
text = text.replace('', ')')
print(f"{text}")
data = {
"code": 200,
"packNo": "",
"lotno": "",
"spec": "",
"specstr": "",
"company": "双维伊士曼纤维有限公司"
}
try:
packNo = re.search(r"(?<![0-9.])(\d+)\n", text, re.DOTALL)
if packNo:
data["packNo"] = packNo.group(1)
lotno = re.search(r"\n*([0-9][0-9.]+-[0-9.]+)\n", text, re.DOTALL)
if lotno:
data["lotno"] = lotno.group(1).strip()
spec = re.search(r"\n*([0-9.]+Y[0-9.]+)", text, re.DOTALL)
if spec:
data["spec"] = spec.group(1).strip()
specstr = re.search(r'\((\d+\.*\d+Y\d+)\)' , text, re.DOTALL)
if specstr:
data["specstr"] = specstr.group(1).strip()
lines = text.strip().split('\n')
for line in lines:
line = line.strip()
if is_chinese_only(line):
data["company"] = line
break
# for key, value in data.items():
# if not value or len(value) == 0:
# result["code"] = 201
# result["msg"] = "识别信息不完整"
except Exception as e:
# result["code"] = 201
# result["msg"] = "识别出错"
print(e)
return data
if __name__ == '__main__':
uvicorn.run(app, host='0.0.0.0', port=8008, workers=1)
# # #
# # # # text = extract_text("./sisu2/6.jpg")
# text = """
# 3.33Y3.56
# 6044060302
# 双维伊士曼纤维有限公司
# (3.0 Y 3 2000)
# ho:3.0-309
# """
# result = parse_text(text)
#
# print(f"result:\n {result}")