107 lines
2.6 KiB
Python
107 lines
2.6 KiB
Python
import os
|
||
import uvicorn
|
||
from fastapi import FastAPI, HTTPException
|
||
from paddleocr import PaddleOCR
|
||
from fastapi.responses import JSONResponse
|
||
import json
|
||
import re
|
||
|
||
ocr = PaddleOCR(lang='ch')
|
||
|
||
app = FastAPI()
|
||
|
||
@app.post("/sishu_ocr")
|
||
def sishu_ocr(path: str):
|
||
if not path or not os.path.exists(path):
|
||
return JSONResponse(content={"code": "201"}, status_code=201)
|
||
|
||
|
||
text = extract_text(path)
|
||
result = parse_text(text)
|
||
return JSONResponse(content=result, status_code=200)
|
||
|
||
|
||
def extract_text(path: str)-> str:
|
||
result = ocr.ocr(path, cls=False)
|
||
text = ""
|
||
for idx in range(len(result)):
|
||
res = result[idx]
|
||
for line in res:
|
||
text += (line[1][0] + '\n')
|
||
return text
|
||
|
||
# 定义一个函数来检查一个字符串是否只包含中文字符
|
||
def is_chinese_only(s):
|
||
for char in s:
|
||
if not '\u4e00' <= char <= '\u9fff': # 检查是否在中文字符范围内
|
||
return False
|
||
return True
|
||
|
||
|
||
def parse_text(text: str) :
|
||
text = text.replace(' ', '')
|
||
text = text.replace('(', '(')
|
||
text = text.replace(')', ')')
|
||
print(f"{text}")
|
||
data = {
|
||
"code": 200,
|
||
"packNo": "",
|
||
"lotno": "",
|
||
"spec": "",
|
||
"specstr": "",
|
||
"company": "双维伊士曼纤维有限公司"
|
||
}
|
||
|
||
try:
|
||
packNo = re.search(r"(?<![0-9.])(\d+)\n", text, re.DOTALL)
|
||
if packNo:
|
||
data["packNo"] = packNo.group(1)
|
||
lotno = re.search(r"\n*([0-9][0-9.]+-[0-9.]+)\n", text, re.DOTALL)
|
||
if lotno:
|
||
data["lotno"] = lotno.group(1).strip()
|
||
|
||
spec = re.search(r"\n*([0-9.]+Y[0-9.]+)", text, re.DOTALL)
|
||
if spec:
|
||
data["spec"] = spec.group(1).strip()
|
||
|
||
specstr = re.search(r'\((\d+\.*\d+Y\d+)\)' , text, re.DOTALL)
|
||
if specstr:
|
||
data["specstr"] = specstr.group(1).strip()
|
||
|
||
lines = text.strip().split('\n')
|
||
|
||
for line in lines:
|
||
line = line.strip()
|
||
if is_chinese_only(line):
|
||
data["company"] = line
|
||
break
|
||
|
||
# for key, value in data.items():
|
||
# if not value or len(value) == 0:
|
||
# result["code"] = 201
|
||
# result["msg"] = "识别信息不完整"
|
||
|
||
|
||
except Exception as e:
|
||
# result["code"] = 201
|
||
# result["msg"] = "识别出错"
|
||
print(e)
|
||
return data
|
||
|
||
|
||
if __name__ == '__main__':
|
||
uvicorn.run(app, host='0.0.0.0', port=8008, workers=1)
|
||
|
||
|
||
# # #
|
||
# # # # text = extract_text("./sisu2/6.jpg")
|
||
# text = """
|
||
# 3.33Y3.56
|
||
# 6044060302
|
||
# 双维伊士曼纤维有限公司
|
||
# (3.0 Y 3 2000)
|
||
# ho:3.0-309
|
||
# """
|
||
# result = parse_text(text)
|
||
#
|
||
# print(f"result:\n {result}") |