From 4ee54de8ac35f1c223eca6b2483f068030045845 Mon Sep 17 00:00:00 2001 From: cwchen <1048842385@qq.com> Date: Mon, 24 Nov 2025 10:22:53 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=81=E4=B8=9A=E5=BA=93ocr=20=E8=AF=86?= =?UTF-8?q?=E5=88=AB=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/main/resources/application-ocr.yml | 4 +- .../common/domain/ocr/vo/OcrResponse.java | 27 +- .../com/bonus/ocr/service/OcrService.java | 247 +++++++++++++----- 3 files changed, 210 insertions(+), 68 deletions(-) diff --git a/bonus-admin/src/main/resources/application-ocr.yml b/bonus-admin/src/main/resources/application-ocr.yml index d60be1d..dcee7c6 100644 --- a/bonus-admin/src/main/resources/application-ocr.yml +++ b/bonus-admin/src/main/resources/application-ocr.yml @@ -1,6 +1,6 @@ ocr: service: - url: http://192.168.0.37:8000/extract/ # ocr 请求地址 - healthUrl: http://192.168.0.37:8000/ # ocr 服务健康检查 + url: http://192.168.0.37:9091/extract # ocr 请求地址 + healthUrl: http://192.168.0.37:9091 # ocr 服务健康检查 timeout: 30000 # ocr 请求超时时间 max-connections: 100 \ No newline at end of file diff --git a/bonus-common/src/main/java/com/bonus/common/domain/ocr/vo/OcrResponse.java b/bonus-common/src/main/java/com/bonus/common/domain/ocr/vo/OcrResponse.java index 2098b97..21c1b8b 100644 --- a/bonus-common/src/main/java/com/bonus/common/domain/ocr/vo/OcrResponse.java +++ b/bonus-common/src/main/java/com/bonus/common/domain/ocr/vo/OcrResponse.java @@ -3,6 +3,10 @@ package com.bonus.common.domain.ocr.vo; import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + /** * @className:OcrResponse * @author:cwchen @@ -14,15 +18,32 @@ import lombok.Data; public class OcrResponse { @JsonProperty("status_code") - private Integer statusCode; // 状态码 + private Integer statusCode; // 状态码 @JsonProperty("message") - private String message; // 消息 + private String message; // 消息 @JsonProperty("data") - private OcrData data; // 识别数据 + private Map data; // 识别数据 public boolean isSuccess() { return statusCode != null && statusCode == 200; } + + /** + * 获取识别文本 + */ + public String getText() { + if (data == null) return null; + return data.values().stream() + .filter(Objects::nonNull) + .collect(Collectors.joining(" ")); + } + + /** + * 获取特定字段值 + */ + public String getFieldValue(String fieldName) { + return data != null ? data.get(fieldName) : null; + } } diff --git a/bonus-ocr/src/main/java/com/bonus/ocr/service/OcrService.java b/bonus-ocr/src/main/java/com/bonus/ocr/service/OcrService.java index 230d0eb..88615c7 100644 --- a/bonus-ocr/src/main/java/com/bonus/ocr/service/OcrService.java +++ b/bonus-ocr/src/main/java/com/bonus/ocr/service/OcrService.java @@ -17,37 +17,38 @@ import org.apache.http.entity.mime.MultipartEntityBuilder; import org.apache.http.entity.mime.HttpMultipartMode; import org.apache.http.entity.mime.content.FileBody; import org.apache.http.entity.ContentType; -import java.nio.charset.StandardCharsets; + import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Map; +import java.util.Optional; /** - * @className:OcrService - * @author:cwchen - * @date:2025-10-16-10:35 - * @version:1.0 + * @className: OcrService + * @author: cwchen + * @date: 2025-10-16-10:35 + * @version: 1.0 * @description: OCR服务实现类 */ @Service(value = "OcrService") @Slf4j public class OcrService { + private static final String UTF_8 = "UTF-8"; + private static final String FILE_PART_NAME = "file"; + private static final String TYPE_PART_NAME = "type"; + private static final String FIELDS_JSON_PART_NAME = "fields_json"; @Value("${ocr.service.url}") private String ocrServiceUrl; - @Value("${ocr.service.healthUrl}") - private String healthUrl; - @Value("${ocr.service.timeout}") private int timeout; private final CloseableHttpClient httpClient; - private final ObjectMapper objectMapper; public OcrService() { - // 配置HTTP客户端 RequestConfig requestConfig = RequestConfig.custom() .setConnectTimeout(timeout) .setSocketTimeout(timeout) @@ -62,71 +63,181 @@ public class OcrService { /** * 调用OCR服务 + * + * @param ocrRequest OCR请求参数 + * @return OCR响应结果 + * @throws IOException 当OCR服务调用失败时抛出 */ public OcrResponse callOcrService(OcrRequest ocrRequest) throws IOException { + validateOcrRequest(ocrRequest); + + HttpPost httpPost = null; + try { + httpPost = createHttpPost(ocrRequest); + return executeOcrRequest(httpPost); + } catch (IOException e) { + log.error("调用OCR服务失败", e); + throw new IOException("OCR服务调用失败: " + e.getMessage(), e); + } finally { + cleanupResources(ocrRequest, httpPost); + } + } + + /** + * 验证OCR请求参数 + */ + private void validateOcrRequest(OcrRequest ocrRequest) { + if (ocrRequest == null) { + throw new IllegalArgumentException("OCR请求参数不能为空"); + } + if (ocrRequest.getFile() == null || !ocrRequest.getFile().exists()) { + throw new IllegalArgumentException("OCR文件不能为空或文件不存在"); + } + if (ocrRequest.getType() == null || ocrRequest.getType().trim().isEmpty()) { + throw new IllegalArgumentException("OCR类型不能为空"); + } + } + + /** + * 创建HTTP POST请求 + */ + private HttpPost createHttpPost(OcrRequest ocrRequest) { HttpPost httpPost = new HttpPost(ocrServiceUrl); + MultipartEntityBuilder builder = MultipartEntityBuilder.create(); + builder.setCharset(StandardCharsets.UTF_8); + builder.setMode(HttpMultipartMode.BROWSER_COMPATIBLE); + + // 添加文件字段 + builder.addPart(FILE_PART_NAME, + new FileBody(ocrRequest.getFile(), + ContentType.MULTIPART_FORM_DATA, + ocrRequest.getFile().getName())); + + // 添加类型字段 + builder.addTextBody(TYPE_PART_NAME, + ocrRequest.getType(), + ContentType.TEXT_PLAIN.withCharset(UTF_8)); + + // 添加解析内容字段 + if (ocrRequest.getFields_json() != null) { + builder.addTextBody(FIELDS_JSON_PART_NAME, + ocrRequest.getFields_json(), + ContentType.TEXT_PLAIN.withCharset(UTF_8)); + } + + httpPost.setEntity(builder.build()); + httpPost.setHeader("Accept", "application/json"); + + return httpPost; + } + + /** + * 执行OCR请求 + */ + private OcrResponse executeOcrRequest(HttpPost httpPost) throws IOException { + log.info("开始调用OCR服务识别"); + + try (CloseableHttpResponse response = httpClient.execute(httpPost)) { + return processHttpResponse(response); + } + } + + /** + * 处理HTTP响应 + */ + private OcrResponse processHttpResponse(CloseableHttpResponse response) throws IOException { + int statusCode = response.getStatusLine().getStatusCode(); + String responseBody = getResponseBody(response); + + log.info("OCR服务响应状态: {}", statusCode); + log.debug("OCR服务响应内容: {}", responseBody); // 改为debug级别,避免日志过大 + + // 检查HTTP状态码 + if (statusCode != 200) { + log.error("OCR服务HTTP请求失败,状态码: {}, 响应: {}", statusCode, responseBody); + throw new IOException("OCR服务HTTP请求失败,状态码: " + statusCode); + } + + OcrResponse ocrResponse = parseResponseBody(responseBody); + handleOcrResult(ocrResponse); + + return ocrResponse; + } + + /** + * 获取响应体 + */ + private String getResponseBody(CloseableHttpResponse response) throws IOException { + HttpEntity entity = response.getEntity(); + return EntityUtils.toString(entity, UTF_8); + } + + /** + * 解析响应体 + */ + private OcrResponse parseResponseBody(String responseBody) throws IOException { try { - // 创建 multipart entity builder - MultipartEntityBuilder builder = MultipartEntityBuilder.create(); - builder.setCharset(StandardCharsets.UTF_8); - builder.setMode(HttpMultipartMode.BROWSER_COMPATIBLE); + return objectMapper.readValue(responseBody, OcrResponse.class); + } catch (IOException e) { + log.error("解析OCR响应失败,响应内容: {}", responseBody, e); + throw new IOException("解析OCR响应失败: " + e.getMessage(), e); + } + } - // 添加文件字段 - builder.addPart("file", - new FileBody(ocrRequest.getFile(), ContentType.MULTIPART_FORM_DATA, ocrRequest.getFile().getName())); - // 添加类型字段 - builder.addTextBody("type", ocrRequest.getType(), - ContentType.TEXT_PLAIN.withCharset("UTF-8")); - // 添加解析内容字段 - builder.addTextBody("fields_json", ocrRequest.getFields_json(), - ContentType.TEXT_PLAIN.withCharset("UTF-8")); + /** + * 处理OCR识别结果 + */ + private void handleOcrResult(OcrResponse ocrResponse) { + if (ocrResponse.isSuccess()) { + log.info("OCR识别成功"); + logOcrResults(ocrResponse); + } else { + log.warn("OCR识别失败: {}", ocrResponse.getMessage()); + } + } - // 设置请求实体 - 注意:这里会自动设置正确的 Content-Type 和 boundary - httpPost.setEntity(builder.build()); - // 设置 Accept 头 - httpPost.setHeader("Accept", "application/json"); - - log.info("OCR服务开始识别"); - // 执行请求 - try (CloseableHttpResponse response = httpClient.execute(httpPost)) { - HttpEntity entity = response.getEntity(); - String responseBody = EntityUtils.toString(entity, "UTF-8"); - log.info("OCR服务响应状态: {}", response.getStatusLine().getStatusCode()); - log.info("OCR服务响应内容: {}", responseBody); - - // 解析响应 - OcrResponse ocrResponse = objectMapper.readValue(responseBody, OcrResponse.class); - - if (ocrResponse.isSuccess()) { - log.info("OCR识别成功"); - // 获取识别结果 - if (ocrResponse.getData() != null && ocrResponse.getData().getChatRes() != null) { - Map chatRes = ocrResponse.getData().getChatRes(); - chatRes.forEach((k, v) -> { - log.info("识别结果 - key: {}, value: {}", k, v); - }); + /** + * 记录OCR识别结果 + */ + private void logOcrResults(OcrResponse ocrResponse) { + Optional.ofNullable(ocrResponse.getData()) + .ifPresent(data -> { + if (log.isInfoEnabled()) { + data.forEach((key, value) -> + log.info("识别结果 - key: {}, value: {}", key, value)); } - } else { - log.warn("OCR识别失败: {}", ocrResponse.getMessage()); - } + }); + } - return ocrResponse; - } -// return null; - } catch (Exception e) { - log.error("调用OCR服务失败", e); - return null; -// throw new IOException("OCR服务调用失败: " + e.getMessage(), e); - } finally { - // 清理临时文件 - if (ocrRequest.getFile() != null && ocrRequest.getFile().exists()) { + /** + * 清理资源 + */ + private void cleanupResources(OcrRequest ocrRequest, HttpPost httpPost) { + // 清理HTTP连接 + if (httpPost != null) { + httpPost.releaseConnection(); + } + + // 清理临时文件 + cleanupTempFile(ocrRequest); + } + + /** + * 清理临时文件 + */ + private void cleanupTempFile(OcrRequest ocrRequest) { + if (ocrRequest.getFile() != null && ocrRequest.getFile().exists()) { + try { boolean deleted = ocrRequest.getFile().delete(); if (!deleted) { log.warn("临时文件删除失败: {}", ocrRequest.getFile().getAbsolutePath()); + } else { + log.debug("临时文件已删除: {}", ocrRequest.getFile().getAbsolutePath()); } + } catch (SecurityException e) { + log.error("删除临时文件时发生安全异常: {}", ocrRequest.getFile().getAbsolutePath(), e); } - httpPost.releaseConnection(); } } @@ -135,9 +246,19 @@ public class OcrService { */ public void close() { try { - httpClient.close(); + if (httpClient != null) { + httpClient.close(); + log.info("OCR服务HTTP客户端已关闭"); + } } catch (IOException e) { log.error("关闭HTTP客户端失败", e); } } -} + + /** + * 销毁方法,用于Spring容器关闭时调用 + */ + public void destroy() { + close(); + } +} \ No newline at end of file