企业库ocr 识别修改
This commit is contained in:
parent
084f83e3be
commit
4ee54de8ac
|
|
@ -1,6 +1,6 @@
|
|||
ocr:
|
||||
service:
|
||||
url: http://192.168.0.37:8000/extract/ # ocr 请求地址
|
||||
healthUrl: http://192.168.0.37:8000/ # ocr 服务健康检查
|
||||
url: http://192.168.0.37:9091/extract # ocr 请求地址
|
||||
healthUrl: http://192.168.0.37:9091 # ocr 服务健康检查
|
||||
timeout: 30000 # ocr 请求超时时间
|
||||
max-connections: 100
|
||||
|
|
@ -3,6 +3,10 @@ package com.bonus.common.domain.ocr.vo;
|
|||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* @className:OcrResponse
|
||||
* @author:cwchen
|
||||
|
|
@ -14,15 +18,32 @@ import lombok.Data;
|
|||
public class OcrResponse {
|
||||
|
||||
@JsonProperty("status_code")
|
||||
private Integer statusCode; // 状态码
|
||||
private Integer statusCode; // 状态码
|
||||
|
||||
@JsonProperty("message")
|
||||
private String message; // 消息
|
||||
private String message; // 消息
|
||||
|
||||
@JsonProperty("data")
|
||||
private OcrData data; // 识别数据
|
||||
private Map<String, String> data; // 识别数据
|
||||
|
||||
public boolean isSuccess() {
|
||||
return statusCode != null && statusCode == 200;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取识别文本
|
||||
*/
|
||||
public String getText() {
|
||||
if (data == null) return null;
|
||||
return data.values().stream()
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.joining(" "));
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取特定字段值
|
||||
*/
|
||||
public String getFieldValue(String fieldName) {
|
||||
return data != null ? data.get(fieldName) : null;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,37 +17,38 @@ import org.apache.http.entity.mime.MultipartEntityBuilder;
|
|||
import org.apache.http.entity.mime.HttpMultipartMode;
|
||||
import org.apache.http.entity.mime.content.FileBody;
|
||||
import org.apache.http.entity.ContentType;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* @className:OcrService
|
||||
* @author:cwchen
|
||||
* @date:2025-10-16-10:35
|
||||
* @version:1.0
|
||||
* @className: OcrService
|
||||
* @author: cwchen
|
||||
* @date: 2025-10-16-10:35
|
||||
* @version: 1.0
|
||||
* @description: OCR服务实现类
|
||||
*/
|
||||
@Service(value = "OcrService")
|
||||
@Slf4j
|
||||
public class OcrService {
|
||||
|
||||
private static final String UTF_8 = "UTF-8";
|
||||
private static final String FILE_PART_NAME = "file";
|
||||
private static final String TYPE_PART_NAME = "type";
|
||||
private static final String FIELDS_JSON_PART_NAME = "fields_json";
|
||||
|
||||
@Value("${ocr.service.url}")
|
||||
private String ocrServiceUrl;
|
||||
|
||||
@Value("${ocr.service.healthUrl}")
|
||||
private String healthUrl;
|
||||
|
||||
@Value("${ocr.service.timeout}")
|
||||
private int timeout;
|
||||
|
||||
private final CloseableHttpClient httpClient;
|
||||
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
public OcrService() {
|
||||
// 配置HTTP客户端
|
||||
RequestConfig requestConfig = RequestConfig.custom()
|
||||
.setConnectTimeout(timeout)
|
||||
.setSocketTimeout(timeout)
|
||||
|
|
@ -62,71 +63,181 @@ public class OcrService {
|
|||
|
||||
/**
|
||||
* 调用OCR服务
|
||||
*
|
||||
* @param ocrRequest OCR请求参数
|
||||
* @return OCR响应结果
|
||||
* @throws IOException 当OCR服务调用失败时抛出
|
||||
*/
|
||||
public OcrResponse callOcrService(OcrRequest ocrRequest) throws IOException {
|
||||
validateOcrRequest(ocrRequest);
|
||||
|
||||
HttpPost httpPost = null;
|
||||
try {
|
||||
httpPost = createHttpPost(ocrRequest);
|
||||
return executeOcrRequest(httpPost);
|
||||
} catch (IOException e) {
|
||||
log.error("调用OCR服务失败", e);
|
||||
throw new IOException("OCR服务调用失败: " + e.getMessage(), e);
|
||||
} finally {
|
||||
cleanupResources(ocrRequest, httpPost);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证OCR请求参数
|
||||
*/
|
||||
private void validateOcrRequest(OcrRequest ocrRequest) {
|
||||
if (ocrRequest == null) {
|
||||
throw new IllegalArgumentException("OCR请求参数不能为空");
|
||||
}
|
||||
if (ocrRequest.getFile() == null || !ocrRequest.getFile().exists()) {
|
||||
throw new IllegalArgumentException("OCR文件不能为空或文件不存在");
|
||||
}
|
||||
if (ocrRequest.getType() == null || ocrRequest.getType().trim().isEmpty()) {
|
||||
throw new IllegalArgumentException("OCR类型不能为空");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建HTTP POST请求
|
||||
*/
|
||||
private HttpPost createHttpPost(OcrRequest ocrRequest) {
|
||||
HttpPost httpPost = new HttpPost(ocrServiceUrl);
|
||||
|
||||
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
|
||||
builder.setCharset(StandardCharsets.UTF_8);
|
||||
builder.setMode(HttpMultipartMode.BROWSER_COMPATIBLE);
|
||||
|
||||
// 添加文件字段
|
||||
builder.addPart(FILE_PART_NAME,
|
||||
new FileBody(ocrRequest.getFile(),
|
||||
ContentType.MULTIPART_FORM_DATA,
|
||||
ocrRequest.getFile().getName()));
|
||||
|
||||
// 添加类型字段
|
||||
builder.addTextBody(TYPE_PART_NAME,
|
||||
ocrRequest.getType(),
|
||||
ContentType.TEXT_PLAIN.withCharset(UTF_8));
|
||||
|
||||
// 添加解析内容字段
|
||||
if (ocrRequest.getFields_json() != null) {
|
||||
builder.addTextBody(FIELDS_JSON_PART_NAME,
|
||||
ocrRequest.getFields_json(),
|
||||
ContentType.TEXT_PLAIN.withCharset(UTF_8));
|
||||
}
|
||||
|
||||
httpPost.setEntity(builder.build());
|
||||
httpPost.setHeader("Accept", "application/json");
|
||||
|
||||
return httpPost;
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行OCR请求
|
||||
*/
|
||||
private OcrResponse executeOcrRequest(HttpPost httpPost) throws IOException {
|
||||
log.info("开始调用OCR服务识别");
|
||||
|
||||
try (CloseableHttpResponse response = httpClient.execute(httpPost)) {
|
||||
return processHttpResponse(response);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理HTTP响应
|
||||
*/
|
||||
private OcrResponse processHttpResponse(CloseableHttpResponse response) throws IOException {
|
||||
int statusCode = response.getStatusLine().getStatusCode();
|
||||
String responseBody = getResponseBody(response);
|
||||
|
||||
log.info("OCR服务响应状态: {}", statusCode);
|
||||
log.debug("OCR服务响应内容: {}", responseBody); // 改为debug级别,避免日志过大
|
||||
|
||||
// 检查HTTP状态码
|
||||
if (statusCode != 200) {
|
||||
log.error("OCR服务HTTP请求失败,状态码: {}, 响应: {}", statusCode, responseBody);
|
||||
throw new IOException("OCR服务HTTP请求失败,状态码: " + statusCode);
|
||||
}
|
||||
|
||||
OcrResponse ocrResponse = parseResponseBody(responseBody);
|
||||
handleOcrResult(ocrResponse);
|
||||
|
||||
return ocrResponse;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取响应体
|
||||
*/
|
||||
private String getResponseBody(CloseableHttpResponse response) throws IOException {
|
||||
HttpEntity entity = response.getEntity();
|
||||
return EntityUtils.toString(entity, UTF_8);
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析响应体
|
||||
*/
|
||||
private OcrResponse parseResponseBody(String responseBody) throws IOException {
|
||||
try {
|
||||
// 创建 multipart entity builder
|
||||
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
|
||||
builder.setCharset(StandardCharsets.UTF_8);
|
||||
builder.setMode(HttpMultipartMode.BROWSER_COMPATIBLE);
|
||||
return objectMapper.readValue(responseBody, OcrResponse.class);
|
||||
} catch (IOException e) {
|
||||
log.error("解析OCR响应失败,响应内容: {}", responseBody, e);
|
||||
throw new IOException("解析OCR响应失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
// 添加文件字段
|
||||
builder.addPart("file",
|
||||
new FileBody(ocrRequest.getFile(), ContentType.MULTIPART_FORM_DATA, ocrRequest.getFile().getName()));
|
||||
// 添加类型字段
|
||||
builder.addTextBody("type", ocrRequest.getType(),
|
||||
ContentType.TEXT_PLAIN.withCharset("UTF-8"));
|
||||
// 添加解析内容字段
|
||||
builder.addTextBody("fields_json", ocrRequest.getFields_json(),
|
||||
ContentType.TEXT_PLAIN.withCharset("UTF-8"));
|
||||
/**
|
||||
* 处理OCR识别结果
|
||||
*/
|
||||
private void handleOcrResult(OcrResponse ocrResponse) {
|
||||
if (ocrResponse.isSuccess()) {
|
||||
log.info("OCR识别成功");
|
||||
logOcrResults(ocrResponse);
|
||||
} else {
|
||||
log.warn("OCR识别失败: {}", ocrResponse.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// 设置请求实体 - 注意:这里会自动设置正确的 Content-Type 和 boundary
|
||||
httpPost.setEntity(builder.build());
|
||||
// 设置 Accept 头
|
||||
httpPost.setHeader("Accept", "application/json");
|
||||
|
||||
log.info("OCR服务开始识别");
|
||||
// 执行请求
|
||||
try (CloseableHttpResponse response = httpClient.execute(httpPost)) {
|
||||
HttpEntity entity = response.getEntity();
|
||||
String responseBody = EntityUtils.toString(entity, "UTF-8");
|
||||
log.info("OCR服务响应状态: {}", response.getStatusLine().getStatusCode());
|
||||
log.info("OCR服务响应内容: {}", responseBody);
|
||||
|
||||
// 解析响应
|
||||
OcrResponse ocrResponse = objectMapper.readValue(responseBody, OcrResponse.class);
|
||||
|
||||
if (ocrResponse.isSuccess()) {
|
||||
log.info("OCR识别成功");
|
||||
// 获取识别结果
|
||||
if (ocrResponse.getData() != null && ocrResponse.getData().getChatRes() != null) {
|
||||
Map<String, String> chatRes = ocrResponse.getData().getChatRes();
|
||||
chatRes.forEach((k, v) -> {
|
||||
log.info("识别结果 - key: {}, value: {}", k, v);
|
||||
});
|
||||
/**
|
||||
* 记录OCR识别结果
|
||||
*/
|
||||
private void logOcrResults(OcrResponse ocrResponse) {
|
||||
Optional.ofNullable(ocrResponse.getData())
|
||||
.ifPresent(data -> {
|
||||
if (log.isInfoEnabled()) {
|
||||
data.forEach((key, value) ->
|
||||
log.info("识别结果 - key: {}, value: {}", key, value));
|
||||
}
|
||||
} else {
|
||||
log.warn("OCR识别失败: {}", ocrResponse.getMessage());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return ocrResponse;
|
||||
}
|
||||
// return null;
|
||||
} catch (Exception e) {
|
||||
log.error("调用OCR服务失败", e);
|
||||
return null;
|
||||
// throw new IOException("OCR服务调用失败: " + e.getMessage(), e);
|
||||
} finally {
|
||||
// 清理临时文件
|
||||
if (ocrRequest.getFile() != null && ocrRequest.getFile().exists()) {
|
||||
/**
|
||||
* 清理资源
|
||||
*/
|
||||
private void cleanupResources(OcrRequest ocrRequest, HttpPost httpPost) {
|
||||
// 清理HTTP连接
|
||||
if (httpPost != null) {
|
||||
httpPost.releaseConnection();
|
||||
}
|
||||
|
||||
// 清理临时文件
|
||||
cleanupTempFile(ocrRequest);
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理临时文件
|
||||
*/
|
||||
private void cleanupTempFile(OcrRequest ocrRequest) {
|
||||
if (ocrRequest.getFile() != null && ocrRequest.getFile().exists()) {
|
||||
try {
|
||||
boolean deleted = ocrRequest.getFile().delete();
|
||||
if (!deleted) {
|
||||
log.warn("临时文件删除失败: {}", ocrRequest.getFile().getAbsolutePath());
|
||||
} else {
|
||||
log.debug("临时文件已删除: {}", ocrRequest.getFile().getAbsolutePath());
|
||||
}
|
||||
} catch (SecurityException e) {
|
||||
log.error("删除临时文件时发生安全异常: {}", ocrRequest.getFile().getAbsolutePath(), e);
|
||||
}
|
||||
httpPost.releaseConnection();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -135,9 +246,19 @@ public class OcrService {
|
|||
*/
|
||||
public void close() {
|
||||
try {
|
||||
httpClient.close();
|
||||
if (httpClient != null) {
|
||||
httpClient.close();
|
||||
log.info("OCR服务HTTP客户端已关闭");
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error("关闭HTTP客户端失败", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 销毁方法,用于Spring容器关闭时调用
|
||||
*/
|
||||
public void destroy() {
|
||||
close();
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue