招标解析算法服务

This commit is contained in:
cwchen 2025-11-29 13:31:51 +08:00
parent 31521edfd3
commit a808af2289
5 changed files with 385 additions and 11 deletions

View File

@ -1,7 +1,7 @@
ocr:
service:
url: http://192.168.0.37:9091/extract # ocr 请求地址
healthUrl: http://192.168.0.37:9091 # ocr 服务健康检查
convertUrl: http://192.168.0.37:10000/extract # word 转pdf 请求地址
analysisUrl: http://192.168.0.37:10001/extract # 招标解析算法服务 请求地址
timeout: 30000 # ocr 请求超时时间
max-connections: 100

View File

@ -0,0 +1,32 @@
package com.bonus.common.domain.ocr.vo;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
/**
* @className:OcrResponse
* @author:cwchen
* @date:2025-10-16-9:53
* @version:1.0
* @description: 招标解析响应结果
*/
@Data
public class AnalysisResponse {
@JsonProperty("status")
private String status; // 状态码
@JsonProperty("message")
private String message; // 消息
@JsonProperty("data")
private Map<String, String> data; // 识别数据
public boolean isSuccess() {
return Objects.equals(status, "success");
}
}

View File

@ -74,7 +74,7 @@ public class FileUtil {
/**
* 根据文件名获取MIME类型
*/
private static String getMimeTypeByFilename(String filename) {
public static String getMimeTypeByFilename(String filename) {
if (filename == null) {
return "application/octet-stream";
}

View File

@ -0,0 +1,261 @@
package com.bonus.ocr.service;
import com.bonus.common.domain.ocr.dto.OcrRequest;
import com.bonus.common.domain.ocr.vo.AnalysisResponse;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.mime.HttpMultipartMode;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.entity.mime.content.FileBody;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Optional;
/**
* @className: 招标解析算法服务
* @author: cwchen
* @date: 2025-10-16-10:35
* @version: 1.0
* @description: 招标解析算法服务实现类
*/
@Service(value = "AnalysisOcrService")
@Slf4j
public class AnalysisOcrService {
private static final String UTF_8 = "UTF-8";
private static final String FILE_PART_NAME = "file";
private static final String TYPE_PART_NAME = "type";
private static final String FIELDS_JSON_PART_NAME = "fields_json";
@Value("${ocr.service.analysisUrl}")
private String ocrServiceUrl;
@Value("${ocr.service.timeout}")
private int timeout;
private final CloseableHttpClient httpClient;
private final ObjectMapper objectMapper;
public AnalysisOcrService() {
RequestConfig requestConfig = RequestConfig.custom()
.setConnectTimeout(timeout)
.setSocketTimeout(timeout)
.setConnectionRequestTimeout(timeout)
.build();
this.httpClient = HttpClients.custom()
.setDefaultRequestConfig(requestConfig)
.build();
this.objectMapper = new ObjectMapper();
}
/**
* 调用招标解析服务
*
* @param ocrRequest 招标解析请求参数
* @return OCR响应结果
* @throws IOException 当招标解析算法服务调用失败时抛出
*/
public AnalysisResponse callOcrService(OcrRequest ocrRequest) throws IOException {
validateOcrRequest(ocrRequest);
HttpPost httpPost = null;
try {
httpPost = createHttpPost(ocrRequest);
return executeOcrRequest(httpPost);
} catch (IOException e) {
log.error("调用招标解析算法服务失败", e);
return null;
} finally {
cleanupResources(ocrRequest, httpPost);
}
}
/**
* 验证招标解析算法请求参数
*/
private void validateOcrRequest(OcrRequest ocrRequest) {
if (ocrRequest == null) {
throw new IllegalArgumentException("招标解析算法请求参数不能为空");
}
if (ocrRequest.getFile() == null || !ocrRequest.getFile().exists()) {
throw new IllegalArgumentException("招标解析文件不能为空或文件不存在");
}
if (ocrRequest.getType() == null || ocrRequest.getType().trim().isEmpty()) {
throw new IllegalArgumentException("招标解析类型不能为空");
}
}
/**
* 创建HTTP POST请求
*/
private HttpPost createHttpPost(OcrRequest ocrRequest) {
HttpPost httpPost = new HttpPost(ocrServiceUrl);
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
builder.setCharset(StandardCharsets.UTF_8);
builder.setMode(HttpMultipartMode.BROWSER_COMPATIBLE);
// 添加文件字段
builder.addPart(FILE_PART_NAME,
new FileBody(ocrRequest.getFile(),
ContentType.MULTIPART_FORM_DATA,
ocrRequest.getFile().getName()));
// 添加类型字段
builder.addTextBody(TYPE_PART_NAME,
ocrRequest.getType(),
ContentType.TEXT_PLAIN.withCharset(UTF_8));
// 添加解析内容字段
if (ocrRequest.getFields_json() != null) {
builder.addTextBody(FIELDS_JSON_PART_NAME,
ocrRequest.getFields_json(),
ContentType.TEXT_PLAIN.withCharset(UTF_8));
}
httpPost.setEntity(builder.build());
httpPost.setHeader("Accept", "application/json");
return httpPost;
}
/**
* 执行招标解析算法请求
*/
private AnalysisResponse executeOcrRequest(HttpPost httpPost) throws IOException {
log.info("开始调用招标解析算法服务");
try (CloseableHttpResponse response = httpClient.execute(httpPost)) {
return processHttpResponse(response);
}
}
/**
* 处理HTTP响应
*/
private AnalysisResponse processHttpResponse(CloseableHttpResponse response) throws IOException {
int statusCode = response.getStatusLine().getStatusCode();
String responseBody = getResponseBody(response);
log.info("招标解析算法服务响应状态: {}", statusCode);
log.debug("招标解析算法响应内容: {}", responseBody); // 改为debug级别避免日志过大
// 检查HTTP状态码
if (statusCode != 200) {
log.error("招标解析算法HTTP请求失败状态码: {}, 响应: {}", statusCode, responseBody);
return null;
}
AnalysisResponse AnalysisResponse = parseResponseBody(responseBody);
return AnalysisResponse;
}
/**
* 获取响应体
*/
private String getResponseBody(CloseableHttpResponse response) throws IOException {
HttpEntity entity = response.getEntity();
return EntityUtils.toString(entity, UTF_8);
}
/**
* 解析响应体
*/
private AnalysisResponse parseResponseBody(String responseBody) throws IOException {
try {
return objectMapper.readValue(responseBody, AnalysisResponse.class);
} catch (IOException e) {
log.error("解析招标解析算法响应失败,响应内容: {}", responseBody, e);
return null;
}
}
/**
* 处理OCR识别结果
*/
private void handleOcrResult(AnalysisResponse AnalysisResponse) {
if (AnalysisResponse.isSuccess()) {
log.info("OCR识别成功");
logOcrResults(AnalysisResponse);
} else {
log.warn("OCR识别失败", AnalysisResponse.getMessage());
}
}
/**
* 记录OCR识别结果
*/
private void logOcrResults(AnalysisResponse AnalysisResponse) {
Optional.ofNullable(AnalysisResponse.getData())
.ifPresent(data -> {
if (log.isInfoEnabled()) {
data.forEach((key, value) ->
log.info("识别结果 - key: {}, value: {}", key, value));
}
});
}
/**
* 清理资源
*/
private void cleanupResources(OcrRequest ocrRequest, HttpPost httpPost) {
// 清理HTTP连接
if (httpPost != null) {
httpPost.releaseConnection();
}
// 清理临时文件
cleanupTempFile(ocrRequest);
}
/**
* 清理临时文件
*/
private void cleanupTempFile(OcrRequest ocrRequest) {
if (ocrRequest.getFile() != null && ocrRequest.getFile().exists()) {
try {
boolean deleted = ocrRequest.getFile().delete();
if (!deleted) {
log.warn("临时文件删除失败: {}", ocrRequest.getFile().getAbsolutePath());
} else {
log.debug("临时文件已删除: {}", ocrRequest.getFile().getAbsolutePath());
}
} catch (SecurityException e) {
log.error("删除临时文件时发生安全异常: {}", ocrRequest.getFile().getAbsolutePath(), e);
}
}
}
/**
* 关闭HTTP客户端
*/
public void close() {
try {
if (httpClient != null) {
httpClient.close();
log.info("招标解析算法服务HTTP客户端已关闭");
}
} catch (IOException e) {
log.error("关闭HTTP客户端失败", e);
}
}
/**
* 销毁方法用于Spring容器关闭时调用
*/
public void destroy() {
close();
}
}

View File

@ -1,6 +1,12 @@
package com.bonus.rabbitmq.consumer;
import com.bonus.common.domain.ocr.dto.OcrRequest;
import com.bonus.common.domain.ocr.vo.AnalysisResponse;
import com.bonus.common.domain.rabbitmq.dto.RabbitMqMessage;
import com.bonus.common.utils.FileUtil;
import com.bonus.file.config.MinioConfig;
import com.bonus.file.util.MinioUtil;
import com.bonus.ocr.service.AnalysisOcrService;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.rabbitmq.client.Channel;
import lombok.extern.slf4j.Slf4j;
@ -8,8 +14,11 @@ import org.springframework.amqp.core.Message;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
/**
* @className:RabbitMQConsumerService
@ -24,6 +33,15 @@ public class RabbitMQConsumerService {
private final ObjectMapper objectMapper = new ObjectMapper();
@Resource
private MinioConfig minioConfig;
@Resource
private MinioUtil minioUtil;
@Resource(name = "AnalysisOcrService")
private AnalysisOcrService analysisOcrService;
@RabbitListener(
queues = "myQueue",
containerFactory = "multiConsumerFactory" // 使用上面配置的工厂保证按顺序消费
@ -34,7 +52,6 @@ public class RabbitMQConsumerService {
log.info("🎯 RabbitMQConsumerService 收到原始消息 - deliveryTag: {}, body: {}",
deliveryTag, rawBody);
RabbitMqMessage message;
try {
// 手动反序列化 JSON RabbitMqMessage避免类型映射问题导致监听方法不执行
@ -56,14 +73,7 @@ public class RabbitMQConsumerService {
try {
log.info("🛠 开始处理消息内容 - ID: {}, 任务: {}, 业务数据: {}",
messageId, taskName, message.getBusinessData());
// 模拟少量业务处理耗时避免长时间阻塞
/*try {
Thread.sleep(500);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}*/
processBusiness(message);
// 处理成功手动确认消息
channel.basicAck(deliveryTag, false);
log.info("✅ 消息处理完成并确认 - ID: {}, 投递标签: {}", messageId, deliveryTag);
@ -78,4 +88,75 @@ public class RabbitMQConsumerService {
}
}
}
/**
* 处理招标解析算法服务业务
* @param message
* @return void
* @author cwchen
* @date 2025/11/29 13:25
*/
private void processBusiness(RabbitMqMessage message) {
String uploadPath = message.getUploadPath();
File fileFromMinio = getFileFromMinio(uploadPath);
AnalysisResponse ocrResponse = performAnalysisRecognition(fileFromMinio);
}
/**
* 从minio中获取文件
* @param uploadPath
* @return File
* @author cwchen
* @date 2025/11/29 13:23
*/
private File getFileFromMinio(String uploadPath) {
try {
File file = minioUtil.getFileFromMinio(minioConfig.getBucketName(), uploadPath);
if (file == null || !file.exists()) {
throw new RuntimeException("Minio文件不存在: " + uploadPath);
}
return file;
} catch (Exception e) {
throw new RuntimeException("获取Minio文件失败: " + uploadPath, e);
}
}
/**
* 调用算法服务
* @param file
* @return AnalysisResponse
* @author cwchen
* @date 2025/11/29 13:30
*/
private AnalysisResponse performAnalysisRecognition(File file) {
try {
OcrRequest ocrRequest = buildOcrRequest(file);
AnalysisResponse ocrResponse = analysisOcrService.callOcrService(ocrRequest);
// 修复检查 招标解析算法服务 响应是否为 null
if (Objects.isNull(ocrResponse)) {
throw new RuntimeException("招标解析算法服务返回结果为空");
}
log.info("OCR识别成功 - 数据: {}", ocrResponse.getData());
return ocrResponse;
} catch (Exception e) {
log.error("OCR识别失败", e);
throw new RuntimeException("OCR识别失败: " + e.getMessage(), e);
}
}
/**
* 构建招标解析算法服务请求
* @param file
* @return OcrRequest
* @author cwchen
* @date 2025/11/29 13:29
*/
private OcrRequest buildOcrRequest(File file) {
OcrRequest ocrRequest = new OcrRequest();
ocrRequest.setFile(file);
ocrRequest.setType(FileUtil.getMimeTypeByFilename(file.getName()));
return ocrRequest;
}
}