ocr_recognition/src/main/java/com/ytlk/ocr/table/HandleDataUtil.java

175 lines
7.5 KiB
Java

package com.ytlk.ocr.table;
import com.ytlk.ocr.OCRSwingArea;
import com.ytlk.ocr.util.CompareDataUtil;
import com.ytlk.ocr.util.FileUtils;
import com.ytlk.ocr.vo.UserErrorVo;
import com.ytlk.ocr.vo.UserVo;
import com.ytlk.util.OcrUtil;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import java.io.File;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
/**
* @className:HandleDataUtil
* @author:cwchen
* @date:2025-03-11-11:17
* @version:1.0
* @description:处理excel数据
*/
public class HandleDataUtil {
public static Object handleData() {
ExecutorService executor = null;
List<Map<String, Object>> errorDataList = new ArrayList<>();
List<Map<String, Object>> megerDataList = new ArrayList<>();
List<Map<String, Object>> excelDataList = new ArrayList<>();
List<Map<String, Object>> pdfDataList = new ArrayList<>();
try {
int rowCount = OCRSwingArea.table.getRowCount();
int rowCount2 = OCRSwingArea.table2.getRowCount();
// 使用 ExecutorService 创建线程池 来读取数据
executor = Executors.newFixedThreadPool(rowCount + rowCount2);
List<Future<Map<String, Object>>> futures = new ArrayList<>();
List<Future<Map<String, Object>>> futures2 = new ArrayList<>();
List<Future<Map<String, Object>>> futures3 = new ArrayList<>();
// excel表格读取数据
for (int i = 0; i < rowCount; i++) {
Map<String, Object> map = new HashMap<>();
File file = (File) OCRSwingArea.table.getValueAt(i, 0);
String path = file.getAbsoluteFile().toPath().toString();
List<UserVo> userVos = FileUtils.getExcelUsers(path);
map.put("fileName", file.getName());
map.put("list", userVos);
excelDataList.add(map);
}
// pdf读取数据
for (int i = 0; i < rowCount2; i++) {
Map<String, Object> map = new HashMap<>();
File file = (File) OCRSwingArea.table2.getValueAt(i, 0);
String path = file.getAbsoluteFile().toPath().toString();
List<UserVo> userVos = OcrUtil.ocrHandle(path);
map.put("fileName", file.getName());
map.put("list", userVos);
pdfDataList.add(map);
}
// 校验excel表格数据
for (Map<String, Object> map : excelDataList) {
String result = checkData(map);
if (StringUtils.isNotBlank(result)) {
return result;
}
}
// 校验pdf表格数据
for (Map<String, Object> map : pdfDataList) {
String result = checkData(map);
if (StringUtils.isNotBlank(result)) {
return result;
}
}
// 分析对比数据
// 1.将数据进行合并
megerDataList = mergeData(excelDataList,pdfDataList);
// 2.比较数据是否存在实发工资不一致、比较数据是否存在人员不一致
for (Map<String, Object> map : megerDataList) {
Future<Map<String, Object>> future3 = executor.submit(new Callable<Map<String, Object>>() {
@Override
public Map<String, Object> call() throws Exception {
Map<String, Object> dataMap = new HashMap<>();
List<UserErrorVo> dataList = new ArrayList<>();
String fileName = (String)map.get("fileName");
List<UserVo> excelList = (List<UserVo>)map.get("list");
List<UserVo> pdfList = (List<UserVo>)map.get("pdfList");
// 3.比较excel中存在的人员是否在pdf中存在
List<UserErrorVo> userErrorVos = CompareDataUtil.compareData(excelList, pdfList);
// 4.比较pdf中存在的人员是否在excel中存在
List<UserErrorVo> userErrorVos2 = CompareDataUtil.compareData2(excelList, pdfList);
// 5.比较pdf和excel中人员的实发工资不一致的数据
List<UserErrorVo> userErrorVos3 = CompareDataUtil.compareData3(excelList, pdfList);
// 6.人员重复
List<UserErrorVo> userErrorVos4 = CompareDataUtil.compareData4(excelList, pdfList);
// 合并数据
dataList.addAll(userErrorVos);
dataList.addAll(userErrorVos2);
dataList.addAll(userErrorVos3);
dataList.addAll(userErrorVos4);
dataMap.put("fileName", fileName.substring(0, fileName.lastIndexOf(".")));
dataMap.put("errorList", dataList);
return dataMap;
}
});
futures3.add(future3);
}
for (Future<Map<String, Object>> future : futures3) {
Map<String, Object> map = future.get();
errorDataList.add(map);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (executor != null) {
executor.shutdown();
}
}
return errorDataList;
}
/**
* 校验数据
*
* @param map
* @return String
* @author cwchen
* @date 2025/3/11 16:42
*/
public static String checkData(Map<String, Object> map) {
String fileName = (String) map.get("fileName");
List<UserVo> list = (List<UserVo>) map.get("list");
// 核对表格数据是否为空
if (CollectionUtils.isEmpty(list)) {
return "文件:" + fileName + ",数据为空";
}
// 核对数据是否有为空的存在
for (int j = 0; j < list.size(); j++) {
UserVo userVo = list.get(j);
if (StringUtils.isBlank(userVo.getName())) {
return "文件:" + fileName + ",表格中数据行第" + (j + 1) + "行,姓名为空,请检查";
}
if (userVo.getWage() == null) {
return "文件:" + fileName + ",表格中数据行第" + (j + 1) + "行,实发工资为空,请检查";
}
}
return null;
}
/**
* 合并数据
* @param excelDataList
* @param pdfDataList
* @return List<Map < String, Object>>
* @author cwchen
* @date 2025/3/11 16:43
*/
public static List<Map<String, Object>> mergeData(List<Map<String, Object>> excelDataList, List<Map<String, Object>> pdfDataList) {
for (Map<String, Object> map : excelDataList) {
String fileName = (String) map.get("fileName");
String subFileName = fileName.substring(0, fileName.lastIndexOf("."));
for (Map<String, Object> map2 : pdfDataList) {
String fileName2 = (String) map2.get("fileName");
String subFileName2 = fileName2.substring(0, fileName2.lastIndexOf("."));
if (Objects.equals(subFileName, subFileName2)) {
map.put("pdfList", map2.get("list"));
}
}
}
return excelDataList;
}
}