diff --git a/src/main/java/com/ytlk/entity/ConfigVo.java b/src/main/java/com/ytlk/entity/ConfigVo.java new file mode 100644 index 0000000..4a348d1 --- /dev/null +++ b/src/main/java/com/ytlk/entity/ConfigVo.java @@ -0,0 +1,19 @@ +package com.ytlk.entity; + +import lombok.Data; + +/** + * @className:ConfigUtil + * @author:cwchen + * @date:2025-04-16-11:20 + * @version:1.0 + * @description:配置实体类 + */ +@Data +public class ConfigVo { + + private String python; + private String pythonScript; + private String output; + private String directory; +} diff --git a/src/main/java/com/ytlk/entity/OcrHandleVo.java b/src/main/java/com/ytlk/entity/OcrHandleVo.java new file mode 100644 index 0000000..98e78cd --- /dev/null +++ b/src/main/java/com/ytlk/entity/OcrHandleVo.java @@ -0,0 +1,19 @@ +package com.ytlk.entity; + +import lombok.Data; + +/** + * @className:OcrHandleVo + * @author:cwchen + * @date:2025-04-16-13:42 + * @version:1.0 + * @description:ocr识别结果-vo + */ +@Data +public class OcrHandleVo { + + private String pdf_file; + private String[] name; + private String[] money; + private String[] name_money; +} diff --git a/src/main/java/com/ytlk/fusion/test.java b/src/main/java/com/ytlk/fusion/test.java new file mode 100644 index 0000000..d547250 --- /dev/null +++ b/src/main/java/com/ytlk/fusion/test.java @@ -0,0 +1,46 @@ +package com.ytlk.fusion; + +import com.alibaba.fastjson.JSONObject; + +import java.io.*; + +/** + * @className:test + * @author:cwchen + * @date:2025-04-16-9:06 + * @version:1.0 + * @description: + */ +public class test { + + + public static void main(String[] args) { + try { + // 创建ProcessBuilder + String python = "D:\\miniconda3\\envs\\ocr\\python.exe"; + String pythonScript = new File("C:\\Users\\10488\\Desktop\\ocr-release\\main.py").getAbsolutePath(); + String param1 = "C:\\Users\\10488\\Desktop\\1.pdf"; + String param2 = "C:\\Users\\10488\\Desktop\\test (2)"; + ProcessBuilder pb = new ProcessBuilder( + python, + pythonScript, + "--input", param1, + "--output", param2 + ); + pb.directory(new File("C:\\Users\\10488\\Desktop\\ocr-release")); + Process p = pb.start(); + // 读取输出 + BufferedReader reader = new BufferedReader( + new InputStreamReader(p.getInputStream())); + String line; + while ((line = reader.readLine()) != null) { + System.out.println(line); + } + // 等待进程结束 + int exitCode = p.waitFor(); + System.out.println("Python脚本执行完毕,退出码: " + exitCode); + } catch (Exception e) { + e.printStackTrace(); + } + } +} diff --git a/src/main/java/com/ytlk/fusion/test2.java b/src/main/java/com/ytlk/fusion/test2.java new file mode 100644 index 0000000..f7fb3b5 --- /dev/null +++ b/src/main/java/com/ytlk/fusion/test2.java @@ -0,0 +1,21 @@ +package com.ytlk.fusion; + +import com.ytlk.ocr.vo.UserVo; +import com.ytlk.util.OcrUtil; + +import java.util.List; + +/** + * @className:test2 + * @author:cwchen + * @date:2025-04-16-11:24 + * @version:1.0 + * @description: + */ +public class test2 { + + public static void main(String[] args) { + List userVos = OcrUtil.ocrHandle("C:\\Users\\10488\\Desktop\\3.pdf"); + System.err.println(userVos); + } +} diff --git a/src/main/java/com/ytlk/ocr/OCRSwingArea.java b/src/main/java/com/ytlk/ocr/OCRSwingArea.java index cd3013e..710466e 100644 --- a/src/main/java/com/ytlk/ocr/OCRSwingArea.java +++ b/src/main/java/com/ytlk/ocr/OCRSwingArea.java @@ -162,8 +162,8 @@ public class OCRSwingArea extends JFrame { JFileChooser fc = new JFileChooser(); fc.setMultiSelectionEnabled(true); fc.setFileSelectionMode(JFileChooser.FILES_ONLY); -// fc.setFileFilter(new FileNameExtensionFilter("PDF Documents", "pdf")); - fc.setFileFilter(new FileNameExtensionFilter("Excel Files (*.xls, *.xlsx)", "xls", "xlsx")); + fc.setFileFilter(new FileNameExtensionFilter("PDF Documents", "pdf")); +// fc.setFileFilter(new FileNameExtensionFilter("Excel Files (*.xls, *.xlsx)", "xls", "xlsx")); fc.setAcceptAllFileFilterUsed(false); int val = fc.showOpenDialog(null); // 文件打开对话框 if (val == fc.APPROVE_OPTION) { diff --git a/src/main/java/com/ytlk/ocr/table/HandleDataUtil.java b/src/main/java/com/ytlk/ocr/table/HandleDataUtil.java index 3edffb8..aef3578 100644 --- a/src/main/java/com/ytlk/ocr/table/HandleDataUtil.java +++ b/src/main/java/com/ytlk/ocr/table/HandleDataUtil.java @@ -5,6 +5,7 @@ import com.ytlk.ocr.util.CompareDataUtil; import com.ytlk.ocr.util.FileUtils; import com.ytlk.ocr.vo.UserErrorVo; import com.ytlk.ocr.vo.UserVo; +import com.ytlk.util.OcrUtil; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; @@ -40,46 +41,22 @@ public class HandleDataUtil { List>> futures3 = new ArrayList<>(); // excel表格读取数据 for (int i = 0; i < rowCount; i++) { - int finalI = i; - Future> future = executor.submit(new Callable>() { - @Override - public Map call() throws Exception { - Map map = new HashMap<>(); - File file = (File) OCRSwingArea.table.getValueAt(finalI, 0); - System.out.println(file.getName()); - String path = file.getAbsoluteFile().toPath().toString(); - List userVos = FileUtils.getExcelUsers(path); - map.put("fileName", file.getName()); - map.put("list", userVos); - return map; - } - }); - futures.add(future); + Map map = new HashMap<>(); + File file = (File) OCRSwingArea.table.getValueAt(i, 0); + String path = file.getAbsoluteFile().toPath().toString(); + List userVos = FileUtils.getExcelUsers(path); + map.put("fileName", file.getName()); + map.put("list", userVos); + excelDataList.add(map); } // pdf读取数据 for (int i = 0; i < rowCount2; i++) { - int finalI = i; - Future> future = executor.submit(new Callable>() { - @Override - public Map call() throws Exception { - Map map = new HashMap<>(); - File file = (File) OCRSwingArea.table2.getValueAt(finalI, 0); - System.out.println(file.getName()); - String path = file.getAbsoluteFile().toPath().toString(); - List userVos = FileUtils.getExcelUsers(path); - map.put("fileName", file.getName()); - map.put("list", userVos); - return map; - } - }); - futures2.add(future); - } - for (Future> future : futures) { - Map map = future.get(); - excelDataList.add(map); - } - for (Future> future : futures2) { - Map map = future.get(); + Map map = new HashMap<>(); + File file = (File) OCRSwingArea.table2.getValueAt(i, 0); + String path = file.getAbsoluteFile().toPath().toString(); + List userVos = OcrUtil.ocrHandle(path); + map.put("fileName", file.getName()); + map.put("list", userVos); pdfDataList.add(map); } // 校验excel表格数据 diff --git a/src/main/java/com/ytlk/util/FileUtil.java b/src/main/java/com/ytlk/util/FileUtil.java new file mode 100644 index 0000000..b1afd06 --- /dev/null +++ b/src/main/java/com/ytlk/util/FileUtil.java @@ -0,0 +1,38 @@ +package com.ytlk.util; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; + +/** + * @className:FileUtil + * @author:cwchen + * @date:2025-04-16-11:18 + * @version:1.0 + * @description:文件工具类 + */ +public class FileUtil { + + public static String readDataFromFile(String filePath) { + File file = new File(filePath); + if (file.exists()) { + StringBuffer sb = null; + try ( + BufferedReader reader = new BufferedReader(new FileReader(filePath))) { + sb = new StringBuffer(); + int lineNumber = 0; + String line; + while ((line = reader.readLine()) != null) { + // 在这里可以对每一行进行进一步的处理,例如提取字段、转换数据类型等 + sb.append(line); + } + } catch (IOException e) { + e.printStackTrace(); + } + return sb.toString(); + } else { + return null; + } + } +} diff --git a/src/main/java/com/ytlk/util/OcrUtil.java b/src/main/java/com/ytlk/util/OcrUtil.java new file mode 100644 index 0000000..0b003c2 --- /dev/null +++ b/src/main/java/com/ytlk/util/OcrUtil.java @@ -0,0 +1,166 @@ +package com.ytlk.util; + +import com.alibaba.fastjson.JSON; +import com.ytlk.entity.ConfigVo; +import com.ytlk.entity.OcrHandleVo; +import com.ytlk.ocr.vo.UserVo; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; + +import java.io.BufferedReader; +import java.io.File; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.*; +import java.util.concurrent.TimeUnit; + +/** + * @className:OcrUtil + * @author:cwchen + * @date:2025-04-16-11:08 + * @version:1.0 + * @description:orc识别工具类 + */ +public class OcrUtil { + + static ConfigVo configVo = new ConfigVo(); + + /**配置文件读取路径*/ + private static final String configPath = "C:\\Users\\10488\\Desktop\\orc\\config.json"; + + private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy_MM_dd_HH_mm_ss"); + private static final DateTimeFormatter FORMATTER2 = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + static { + String str = FileUtil.readDataFromFile(configPath); + configVo = JSON.parseObject(str, ConfigVo.class); + } + + + public static List ocrHandle(String filePath){ + List dataList = null; + try { + System.err.println("开始识别--" + getBaseName(filePath) + "文件," + "开始时间" + LocalDateTime.now().format(FORMATTER2)); + long startTime = System.currentTimeMillis(); + // 创建ProcessBuilder + String python = configVo.getPython(); + String pythonScript = new File(configVo.getPythonScript()).getAbsolutePath(); + String param1 = filePath; + String param2 = configVo.getOutput() + File.separator + getCurrentDateTimeFormatted(); + // 创建输出目录 + createDirectories(param2); + ProcessBuilder pb = new ProcessBuilder( + python, + pythonScript, + "--input", param1, + "--output", param2 + ); + pb.redirectErrorStream(true); + // 设置环境变量指定编码 + Map env = pb.environment(); + env.put("PYTHONIOENCODING", "UTF-8"); // 或GBK等与脚本匹配的编码 + // 设置工作目录 + pb.directory(new File(configVo.getDirectory())); + Process p = pb.start(); + // 读取输出 + BufferedReader reader = new BufferedReader( + new InputStreamReader(p.getInputStream(), StandardCharsets.UTF_8)); + String line; + while ((line = reader.readLine()) != null) { + System.out.println(line); + } + // 等待进程结束 +// int exitCode = p.waitFor(); + // 设置30秒超时 + boolean finished = p.waitFor(30, TimeUnit.SECONDS); + if (!finished) { + p.destroyForcibly(); // 强制终止进程 + System.err.println("Python脚本执行超时"); + } else { + int exitCode = p.waitFor(); + System.out.println("Python脚本执行完毕,退出码: " + exitCode); + if(exitCode == 0){ + String path = param2 + File.separator + getBaseName(filePath) + "_result.json"; + dataList = ocrHandleResult(path); + } + long endTime = System.currentTimeMillis(); + System.err.println("结束识别--" + getBaseName(filePath) + "文件," + "开始时间" + LocalDateTime.now().format(FORMATTER2)); + long duration = endTime - startTime; + System.err.println("执行耗时: " + duration + " 毫秒"); + } + } catch (Exception e) { + e.printStackTrace(); + } + System.err.println("识别结果:" + dataList); + return Optional.ofNullable(dataList).orElseGet(ArrayList::new); + } + + public static String getCurrentDateTimeFormatted() { + return LocalDateTime.now().format(FORMATTER); + } + + public static List ocrHandleResult(String filePath){ + List dataList = new ArrayList<>(); + if(!new File(filePath).exists()){ + return dataList; + } + try { + String str = FileUtil.readDataFromFile(filePath); + if(StringUtils.isBlank(str)){ + return dataList; + } + OcrHandleVo ocrHandleVo = JSON.parseObject(str, OcrHandleVo.class); + System.err.println(ocrHandleVo); + String[] nameMoney = ocrHandleVo.getName_money(); + List list = Arrays.asList(nameMoney); + for (String value : list) { + if(value.contains("未知姓名")){ + continue; + } + String[] split = value.split("-"); + UserVo userVo = new UserVo(); + userVo.setName(split[0]); + userVo.setWage(convertToDouble(split[1])); + dataList.add(userVo); + } + } catch (Exception e) { + e.printStackTrace(); + return dataList; + } + return dataList; + } + + public static String getBaseName(String filePath) { + Path path = Paths.get(filePath); + String fileName = path.getFileName().toString(); + int dotIndex = fileName.lastIndexOf('.'); + return (dotIndex == -1) ? fileName : fileName.substring(0, dotIndex); + } + + public static void createDirectories(String multiLevelPath){ + try { + Path path = Paths.get(multiLevelPath); + Files.createDirectories(path); // 自动创建所有不存在的父目录 + } catch (Exception e) { + System.err.println("创建失败: " + e.getMessage()); + } + } + + public static double convertToDouble(String input) { + if (input == null) { + return 0.0; + } + // 去除所有空白字符(包括空格、制表符、换行等) + String trimmed = input.replaceAll("\\s+", ""); + try { + return Double.parseDouble(trimmed); + } catch (NumberFormatException e) { + return 0.0; + } + } +}