This commit is contained in:
hayu 2025-09-15 19:19:37 +08:00
parent 0279307413
commit f32ff8cb6b
5 changed files with 576 additions and 4 deletions

View File

@ -0,0 +1,499 @@
package com.bonus.material.common.utils;
import com.bonus.common.core.web.domain.AjaxResult;
import com.bonus.material.materialStation.domain.ProAuthorizeDetails;
import com.bonus.system.api.RemoteFileService;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.usermodel.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.web.multipart.MultipartFile;
import java.io.*;
import java.net.URL;
import java.net.URI;
import java.net.URLEncoder;
import java.nio.file.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Word 解析工具类兼容本地路径与 URL支持 doc/docx 文字与图片提取
* 依赖Apache POI (poi-ooxml, poi-scratchpad)
*
* 使用
* List<ProAuthorizeDetails> list = WordParserUtil.parseWordFile(multipartFile, filePathUrlOrLocalPath, sysFileService);
*
* 注意sysFileService.upload(...) 应返回 AjaxResult 或类似结构且包含 data.url 字段你项目的上传逻辑
*/
public class WordParserUtil {
private static final Logger logger = LoggerFactory.getLogger(WordParserUtil.class);
// filePath URL 先下载到本地临时文件再解析
private static final String TEMP_PREFIX = "word_parse_";
// 身份证号正则15 18最后一位可能是 Xx
private static final Pattern ID_PATTERN = Pattern.compile("(\\d{15}|\\d{17}[0-9Xx])");
/**
* 统一入口
*
* @param file 前端上传的 MultipartFile原始文件
* @param filePath sysFileService.upload 返回的文件访问路径可以是 http(s) URL 本地路径
* @param sysFileService 你项目的文件上传服务实例用于上传解析出的图片
*/
public static Map<String, Object> parseWordFile(MultipartFile file,String fileName, String filePath, RemoteFileService sysFileService) throws Exception {
Map<String, Object> result = new HashMap<>();
List<ProAuthorizeDetails> receivers = new ArrayList<>();
try {
String originalName = file.getOriginalFilename();
if (originalName == null) {
originalName = "upload.docx";
}
// 根据扩展名选择解析
String lower = originalName.toLowerCase();
File localFile;
if (filePath.startsWith("http://") || filePath.startsWith("https://")) {
localFile = downloadToLocal(filePath);
} else {
Path p = Paths.get(filePath);
if (Files.exists(p)) {
localFile = p.toFile();
} else {
localFile = writeMultipartToTemp(file);
}
}
if (lower.endsWith(".docx")) {
try (FileInputStream fis2 = new FileInputStream(localFile)) {
receivers = parseDocx(fis2, sysFileService);
}
} else {
throw new IllegalArgumentException("文件格式不对请下载模版仅支持docx");
}
result.put("success", true);
result.put("receivers", receivers); // 识别出来的委托人
result.put("filePath", filePath); // 文档路径始终返回
result.put("fileName", fileName);
} catch (Exception e) {
result.put("success", false);
result.put("message", "文档解析失败");
result.put("filePath", filePath);
result.put("fileName", fileName);
}
return result;
}
/* ================= helper: 下载/写临时文件 ================= */
private static File downloadToLocal(String fileUrl) throws IOException {
// URL 的文件名部分做编码处理中文/空格
String encoded = encodeChineseInUrl(fileUrl);
URL url = new URL(encoded);
Path tempDir = Files.createTempDirectory(TEMP_PREFIX);
String fileName = fileUrl.substring(fileUrl.lastIndexOf('/') + 1);
Path target = tempDir.resolve(fileName);
try (InputStream in = url.openStream()) {
Files.copy(in, target, StandardCopyOption.REPLACE_EXISTING);
} catch (IOException e) {
logger.error("下载远程文件失败: {}, encodedUrl={}", fileUrl, encoded, e);
throw e;
}
return target.toFile();
}
private static String encodeChineseInUrl(String url) throws UnsupportedEncodingException {
int idx = url.lastIndexOf('/');
if (idx < 0) {
return URLEncoder.encode(url, "UTF-8").replace("+", "%20");
}
String prefix = url.substring(0, idx + 1);
String fileName = url.substring(idx + 1);
return prefix + URLEncoder.encode(fileName, "UTF-8").replace("+", "%20");
}
private static File writeMultipartToTemp(MultipartFile file) throws IOException {
Path tempDir = Files.createTempDirectory(TEMP_PREFIX);
String fileName = file.getOriginalFilename() == null ? UUID.randomUUID().toString() + ".docx" : file.getOriginalFilename();
Path target = tempDir.resolve(fileName);
try (InputStream in = file.getInputStream()) {
Files.copy(in, target, StandardCopyOption.REPLACE_EXISTING);
}
return target.toFile();
}
/* ================= 文本抽取(鲁棒) ================= */
// 归一化文本去零宽字符合并空白等
private static String normalizeText(String text) {
if (text == null) {
return "";
}
text = text.replace('\u00A0', ' '); // NBSP
text = text.replaceAll("[\\u200B-\\u200F\\uFEFF]", "");
text = text.replaceAll("[\\p{Cntrl}&&[^\r\n\t]]", " ");
text = text.replaceAll("\\s+", " ");
return text.trim();
}
// 鲁棒提取姓名+身份证对
private static List<ProAuthorizeDetails> extractReceivers(String text) {
List<ProAuthorizeDetails> list = new ArrayList<>();
if (text == null || text.trim().isEmpty()) {
return list;
}
String norm = normalizeText(text);
// 支持 (孙贤红)身份证号123... / 孙贤红 身份证号123... / 被授权人姓名孙贤红 身份证号...
Pattern p1 = Pattern.compile("[\\(\\[]?\\s*([\\p{IsHan}·•・]{2,10})\\s*[\\)\\]]?\\s*身份证号\\s*(\\d{15}|\\d{17}[0-9Xx])");
Matcher m1 = p1.matcher(norm);
while (m1.find()) {
ProAuthorizeDetails d = new ProAuthorizeDetails();
d.setName(m1.group(1).trim());
d.setIdNumber(m1.group(2).trim());
list.add(d);
}
// p1 未找到再试带被授权人姓名/委托人等标签的模式
if (list.isEmpty()) {
Pattern p2 = Pattern.compile("(被授权人姓名|委托人姓名|委托人|被授权人)[:]?\\s*([\\p{IsHan}·•・]{2,10}).{0,50}?身份证号\\s*(\\d{15}|\\d{17}[0-9Xx])");
Matcher m2 = p2.matcher(norm);
while (m2.find()) {
ProAuthorizeDetails d = new ProAuthorizeDetails();
d.setName(m2.group(2).trim());
d.setIdNumber(m2.group(3).trim());
list.add(d);
}
}
return list;
}
/* ================= 上下文判定(正面/背面) ================= */
private static boolean contextSuggestsFront(String ctx) {
if (ctx == null) {
return false;
}
String lower = ctx.toLowerCase();
return lower.contains("正面") || lower.contains("头像") || lower.contains("人像") || lower.contains("");
}
private static boolean contextSuggestsBack(String ctx) {
if (ctx == null) {
return false;
}
String lower = ctx.toLowerCase();
return lower.contains("背面") || lower.contains("国徽") || lower.contains("");
}
/* ================= 图片上传(调用 sysFileService.upload ================= */
// 你项目的 sysFileService.upload(MultipartFile) 应返回 AjaxResultdata 中包含 url 字段
private static String saveImageWithSysFileService(byte[] data, String ext, RemoteFileService sysFileService) {
try {
String fileName = UUID.randomUUID().toString().replace("-", "") + "." + (ext == null ? "jpg" : ext);
MultipartFile multipartFile = new MockMultipartFile(fileName, fileName, "image/" + (ext == null ? "jpeg" : ext), data);
AjaxResult uploadRes = sysFileService.upload(multipartFile);
if (uploadRes != null && uploadRes.isSuccess()) {
Object d = uploadRes.get("data");
if (d instanceof Map) {
Map map = (Map) d;
Object url = map.get("url");
if (url != null) {
return url.toString();
}
}
}
} catch (Exception e) {
logger.warn("图片上传到 sysFileService 失败: {}", e.getMessage());
}
return null;
}
/* ================= docx 解析(文字 + 图片) ================= */
private static List<ProAuthorizeDetails> parseDocx(InputStream inputStream, RemoteFileService sysFileService) throws Exception {
List<ProAuthorizeDetails> receivers = new ArrayList<>();
XWPFDocument doc = new XWPFDocument(OPCPackage.open(inputStream));
// 1) 提取全文文本并识别委托人姓名/身份证
StringBuilder sb = new StringBuilder();
for (XWPFParagraph p : doc.getParagraphs()) {
sb.append(p.getText()).append(" ");
}
for (XWPFTable t : doc.getTables()) {
for (XWPFTableRow r : t.getRows()) {
for (XWPFTableCell c : r.getTableCells()) {
sb.append(c.getText()).append(" ");
}
}
}
String fullText = sb.toString();
receivers = extractReceivers(fullText); // 你已有的鲁棒方法
// 2) 遍历文档结构收集图片 & 标识注意保持全局 paraIndex
List<ImgHolder> images = new ArrayList<>();
List<Marker> markers = new ArrayList<>();
int globalParaIndex = 0;
int tableIdx = 0;
int globalImageCounter = 0;
List<IBodyElement> bodyElements = doc.getBodyElements();
for (IBodyElement be : bodyElements) {
if (be.getElementType() == BodyElementType.PARAGRAPH) {
XWPFParagraph p = (XWPFParagraph) be;
String text = normalizeText(p.getText());
String cellKey = "body";
// 标识只匹配委托人相关
if (text.contains("委托人") && (text.contains("正面") || text.contains("背面"))) {
Marker mk = new Marker();
mk.isFront = text.contains("正面");
mk.paraIndex = globalParaIndex;
mk.cellKey = cellKey;
mk.text = text;
markers.add(mk);
}
// 图片段落内
if (p.getRuns() != null) {
for (XWPFRun run : p.getRuns()) {
List<XWPFPicture> pics = run.getEmbeddedPictures();
if (pics != null && !pics.isEmpty()) {
for (XWPFPicture pic : pics) {
XWPFPictureData pd = pic.getPictureData();
ImgHolder ih = new ImgHolder();
ih.data = pd.getData();
ih.ext = pd.suggestFileExtension();
ih.cellKey = cellKey;
ih.paraIndex = globalParaIndex;
ih.url = null;
ih.imageIndex = globalImageCounter++;
images.add(ih);
}
}
}
}
globalParaIndex++;
} else if (be.getElementType() == BodyElementType.TABLE) {
XWPFTable table = (XWPFTable) be;
for (int r = 0; r < table.getRows().size(); r++) {
XWPFTableRow row = table.getRows().get(r);
List<XWPFTableCell> cells = row.getTableCells();
for (int c = 0; c < cells.size(); c++) {
XWPFTableCell cell = cells.get(c);
String cellKey = "t" + tableIdx + "_r" + r + "_c" + c;
for (XWPFParagraph p : cell.getParagraphs()) {
String text = normalizeText(p.getText());
if (text.contains("委托人") && (text.contains("正面") || text.contains("背面"))) {
Marker mk = new Marker();
mk.isFront = text.contains("正面");
mk.paraIndex = globalParaIndex;
mk.cellKey = cellKey;
mk.text = text;
markers.add(mk);
}
if (p.getRuns() != null) {
for (XWPFRun run : p.getRuns()) {
List<XWPFPicture> pics = run.getEmbeddedPictures();
if (pics != null && !pics.isEmpty()) {
for (XWPFPicture pic : pics) {
XWPFPictureData pd = pic.getPictureData();
ImgHolder ih = new ImgHolder();
ih.data = pd.getData();
ih.ext = pd.suggestFileExtension();
ih.cellKey = cellKey;
ih.paraIndex = globalParaIndex;
ih.url = null;
ih.imageIndex = globalImageCounter++;
images.add(ih);
}
}
}
}
globalParaIndex++;
}
}
}
tableIdx++;
}
}
// 如果没有任何标识markers尽量 fallback不做这里的自动索引分配后面会尝试按剩余图片补位
if (markers.isEmpty()) {
// 直接尝试按模板索引规则作为兜底例如单人取 images[0]/images[2]两人取 [2,4] & [0,1]
// 但首先还是希望尽量靠标识下面的代码会在分配后进行补位
}
// 3) 按标识顺序分配图片给委托人只关注委托人
Set<Integer> usedImageIndices = new HashSet<>();
int delegateIdx = 0; // 当前分配到第几个委托人0-based
for (Marker mk : markers) {
// 只处理委托人的标识忽略法人代表或其它
if (mk.text == null || !mk.text.contains("委托人")) {
continue;
}
if (receivers.isEmpty()) {
break;
}
int targetReceiver = Math.min(delegateIdx, receivers.size() - 1);
// 找到最佳图片候选优先同 cellparaIndex >= marker.paraIndex未被用过
ImgHolder found = null;
for (ImgHolder ih : images) {
if (usedImageIndices.contains(ih.imageIndex)) {
continue;
}
if (!Objects.equals(ih.cellKey, mk.cellKey)) {
continue;
}
if (ih.paraIndex >= mk.paraIndex) { found = ih; break; }
}
// 如果未找到同 cell 则找全局 paraIndex >= marker.paraIndex
if (found == null) {
for (ImgHolder ih : images) {
if (usedImageIndices.contains(ih.imageIndex)) {
continue;
}
if (ih.paraIndex >= mk.paraIndex) { found = ih; break; }
}
}
// 最后兜底任意未使用图片
if (found == null) {
for (ImgHolder ih : images) {
if (!usedImageIndices.contains(ih.imageIndex)) { found = ih; break; }
}
}
if (found != null) {
// 上传惰性若还未上传url==null则上传一次
if (found.url == null) {
String uploaded = saveImageWithSysFileService(found.data, found.ext, sysFileService);
found.url = uploaded;
}
if (found.url != null) {
ProAuthorizeDetails p = receivers.get(targetReceiver);
if (mk.isFront) {
p.setFrontUrl(found.url);
} else {
p.setBackUrl(found.url);
}
}
usedImageIndices.add(found.imageIndex);
}
// 如果这是背面则才认为一组委托人完成delegateIdx++开始下一人
if (!mk.isFront) {
delegateIdx++;
}
}
// 4) 补位若某些委托人缺少 front/back用剩余图片补上不覆盖已有
if (!receivers.isEmpty() && !images.isEmpty()) {
for (int i = 0; i < receivers.size(); i++) {
ProAuthorizeDetails p = receivers.get(i);
if ((p.getFrontUrl() == null || p.getFrontUrl().isEmpty()) ||
(p.getBackUrl() == null || p.getBackUrl().isEmpty())) {
for (ImgHolder ih : images) {
if (usedImageIndices.contains(ih.imageIndex)) {
continue;
}
// 上传并分配
if (ih.url == null) {
ih.url = saveImageWithSysFileService(ih.data, ih.ext, sysFileService);
}
if (ih.url == null) {
continue;
}
if (p.getFrontUrl() == null || p.getFrontUrl().isEmpty()) {
p.setFrontUrl(ih.url);
usedImageIndices.add(ih.imageIndex);
} else if (p.getBackUrl() == null || p.getBackUrl().isEmpty()) {
p.setBackUrl(ih.url);
usedImageIndices.add(ih.imageIndex);
}
if ((p.getFrontUrl() != null && !p.getFrontUrl().isEmpty()) &&
(p.getBackUrl() != null && !p.getBackUrl().isEmpty())) {
break;
}
}
}
}
}
return receivers;
}
private static int handleParagraph(XWPFParagraph p, List<ProAuthorizeDetails> receivers,
RemoteFileService sysFileService, int personIndex) {
String text = normalizeText(p.getText());
if (text.contains("身份证(正面)") || text.contains("身份证(背面)")) {
for (XWPFRun run : p.getRuns()) {
for (XWPFPicture pic : run.getEmbeddedPictures()) {
XWPFPictureData pd = pic.getPictureData();
String url = saveImageWithSysFileService(pd.getData(), pd.suggestFileExtension(), sysFileService);
if (url == null) {
continue;
}
if (text.contains("委托人")) {
// 委托人
if (!receivers.isEmpty()) {
ProAuthorizeDetails person = receivers.get(Math.min(personIndex, receivers.size() - 1));
if (text.contains("正面")) {
person.setFrontUrl(url);
} else {
person.setBackUrl(url);
personIndex++; // 背面处理完 下一个人
}
}
}
}
}
}
return personIndex;
}
private static class ImgHolder {
byte[] data;
String ext;
String cellKey; // "body" "t0_r1_c2"
int paraIndex;
String url; // 上传后返回的 url惰性赋值
int imageIndex; // 全局顺序索引可选用于调试
}
private static class Marker {
boolean isFront; // true = 正面, false = 背面
int paraIndex;
String cellKey;
String text; // 原始文本用于调试
}
}

View File

@ -11,6 +11,7 @@ import com.bonus.common.core.web.page.TableDataInfo;
import com.bonus.common.log.annotation.SysLog;
import com.bonus.common.log.enums.OperaType;
import com.bonus.material.common.annotation.PreventRepeatSubmit;
import com.bonus.material.common.utils.WordParserUtil;
import com.bonus.material.materialStation.domain.ProAuthorizeDetails;
import com.bonus.material.materialStation.domain.ProAuthorizeInfo;
import com.bonus.material.materialStation.service.ProAuthorizeService;
@ -19,13 +20,17 @@ import com.bonus.system.api.RemoteFileService;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.context.request.RequestContextHolder;
import org.springframework.web.context.request.ServletRequestAttributes;
import org.springframework.web.multipart.MultipartFile;
import javax.annotation.Resource;
import javax.servlet.http.HttpServletResponse;
import javax.validation.constraints.NotNull;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
/**
@ -81,6 +86,26 @@ public class ProAuthorizeController extends BaseController {
}
}
@PostMapping("/parseWord")
public AjaxResult parseWord(@RequestParam("file") MultipartFile file) {
try {
AjaxResult result = sysFileService.upload(file);
if (!result.isSuccess()) {
return AjaxResult.error("文件上传失败");
}
Map<String, Object> jsonObject = (Map<String, Object>) result.get("data");
String fileName = file.getOriginalFilename();
String filePath = jsonObject.get("url").toString();
Map<String, Object> data = WordParserUtil.parseWordFile(file,fileName, filePath, sysFileService);
return AjaxResult.success(data);
} catch (Exception e) {
logger.error("解析失败", e);
return AjaxResult.error("解析失败,请按模版填写内容");
}
}
/**
* 授权提交
*/
@ -138,5 +163,15 @@ public class ProAuthorizeController extends BaseController {
return AjaxResult.success(ListPagingUtil.paging(pageIndex, pageSize, new ArrayList<>()));
}
}
/**
* 授权委托书模版下载
*/
@ApiOperation(value = "授权委托书模版下载")
@PostMapping("/downLoad")
public void downLoadExcelFile(){
HttpServletResponse resp = ((ServletRequestAttributes) Objects.requireNonNull(RequestContextHolder.getRequestAttributes())).getResponse();
service.downLoadTemplate(resp);
}
}

View File

@ -9,6 +9,7 @@ import com.bonus.material.materialStation.domain.ProAuthorizeInfo;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.multipart.MultipartFile;
import javax.servlet.http.HttpServletResponse;
import java.util.List;
/**
@ -57,4 +58,10 @@ public interface ProAuthorizeService {
int updateAuthorizeInfoSign(ProAuthorizeDetails bean);
List<ProAuthorizeInfo> getAuthorList(ProAuthorizeInfo bean);
/**
* 授权委托书模版下载
* @param resp
*/
void downLoadTemplate(HttpServletResponse resp);
}

View File

@ -14,6 +14,7 @@ import com.bonus.material.materialStation.mapper.ProAuthorizeMapper;
import com.bonus.material.materialStation.service.ProAuthorizeService;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import org.apache.commons.io.IOUtils;
import org.hibernate.validator.internal.util.StringHelper;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
@ -22,10 +23,8 @@ import org.springframework.util.CollectionUtils;
import org.springframework.web.multipart.MultipartFile;
import javax.annotation.Resource;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
@ -283,6 +282,38 @@ public class ProAuthorizeServiceImpl implements ProAuthorizeService {
}
}
@Override
public void downLoadTemplate(HttpServletResponse response) {
// 模板名称
String templateName = "授权委托书模板.doc";
try (
InputStream input = this.getClass().getClassLoader().getResourceAsStream("template/授权委托书模板.doc");
OutputStream out = response.getOutputStream()
) {
if (input == null) {
throw new FileNotFoundException("模板文件不存在: " + templateName);
}
// 设置响应头
response.setCharacterEncoding("UTF-8");
response.setContentType("application/msword");
response.setHeader(
"Content-Disposition",
"attachment;filename=" + new String(templateName.getBytes("UTF-8"), "ISO-8859-1")
);
response.setHeader("Access-Control-Expose-Headers", "Content-Disposition");
// 缓冲区传输
byte[] buffer = new byte[1024];
int bytesToRead;
while ((bytesToRead = input.read(buffer)) != -1) {
out.write(buffer, 0, bytesToRead);
}
out.flush();
} catch (IOException e) {
log.error("下载模板失败: {}", e.getMessage(), e);
}
}
/**
* 判断关键字是否包含在item中
* @param item