package com.example.demo.service; import com.example.demo.model.*; import org.apache.pdfbox.Loader; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDType0Font; import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.apache.pdfbox.pdmodel.graphics.PDXObject; import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.util.Matrix; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import javax.imageio.ImageIO; import java.awt.*; import java.awt.image.BufferedImage; import java.io.File; import java.math.BigDecimal; import java.math.RoundingMode; import java.util.List; @Service public class PDFBoxService { @Autowired private GetPictureNumService getPictureNumService; @Autowired private DifyService difyService; public void test() throws Exception { PDDocument document = Loader.loadPDF(new File("D:\\demo\\file2\\202410725352_CN118294200A.pdf")); String instruction = this.getPatentInstruction(document); // List difyGetNumVOS = difyService.getAn(); List difyGetNumVOS = difyService.getAnswerFromAI(instruction); int pageNum = 0; Integer a = document.getNumberOfPages(); System.out.println(a); for (int pageIndex = 0; pageIndex < document.getNumberOfPages(); pageIndex++) { PDPage page = document.getPage(pageIndex); PDResources resources = page.getResources(); // 遍历页面资源中的图片对象 for (COSName name : resources.getXObjectNames()) { PDXObject xObject = resources.getXObject(name); if (xObject instanceof PDFormXObject) { PDFormXObject pdFormXObject = (PDFormXObject) xObject; PDResources resources2 = pdFormXObject.getResources(); for (COSName name1 : resources2.getXObjectNames()) { PDXObject xObject2 = resources2.getXObject(name1); if (xObject2 instanceof PDImageXObject) { PDImageXObject oldImage = (PDImageXObject) xObject2; BufferedImage bufferedImage = oldImage.getImage(); String path = "D:\\demo\\file2\\temp_roi" + pageNum + ".jpg"; // ✅ 关键1:获取原图位置和尺寸 File file = new File(path); ImageIO.write(bufferedImage, "jpg", file); getPictureNumService.drawPictures(path, difyGetNumVOS); PDImageXObject newImage = PDImageXObject.createFromFile(path, document); syncImageProperties(oldImage, newImage); pageNum++; resources2.put(name1, newImage); } } } } } document.save(new File("D:\\demo\\file2\\temp_roi41.pdf")); document.close(); } private static void syncImageProperties(PDImageXObject src, PDImageXObject dest) throws Exception { dest.setWidth(src.getWidth()); dest.setHeight(src.getHeight()); dest.setColorSpace(src.getColorSpace()); dest.setBitsPerComponent(src.getBitsPerComponent()); if (src.getMetadata() != null) { dest.setMetadata(src.getMetadata()); } } /** * 提取pdf具体实施方式部分 */ public String getPatentInstruction(PDDocument document) throws Exception { Integer numberOfPages = document.getNumberOfPages(); StringBuilder stringBuilder = new StringBuilder(); boolean ifAddText = false; for (int i = 7; i < numberOfPages; i++) { PDPage page = document.getPage(i); // 提取页面文本 PDFTextStripper textStripper = new PDFTextStripper(); textStripper.setStartPage(i + 1); textStripper.setEndPage(i + 1); String pageText = textStripper.getText(document); if (pageText != null) { String targetText = "具体实施方式"; if (ifAddText) { stringBuilder.append(pageText); } else if (pageText.contains(targetText)) { Integer index = pageText.indexOf("具体实施方式"); pageText = pageText.substring(index + targetText.length()); stringBuilder.append(pageText); ifAddText = true; } } } return stringBuilder.toString(); } public void test2() throws Exception { PDDocument document = Loader.loadPDF(new File("D:\\demo\\file2\\202410725352_CN118294200A.pdf")); String instruction = this.getPatentInstruction(document); List difyGetNumVOS = difyService.getAn(); // List difyGetNumVOS = difyService.getAnswerFromAI(instruction); int pageNum = 0; Integer a = document.getNumberOfPages(); System.out.println(a); for (int pageIndex = 0; pageIndex < document.getNumberOfPages(); pageIndex++) { PDPage page = document.getPage(pageIndex); PDResources resources = page.getResources(); ImageCoordinateExtractor locator = new ImageCoordinateExtractor(); locator.processPage(page); List picturePositionVOList = locator.picturePositionVOList; // 遍历页面资源中的图片对象 for (COSName name : resources.getXObjectNames()) { PDXObject xObject = resources.getXObject(name); if (xObject instanceof PDFormXObject) { PDFormXObject pdFormXObject = (PDFormXObject) xObject; PDResources resources2 = pdFormXObject.getResources(); for (COSName name1 : resources2.getXObjectNames()) { PDXObject xObject2 = resources2.getXObject(name1); if (xObject2 instanceof PDImageXObject) { PDImageXObject oldImage = (PDImageXObject) xObject2; BufferedImage bufferedImage = oldImage.getImage(); Integer imgWidth = bufferedImage.getWidth(); Integer imgHeight = bufferedImage.getHeight(); PicturePositionVO picturePositionVO = picturePositionVOList.stream().filter(item -> item.getPdImageXObject().equals(xObject2)).findFirst().orElse(null); Float basex=picturePositionVO.getX(); Float basey=picturePositionVO.getY(); String path = "D:\\demo\\file2\\temp_roi" + pageNum + ".jpg"; // ✅ 关键1:获取原图位置和尺寸 File file = new File(path); ImageIO.write(bufferedImage, "jpg", file); if (picturePositionVO != null) { List drawMessVOList = getPictureNumService.getPDFDrawMessVO(path, difyGetNumVOS); for (DrawMessVO drawMessVO : drawMessVOList) { drawMessVO = getPictureNumService.getTextDrawMessVO(drawMessVO, imgWidth); Integer textSize=drawMessVO.getTextSize()*72/300; TextPositionVO textPositionVO= this.getTextPositionVO(drawMessVO,imgWidth,imgHeight,picturePositionVO.getWidth(),picturePositionVO.getHeight()); addTextBox(document, page, new BigDecimal(basex.toString()).add(new BigDecimal(textPositionVO.getX().toString())).floatValue(), new BigDecimal(basey.toString()).add(new BigDecimal(textPositionVO.getY().toString())).floatValue(), drawMessVO.getValue(), "D:\\demo\\src\\main\\resources\\font\\simsun.ttf", textSize, Color.RED); } } pageNum++; } } } } } document.save(new File("D:\\demo\\file2\\temp_roi41.pdf")); document.close(); } private static void addTextBox(PDDocument doc, PDPage page, float x, float y, String text, String fontPath, int fontSize, Color color) throws Exception { // PDType0Font font = PDType0Font.load(doc, new File(fontPath)); PDFont customFont = PDType0Font.load(doc,new File(fontPath)); try (PDPageContentStream cs = new PDPageContentStream(doc, page, PDPageContentStream.AppendMode.APPEND, true)) { cs.setFont(customFont, fontSize); cs.setNonStrokingColor(color); cs.beginText(); cs.newLineAtOffset(x, y); cs.showText(text); cs.endText(); } } public TextPositionVO getTextPositionVO(DrawMessVO drawMessVO, Integer imgWidth, Integer imgHeight, Float pdfImgWidth, Float pdfImgHeight) { Integer tx = drawMessVO.getX1(); Integer ty = drawMessVO.getY1(); Float a = new BigDecimal(tx.toString()).divide(new BigDecimal(imgWidth.toString()),10, RoundingMode.HALF_UP).multiply(new BigDecimal(pdfImgWidth.toString())).floatValue(); // float a=tx/imgWidth*pdfImgWidth; Float c = new BigDecimal(imgHeight.toString()).subtract(new BigDecimal(ty.toString())).floatValue(); Float b = new BigDecimal(c.toString()).divide(new BigDecimal(imgHeight.toString()),10, RoundingMode.HALF_UP).multiply(new BigDecimal(pdfImgHeight.toString())).floatValue(); // float b = (imgHeight - ty - drawMessVO.getHeight()) / imgHeight * pdfImgHeight; TextPositionVO textPositionVO = new TextPositionVO(); textPositionVO.setX(a); textPositionVO.setY(b); return textPositionVO; } }