|
@@ -0,0 +1,334 @@
|
|
|
|
+package com.test.xiaoshi.test.controller;
|
|
|
|
+
|
|
|
|
+import org.apache.commons.codec.binary.Base64;
|
|
|
|
+import org.apache.pdfbox.cos.COSName;
|
|
|
|
+import org.apache.pdfbox.multipdf.PDFMergerUtility;
|
|
|
|
+import org.apache.pdfbox.multipdf.Splitter;
|
|
|
|
+import org.apache.pdfbox.pdmodel.*;
|
|
|
|
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
|
|
|
+import org.apache.pdfbox.pdmodel.font.PDFont;
|
|
|
|
+import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
|
|
|
+import org.apache.pdfbox.pdmodel.graphics.PDXObject;
|
|
|
|
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
|
|
|
+import org.apache.pdfbox.rendering.PDFRenderer;
|
|
|
|
+import org.apache.pdfbox.text.PDFTextStripper;
|
|
|
|
+
|
|
|
|
+import javax.imageio.ImageIO;
|
|
|
|
+import java.awt.image.BufferedImage;
|
|
|
|
+import java.io.*;
|
|
|
|
+import java.util.ArrayList;
|
|
|
|
+import java.util.Iterator;
|
|
|
|
+import java.util.List;
|
|
|
|
+
|
|
|
|
+public class PdfBoxUtil {
|
|
|
|
+ public static void main(String args[]) throws IOException {
|
|
|
|
+ String filePath = "C:\\Users\\Administrator\\Desktop\\1.pdf";
|
|
|
|
+
|
|
|
|
+ // 1、读取pdf文件
|
|
|
|
+// String content = readPdf(filePath);
|
|
|
|
+// System.out.println("读取内容:" + content);
|
|
|
|
+
|
|
|
|
+ //2、获取pdf图片
|
|
|
|
+ getImg(filePath);
|
|
|
|
+
|
|
|
|
+ // 3、pdf中插入文字
|
|
|
|
+// String inFilePath = "F:\\image_test\\sample.pdf";
|
|
|
|
+// String outFilePath = "F:\\image_test\\sample2.pdf";
|
|
|
|
+// try {
|
|
|
|
+// insertWordContent(inFilePath,outFilePath,1,"插入的小狗文字");
|
|
|
|
+// } catch (Exception e) {
|
|
|
|
+// e.printStackTrace();
|
|
|
|
+// }
|
|
|
|
+
|
|
|
|
+ // 4、pdf文件插入图片
|
|
|
|
+// String inFilePath = "F:\\image_test\\sample.pdf";
|
|
|
|
+// String imagePath = "F:\\image_test\\sun1.jpg";
|
|
|
|
+// String outFilePath = "F:\\image_test\\sample3.pdf";
|
|
|
|
+// try {
|
|
|
|
+// insertImageContent(inFilePath,imagePath,outFilePath,1);
|
|
|
|
+// } catch (Exception e) {
|
|
|
|
+// e.printStackTrace();
|
|
|
|
+// }
|
|
|
|
+
|
|
|
|
+ // 5、合并pdf文件
|
|
|
|
+// String filePath1 = "F:\\image_test\\sample.pdf";
|
|
|
|
+// String filePath2 = "F:\\image_test\\sample2.pdf";
|
|
|
|
+// String outFilePath = "F:\\image_test\\sample4.pdf";
|
|
|
|
+// List<String> filePathList = new ArrayList<>();
|
|
|
|
+// filePathList.add(filePath1);
|
|
|
|
+// filePathList.add(filePath2);
|
|
|
|
+// try {
|
|
|
|
+// mergePdf(filePathList, outFilePath);
|
|
|
|
+// } catch (Exception e) {
|
|
|
|
+// e.printStackTrace();
|
|
|
|
+// }
|
|
|
|
+
|
|
|
|
+ // 6、拆分pdf文件
|
|
|
|
+// String inFilePath = "F:\\image_test\\sample4.pdf";
|
|
|
|
+// String targetPath = "F:\\image_test\\11";
|
|
|
|
+// String targetFileName = "aa";
|
|
|
|
+// try {
|
|
|
|
+// spiltPdf(inFilePath, targetPath, targetFileName);
|
|
|
|
+// } catch (Exception e) {
|
|
|
|
+// e.printStackTrace();
|
|
|
|
+// }
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 读取pdf中文字信息(全部)
|
|
|
|
+ *
|
|
|
|
+ * @param inputFile
|
|
|
|
+ * @return
|
|
|
|
+ */
|
|
|
|
+ public static String readPdf(String inputFile) {
|
|
|
|
+ //创建文档对象
|
|
|
|
+ PDDocument doc = null;
|
|
|
|
+ String content = "";
|
|
|
|
+ try {
|
|
|
|
+ //加载一个pdf对象
|
|
|
|
+ doc = PDDocument.load(new File(inputFile));
|
|
|
|
+ //获取一个PDFTextStripper文本剥离对象
|
|
|
|
+ PDFTextStripper textStripper = new PDFTextStripper();
|
|
|
|
+ content = textStripper.getText(doc);
|
|
|
|
+// System.out.println("内容:" + content);
|
|
|
|
+// System.out.println("全部页数" + doc.getNumberOfPages());
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ } finally {
|
|
|
|
+ try {
|
|
|
|
+ //关闭文档
|
|
|
|
+ if (doc != null) {
|
|
|
|
+ doc.close();
|
|
|
|
+ }
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return content;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 插入文字到pdf中
|
|
|
|
+ *
|
|
|
|
+ * @param inputFilePath
|
|
|
|
+ * @param outputFilePath
|
|
|
|
+ * @param pageNum
|
|
|
|
+ * @param message
|
|
|
|
+ * @throws Exception
|
|
|
|
+ */
|
|
|
|
+ public static void insertWordContent(String inputFilePath, String outputFilePath, Integer pageNum, String message) throws Exception {
|
|
|
|
+ File inputPDFFile = new File(inputFilePath);
|
|
|
|
+ File outputPDFFile = new File(outputFilePath);
|
|
|
|
+ PDDocument doc = null;
|
|
|
|
+ try {
|
|
|
|
+ doc = PDDocument.load(inputPDFFile);
|
|
|
|
+ PDPageTree allPages = doc.getDocumentCatalog().getPages();
|
|
|
|
+// PDFont font = PDType1Font.HELVETICA_BOLD;
|
|
|
|
+ PDFont font = PDType0Font.load(doc, new File("C:\\Users\\DELL\\Desktop\\FZLTHJW.TTF"));
|
|
|
|
+ // 字体大小
|
|
|
|
+ float fontSize = 36.0f;
|
|
|
|
+ PDPage page = (PDPage) allPages.get(pageNum - 1);
|
|
|
|
+ PDRectangle pageSize = page.getMediaBox();
|
|
|
|
+ float stringWidth = font.getStringWidth(message) * fontSize / 1000f;
|
|
|
|
+ // 计算页面的中心位置
|
|
|
|
+ int rotation = page.getRotation();
|
|
|
|
+ boolean rotate = rotation == 90 || rotation == 270;
|
|
|
|
+ float pageWidth = rotate ? pageSize.getHeight() : pageSize.getWidth();
|
|
|
|
+ float pageHeight = rotate ? pageSize.getWidth() : pageSize.getHeight();
|
|
|
|
+ double centeredXPosition = rotate ? pageHeight / 2f : (pageWidth - stringWidth) / 2f;
|
|
|
|
+ double centeredYPosition = rotate ? (pageWidth - stringWidth) / 2f : pageHeight / 2f;
|
|
|
|
+ // append the content to the existing stream
|
|
|
|
+ PDPageContentStream contentStream = new PDPageContentStream(doc, page, true, true, true);
|
|
|
|
+ contentStream.beginText();
|
|
|
|
+ // 设置字体和字体大小
|
|
|
|
+ contentStream.setFont(font, fontSize);
|
|
|
|
+ // 设置字体颜色(如下为红色)
|
|
|
|
+ contentStream.setNonStrokingColor(255, 0, 0);
|
|
|
|
+ if (rotate) {
|
|
|
|
+ // rotate the text according to the page rotation
|
|
|
|
+ contentStream.setTextRotation(Math.PI / 2, centeredXPosition, centeredYPosition);
|
|
|
|
+ } else {
|
|
|
|
+ contentStream.setTextTranslation(centeredXPosition, centeredYPosition);
|
|
|
|
+ }
|
|
|
|
+ // 写入文字
|
|
|
|
+ contentStream.drawString(message);
|
|
|
|
+ contentStream.endText();
|
|
|
|
+ contentStream.close();
|
|
|
|
+ // 保存到新文档中
|
|
|
|
+ doc.save(outputPDFFile);
|
|
|
|
+ System.out.println("成功向pdf插入文字");
|
|
|
|
+ } finally {
|
|
|
|
+ if (doc != null) {
|
|
|
|
+ doc.close();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 在pdf中插入图片
|
|
|
|
+ *
|
|
|
|
+ * @param inputFilePath
|
|
|
|
+ * @param imagePath
|
|
|
|
+ * @param outputFilePath
|
|
|
|
+ * @param pageNum
|
|
|
|
+ * @throws Exception
|
|
|
|
+ */
|
|
|
|
+ public static void insertImageContent(String inputFilePath, String imagePath, String outputFilePath, Integer pageNum) throws Exception {
|
|
|
|
+ File inputPDFFile = new File(inputFilePath);
|
|
|
|
+ File outputPDFFile = new File(outputFilePath);
|
|
|
|
+
|
|
|
|
+ try {
|
|
|
|
+ PDDocument doc = PDDocument.load(inputPDFFile);
|
|
|
|
+ PDImageXObject pdImage = PDImageXObject.createFromFile(imagePath, doc);
|
|
|
|
+
|
|
|
|
+ PDPage page = doc.getPage(0);
|
|
|
|
+ //注释的这行代码会覆盖原内容,没注释的那行不会覆盖
|
|
|
|
+// PDPageContentStream contentStream = new PDPageContentStream(doc, page);
|
|
|
|
+ PDPageContentStream contentStream = new PDPageContentStream(doc, page, true, true, true);
|
|
|
|
+ contentStream.drawImage(pdImage, 70, 250);
|
|
|
|
+ contentStream.close();
|
|
|
|
+ doc.save(outputPDFFile);
|
|
|
|
+ doc.close();
|
|
|
|
+ System.out.println("成功插入图片");
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 合并pdf文件
|
|
|
|
+ *
|
|
|
|
+ * @param pathList
|
|
|
|
+ * @param targetPDFPath
|
|
|
|
+ * @throws Exception
|
|
|
|
+ */
|
|
|
|
+ public static void mergePdf(List<String> pathList, String targetPDFPath) throws Exception {
|
|
|
|
+ List<InputStream> inputStreams = new ArrayList<>();
|
|
|
|
+ for (String path : pathList) {
|
|
|
|
+ inputStreams.add(new FileInputStream(new File(path)));
|
|
|
|
+ }
|
|
|
|
+ PDFMergerUtility mergePdf = new PDFMergerUtility();
|
|
|
|
+ File file = new File(targetPDFPath);
|
|
|
|
+
|
|
|
|
+ if (!file.exists()) {
|
|
|
|
+ file.delete();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ mergePdf.addSources(inputStreams);
|
|
|
|
+ mergePdf.setDestinationFileName(targetPDFPath);
|
|
|
|
+ mergePdf.mergeDocuments();
|
|
|
|
+ for (InputStream in : inputStreams) {
|
|
|
|
+ if (in != null) {
|
|
|
|
+ in.close();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 将pdf文件分割成多个
|
|
|
|
+ *
|
|
|
|
+ * @param sourcePdfPath
|
|
|
|
+ * @param splitPath
|
|
|
|
+ * @param splitFileName
|
|
|
|
+ * @throws Exception
|
|
|
|
+ */
|
|
|
|
+ public static void spiltPdf(String sourcePdfPath, String splitPath, String splitFileName) throws Exception {
|
|
|
|
+ File targetDir = new File(splitPath);
|
|
|
|
+ if (!targetDir.exists()) {
|
|
|
|
+ targetDir.mkdirs();
|
|
|
|
+ }
|
|
|
|
+ int j = 1;
|
|
|
|
+ String splitPdf = splitPath + File.separator + splitFileName + "_";
|
|
|
|
+
|
|
|
|
+ // Loading an existing PDF document
|
|
|
|
+ File file = new File(sourcePdfPath);
|
|
|
|
+ PDDocument document = PDDocument.load(file);
|
|
|
|
+ // Instantiating Splitter class
|
|
|
|
+ Splitter splitter = new Splitter();
|
|
|
|
+ splitter.setStartPage(1);
|
|
|
|
+ splitter.setSplitAtPage(1);
|
|
|
|
+ splitter.setEndPage(5);
|
|
|
|
+ // splitting the pages of a PDF document
|
|
|
|
+ List<PDDocument> Pages = splitter.split(document);
|
|
|
|
+ // Creating an iterator
|
|
|
|
+ Iterator<PDDocument> iterator = Pages.listIterator();
|
|
|
|
+ // Saving each page as an individual document
|
|
|
|
+ while (iterator.hasNext()) {
|
|
|
|
+ PDDocument pd = iterator.next();
|
|
|
|
+ String pdfName = splitPdf + j++ + ".pdf";
|
|
|
|
+ pd.save(pdfName);
|
|
|
|
+ }
|
|
|
|
+ document.close();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public static void getImg(String filePath) throws IOException {
|
|
|
|
+ PDDocument document = PDDocument.load(new File(filePath));
|
|
|
|
+ PDPage pdfpage = document.getPage(1);
|
|
|
|
+ int i = 1;
|
|
|
|
+ PDResources pdResources = pdfpage.getResources();
|
|
|
|
+ for (COSName c : pdResources.getXObjectNames()) {
|
|
|
|
+ PDXObject o = pdResources.getXObject(c);
|
|
|
|
+ if (o instanceof org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject) {
|
|
|
|
+ File file = new File(i + ".png");
|
|
|
|
+ i++;
|
|
|
|
+ ImageIO.write(((PDImageXObject) o).getImage(), "png", file);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 提取指定PDF页面的图片转换为Base64的List
|
|
|
|
+ * 注意:图片提取的顺序是PDF创建时图片插入的顺序
|
|
|
|
+ *
|
|
|
|
+ * @throws Exception
|
|
|
|
+ */
|
|
|
|
+ public static List<String> getImg2(String filePath) {
|
|
|
|
+ List<String> photos = new ArrayList<>();
|
|
|
|
+ try (PDDocument document = PDDocument.load(new File(filePath))) {
|
|
|
|
+ //TODO 下标从0开始,所以-1
|
|
|
|
+ for (int i = 0; i < document.getNumberOfPages(); i++) {
|
|
|
|
+ PDPage pdfpage = document.getPage(i);
|
|
|
|
+ // get resource of pdf
|
|
|
|
+ PDResources pdResources = pdfpage.getResources();
|
|
|
|
+ Iterable<COSName> xObjectNames = pdResources.getXObjectNames();
|
|
|
|
+ Iterator<COSName> iterator = xObjectNames.iterator();
|
|
|
|
+ while (iterator.hasNext()) {
|
|
|
|
+ PDXObject o = pdResources.getXObject(iterator.next());
|
|
|
|
+ if (o instanceof PDImageXObject) {
|
|
|
|
+ //得到BufferedImage对象
|
|
|
|
+ BufferedImage image = ((PDImageXObject) o).getImage();
|
|
|
|
+ String base64img = convertimgtoBase64(image);
|
|
|
|
+ // 可以打印到本地,查看输出顺序
|
|
|
|
+ //String imglocation = "C:\\CER\\AE EMC lab_Report template\\pdf img by page\\";
|
|
|
|
+ //File imgfile = new File(imglocation + StringUtil.get32UUID() + ".png");
|
|
|
|
+ //ImageIO.write(image, "png", imgfile);
|
|
|
|
+
|
|
|
|
+ photos.add("data:image/jpg;base64," + base64img);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ return photos;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public static String convertimgtoBase64(BufferedImage image) {
|
|
|
|
+ String png_base64 = "";
|
|
|
|
+ try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
|
|
|
+ ImageIO.write(image, "png", baos);//写入流中
|
|
|
|
+ byte[] bytes = baos.toByteArray();//转换成字节
|
|
|
|
+// png_base64 = new BASE64Encoder().encode(bytes);//jdk1.8写法
|
|
|
|
+ png_base64 = Base64.encodeBase64String(bytes);//JDK11写法
|
|
|
|
+// String png_base64 = Base64.encodeBase64String(bytes).trim();//转换成base64串
|
|
|
|
+ png_base64 = png_base64.replaceAll("\n", "").replaceAll("\r", "");//删除 \r\n
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ return png_base64;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+}
|