浏览代码

3/8 邮件发送模块添加

lwhhszx 1 年之前
父节点
当前提交
6b6d15cb84
共有 1 个文件被更改,包括 79 次插入0 次删除
  1. 79 0
      src/test/java/cn/cslg/pas/service/PDFBoxTests.java

+ 79 - 0
src/test/java/cn/cslg/pas/service/PDFBoxTests.java

@@ -1,13 +1,92 @@
 package cn.cslg.pas.service;
 
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDResources;
+import org.apache.pdfbox.pdmodel.common.PDStream;
+import org.apache.pdfbox.pdmodel.graphics.PDXObject;
+import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.junit.Test;
 import org.springframework.boot.test.context.SpringBootTest;
 
+import javax.imageio.ImageIO;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.List;
+
 @SpringBootTest
 public class PDFBoxTests {
+    @Test
     public  void test(){
+        File file = new File("D:\\PAS\\target\\file\\20230913\\0a01ae69c6b540789ae7848157433cf7.pdf");
+        FileInputStream in = null;
+        try {
+
+            // 获取解析后得到的PDF文档对象
+            PDDocument pdfdocument = PDDocument.load(file);
+
+            int pageNumbers=pdfdocument.getNumberOfPages();
+            System.out.println("PDF总页数:"+pageNumbers);
+            //新建一个PDF文本剥离器
+            PDFTextStripper stripper = new PDFTextStripper();
+            //sort设置为true 则按照行进行读取,默认是false
+            stripper.setSortByPosition(true);
+
+
+            for(int i=1;i<=pdfdocument.getNumberOfPages();i++)
+            {
+                //读取文字
+                System.out.println("第 " + i + " 页 " );
+                stripper.setStartPage(i);
+                stripper.setEndPage(i);
+                System.out.println("PDF文件的文本内容如下:");
+
 
+                //读取图片
+                PDPage page=pdfdocument.getPage(i-1);
+                PDResources pdResources = page.getResources();
+                int index=1;
+                //获取页中的对象
+                System.out.println("PDF文件的图片:");
+                Iterable<COSName> cosNames =pdResources.getXObjectNames();
+                for(COSName csName : cosNames)
+                {
+                    PDXObject pdxObject = pdResources.getXObject(csName);
+                    if(pdxObject instanceof PDImageXObject){
+                        PDStream pdStream = pdxObject.getStream();
+                        PDImageXObject image = new PDImageXObject(pdStream, pdResources);
+                        File imgFile = new File(String.format("D:\\PAS\\target\\file\\2023"+File.separator+"page%d-image%d.jpeg", i,index++));
+                        ImageIO.write(image.getImage(), "jpeg", imgFile);
+                    }
+                    else if(pdxObject instanceof PDFormXObject)
+                    {
+                        ((PDFormXObject) pdxObject).getResources();
+                        PDStream pdStream = pdxObject.getStream();
+                        PDImageXObject image = new PDImageXObject(pdStream, pdResources);
+                        File imgFile = new File(String.format("D:\\PAS\\target\\file\\2023"+File.separator+"page%d-image%d.jpeg", i,index++));
+                        ImageIO.write(image.getImage(), "jpeg", imgFile);
+                    }
+                }
 
+            }
+        }
+        catch (Exception e) {
+            System.out.println("读取PDF文件" + file.getAbsolutePath() + "生失败!" + e);
+            e.printStackTrace();
+        } finally {
+            if (in != null) {
+                try {
+                    in.close();
+                } catch (IOException e1) {
+                }
+            }
 
 
+        }
     }
 }