|
@@ -0,0 +1,62 @@
|
|
|
+package cn.cslg.pas.service;
|
|
|
+
|
|
|
+import org.apache.pdfbox.cos.COSName;
|
|
|
+import org.apache.pdfbox.pdmodel.PDDocument;
|
|
|
+import org.apache.pdfbox.pdmodel.PDPage;
|
|
|
+import org.apache.pdfbox.pdmodel.PDResources;
|
|
|
+import org.apache.pdfbox.pdmodel.graphics.PDXObject;
|
|
|
+import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
|
|
|
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
|
|
+import org.junit.jupiter.api.Test;
|
|
|
+import org.springframework.boot.test.context.SpringBootTest;
|
|
|
+
|
|
|
+import javax.imageio.ImageIO;
|
|
|
+import java.io.File;
|
|
|
+import java.io.IOException;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
+
|
|
|
+/**
|
|
|
+ * 测试使用 Apache PDFBox框架解析获得pdf中的图片
|
|
|
+ *
|
|
|
+ * @author chenyu
|
|
|
+ * @date 2023/9/5
|
|
|
+ */
|
|
|
+@SpringBootTest
|
|
|
+public class PdfBoxGetImagesTests {
|
|
|
+
|
|
|
+ @Test
|
|
|
+ void main() throws Exception {
|
|
|
+ File file = new File("C:\\Users\\Administrator\\Desktop\\专利接口调用帮助文档_2000041.pdf");
|
|
|
+ PDDocument document = PDDocument.load(file);
|
|
|
+
|
|
|
+ List<PDImageXObject> images = new ArrayList<>();
|
|
|
+ int numberOfPages = document.getNumberOfPages();
|
|
|
+ int numberOfPages2 = document.getPages().getCount();
|
|
|
+ System.out.println("pdf总页数为:" + numberOfPages + "或是:" + numberOfPages2);
|
|
|
+
|
|
|
+ for (PDPage page : document.getPages()) {
|
|
|
+ images.addAll(getImagesFromResources(page.getResources()));
|
|
|
+ }
|
|
|
+ for (PDImageXObject n : images) {
|
|
|
+ int i = 0;
|
|
|
+ File outputfile = new File("说明书中的附图" + (++i) + "." + n.getSuffix());
|
|
|
+ ImageIO.write(n.getImage(), n.getSuffix(), outputfile);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ List<PDImageXObject> getImagesFromResources(PDResources resources) throws IOException {
|
|
|
+ List<PDImageXObject> images = new ArrayList<>();
|
|
|
+ for (COSName xObjectName : resources.getXObjectNames()) {
|
|
|
+ PDXObject xObject = resources.getXObject(xObjectName);
|
|
|
+ if (xObject instanceof PDFormXObject) {
|
|
|
+ System.out.println("是PDFormXObject类型");
|
|
|
+ } else if (xObject instanceof PDImageXObject) {
|
|
|
+ System.out.println("是PDImageXObject类型");
|
|
|
+ images.add((PDImageXObject) xObject);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return images;
|
|
|
+ }
|
|
|
+
|
|
|
+}
|