|
@@ -1,58 +0,0 @@
|
|
-package com.test.xiaoshi.test.controller;
|
|
|
|
-
|
|
|
|
-import org.apache.pdfbox.pdmodel.PDDocument;
|
|
|
|
-import org.apache.pdfbox.rendering.PDFRenderer;
|
|
|
|
-import org.apache.pdfbox.text.PDFTextStripper;
|
|
|
|
-
|
|
|
|
-import javax.imageio.ImageIO;
|
|
|
|
-import java.awt.image.BufferedImage;
|
|
|
|
-import java.io.File;
|
|
|
|
-import java.io.IOException;
|
|
|
|
-
|
|
|
|
-/**
|
|
|
|
- * @author chenyu
|
|
|
|
- * @date 2023/10/19
|
|
|
|
- */
|
|
|
|
-public class PdfTest {
|
|
|
|
- public static void main(String[] args) throws IOException {
|
|
|
|
- getImgFromPdf("2.pdf");
|
|
|
|
-
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /**
|
|
|
|
- * 读取pdf中文字信息(全部)
|
|
|
|
- *
|
|
|
|
- * @param inputFile
|
|
|
|
- * @return
|
|
|
|
- */
|
|
|
|
- public static String readPdf(String inputFile) {
|
|
|
|
- //创建文档对象
|
|
|
|
- PDDocument doc = null;
|
|
|
|
- String content = "";
|
|
|
|
- try {
|
|
|
|
- //加载一个pdf对象
|
|
|
|
- doc = PDDocument.load(new File(inputFile));
|
|
|
|
- //获取一个PDFTextStripper文本剥离对象
|
|
|
|
- PDFTextStripper textStripper = new PDFTextStripper();
|
|
|
|
- content = textStripper.getText(doc);
|
|
|
|
- System.out.println("内容:" + content);
|
|
|
|
- System.out.println("全部页数" + doc.getNumberOfPages());
|
|
|
|
- //关闭文档
|
|
|
|
- doc.close();
|
|
|
|
- } catch (Exception e) {
|
|
|
|
- e.printStackTrace();
|
|
|
|
- }
|
|
|
|
- return content;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- public static void getImgFromPdf(String inputFile) throws IOException {
|
|
|
|
- File file = new File(inputFile);
|
|
|
|
- PDDocument doc = PDDocument.load(file);
|
|
|
|
- PDFRenderer renderer = new PDFRenderer(doc);
|
|
|
|
- BufferedImage image = renderer.renderImage(3);
|
|
|
|
- ImageIO.write(image, "JPEG", new File("pdf第1页中的图片.jpg"));
|
|
|
|
- doc.close();
|
|
|
|
-
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-}
|
|
|