Parcourir la source

合并多个专利的说明书pdf首页

chendayu il y a 2 ans
Parent
commit
6345820529

+ 18 - 12
PAS/pom.xml

@@ -98,7 +98,7 @@
         <dependency>
             <groupId>com.google.code.gson</groupId>
             <artifactId>gson</artifactId>
-<!--            <version>${google.code.gson.version}</version>-->
+            <!--            <version>${google.code.gson.version}</version>-->
         </dependency>
         <dependency>
             <groupId>com.baomidou</groupId>
@@ -135,12 +135,12 @@
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
         </dependency>
-<!--        <dependency>-->
-<!--            <groupId>org.openjdk.nashorn</groupId>-->
-<!--            <artifactId>nashorn-core</artifactId>-->
-<!--            <version>15.3</version>-->
-<!--        </dependency>-->
-<!--      jdk17后需要此依赖-->
+        <!--        <dependency>-->
+        <!--            <groupId>org.openjdk.nashorn</groupId>-->
+        <!--            <artifactId>nashorn-core</artifactId>-->
+        <!--            <version>15.3</version>-->
+        <!--        </dependency>-->
+        <!--      jdk17后需要此依赖-->
 
         <dependency>
             <groupId>org.apache.poi</groupId>
@@ -198,6 +198,12 @@
             <artifactId>antlr4-runtime</artifactId>
             <version>4.12.0</version>
         </dependency>
+
+        <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>pdfbox</artifactId>
+            <version>2.0.16</version>
+        </dependency>
     </dependencies>
 
 
@@ -243,11 +249,11 @@
                 <enabled>false</enabled>
             </snapshots>
         </repository>
-    <repository>
-        <id>com.e-iceblue</id>
-        <name>e-iceblue</name>
-        <url>http://repo.e-iceblue.com/nexus/content/groups/public/</url>
-    </repository>
+        <repository>
+            <id>com.e-iceblue</id>
+            <name>e-iceblue</name>
+            <url>http://repo.e-iceblue.com/nexus/content/groups/public/</url>
+        </repository>
     </repositories>
 
 </project>

+ 1 - 1
PAS/src/main/java/cn/cslg/pas/common/config/WebSocketConfig.java

@@ -6,7 +6,7 @@ import org.springframework.web.socket.server.standard.ServerEndpointExporter;
 
 @Configuration
 public class WebSocketConfig {
-    @Bean
+    //@Bean
     public ServerEndpointExporter serverEndpointExporter() {
         return new ServerEndpointExporter();
     }

+ 10 - 0
PAS/src/main/java/cn/cslg/pas/controller/PatentInstructionController.java

@@ -10,6 +10,7 @@ import cn.cslg.pas.common.utils.SecurityUtils.LoginUtils;
 import cn.cslg.pas.common.utils.auth.checkAuth;
 import cn.cslg.pas.domain.PatentInstruction;
 import cn.cslg.pas.service.PatentInstructionService;
+import cn.cslg.pas.service.patentPDF.PatentPDFService;
 import cn.hutool.core.io.FileUtil;
 import cn.hutool.core.io.IoUtil;
 import io.swagger.v3.oas.annotations.Operation;
@@ -25,6 +26,7 @@ import org.springframework.web.multipart.MultipartFile;
 import javax.servlet.ServletOutputStream;
 import javax.servlet.http.HttpServletResponse;
 import java.io.IOException;
+import java.util.List;
 
 /**
  * <p>
@@ -40,6 +42,7 @@ import java.io.IOException;
 @RequiredArgsConstructor(onConstructor_ = {@Lazy})
 public class PatentInstructionController {
     private final PatentInstructionService patentInstructionService;
+    private final PatentPDFService patentPDFService;
     private final FileUtils fileUtils;
     private final LoginUtils loginUtils;
 
@@ -95,5 +98,12 @@ public class PatentInstructionController {
             e.printStackTrace();
         }
     }
+
+    @PostMapping("/pdfFirstPage")
+    @Operation(summary = "获取说明书首页")
+    public void pdfFileFirstPage(List<String> patentNos) throws IOException {
+        patentPDFService.queryPatentPdfFirstPages(patentNos);
+    }
+
 }
 

+ 1 - 1
PAS/src/main/java/cn/cslg/pas/domain/PatentInstruction.java

@@ -53,7 +53,7 @@ public class PatentInstruction extends SuperEntity<PatentInstruction> {
 
 
     /**
-     * 文件流
+     * base64位文件流
      */
     @TableField(exist = false)
     private String pictureStringData;

+ 56 - 0
PAS/src/main/java/cn/cslg/pas/service/patentPDF/PatentPDFService.java

@@ -0,0 +1,56 @@
+package cn.cslg.pas.service.patentPDF;
+
+import cn.cslg.pas.common.utils.FileUtils;
+import cn.cslg.pas.domain.PatentInstruction;
+import cn.cslg.pas.mapper.PatentInstructionMapper;
+import cn.cslg.pas.service.PatentInstructionService;
+import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
+import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.pdfbox.multipdf.PDFMergerUtility;
+import org.apache.pdfbox.multipdf.Splitter;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.rendering.ImageType;
+import org.apache.pdfbox.rendering.PDFRenderer;
+import org.springframework.stereotype.Service;
+
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * 查询专利的说明书pdf首页
+ *
+ * @author chenyu
+ * @date 2023/9/6
+ */
+@RequiredArgsConstructor
+@Slf4j
+@Service
+public class PatentPDFService extends ServiceImpl<PatentInstructionMapper, PatentInstruction> {
+    private final PatentInstructionService patentInstructionService;
+    private final FileUtils fileUtils;
+
+    public void queryPatentPdfFirstPages(List<String> patentNos) throws IOException {
+        log.info("开始处理【导出多件专利pdf首页】的业务,业务参数为patentNos:{}", patentNos);
+
+        //根据专利号patentNos查询出所有pdf文档数据
+        List<PatentInstruction> patentInstructions = patentInstructionService.list(new LambdaQueryWrapper<PatentInstruction>().in(PatentInstruction::getPatentNo, patentNos));
+
+        for (PatentInstruction patentInstruction : patentInstructions) {
+            String filePath = fileUtils.getSystemPath() + patentInstruction.getUrl();
+            PDDocument doc = PDDocument.load(new File(filePath));
+            Splitter splitter = new Splitter();
+            splitter.setStartPage(1);
+            splitter.setEndPage(1);
+            PDDocument neededDoc = splitter.split(doc).get(0);
+            File pdfFirstPageFile = new File("合并多件专利的pdf首页.pdf");
+            neededDoc.save(pdfFirstPageFile);
+
+        }
+
+    }
+
+}

+ 24 - 0
PAS/src/test/java/cn/cslg/pas/service/PatentPDFServiceTests.java

@@ -0,0 +1,24 @@
+//package cn.cslg.pas.service;
+//
+//import cn.cslg.pas.service.patentPDF.PatentPDFService;
+//import org.junit.jupiter.api.Test;
+//import org.springframework.beans.factory.annotation.Autowired;
+//import org.springframework.boot.test.context.SpringBootTest;
+//
+//import java.io.IOException;
+//
+///**
+// * @author chenyu
+// * @date 2023/9/6
+// */
+//@SpringBootTest
+//public class PatentPDFServiceTests {
+//    @Autowired
+//    private PatentPDFService patentPDFService;
+//
+//    @Test
+//    void test() throws IOException {
+//        patentPDFService.queryPatentPdfFirstPages("CN201080016105.6");
+//    }
+//
+//}

+ 42 - 0
PAS/src/test/java/cn/cslg/pas/service/PdfBoxGetFirstPageTests.java

@@ -0,0 +1,42 @@
+package cn.cslg.pas.service;
+
+import org.apache.pdfbox.multipdf.Splitter;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.junit.jupiter.api.Test;
+import org.springframework.boot.test.context.SpringBootTest;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.ListIterator;
+
+/**
+ * 测试使用 Apache PDFBox框架解析获得pdf首页
+ *
+ * @author chenyu
+ * @date 2023/9/6
+ */
+@SpringBootTest
+public class PdfBoxGetFirstPageTests {
+
+    @Test
+    void main() throws IOException {
+        File file = new File("C:\\Users\\Administrator\\Desktop\\专利接口调用帮助文档_2000041.pdf");
+        PDDocument document = PDDocument.load(file);
+        Splitter splitter = new Splitter();
+        splitter.setStartPage(1);
+        splitter.setEndPage(1);
+        List<PDDocument> pages = splitter.split(document);
+        PDDocument document2 = pages.get(0);
+        File pdfFirstPageFile = new File("新pdf首页文件.pdf");
+        document2.save(pdfFirstPageFile);
+
+        document2.close();
+        document.close();
+
+        pdfFirstPageFile.delete();
+
+    }
+
+}

+ 62 - 0
PAS/src/test/java/cn/cslg/pas/service/PdfBoxGetImagesTests.java

@@ -0,0 +1,62 @@
+package cn.cslg.pas.service;
+
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDResources;
+import org.apache.pdfbox.pdmodel.graphics.PDXObject;
+import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+import org.junit.jupiter.api.Test;
+import org.springframework.boot.test.context.SpringBootTest;
+
+import javax.imageio.ImageIO;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * 测试使用 Apache PDFBox框架解析获得pdf中的图片
+ *
+ * @author chenyu
+ * @date 2023/9/5
+ */
+@SpringBootTest
+public class PdfBoxGetImagesTests {
+
+    @Test
+    void main() throws Exception {
+        File file = new File("C:\\Users\\Administrator\\Desktop\\专利接口调用帮助文档_2000041.pdf");
+        PDDocument document = PDDocument.load(file);
+
+        List<PDImageXObject> images = new ArrayList<>();
+        int numberOfPages = document.getNumberOfPages();
+        int numberOfPages2 = document.getPages().getCount();
+        System.out.println("pdf总页数为:" + numberOfPages + "或是:" + numberOfPages2);
+
+        for (PDPage page : document.getPages()) {
+            images.addAll(getImagesFromResources(page.getResources()));
+        }
+        for (PDImageXObject n : images) {
+            int i = 0;
+            File outputfile = new File("说明书中的附图" + (++i) + "." + n.getSuffix());
+            ImageIO.write(n.getImage(), n.getSuffix(), outputfile);
+        }
+    }
+
+    List<PDImageXObject> getImagesFromResources(PDResources resources) throws IOException {
+        List<PDImageXObject> images = new ArrayList<>();
+        for (COSName xObjectName : resources.getXObjectNames()) {
+            PDXObject xObject = resources.getXObject(xObjectName);
+            if (xObject instanceof PDFormXObject) {
+                System.out.println("是PDFormXObject类型");
+            } else if (xObject instanceof PDImageXObject) {
+                System.out.println("是PDImageXObject类型");
+                images.add((PDImageXObject) xObject);
+            }
+        }
+        return images;
+    }
+
+}

+ 0 - 36
PAS/src/test/java/cn/cslg/pas/service/test.java

@@ -1,36 +0,0 @@
-package cn.cslg.pas.service;
-
-import org.junit.jupiter.api.Test;
-import org.springframework.boot.test.context.SpringBootTest;
-
-/**
- * @author chenyu
- * @date 2023/9/1
- */
-@SpringBootTest
-public class test {
-
-    @Test
-    void test() {
-        String text = "【請求項1】\n" +
-                "複数の電源と;\n" +
-                "各々が前記電源の1つに関連し、前記関連する電源の性能データを収集する複数の監視モジュールと;\n" +
-                "各々が前記監視モジュールの1つに関連し、前記性能データを送信する複数の送信機と;\n" +
-                "各々が前記送信機の1つに関連し、タイマーの初期化からの経過時間に応じて送信イベントを制御する複数のコントローラと\n" +
-                "を具えていることを特徴とする分散型DC電力設備用監視システム。\n";
-        boolean result1 = text.contains("、");
-        boolean result2 = text.substring(0, text.indexOf("、")).contains("請求項");
-        boolean result3 = text.contains("、");
-        boolean result4 = text.contains("請求項");
-
-        if ((text.contains("、") && text.substring(0, text.indexOf("、")).contains("請求項")) || (!text.contains("、") && text.contains("請求項"))) {
-            System.out.println("通过");
-        }
-
-//        if ((strs[i].contains("、") && strs[i].substring(0, strs[i].indexOf("、")).contains("請求項")) || (!strs[i].contains("、") && strs[i].contains("請求項"))) {
-//
-//        }
-
-    }
-
-}