lwhhszx 1 年間 前
コミット
95f049b894

+ 10 - 0
PAS/pom.xml

@@ -204,6 +204,16 @@
             <artifactId>pdfbox</artifactId>
             <version>2.0.16</version>
         </dependency>
+        <dependency>
+            <groupId>jaxen</groupId>
+            <artifactId>jaxen</artifactId>
+            <version>1.2.0</version> <!-- 使用你需要的版本 -->
+        </dependency>
+        <dependency>
+            <groupId>dom4j</groupId>
+            <artifactId>dom4j</artifactId>
+            <version>1.6.1</version>
+        </dependency>
     </dependencies>
 
 

+ 21 - 0
PAS/src/main/java/cn/cslg/pas/common/core/IgnoreDTDEntityResolver.java

@@ -0,0 +1,21 @@
+package cn.cslg.pas.common.core;
+
+
+
+
+import org.xml.sax.EntityResolver;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+
+public class IgnoreDTDEntityResolver implements EntityResolver {
+
+ @Override
+ public InputSource resolveEntity(String publicId, String systemId)
+   throws SAXException, IOException {
+        return new InputSource(new ByteArrayInputStream("<?xml version='1.0' encoding='UTF-8'?>".getBytes()));
+ }
+
+}

+ 78 - 33
PAS/src/main/java/cn/cslg/pas/service/upLoadPatent/ExcutePatentDataStar.java

@@ -1,5 +1,6 @@
 package cn.cslg.pas.service.upLoadPatent;
 
+import cn.cslg.pas.common.core.IgnoreDTDEntityResolver;
 import cn.cslg.pas.common.model.dto.UploadFileDTO;
 import cn.cslg.pas.common.model.outApi.PatentStarListDto;
 import cn.cslg.pas.common.model.vo.KeyValueVO;
@@ -23,6 +24,10 @@ import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.fileupload.FileItem;
 import org.apache.commons.fileupload.disk.DiskFileItemFactory;
 import org.apache.commons.lang3.StringUtils;
+import org.dom4j.Document;
+import org.dom4j.Element;
+import org.dom4j.XPath;
+import org.dom4j.io.SAXReader;
 import org.springframework.stereotype.Service;
 import org.springframework.web.multipart.MultipartFile;
 import org.springframework.web.multipart.commons.CommonsMultipartFile;
@@ -95,7 +100,6 @@ public class ExcutePatentDataStar implements IExcutePatentData {
                         continue;
                     }
 
-
                     //至此,即找到了当前检索式即为要下载的专利开始位置所属的检索式,重新计算开始位置startNumber(即要下载的专利的开始位置,对应了当前检索式中该专利位置)
                     startNumber = startNumber - (countForStart - patentStarListDto.getTotal());
                     //第一个检索式下载完成后,从第二个检索式开始 startNumber都是从1开始下载
@@ -692,30 +696,51 @@ public class ExcutePatentDataStar implements IExcutePatentData {
                     return true;
                 }
 
-                //使用正则表达式拼接出权要原文
-                String regex = "(?<=<claim id=.{1,110}>)[\\w\\W]+?(?=</claim>)";
-                Pattern compile = Pattern.compile(regex);
-                Matcher matcher = compile.matcher(cnFullXmlStr);
-                StringBuilder builder = new StringBuilder();
-                while (matcher.find()) {
-                    String macherGroup = matcher.group();
-                    String right = macherGroup.replaceAll("<!--.*?-->", "").trim();
-                    right = right.replaceAll("<.*?>", "").trim();
-                    right = right.replaceAll(" +", "");
-                    right = right.replaceAll("<br/>", "");
-                    right = right.replace("\r\n", "").replace("\n", "").replace("\t", "");
-                    builder.append(right).append("\r\n");
+//                //使用正则表达式拼接出权要原文
+//                String regex = "(?<=<claim id=.{1,110}>)[\\w\\W]+?(?=</claim>)";
+//                Pattern compile = Pattern.compile(regex);
+//                Matcher matcher = compile.matcher(cnFullXmlStr);
+//                StringBuilder builder = new StringBuilder();
+//                while (matcher.find()) {
+//                    String macherGroup = matcher.group();
+//                    String right = macherGroup.replaceAll("<!--.*?-->", "").trim();
+//                    right = right.replaceAll("<.*?>", "").trim();
+//                    right = right.replaceAll(" +", "");
+//                    right = right.replaceAll("<br/>", "");
+//                    right = right.replace("\r\n", "").replace("\n", "").replace("\t", "");
+//                    builder.append(right).append("\r\n");
+//                }
+//
+//                String patentRightText = builder + "";
+//
+//                //使用工具类去除字符串文本中的所有HTML格式标签
+//                patentRightText = RemoveHtmlTagsUtils.removeHtmlTags(patentRightText);
+//                patentRightText = patentRightText.trim();
+                SAXReader reader = new SAXReader();
+                reader.setEntityResolver(new IgnoreDTDEntityResolver());
+                Reader stringReader = new StringReader(cnFullXmlStr);
+                Document document = reader.read(stringReader);
+                XPath xpath = document.createXPath("//claim-text");
+                List<Element> element = (List<Element>) xpath.selectNodes(document);
+                if (element.size() == 0) {
+                    xpath = document.createXPath("//business:ClaimText");
+                    element = (List<Element>) xpath.selectNodes(document);
+                }
+                List<String> reClaims = new ArrayList<>();
+                element.forEach(item -> {
+                    String claim = item.getText();
+                    claim =   claim.replaceAll("\r\n|\r|\n| ", "");
+                    reClaims.add(claim);
+
+                });
+                String reClaim = "";
+                if (reClaims.size() != 0) {
+                    reClaim = cn.cslg.pas.common.utils.StringUtils.join(reClaims, "\r\n");
                 }
-
-                String patentRightText = builder + "";
-
-                //使用工具类去除字符串文本中的所有HTML格式标签
-                patentRightText = RemoveHtmlTagsUtils.removeHtmlTags(patentRightText);
-                patentRightText = patentRightText.trim();
 
                 //装载权利要求原文
                 PatentRight patentRight = new PatentRight();
-                patentRight.setContent(patentRightText);
+                patentRight.setContent(reClaim);
                 uploadParamsVO.setPatentRight(patentRight);
             }
 
@@ -753,21 +778,41 @@ public class ExcutePatentDataStar implements IExcutePatentData {
                     return true;
                 }
 
-                //使用正则表达式拼接出说明书文本全文
-                String regex = "(?<=<description>)[\\w\\W]+?(?=</description>)";
-                Pattern compile = Pattern.compile(regex);
-                Matcher matcher = compile.matcher(cnFullXmlStr);
-                String text = "";
-                while (matcher.find()) {
-                    text = matcher.group();
-                    text = text.replaceAll("<invention-title[^>]*>.*?(\\n{0,}|\\s{0,}?)</invention-title>", "");
-                    text = text.replaceAll("<br/>", "");
-                    text = text.replaceAll("\\s", "");
+                SAXReader reader = new SAXReader();
+                reader.setEntityResolver(new IgnoreDTDEntityResolver());
+                Reader stringReader = new StringReader(cnFullXmlStr);
+                Document document = reader.read(stringReader);
+                XPath xpath = document.createXPath("//description//p");
+                List<Element> elements = (List<Element>) xpath.selectNodes(document);
+                if (elements.size() == 0) {
+                    xpath = document.createXPath("//business:Description//base:Paragraphs");
+                    elements = (List<Element>) xpath.selectNodes(document);
+                }
+                List<String> fullTexts = new ArrayList<>();
+                Integer count=1;
+                for (Element item:elements){
+                    String fullText = item.getText().replaceAll("<br/>", "");
+                    fullText =  fullText.replaceAll("\r\n|\r|\n| ", "");
+                    if (!fullText.trim().equals("技术领域")
+                            && !fullText.trim().equals("发明内容")
+                            && !fullText.trim().equals("附图说明")
+                            && !fullText.trim().equals("具体实施方式")
+                            &&!fullText.trim().equals("背景技术")
+                            &&!fullText.trim().equals("实用新型内容"))
+
+                    {String formattedNum = String.format("%04d", count);
+                        fullText ="["+formattedNum+"]"+fullText;
+                        count++;
+                    }
+                    fullTexts.add(fullText);
+                }
+                String fullText = "";
+                if (fullTexts.size() != 0) {
+                    fullText = cn.cslg.pas.common.utils.StringUtils.join(fullTexts, "\r\n");
                 }
-
                 //装载说明书文本全文
                 PatentInstructionText patentInstructionText = new PatentInstructionText();
-                patentInstructionText.setManual(text.trim());
+                patentInstructionText.setManual(fullText);
                 uploadParamsVO.setPatentInstructionText(patentInstructionText);
             }
 

+ 1 - 1
PAS/src/main/resources/application.yml

@@ -20,7 +20,7 @@ spring:
       max-file-size: 1000MB
       max-request-size: 1000MB
   profiles:
-    active: dev
+    active: prodNetIn
   jackson:
     default-property-inclusion: non_null
     serialization: