1 year ago · 95f049b894
--- a/PAS/pom.xml
+++ b/PAS/pom.xml
@@ -204,6 +204,16 @@
 
																             <artifactId>pdfbox</artifactId>
															
 
																             <version>2.0.16</version>
															
 
																         </dependency>
															
 
																+        <dependency>
															
 
																+            <groupId>jaxen</groupId>
															
 
																+            <artifactId>jaxen</artifactId>
															
 
																+            <version>1.2.0</version> <!-- 使用你需要的版本 -->
															
 
																+        </dependency>
															
 
																+        <dependency>
															
 
																+            <groupId>dom4j</groupId>
															
 
																+            <artifactId>dom4j</artifactId>
															
 
																+            <version>1.6.1</version>
															
 
																+        </dependency>
															
 
																     </dependencies>
															
--- a/PAS/src/main/java/cn/cslg/pas/common/core/IgnoreDTDEntityResolver.java
+++ b/PAS/src/main/java/cn/cslg/pas/common/core/IgnoreDTDEntityResolver.java
@@ -0,0 +1,21 @@
 
																+package cn.cslg.pas.common.core;
															
 
																+
															
 
																+
															
 
																+
															
 
																+
															
 
																+import org.xml.sax.EntityResolver;
															
 
																+import org.xml.sax.InputSource;
															
 
																+import org.xml.sax.SAXException;
															
 
																+
															
 
																+import java.io.ByteArrayInputStream;
															
 
																+import java.io.IOException;
															
 
																+
															
 
																+public class IgnoreDTDEntityResolver implements EntityResolver {
															
 
																+
															
 
																+ @Override
															
 
																+ public InputSource resolveEntity(String publicId, String systemId)
															
 
																+   throws SAXException, IOException {
															
 
																+        return new InputSource(new ByteArrayInputStream("<?xml version='1.0' encoding='UTF-8'?>".getBytes()));
															
 
																+ }
															
 
																+
															
 
																+}
															
--- a/PAS/src/main/java/cn/cslg/pas/service/upLoadPatent/ExcutePatentDataStar.java
+++ b/PAS/src/main/java/cn/cslg/pas/service/upLoadPatent/ExcutePatentDataStar.java
@@ -1,5 +1,6 @@
 
																 package cn.cslg.pas.service.upLoadPatent;
															
 
																+import cn.cslg.pas.common.core.IgnoreDTDEntityResolver;
															
 
																 import cn.cslg.pas.common.model.dto.UploadFileDTO;
															
 
																 import cn.cslg.pas.common.model.outApi.PatentStarListDto;
															
 
																 import cn.cslg.pas.common.model.vo.KeyValueVO;
															
@@ -23,6 +24,10 @@ import lombok.extern.slf4j.Slf4j;
 
																 import org.apache.commons.fileupload.FileItem;
															
 
																 import org.apache.commons.fileupload.disk.DiskFileItemFactory;
															
 
																 import org.apache.commons.lang3.StringUtils;
															
 
																+import org.dom4j.Document;
															
 
																+import org.dom4j.Element;
															
 
																+import org.dom4j.XPath;
															
 
																+import org.dom4j.io.SAXReader;
															
 
																 import org.springframework.stereotype.Service;
															
 
																 import org.springframework.web.multipart.MultipartFile;
															
 
																 import org.springframework.web.multipart.commons.CommonsMultipartFile;
															
@@ -95,7 +100,6 @@ public class ExcutePatentDataStar implements IExcutePatentData {
 
																                         continue;
															
 
																                     }
															
 
																-
															
 
																                     //至此，即找到了当前检索式即为要下载的专利开始位置所属的检索式，重新计算开始位置startNumber（即要下载的专利的开始位置，对应了当前检索式中该专利位置）
															
 
																                     startNumber = startNumber - (countForStart - patentStarListDto.getTotal());
															
 
																                     //第一个检索式下载完成后，从第二个检索式开始 startNumber都是从1开始下载
															
@@ -692,30 +696,51 @@ public class ExcutePatentDataStar implements IExcutePatentData {
 
																                     return true;
															
 
																                 }
															
 
																-                //使用正则表达式拼接出权要原文
															
 
																-                String regex = "(?<=<claim id=.{1,110}>)[\\w\\W]+?(?=</claim>)";
															
 
																-                Pattern compile = Pattern.compile(regex);
															
 
																-                Matcher matcher = compile.matcher(cnFullXmlStr);
															
 
																-                StringBuilder builder = new StringBuilder();
															
 
																-                while (matcher.find()) {
															
 
																-                    String macherGroup = matcher.group();
															
 
																-                    String right = macherGroup.replaceAll("<!--.*?-->", "").trim();
															
 
																-                    right = right.replaceAll("<.*?>", "").trim();
															
 
																-                    right = right.replaceAll(" +", "");
															
 
																-                    right = right.replaceAll("<br/>", "");
															
 
																-                    right = right.replace("\r\n", "").replace("\n", "").replace("\t", "");
															
 
																-                    builder.append(right).append("\r\n");
															
 
																+//                //使用正则表达式拼接出权要原文
															
 
																+//                String regex = "(?<=<claim id=.{1,110}>)[\\w\\W]+?(?=</claim>)";
															
 
																+//                Pattern compile = Pattern.compile(regex);
															
 
																+//                Matcher matcher = compile.matcher(cnFullXmlStr);
															
 
																+//                StringBuilder builder = new StringBuilder();
															
 
																+//                while (matcher.find()) {
															
 
																+//                    String macherGroup = matcher.group();
															
 
																+//                    String right = macherGroup.replaceAll("<!--.*?-->", "").trim();
															
 
																+//                    right = right.replaceAll("<.*?>", "").trim();
															
 
																+//                    right = right.replaceAll(" +", "");
															
 
																+//                    right = right.replaceAll("<br/>", "");
															
 
																+//                    right = right.replace("\r\n", "").replace("\n", "").replace("\t", "");
															
 
																+//                    builder.append(right).append("\r\n");
															
 
																+//                }
															
 
																+//
															
 
																+//                String patentRightText = builder + "";
															
 
																+//
															
 
																+//                //使用工具类去除字符串文本中的所有HTML格式标签
															
 
																+//                patentRightText = RemoveHtmlTagsUtils.removeHtmlTags(patentRightText);
															
 
																+//                patentRightText = patentRightText.trim();
															
 
																+                SAXReader reader = new SAXReader();
															
 
																+                reader.setEntityResolver(new IgnoreDTDEntityResolver());
															
 
																+                Reader stringReader = new StringReader(cnFullXmlStr);
															
 
																+                Document document = reader.read(stringReader);
															
 
																+                XPath xpath = document.createXPath("//claim-text");
															
 
																+                List<Element> element = (List<Element>) xpath.selectNodes(document);
															
 
																+                if (element.size() == 0) {
															
 
																+                    xpath = document.createXPath("//business:ClaimText");
															
 
																+                    element = (List<Element>) xpath.selectNodes(document);
															
 
																+                }
															
 
																+                List<String> reClaims = new ArrayList<>();
															
 
																+                element.forEach(item -> {
															
 
																+                    String claim = item.getText();
															
 
																+                    claim =   claim.replaceAll("\r\n|\r|\n| ", "");
															
 
																+                    reClaims.add(claim);
															
 
																+
															
 
																+                });
															
 
																+                String reClaim = "";
															
 
																+                if (reClaims.size() != 0) {
															
 
																+                    reClaim = cn.cslg.pas.common.utils.StringUtils.join(reClaims, "\r\n");
															
 
																                 }
															
 
																-
															
 
																-                String patentRightText = builder + "";
															
 
																-
															
 
																-                //使用工具类去除字符串文本中的所有HTML格式标签
															
 
																-                patentRightText = RemoveHtmlTagsUtils.removeHtmlTags(patentRightText);
															
 
																-                patentRightText = patentRightText.trim();
															
 
																                 //装载权利要求原文
															
 
																                 PatentRight patentRight = new PatentRight();
															
 
																-                patentRight.setContent(patentRightText);
															
 
																+                patentRight.setContent(reClaim);
															
 
																                 uploadParamsVO.setPatentRight(patentRight);
															
 
																             }
															
@@ -753,21 +778,41 @@ public class ExcutePatentDataStar implements IExcutePatentData {
 
																                     return true;
															
 
																                 }
															
 
																-                //使用正则表达式拼接出说明书文本全文
															
 
																-                String regex = "(?<=<description>)[\\w\\W]+?(?=</description>)";
															
 
																-                Pattern compile = Pattern.compile(regex);
															
 
																-                Matcher matcher = compile.matcher(cnFullXmlStr);
															
 
																-                String text = "";
															
 
																-                while (matcher.find()) {
															
 
																-                    text = matcher.group();
															
 
																-                    text = text.replaceAll("<invention-title[^>]*>.*?(\\n{0,}|\\s{0,}?)</invention-title>", "");
															
 
																-                    text = text.replaceAll("<br/>", "");
															
 
																-                    text = text.replaceAll("\\s", "");
															
 
																+                SAXReader reader = new SAXReader();
															
 
																+                reader.setEntityResolver(new IgnoreDTDEntityResolver());
															
 
																+                Reader stringReader = new StringReader(cnFullXmlStr);
															
 
																+                Document document = reader.read(stringReader);
															
 
																+                XPath xpath = document.createXPath("//description//p");
															
 
																+                List<Element> elements = (List<Element>) xpath.selectNodes(document);
															
 
																+                if (elements.size() == 0) {
															
 
																+                    xpath = document.createXPath("//business:Description//base:Paragraphs");
															
 
																+                    elements = (List<Element>) xpath.selectNodes(document);
															
 
																+                }
															
 
																+                List<String> fullTexts = new ArrayList<>();
															
 
																+                Integer count=1;
															
 
																+                for (Element item:elements){
															
 
																+                    String fullText = item.getText().replaceAll("<br/>", "");
															
 
																+                    fullText =  fullText.replaceAll("\r\n|\r|\n| ", "");
															
 
																+                    if (!fullText.trim().equals("技术领域")
															
 
																+                            && !fullText.trim().equals("发明内容")
															
 
																+                            && !fullText.trim().equals("附图说明")
															
 
																+                            && !fullText.trim().equals("具体实施方式")
															
 
																+                            &&!fullText.trim().equals("背景技术")
															
 
																+                            &&!fullText.trim().equals("实用新型内容"))
															
 
																+
															
 
																+                    {String formattedNum = String.format("%04d", count);
															
 
																+                        fullText ="["+formattedNum+"]"+fullText;
															
 
																+                        count++;
															
 
																+                    }
															
 
																+                    fullTexts.add(fullText);
															
 
																+                }
															
 
																+                String fullText = "";
															
 
																+                if (fullTexts.size() != 0) {
															
 
																+                    fullText = cn.cslg.pas.common.utils.StringUtils.join(fullTexts, "\r\n");
															
 
																                 }
															
 
																-
															
 
																                 //装载说明书文本全文
															
 
																                 PatentInstructionText patentInstructionText = new PatentInstructionText();
															
 
																-                patentInstructionText.setManual(text.trim());
															
 
																+                patentInstructionText.setManual(fullText);
															
 
																                 uploadParamsVO.setPatentInstructionText(patentInstructionText);
															
 
																             }
															
--- a/PAS/src/main/resources/application.yml
+++ b/PAS/src/main/resources/application.yml
@@ -20,7 +20,7 @@ spring:
 
																       max-file-size: 1000MB
															
 
																       max-request-size: 1000MB
															
 
																   profiles:
															
 
																-    active: dev
															
 
																+    active: prodNetIn
															
 
																   jackson:
															
 
																     default-property-inclusion: non_null
															
 
																     serialization: