|
@@ -1,5 +1,6 @@
|
|
package cn.cslg.pas.service.upLoadPatent;
|
|
package cn.cslg.pas.service.upLoadPatent;
|
|
|
|
|
|
|
|
+import cn.cslg.pas.common.core.IgnoreDTDEntityResolver;
|
|
import cn.cslg.pas.common.model.dto.UploadFileDTO;
|
|
import cn.cslg.pas.common.model.dto.UploadFileDTO;
|
|
import cn.cslg.pas.common.model.outApi.PatentStarListDto;
|
|
import cn.cslg.pas.common.model.outApi.PatentStarListDto;
|
|
import cn.cslg.pas.common.model.vo.KeyValueVO;
|
|
import cn.cslg.pas.common.model.vo.KeyValueVO;
|
|
@@ -23,6 +24,10 @@ import lombok.extern.slf4j.Slf4j;
|
|
import org.apache.commons.fileupload.FileItem;
|
|
import org.apache.commons.fileupload.FileItem;
|
|
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
|
|
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
|
|
import org.apache.commons.lang3.StringUtils;
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
+import org.dom4j.Document;
|
|
|
|
+import org.dom4j.Element;
|
|
|
|
+import org.dom4j.XPath;
|
|
|
|
+import org.dom4j.io.SAXReader;
|
|
import org.springframework.stereotype.Service;
|
|
import org.springframework.stereotype.Service;
|
|
import org.springframework.web.multipart.MultipartFile;
|
|
import org.springframework.web.multipart.MultipartFile;
|
|
import org.springframework.web.multipart.commons.CommonsMultipartFile;
|
|
import org.springframework.web.multipart.commons.CommonsMultipartFile;
|
|
@@ -95,7 +100,6 @@ public class ExcutePatentDataStar implements IExcutePatentData {
|
|
continue;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
|
|
-
|
|
|
|
//至此,即找到了当前检索式即为要下载的专利开始位置所属的检索式,重新计算开始位置startNumber(即要下载的专利的开始位置,对应了当前检索式中该专利位置)
|
|
//至此,即找到了当前检索式即为要下载的专利开始位置所属的检索式,重新计算开始位置startNumber(即要下载的专利的开始位置,对应了当前检索式中该专利位置)
|
|
startNumber = startNumber - (countForStart - patentStarListDto.getTotal());
|
|
startNumber = startNumber - (countForStart - patentStarListDto.getTotal());
|
|
//第一个检索式下载完成后,从第二个检索式开始 startNumber都是从1开始下载
|
|
//第一个检索式下载完成后,从第二个检索式开始 startNumber都是从1开始下载
|
|
@@ -692,30 +696,51 @@ public class ExcutePatentDataStar implements IExcutePatentData {
|
|
return true;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
|
|
- //使用正则表达式拼接出权要原文
|
|
|
|
- String regex = "(?<=<claim id=.{1,110}>)[\\w\\W]+?(?=</claim>)";
|
|
|
|
- Pattern compile = Pattern.compile(regex);
|
|
|
|
- Matcher matcher = compile.matcher(cnFullXmlStr);
|
|
|
|
- StringBuilder builder = new StringBuilder();
|
|
|
|
- while (matcher.find()) {
|
|
|
|
- String macherGroup = matcher.group();
|
|
|
|
- String right = macherGroup.replaceAll("<!--.*?-->", "").trim();
|
|
|
|
- right = right.replaceAll("<.*?>", "").trim();
|
|
|
|
- right = right.replaceAll(" +", "");
|
|
|
|
- right = right.replaceAll("<br/>", "");
|
|
|
|
- right = right.replace("\r\n", "").replace("\n", "").replace("\t", "");
|
|
|
|
- builder.append(right).append("\r\n");
|
|
|
|
|
|
+// //使用正则表达式拼接出权要原文
|
|
|
|
+// String regex = "(?<=<claim id=.{1,110}>)[\\w\\W]+?(?=</claim>)";
|
|
|
|
+// Pattern compile = Pattern.compile(regex);
|
|
|
|
+// Matcher matcher = compile.matcher(cnFullXmlStr);
|
|
|
|
+// StringBuilder builder = new StringBuilder();
|
|
|
|
+// while (matcher.find()) {
|
|
|
|
+// String macherGroup = matcher.group();
|
|
|
|
+// String right = macherGroup.replaceAll("<!--.*?-->", "").trim();
|
|
|
|
+// right = right.replaceAll("<.*?>", "").trim();
|
|
|
|
+// right = right.replaceAll(" +", "");
|
|
|
|
+// right = right.replaceAll("<br/>", "");
|
|
|
|
+// right = right.replace("\r\n", "").replace("\n", "").replace("\t", "");
|
|
|
|
+// builder.append(right).append("\r\n");
|
|
|
|
+// }
|
|
|
|
+//
|
|
|
|
+// String patentRightText = builder + "";
|
|
|
|
+//
|
|
|
|
+// //使用工具类去除字符串文本中的所有HTML格式标签
|
|
|
|
+// patentRightText = RemoveHtmlTagsUtils.removeHtmlTags(patentRightText);
|
|
|
|
+// patentRightText = patentRightText.trim();
|
|
|
|
+ SAXReader reader = new SAXReader();
|
|
|
|
+ reader.setEntityResolver(new IgnoreDTDEntityResolver());
|
|
|
|
+ Reader stringReader = new StringReader(cnFullXmlStr);
|
|
|
|
+ Document document = reader.read(stringReader);
|
|
|
|
+ XPath xpath = document.createXPath("//claim-text");
|
|
|
|
+ List<Element> element = (List<Element>) xpath.selectNodes(document);
|
|
|
|
+ if (element.size() == 0) {
|
|
|
|
+ xpath = document.createXPath("//business:ClaimText");
|
|
|
|
+ element = (List<Element>) xpath.selectNodes(document);
|
|
|
|
+ }
|
|
|
|
+ List<String> reClaims = new ArrayList<>();
|
|
|
|
+ element.forEach(item -> {
|
|
|
|
+ String claim = item.getText();
|
|
|
|
+ claim = claim.replaceAll("\r\n|\r|\n| ", "");
|
|
|
|
+ reClaims.add(claim);
|
|
|
|
+
|
|
|
|
+ });
|
|
|
|
+ String reClaim = "";
|
|
|
|
+ if (reClaims.size() != 0) {
|
|
|
|
+ reClaim = cn.cslg.pas.common.utils.StringUtils.join(reClaims, "\r\n");
|
|
}
|
|
}
|
|
-
|
|
|
|
- String patentRightText = builder + "";
|
|
|
|
-
|
|
|
|
- //使用工具类去除字符串文本中的所有HTML格式标签
|
|
|
|
- patentRightText = RemoveHtmlTagsUtils.removeHtmlTags(patentRightText);
|
|
|
|
- patentRightText = patentRightText.trim();
|
|
|
|
|
|
|
|
//装载权利要求原文
|
|
//装载权利要求原文
|
|
PatentRight patentRight = new PatentRight();
|
|
PatentRight patentRight = new PatentRight();
|
|
- patentRight.setContent(patentRightText);
|
|
|
|
|
|
+ patentRight.setContent(reClaim);
|
|
uploadParamsVO.setPatentRight(patentRight);
|
|
uploadParamsVO.setPatentRight(patentRight);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -753,21 +778,41 @@ public class ExcutePatentDataStar implements IExcutePatentData {
|
|
return true;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
|
|
- //使用正则表达式拼接出说明书文本全文
|
|
|
|
- String regex = "(?<=<description>)[\\w\\W]+?(?=</description>)";
|
|
|
|
- Pattern compile = Pattern.compile(regex);
|
|
|
|
- Matcher matcher = compile.matcher(cnFullXmlStr);
|
|
|
|
- String text = "";
|
|
|
|
- while (matcher.find()) {
|
|
|
|
- text = matcher.group();
|
|
|
|
- text = text.replaceAll("<invention-title[^>]*>.*?(\\n{0,}|\\s{0,}?)</invention-title>", "");
|
|
|
|
- text = text.replaceAll("<br/>", "");
|
|
|
|
- text = text.replaceAll("\\s", "");
|
|
|
|
|
|
+ SAXReader reader = new SAXReader();
|
|
|
|
+ reader.setEntityResolver(new IgnoreDTDEntityResolver());
|
|
|
|
+ Reader stringReader = new StringReader(cnFullXmlStr);
|
|
|
|
+ Document document = reader.read(stringReader);
|
|
|
|
+ XPath xpath = document.createXPath("//description//p");
|
|
|
|
+ List<Element> elements = (List<Element>) xpath.selectNodes(document);
|
|
|
|
+ if (elements.size() == 0) {
|
|
|
|
+ xpath = document.createXPath("//business:Description//base:Paragraphs");
|
|
|
|
+ elements = (List<Element>) xpath.selectNodes(document);
|
|
|
|
+ }
|
|
|
|
+ List<String> fullTexts = new ArrayList<>();
|
|
|
|
+ Integer count=1;
|
|
|
|
+ for (Element item:elements){
|
|
|
|
+ String fullText = item.getText().replaceAll("<br/>", "");
|
|
|
|
+ fullText = fullText.replaceAll("\r\n|\r|\n| ", "");
|
|
|
|
+ if (!fullText.trim().equals("技术领域")
|
|
|
|
+ && !fullText.trim().equals("发明内容")
|
|
|
|
+ && !fullText.trim().equals("附图说明")
|
|
|
|
+ && !fullText.trim().equals("具体实施方式")
|
|
|
|
+ &&!fullText.trim().equals("背景技术")
|
|
|
|
+ &&!fullText.trim().equals("实用新型内容"))
|
|
|
|
+
|
|
|
|
+ {String formattedNum = String.format("%04d", count);
|
|
|
|
+ fullText ="["+formattedNum+"]"+fullText;
|
|
|
|
+ count++;
|
|
|
|
+ }
|
|
|
|
+ fullTexts.add(fullText);
|
|
|
|
+ }
|
|
|
|
+ String fullText = "";
|
|
|
|
+ if (fullTexts.size() != 0) {
|
|
|
|
+ fullText = cn.cslg.pas.common.utils.StringUtils.join(fullTexts, "\r\n");
|
|
}
|
|
}
|
|
-
|
|
|
|
//装载说明书文本全文
|
|
//装载说明书文本全文
|
|
PatentInstructionText patentInstructionText = new PatentInstructionText();
|
|
PatentInstructionText patentInstructionText = new PatentInstructionText();
|
|
- patentInstructionText.setManual(text.trim());
|
|
|
|
|
|
+ patentInstructionText.setManual(fullText);
|
|
uploadParamsVO.setPatentInstructionText(patentInstructionText);
|
|
uploadParamsVO.setPatentInstructionText(patentInstructionText);
|
|
}
|
|
}
|
|
|
|
|