123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269 |
- package cn.cslg.pas.common.utils;
- import cn.cslg.pas.common.model.params.PatentRightParams;
- import cn.cslg.pas.domain.PatentRight;
- import org.springframework.stereotype.Component;
- import java.util.ArrayList;
- import java.util.List;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- /**
- * 拆分权要工具类
- *
- * @Author chenyu
- * @Date 2023/4/25
- */
- @Component
- public class PatentRightUtils {
- public List<PatentRight> formatPatentRight(PatentRightParams params) {
- //国家
- String country = params.getPatentNo().substring(0, 2);
- //原文
- String content = params.getContent();
- //译文
- String contentOut = params.getContentOut();
- //创建一个权要集合
- ArrayList<PatentRight> patentRights = new ArrayList<>();
- //若权要原文为空,则直接返回空集合
- if (content == null || content.equals("")) {
- return patentRights;
- }
- //先去掉所有换行符,使原文连成一个长字符串
- if (content.contains("\r\n")) {
- content = content.replaceAll("\r\n", "");
- } else {
- content = content.replaceAll("\n", "");
- }
- if (contentOut == null) {
- contentOut = "";
- }
- if (contentOut.contains("\r\n")) {
- contentOut = contentOut.replaceAll("\r\n", "");
- } else {
- contentOut = contentOut.replaceAll("\n", "");
- }
- //去掉译文的所有空格
- contentOut = contentOut.replaceAll(" +", "");
- //中日韩专利↓
- if (country.equals("CN") || country.equals("JP") || country.equals("KR")) {
- String regex;
- if (content.contains("。2.")) {
- regex = "。[0-9]+\\.";
- } else if (content.contains("。2、")) {
- regex = "。[0-9]+、";
- } else if (content.contains("[001]")) {
- regex = "\\[[0-9]+]";
- } else if (content.contains("請求項")) {
- regex = "。【請求項[0-9]+】";
- } else if (content.contains(".청구항 2")) {
- regex = "\\.청구항 [0-9]+";
- } else {
- regex = "";
- }
- Matcher matcher = Pattern.compile(regex).matcher(content);
- Matcher matcherOut = Pattern.compile(regex).matcher(contentOut);
- while (matcher.find()) {
- if (matcher.group().contains(".청구항")) {
- //".청구항 2" -> "\\.청구항 2"
- String matcherGroup = matcher.group().replace(".", "\\.");
- //".청구항 2" -> ".\n청구항 2"
- content = content.replaceFirst(matcherGroup, ".\n" + matcherGroup.substring(matcherGroup.indexOf(".") + 1));
- } else if (matcher.group().contains(".")) {
- //"。2." -> "。2\\."
- String matcherGroup = matcher.group().replace(".", "\\.");
- //"。2." -> "。\n2."
- content = content.replaceFirst(matcherGroup, "。\n" + matcherGroup.substring(matcherGroup.indexOf("。") + 1, matcherGroup.indexOf("\\")) + ".");
- } else if (matcher.group().contains("、") || matcher.group().contains("請求項")) {
- content = content.replaceFirst(matcher.group(), "。\n" + matcher.group().substring(matcher.group().indexOf("。") + 1));
- }
- }
- String[] strs = content.split("\n");
- while (matcherOut.find()) {
- if (matcherOut.group().contains(".")) {
- String matcherOutGroup = matcherOut.group().replace(".", "\\.");
- contentOut = contentOut.replaceFirst(matcherOutGroup, "。\n" + matcherOutGroup.substring(matcherOutGroup.indexOf("。") + 1, matcherOutGroup.indexOf("\\")) + ".");
- } else if (matcherOut.group().contains("、")) {
- contentOut = contentOut.replaceFirst(matcherOut.group(), "。\n" + matcherOut.group().substring(matcherOut.group().indexOf("。") + 1));
- }
- }
- String[] strsOut = contentOut.split("\n");
- //定义父权要排序号
- String regex1 = "权利要求[0-9]+";
- String regex2 = "权利要求[0-9]+[至或~-]+[0-9]+";
- for (int i = 0; i < strs.length; i++) {
- //之前:if (strs[i].contains("权利要求")) {
- //现在:if(该权要有逗号,并且它第一句话中有"权利要求"4个字 || 该权要没有逗号,并且它有"权利要求"4个字) {
- if ((strs[i].contains(",") && strs[i].substring(0, strs[i].indexOf(",")).contains("权利要求")) || (!strs[i].contains(",") && strs[i].contains("权利要求"))) {
- Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]);
- Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]);
- if (matcher2.find()) {
- String parentNum = matcher2.group().substring(matcher2.group().indexOf("求") + 1);
- String[] parentNums;
- if (parentNum.contains("~")) {
- parentNums = parentNum.split("~");
- } else if (parentNum.contains("-")) {
- parentNums = parentNum.split("-");
- } else if (parentNum.contains("至")) {
- parentNums = parentNum.split("至");
- } else if (parentNum.contains("或")) {
- parentNums = parentNum.split("或");
- } else {
- parentNums = new String[0];
- }
- StringBuilder builder = new StringBuilder();
- for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) {
- if ((builder + "").equals("")) {
- builder.append(j);
- } else {
- builder.append(",").append(j);
- }
- }
- PatentRight patentRight = new PatentRight()
- .setPatentId(params.getPatentId())
- .setType(0)
- .setContent(strs[i])
- .setSort(i)
- .setParentSort(builder + "");
- if (strsOut.length - 1 >= i) {
- patentRight.setContentOut(strsOut[i]);
- }
- patentRights.add(patentRight);
- } else if (matcher1.find()) {
- String parentNum = matcher1.group().substring(matcher1.group().indexOf("求") + 1);
- PatentRight patentRight = new PatentRight()
- .setPatentId(params.getPatentId())
- .setType(0)
- .setContent(strs[i])
- .setSort(i)
- .setParentSort((Integer.parseInt(parentNum) - 1) + "");
- if (strsOut.length - 1 >= i) {
- patentRight.setContentOut(strsOut[i]);
- }
- patentRights.add(patentRight);
- }
- } else {
- PatentRight patentRight = new PatentRight()
- .setPatentId(params.getPatentId())
- .setType(1)
- .setContent(strs[i])
- .setSort(i)
- .setParentSort("-1");
- if (strsOut.length - 1 >= i) {
- patentRight.setContentOut(strsOut[i]);
- }
- patentRights.add(patentRight);
- }
- }
- //英文专利的权要拆分 ↓
- } else {
- content = content.substring(content.indexOf("1."));
- if (contentOut.contains("1.")) {
- contentOut = contentOut.substring(contentOut.indexOf("1."));
- } else {
- contentOut = contentOut.substring(contentOut.indexOf("1、"));
- }
- String regex;
- if (content.contains("1.")) {
- regex = "\\.[0-9]+\\. ";
- } else if (content.contains("1、")) {
- regex = "\\.[0-9]+\\. ";
- } else if (content.contains("[001]")) {
- regex = "\\[[0-9]+]";
- } else {
- regex = "";
- }
- Matcher matcher = Pattern.compile(regex).matcher(content);
- Matcher matcherOut = Pattern.compile("。[0-9]+\\.").matcher(contentOut);
- while (matcher.find()) {
- String matcherGroup = matcher.group().replace(".", "\\.");
- content = content.replaceFirst(matcherGroup, ".\n" + matcherGroup.substring(matcherGroup.indexOf(".") + 1, matcherGroup.lastIndexOf("\\")) + ". ");
- }
- String[] strs = content.split("\n");
- while (matcherOut.find()) {
- String matcherOutGroup = matcherOut.group().replace(".", "\\.");
- contentOut = contentOut.replaceFirst(matcherOutGroup, "。\n" + matcherOutGroup.substring(matcherOutGroup.indexOf("。") + 1, matcherOutGroup.indexOf("\\")) + ".");
- }
- String[] strsOut = contentOut.split("\n");
- //定义父权要排序号
- String regex1 = "claim [0-9]+";
- String regex2 = "claims [0-9]+ or [0-9]+";
- for (int i = 0; i < strs.length; i++) {
- if (strs[i].contains("claim")) {
- Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]);
- Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]);
- if (matcher2.find()) {
- String parentNum = matcher2.group().substring(matcher2.group().indexOf("s") + 2);
- String[] parentNums;
- if (parentNum.contains(" or ")) {
- parentNums = parentNum.split(" or ");
- } else if (parentNum.contains(" - ")) {
- parentNums = parentNum.split(" - ");
- } else if (parentNum.contains(" to ")) {
- parentNums = parentNum.split(" to ");
- } else if (parentNum.contains("或")) {
- parentNums = parentNum.split("或");
- } else {
- parentNums = new String[0];
- }
- StringBuilder builder = new StringBuilder();
- for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) {
- if ((builder + "").equals("")) {
- builder.append(j);
- } else {
- builder.append(",").append(j);
- }
- }
- PatentRight patentRight = new PatentRight()
- .setPatentId(params.getPatentId())
- .setType(0)
- .setContent(strs[i])
- .setSort(i)
- .setParentSort(builder + "");
- if (strsOut.length - 1 >= i) {
- patentRight.setContentOut(strsOut[i]);
- }
- patentRights.add(patentRight);
- } else if (matcher1.find()) {
- String parentNum = matcher1.group().substring(matcher1.group().indexOf("m") + 2);
- PatentRight patentRight = new PatentRight()
- .setPatentId(params.getPatentId())
- .setType(0)
- .setContent(strs[i])
- .setSort(i)
- .setParentSort((Integer.parseInt(parentNum) - 1) + "");
- if (strsOut.length - 1 >= i) {
- patentRight.setContentOut(strsOut[i]);
- }
- patentRights.add(patentRight);
- }
- } else {
- PatentRight patentRight = new PatentRight()
- .setPatentId(params.getPatentId())
- .setType(1)
- .setContent(strs[i])
- .setSort(i)
- .setParentSort("-1");
- if (strsOut.length - 1 >= i) {
- patentRight.setContentOut(strsOut[i]);
- }
- patentRights.add(patentRight);
- }
- }
- }
- return patentRights;
- }
- }
|