package cn.cslg.pas.common.utils; import cn.cslg.pas.common.model.params.PatentRightParams; import cn.cslg.pas.domain.PatentRight; import org.springframework.stereotype.Component; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 拆分权要工具类 * * @Author chenyu * @Date 2023/4/25 */ @Component public class PatentRightUtils { public List formatPatentRight(PatentRightParams params) { //国家 String country = params.getPatentNo().substring(0, 2); //原文 String content = params.getContent(); //译文 String contentOut = params.getContentOut(); //创建一个权要集合 ArrayList patentRights = new ArrayList<>(); //若权要原文为空,则直接返回空集合 if (content == null || content.equals("")) { return patentRights; } //先去掉所有换行符,使原文连成一个长字符串 if (content.contains("\r\n")) { content = content.replaceAll("\r\n", ""); } else { content = content.replaceAll("\n", ""); } if (contentOut == null) { contentOut = ""; } if (contentOut.contains("\r\n")) { contentOut = contentOut.replaceAll("\r\n", ""); } else { contentOut = contentOut.replaceAll("\n", ""); } //去掉译文的所有空格 contentOut = contentOut.replaceAll(" +", ""); //中日韩专利↓ if (country.equals("CN") || country.equals("JP") || country.equals("KR")) { String regex; if (content.contains("。2.")) { regex = "。[0-9]+\\."; } else if (content.contains("。2、")) { regex = "。[0-9]+、"; } else if (content.contains("[001]")) { regex = "\\[[0-9]+]"; } else if (content.contains("請求項")) { regex = "。【請求項[0-9]+】"; } else if (content.contains(".청구항 2")) { regex = "\\.청구항 [0-9]+"; } else { regex = ""; } Matcher matcher = Pattern.compile(regex).matcher(content); Matcher matcherOut = Pattern.compile(regex).matcher(contentOut); while (matcher.find()) { if (matcher.group().contains(".청구항")) { //".청구항 2" -> "\\.청구항 2" String matcherGroup = matcher.group().replace(".", "\\."); //".청구항 2" -> ".\n청구항 2" content = content.replaceFirst(matcherGroup, ".\n" + matcherGroup.substring(matcherGroup.indexOf(".") + 1)); } else if (matcher.group().contains(".")) { //"。2." -> "。2\\." String matcherGroup = matcher.group().replace(".", "\\."); //"。2." -> "。\n2." content = content.replaceFirst(matcherGroup, "。\n" + matcherGroup.substring(matcherGroup.indexOf("。") + 1, matcherGroup.indexOf("\\")) + "."); } else if (matcher.group().contains("、") || matcher.group().contains("請求項")) { content = content.replaceFirst(matcher.group(), "。\n" + matcher.group().substring(matcher.group().indexOf("。") + 1)); } } String[] strs = content.split("\n"); while (matcherOut.find()) { if (matcherOut.group().contains(".")) { String matcherOutGroup = matcherOut.group().replace(".", "\\."); contentOut = contentOut.replaceFirst(matcherOutGroup, "。\n" + matcherOutGroup.substring(matcherOutGroup.indexOf("。") + 1, matcherOutGroup.indexOf("\\")) + "."); } else if (matcherOut.group().contains("、")) { contentOut = contentOut.replaceFirst(matcherOut.group(), "。\n" + matcherOut.group().substring(matcherOut.group().indexOf("。") + 1)); } } String[] strsOut = contentOut.split("\n"); //定义父权要排序号 String regex1 = "权利要求[0-9]+"; String regex2 = "权利要求[0-9]+[至或~-]+[0-9]+"; for (int i = 0; i < strs.length; i++) { //之前:if (strs[i].contains("权利要求")) { //现在:if(该权要有逗号,并且它第一句话中有"权利要求"4个字 || 该权要没有逗号,并且它有"权利要求"4个字) { if ((strs[i].contains(",") && strs[i].substring(0, strs[i].indexOf(",")).contains("权利要求")) || (!strs[i].contains(",") && strs[i].contains("权利要求"))) { Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]); Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]); if (matcher2.find()) { String parentNum = matcher2.group().substring(matcher2.group().indexOf("求") + 1); String[] parentNums; if (parentNum.contains("~")) { parentNums = parentNum.split("~"); } else if (parentNum.contains("-")) { parentNums = parentNum.split("-"); } else if (parentNum.contains("至")) { parentNums = parentNum.split("至"); } else if (parentNum.contains("或")) { parentNums = parentNum.split("或"); } else { parentNums = new String[0]; } StringBuilder builder = new StringBuilder(); for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) { if ((builder + "").equals("")) { builder.append(j); } else { builder.append(",").append(j); } } PatentRight patentRight = new PatentRight() .setPatentId(params.getPatentId()) .setType(0) .setContent(strs[i]) .setSort(i) .setParentSort(builder + ""); if (strsOut.length - 1 >= i) { patentRight.setContentOut(strsOut[i]); } patentRights.add(patentRight); } else if (matcher1.find()) { String parentNum = matcher1.group().substring(matcher1.group().indexOf("求") + 1); PatentRight patentRight = new PatentRight() .setPatentId(params.getPatentId()) .setType(0) .setContent(strs[i]) .setSort(i) .setParentSort((Integer.parseInt(parentNum) - 1) + ""); if (strsOut.length - 1 >= i) { patentRight.setContentOut(strsOut[i]); } patentRights.add(patentRight); } } else { PatentRight patentRight = new PatentRight() .setPatentId(params.getPatentId()) .setType(1) .setContent(strs[i]) .setSort(i) .setParentSort("-1"); if (strsOut.length - 1 >= i) { patentRight.setContentOut(strsOut[i]); } patentRights.add(patentRight); } } //英文专利的权要拆分 ↓ } else { content = content.substring(content.indexOf("1.")); if (contentOut.contains("1.")) { contentOut = contentOut.substring(contentOut.indexOf("1.")); } else { contentOut = contentOut.substring(contentOut.indexOf("1、")); } String regex; if (content.contains("1.")) { regex = "\\.[0-9]+\\. "; } else if (content.contains("1、")) { regex = "\\.[0-9]+\\. "; } else if (content.contains("[001]")) { regex = "\\[[0-9]+]"; } else { regex = ""; } Matcher matcher = Pattern.compile(regex).matcher(content); Matcher matcherOut = Pattern.compile("。[0-9]+\\.").matcher(contentOut); while (matcher.find()) { String matcherGroup = matcher.group().replace(".", "\\."); content = content.replaceFirst(matcherGroup, ".\n" + matcherGroup.substring(matcherGroup.indexOf(".") + 1, matcherGroup.lastIndexOf("\\")) + ". "); } String[] strs = content.split("\n"); while (matcherOut.find()) { String matcherOutGroup = matcherOut.group().replace(".", "\\."); contentOut = contentOut.replaceFirst(matcherOutGroup, "。\n" + matcherOutGroup.substring(matcherOutGroup.indexOf("。") + 1, matcherOutGroup.indexOf("\\")) + "."); } String[] strsOut = contentOut.split("\n"); //定义父权要排序号 String regex1 = "claim [0-9]+"; String regex2 = "claims [0-9]+ or [0-9]+"; for (int i = 0; i < strs.length; i++) { if (strs[i].contains("claim")) { Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]); Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]); if (matcher2.find()) { String parentNum = matcher2.group().substring(matcher2.group().indexOf("s") + 2); String[] parentNums; if (parentNum.contains(" or ")) { parentNums = parentNum.split(" or "); } else if (parentNum.contains(" - ")) { parentNums = parentNum.split(" - "); } else if (parentNum.contains(" to ")) { parentNums = parentNum.split(" to "); } else if (parentNum.contains("或")) { parentNums = parentNum.split("或"); } else { parentNums = new String[0]; } StringBuilder builder = new StringBuilder(); for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) { if ((builder + "").equals("")) { builder.append(j); } else { builder.append(",").append(j); } } PatentRight patentRight = new PatentRight() .setPatentId(params.getPatentId()) .setType(0) .setContent(strs[i]) .setSort(i) .setParentSort(builder + ""); if (strsOut.length - 1 >= i) { patentRight.setContentOut(strsOut[i]); } patentRights.add(patentRight); } else if (matcher1.find()) { String parentNum = matcher1.group().substring(matcher1.group().indexOf("m") + 2); PatentRight patentRight = new PatentRight() .setPatentId(params.getPatentId()) .setType(0) .setContent(strs[i]) .setSort(i) .setParentSort((Integer.parseInt(parentNum) - 1) + ""); if (strsOut.length - 1 >= i) { patentRight.setContentOut(strsOut[i]); } patentRights.add(patentRight); } } else { PatentRight patentRight = new PatentRight() .setPatentId(params.getPatentId()) .setType(1) .setContent(strs[i]) .setSort(i) .setParentSort("-1"); if (strsOut.length - 1 >= i) { patentRight.setContentOut(strsOut[i]); } patentRights.add(patentRight); } } } return patentRights; } }