package cn.cslg.pas.common.utils; import cn.cslg.pas.common.model.params.PatentRightParams; import cn.cslg.pas.domain.PatentRight; import org.springframework.stereotype.Component; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 拆分权要工具类 * * @Author chenyu * @Date 2023/4/25 */ @Component public class PatentRightUtils { public static List formatPatentRight(PatentRightParams params) { try { //国家 String country = params.getPatentNo().substring(0, 2); //原文 String content = params.getContent(); //译文 String contentOut = params.getContentOut(); //创建一个权要集合 ArrayList patentRights = new ArrayList<>(); //若原文为null或空串,则直接结束并返回空集合;若译文为null,则置为空串 if (content == null || content.equals("")) { return patentRights; } if (contentOut == null) { contentOut = ""; } //将原文和译文的所有换行符 "\r\n"或是"\n" 替换成 "@",这样首先可以使得全文连在一起,其次再根据特殊符号@拆分权要 if (content.contains("\r\n")) { content = content.replaceAll("\r\n", "@"); } else { content = content.replaceAll("\n", "@"); } if (contentOut.contains("\r\n")) { contentOut = contentOut.replaceAll("\r\n", "@"); } else { contentOut = contentOut.replaceAll("\n", "@"); } //去掉译文的所有空格 contentOut = contentOut.replaceAll(" +", ""); //中日韩权要↓ if (country.equals("CN") || country.equals("JP") || country.equals("KR") || content.contains("权利要求")) { String regex; if (content.contains("@2")) { regex = "@[0-9]+"; } else if (content.contains("@[00")) { regex = "@\\[[0-9]+]"; } else if (content.contains("@請求項")) { regex = "@【請求項[0-9]+】"; } else if (content.contains("@청구항")) { regex = "@청구항 [0-9]+"; } else { regex = ""; } String[] strs; //原文数组 String[] strsOut; //译文数组 //若以上没有匹配到,则权要直接以句号 "。" 拆分 if (regex.equals("")) { content = content.replaceAll("@", ""); contentOut = contentOut.replaceAll("@", ""); strs = content.split("。"); strsOut = contentOut.split("。"); //若匹配到,则根据不同情形拆分 } else { Matcher matcher = Pattern.compile(regex).matcher(content); Matcher matcherOut = Pattern.compile(regex).matcher(contentOut); while (matcher.find()) { content = content.replaceFirst(matcher.group(), "\n" + matcher.group().substring(matcher.group().indexOf("@") + 1)); } while (matcherOut.find()) { contentOut = contentOut.replaceFirst(matcherOut.group(), "\n" + matcherOut.group().substring(matcherOut.group().indexOf("@") + 1)); } content = content.replaceAll("@", ""); contentOut = contentOut.replaceAll("@", ""); strs = content.split("\n"); strsOut = contentOut.split("\n"); } //定义父权要排序号 String regex1 = "权利要求[0-9]+"; String regex2 = "权利要求[0-9]+[至或~-]+[0-9]+"; for (int i = 0; i < strs.length; i++) { //之前:if (strs[i].contains("权利要求")) { //现在:if(该权要有逗号,并且它第一句话中有"权利要求"4个字 || 该权要没有逗号,并且它有"权利要求"4个字) { if ((strs[i].contains(",") && strs[i].substring(0, strs[i].indexOf(",")).contains("权利要求")) || (!strs[i].contains(",") && strs[i].contains("权利要求"))) { Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]); Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]); if (matcher2.find()) { String parentNum = matcher2.group().substring(matcher2.group().indexOf("求") + 1); String[] parentNums; if (parentNum.contains("~")) { parentNums = parentNum.split("~"); } else if (parentNum.contains("-")) { parentNums = parentNum.split("-"); } else if (parentNum.contains("至")) { parentNums = parentNum.split("至"); } else if (parentNum.contains("或")) { parentNums = parentNum.split("或"); } else { parentNums = new String[0]; } StringBuilder builder = new StringBuilder(); for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) { if ((builder + "").equals("")) { builder.append(j); } else { builder.append(",").append(j); } } PatentRight patentRight = new PatentRight() .setPatentId(params.getPatentId()) .setType(0) .setContent(strs[i]) .setSort(i) .setParentSort(builder + ""); if (strsOut.length - 1 >= i) { //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文 if(strsOut.length>strs.length&&strs.length==(i+1)){ patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length))); } else { patentRight.setContentOut(strsOut[i]); } } patentRights.add(patentRight); } else if (matcher1.find()) { String parentNum = matcher1.group().substring(matcher1.group().indexOf("求") + 1); PatentRight patentRight = new PatentRight() .setPatentId(params.getPatentId()) .setType(0) .setContent(strs[i]) .setSort(i) .setParentSort((Integer.parseInt(parentNum) - 1) + ""); if (strsOut.length - 1 >= i) { patentRight.setContentOut(strsOut[i]); } patentRights.add(patentRight); } } else { PatentRight patentRight = new PatentRight() .setPatentId(params.getPatentId()) .setType(1) .setContent(strs[i]) .setSort(i) .setParentSort("-1"); if (strsOut.length - 1 >= i) { //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文 if(strsOut.length>strs.length&&strs.length==(i+1)){ patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length))); } else { patentRight.setContentOut(strsOut[i]); } } patentRights.add(patentRight); } } //英文专利的权要拆分 ↓ } else { if (!content.equals("") && content.contains("1")) { content = content.substring(content.indexOf("1")); } if (!contentOut.equals("") && contentOut.contains("1")) { contentOut = contentOut.substring(contentOut.indexOf("1")); } String regex; if (content.contains("@")) { regex = "@[0-9]+"; } else if (content.contains("[001]")) { regex = "\\[[0-9]+]"; } else { regex = ""; } Matcher matcher = Pattern.compile(regex).matcher(content); Matcher matcherOut = Pattern.compile("@[0-9]+").matcher(contentOut); while (matcher.find()) { content = content.replaceFirst(matcher.group(), "\n" + matcher.group().substring(matcher.group().indexOf("@") + 1)); } while (matcherOut.find()) { contentOut = contentOut.replaceFirst(matcherOut.group(), "\n" + matcherOut.group().substring(matcherOut.group().indexOf("@") + 1)); } content = content.replaceAll("@", ""); contentOut = contentOut.replaceAll("@", ""); String[] strs = content.split("\n"); String[] strsOut = contentOut.split("\n"); //定义父权要排序号 String regex1 = "claim [0-9]+"; String regex2 = "claims [0-9]+ or [0-9]+"; String regex3 = "any preceding claim"; for (int i = 0; i < strs.length; i++) { if (strs[i].contains("claim")) { Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]); Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]); Matcher matcher3 = Pattern.compile(regex3).matcher(strs[i]); if (matcher2.find()) { String parentNum = matcher2.group().substring(matcher2.group().indexOf("s") + 2); String[] parentNums; if (parentNum.contains(" or ")) { parentNums = parentNum.split(" or "); } else if (parentNum.contains(" - ")) { parentNums = parentNum.split(" - "); } else if (parentNum.contains(" to ")) { parentNums = parentNum.split(" to "); } else if (parentNum.contains("或")) { parentNums = parentNum.split("或"); } else { parentNums = new String[0]; } StringBuilder builder = new StringBuilder(); for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) { if ((builder + "").equals("")) { builder.append(j); } else { builder.append(",").append(j); } } PatentRight patentRight = new PatentRight() .setPatentId(params.getPatentId()) .setType(0) .setContent(strs[i]) .setSort(i) .setParentSort(builder + ""); if (strsOut.length - 1 >= i) { //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文 if(strsOut.length>strs.length&&strs.length==(i+1)){ patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length))); } else { patentRight.setContentOut(strsOut[i]); } } patentRights.add(patentRight); } else if (matcher1.find()) { String parentNum = matcher1.group().substring(matcher1.group().indexOf("m") + 2); PatentRight patentRight = new PatentRight() .setPatentId(params.getPatentId()) .setType(0) .setContent(strs[i]) .setSort(i) .setParentSort((Integer.parseInt(parentNum) - 1) + ""); //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文 if(strsOut.length>strs.length&&strs.length==(i+1)){ patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length))); } else { patentRight.setContentOut(strsOut[i]); } patentRights.add(patentRight); } else if (matcher3.find()) { StringBuilder builder = new StringBuilder(); for (int sort = 0; sort < strs.length; sort++) { if (sort == i) { continue; } builder.append(i).append(","); } PatentRight patentRight = new PatentRight() .setPatentId(params.getPatentId()) .setType(0) .setContent(strs[i]) .setSort(i) .setParentSort(builder.substring(0, builder.lastIndexOf(","))); //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文 if(strsOut.length>strs.length&&strs.length==(i+1)){ patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length))); } else { patentRight.setContentOut(strsOut[i]); } patentRights.add(patentRight); } } else { PatentRight patentRight = new PatentRight() .setPatentId(params.getPatentId()) .setType(1) .setContent(strs[i]) .setSort(i) .setParentSort("-1"); //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文 if(strsOut.length>strs.length&&strs.length==(i+1)){ patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length))); } else { patentRight.setContentOut(strsOut[i]); } patentRights.add(patentRight); } } } return patentRights; } catch (Exception e) { return new ArrayList<>(); } } }