package cn.cslg.pas.common.utils.ClaimUtils; import cn.cslg.pas.common.utils.StringUtils; import cn.cslg.pas.common.vo.PatentRightParams; import cn.cslg.pas.common.vo.RePatentClaim; import org.springframework.stereotype.Component; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; /** * 拆分权要工具类 * * @Author chenyu * @Date 2023/4/25 */ @Component public class ClaimSplitUtils { private static final List CNRegix = Arrays.asList("[0-9]+\\.[^0-9]+?", "\\[[0-9]+]","[0-9]+\\、[^0-9]+?"); private static final List JPRegix = Arrays.asList("【請求項[0-9]+】"); private static final List KORRegix = Arrays.asList("청구항 [0-9]+", "\\[[0-9]+]"); private static final List ENReginx = Arrays.asList("[0-9]+\\.[^0-9]+?", "\\[[0-9]+]"); public static List formatPatentRight(PatentRightParams params) { try { //国家 String country = params.getCountry(); String patentNo = params.getPatentNo(); if (country == null) { country = params.getPatentNo().substring(0, 2); } //原文 String content = params.getContent(); //创建一个权要集合 ArrayList patentRights = new ArrayList<>(); //若原文为null或空串,则直接结束并返回空集合;若译文为null,则置为空串 if (content == null || content.equals("")) { return patentRights; } if (content.contains("@##")) { patentRights = getSplitedRePatentClaim(content); } else { String regex = ClaimSplitUtils.getRegex(country, content); if (regex == null) { reDefaultPatentClaim(patentRights, content); return patentRights; } patentRights = splitPatentRight(regex, content); } switch (country) { case "CN": loadCNPatentClaim(patentNo, patentRights); break; case "FR": loadFRPatentClaim(patentNo, patentRights); break; default: loadENPatentClaim(patentNo, patentRights); break; } return patentRights; } catch (Exception e) { e.printStackTrace(); ArrayList patentRights = new ArrayList<>(); patentRights.add(new RePatentClaim().setPatentNo(params.getPatentNo()).setContent(params.getContent()).setContentOut(params.getContentOut()).setType(1).setSort(0).setParentSort("-1")); return patentRights; } } private static final ArrayList splitPatentRight(String regex, String content) { //获取拆分信息 List claimSplitVOS = ClaimSplitUtils.getClaimSpitVO(regex, content); String regexNum = "[1-9][0-9]*"; Integer num = 1; ArrayList patentRights = new ArrayList<>(); Boolean flag = true; Integer start = 0; Integer end = 0; for (int i = 0; i < claimSplitVOS.size(); i++) { String cont = claimSplitVOS.get(i).getContent(); Matcher matcherNum = Pattern.compile(regexNum).matcher(cont); //查询当前序号 if (matcherNum.find()) { String a = matcherNum.group(); Integer num2 = Integer.parseInt(a); if (num2 == num) { if (num == 1) { start = claimSplitVOS.get(i).getPosition(); num++; } else { end = claimSplitVOS.get(i).getPosition(); String str = content.substring(start, end); RePatentClaim rePatentClaim = new RePatentClaim(); rePatentClaim.setSort(num - 2); rePatentClaim.setContent(str); patentRights.add(rePatentClaim); start = claimSplitVOS.get(i).getPosition(); num++; //最后一个的处理逻辑 if (i + 1 == claimSplitVOS.size()) { end = content.length(); String str1 = content.substring(start, end); RePatentClaim rePatentClaim1 = new RePatentClaim(); rePatentClaim1.setSort(num - 2); rePatentClaim1.setContent(str1); patentRights.add(rePatentClaim1); } } } else { if (i == 0 && num2.equals(2)) { end = claimSplitVOS.get(i).getPosition(); String str = content.substring(0, end); RePatentClaim rePatentClaim = new RePatentClaim(); rePatentClaim.setSort(num - 1); rePatentClaim.setContent(str); patentRights.add(rePatentClaim); start = claimSplitVOS.get(i).getPosition(); num += 2; } continue; } } else { flag = false; break; } } if (flag.equals(false) || patentRights.size() == 1 || patentRights.size() == 0) { reDefaultPatentClaim(patentRights, content); } return patentRights; } /** * 根据权要文本拆分,获得拆分内容以及起始位置信息 * * @param regex * @param content */ public static List getClaimSpitVO(String regex, String content) { List claimSplitVOS = new ArrayList<>(); Matcher matcher = Pattern.compile(regex).matcher(content); while (matcher.find()) { ClaimSplitVO claimSplitVO = new ClaimSplitVO(); claimSplitVO.setContent(matcher.group()); claimSplitVO.setPosition(matcher.start()); claimSplitVOS.add(claimSplitVO); } return claimSplitVOS; } public static String getRegex(String country, String content) { String reginx = null; List regixs = new ArrayList<>(); switch (country) { case "CN": regixs = ClaimSplitUtils.CNRegix; break; case "JP": regixs = ClaimSplitUtils.JPRegix; break; case "KR": regixs = ClaimSplitUtils.KORRegix; break; default: regixs = ClaimSplitUtils.ENReginx; break; } for (String re : regixs) { Matcher matcher = Pattern.compile(re).matcher(content); if (matcher.find()) { reginx = re; break; } } return reginx; } private static void reDefaultPatentClaim(ArrayList patentRights, String content) { patentRights.clear(); RePatentClaim rePatentClaim = new RePatentClaim(); rePatentClaim.setSort(0); rePatentClaim.setContent(content); rePatentClaim.setParentSort("-1"); patentRights.add(rePatentClaim); } private static void loadCNPatentClaim(String patentNo, ArrayList rePatentClaims) { String regex1 = "权利要求[0-9]+"; String regex2 = "权利要求[0-9]+[至或~-]+[0-9]+"; for (int i = 0; i < rePatentClaims.size(); i++) { RePatentClaim patentRight = rePatentClaims.get(i); String content = rePatentClaims.get(i).getContent(); if ((content.contains(",") && content.substring(0, content.indexOf(",")).contains("权利要求")) || (!content.contains(",") && content.contains("权利要求"))) { Matcher matcher1 = Pattern.compile(regex1).matcher(content); Matcher matcher2 = Pattern.compile(regex2).matcher(content); if (matcher2.find()) { String parentNum = matcher2.group().substring(matcher2.group().indexOf("求") + 1); String[] parentNums; if (parentNum.contains("~")) { parentNums = parentNum.split("~"); } else if (parentNum.contains("-")) { parentNums = parentNum.split("-"); } else if (parentNum.contains("至")) { parentNums = parentNum.split("至"); } else if (parentNum.contains("或")) { parentNums = parentNum.split("或"); } else { parentNums = new String[0]; } StringBuilder builder = new StringBuilder(); for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) { if ((builder + "").equals("")) { builder.append(j); } else { builder.append(",").append(j); } } patentRight.setType(0) .setParentSort(builder + ""); } else if (matcher1.find()) { String parentNum = matcher1.group().substring(matcher1.group().indexOf("求") + 1); patentRight.setType(0) .setParentSort((Integer.parseInt(parentNum) - 1) + ""); } else { patentRight .setType(1) .setParentSort("-1"); } } else { if (!StringUtils.isEmpty(content)) { patentRight .setType(1) .setParentSort("-1"); } } } } private static void loadENPatentClaim(String patentNo, ArrayList patentRights) { //定义父权要排序号 String regex1 = "claim [0-9]+"; String regex2 = "claims [0-9]+ or [0-9]+"; String regex3 = "any preceding claim"; String regex5 = "(Anspruch [0-9]+ oder [0-9]+)|(Ansprüche [0-9]+ bis [0-9]+)"; String regex4 = "Anspruch [0-9]+"; for (int i = 0; i < patentRights.size(); i++) { RePatentClaim patentRight = patentRights.get(i); String content = patentRight.getContent(); Matcher matcher1 = Pattern.compile(regex1).matcher(content); Matcher matcher2 = Pattern.compile(regex2).matcher(content); Matcher matcher3 = Pattern.compile(regex3).matcher(content); Matcher matcher4 = Pattern.compile(regex4).matcher(content); Matcher matcher5 = Pattern.compile(regex5).matcher(content); Boolean flag1 = matcher1.find(); Boolean flag2 = matcher2.find(); Boolean flag3 = matcher3.find(); Boolean flag4 = matcher4.find(); Boolean flag5 = matcher5.find(); if (flag2 || flag5) { List parentNums = new ArrayList<>(); if (flag2) { parentNums = getNums(matcher2.group()); } else if (flag5) { parentNums = getNums(matcher5.group()); } StringBuilder builder = new StringBuilder(); for (int j = Integer.parseInt(parentNums.get(0)) - 1; j < Integer.parseInt(parentNums.get(parentNums.size() - 1)); j++) { if ((builder + "").equals("")) { builder.append(j); } else { builder.append(",").append(j); } } patentRight .setPatentNo(patentNo) .setType(0) .setParentSort(builder + ""); } else if (flag1 || flag4) { String parentNum = null; if (flag1) { parentNum = getNums(matcher1.group()).get(0); } else if (flag4) { parentNum = getNums(matcher4.group()).get(0); } patentRight .setPatentNo(patentNo) .setType(0) .setParentSort((Integer.parseInt(parentNum) - 1) + ""); } else if (flag3) { StringBuilder builder = new StringBuilder(); for (int sort = 0; sort < patentRights.size(); sort++) { if (sort == i) { continue; } builder.append(i).append(","); } patentRight .setPatentNo(patentNo) .setType(0) .setParentSort(builder.substring(0, builder.lastIndexOf(","))); } else { patentRight .setPatentNo(patentNo) .setType(1) .setParentSort("-1"); } } } private static void loadFRPatentClaim(String patentNo, ArrayList patentRights) { //定义父权要排序号 String regex1 = "revendications là [0-9]+"; String regex2 = "(revendication [0-9]+ ou [0-9]+)|(revendication [0-9]+ et [0-9]+)"; String regex3 = "des revendications précédentes"; String regex4 = "revendication précédente"; for (int i = 0; i < patentRights.size(); i++) { RePatentClaim patentRight = patentRights.get(i); String content = patentRight.getContent(); Matcher matcher1 = Pattern.compile(regex1).matcher(content); Matcher matcher2 = Pattern.compile(regex2).matcher(content); Matcher matcher3 = Pattern.compile(regex3).matcher(content); Matcher matcher4 = Pattern.compile(regex4).matcher(content); Boolean flag1 = matcher1.find(); Boolean flag2 = matcher2.find(); Boolean flag3 = matcher3.find(); Boolean flag4 = matcher4.find(); if (flag2) { List parentNums = new ArrayList<>(); if (flag2) { parentNums = getNums(matcher2.group()); } StringBuilder builder = new StringBuilder(); for (int j = Integer.parseInt(parentNums.get(0)) - 1; j < Integer.parseInt(parentNums.get(parentNums.size() - 1)); j++) { if ((builder + "").equals("")) { builder.append(j); } else { builder.append(",").append(j); } } patentRight .setPatentNo(patentNo) .setType(0) .setParentSort(builder + ""); } else if (flag1) { String parentNum = null; if (flag1) { parentNum = getNums(matcher1.group()).get(0); } patentRight .setPatentNo(patentNo) .setType(0) .setParentSort((Integer.parseInt(parentNum) - 1) + ""); } else if (flag3) { StringBuilder builder = new StringBuilder(); for (int sort = 0; sort < patentRights.size(); sort++) { if (sort == i) { continue; } builder.append(i).append(","); } patentRight .setPatentNo(patentNo) .setType(0) .setParentSort(builder.substring(0, builder.lastIndexOf(","))); } else if (flag4) { StringBuilder builder = new StringBuilder(); builder.append(i - 1); patentRight .setPatentNo(patentNo) .setType(0) .setParentSort(builder.toString()); } else { patentRight .setPatentNo(patentNo) .setType(1) .setParentSort("-1"); } } } public static List getNums(String test) { List nums = new ArrayList<>(); String regex1 = "[0-9]+"; Matcher matcher1 = Pattern.compile(regex1).matcher(test); while (matcher1.find()) { nums.add(matcher1.group()); } return nums; } public static final ArrayList getSplitedRePatentClaim(String content) { ArrayList arrayList = new ArrayList<>(); String[] contents = content.split("@##"); for (int i = 0; i < contents.length; i++) { RePatentClaim rePatentClaim = new RePatentClaim(); rePatentClaim.setSort(i); rePatentClaim.setContent(contents[i]); arrayList.add(rePatentClaim); } return arrayList; } }