123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436 |
- package cn.cslg.pas.common.utils.ClaimUtils;
- import cn.cslg.pas.common.utils.StringUtils;
- import cn.cslg.pas.common.vo.PatentRightParams;
- import cn.cslg.pas.common.vo.RePatentClaim;
- import org.springframework.stereotype.Component;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.List;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import java.util.stream.Collectors;
- /**
- * 拆分权要工具类
- *
- * @Author chenyu
- * @Date 2023/4/25
- */
- @Component
- public class ClaimSplitUtils {
- private static final List<String> CNRegix = Arrays.asList("[0-9]+\\.[^0-9]+?", "\\[[0-9]+]","[0-9]+\\、[^0-9]+?");
- private static final List<String> JPRegix = Arrays.asList("【請求項[0-9]+】");
- private static final List<String> KORRegix = Arrays.asList("청구항 [0-9]+", "\\[[0-9]+]");
- private static final List<String> ENReginx = Arrays.asList("[0-9]+\\.[^0-9]+?", "\\[[0-9]+]");
- public static List<RePatentClaim> formatPatentRight(PatentRightParams params) {
- try {
- //国家
- String country = params.getCountry();
- String patentNo = params.getPatentNo();
- if (country == null) {
- country = params.getPatentNo().substring(0, 2);
- }
- //原文
- String content = params.getContent();
- //创建一个权要集合
- ArrayList<RePatentClaim> patentRights = new ArrayList<>();
- //若原文为null或空串,则直接结束并返回空集合;若译文为null,则置为空串
- if (content == null || content.equals("")) {
- return patentRights;
- }
- if (content.contains("@##")) {
- patentRights = getSplitedRePatentClaim(content);
- } else {
- String regex = ClaimSplitUtils.getRegex(country, content);
- if (regex == null) {
- reDefaultPatentClaim(patentRights, content);
- return patentRights;
- }
- patentRights = splitPatentRight(regex, content);
- }
- switch (country) {
- case "CN":
- loadCNPatentClaim(patentNo, patentRights);
- break;
- case "FR":
- loadFRPatentClaim(patentNo, patentRights);
- break;
- default:
- loadENPatentClaim(patentNo, patentRights);
- break;
- }
- return patentRights;
- } catch (Exception e) {
- e.printStackTrace();
- ArrayList<RePatentClaim> patentRights = new ArrayList<>();
- patentRights.add(new RePatentClaim().setPatentNo(params.getPatentNo()).setContent(params.getContent()).setContentOut(params.getContentOut()).setType(1).setSort(0).setParentSort("-1"));
- return patentRights;
- }
- }
- private static final ArrayList<RePatentClaim> splitPatentRight(String regex, String content) {
- //获取拆分信息
- List<ClaimSplitVO> claimSplitVOS = ClaimSplitUtils.getClaimSpitVO(regex, content);
- String regexNum = "[1-9][0-9]*";
- Integer num = 1;
- ArrayList<RePatentClaim> patentRights = new ArrayList<>();
- Boolean flag = true;
- Integer start = 0;
- Integer end = 0;
- for (int i = 0; i < claimSplitVOS.size(); i++) {
- String cont = claimSplitVOS.get(i).getContent();
- Matcher matcherNum = Pattern.compile(regexNum).matcher(cont);
- //查询当前序号
- if (matcherNum.find()) {
- String a = matcherNum.group();
- Integer num2 = Integer.parseInt(a);
- if (num2 == num) {
- if (num == 1) {
- start = claimSplitVOS.get(i).getPosition();
- num++;
- } else {
- end = claimSplitVOS.get(i).getPosition();
- String str = content.substring(start, end);
- RePatentClaim rePatentClaim = new RePatentClaim();
- rePatentClaim.setSort(num - 2);
- rePatentClaim.setContent(str);
- patentRights.add(rePatentClaim);
- start = claimSplitVOS.get(i).getPosition();
- num++;
- //最后一个的处理逻辑
- if (i + 1 == claimSplitVOS.size()) {
- end = content.length();
- String str1 = content.substring(start, end);
- RePatentClaim rePatentClaim1 = new RePatentClaim();
- rePatentClaim1.setSort(num - 2);
- rePatentClaim1.setContent(str1);
- patentRights.add(rePatentClaim1);
- }
- }
- } else {
- if (i == 0 && num2.equals(2)) {
- end = claimSplitVOS.get(i).getPosition();
- String str = content.substring(0, end);
- RePatentClaim rePatentClaim = new RePatentClaim();
- rePatentClaim.setSort(num - 1);
- rePatentClaim.setContent(str);
- patentRights.add(rePatentClaim);
- start = claimSplitVOS.get(i).getPosition();
- num += 2;
- }
- continue;
- }
- } else {
- flag = false;
- break;
- }
- }
- if (flag.equals(false) || patentRights.size() == 1 || patentRights.size() == 0) {
- reDefaultPatentClaim(patentRights, content);
- }
- return patentRights;
- }
- /**
- * 根据权要文本拆分,获得拆分内容以及起始位置信息
- *
- * @param regex
- * @param content
- */
- public static List<ClaimSplitVO> getClaimSpitVO(String regex, String content) {
- List<ClaimSplitVO> claimSplitVOS = new ArrayList<>();
- Matcher matcher = Pattern.compile(regex).matcher(content);
- while (matcher.find()) {
- ClaimSplitVO claimSplitVO = new ClaimSplitVO();
- claimSplitVO.setContent(matcher.group());
- claimSplitVO.setPosition(matcher.start());
- claimSplitVOS.add(claimSplitVO);
- }
- return claimSplitVOS;
- }
- public static String getRegex(String country, String content) {
- String reginx = null;
- List<String> regixs = new ArrayList<>();
- switch (country) {
- case "CN":
- regixs = ClaimSplitUtils.CNRegix;
- break;
- case "JP":
- regixs = ClaimSplitUtils.JPRegix;
- break;
- case "KR":
- regixs = ClaimSplitUtils.KORRegix;
- break;
- default:
- regixs = ClaimSplitUtils.ENReginx;
- break;
- }
- for (String re : regixs) {
- Matcher matcher = Pattern.compile(re).matcher(content);
- if (matcher.find()) {
- reginx = re;
- break;
- }
- }
- return reginx;
- }
- private static void reDefaultPatentClaim(ArrayList<RePatentClaim> patentRights, String content) {
- patentRights.clear();
- RePatentClaim rePatentClaim = new RePatentClaim();
- rePatentClaim.setSort(0);
- rePatentClaim.setContent(content);
- rePatentClaim.setParentSort("-1");
- patentRights.add(rePatentClaim);
- }
- private static void loadCNPatentClaim(String patentNo, ArrayList<RePatentClaim> rePatentClaims) {
- String regex1 = "权利要求[0-9]+";
- String regex2 = "权利要求[0-9]+[至或~-]+[0-9]+";
- for (int i = 0; i < rePatentClaims.size(); i++) {
- RePatentClaim patentRight = rePatentClaims.get(i);
- String content = rePatentClaims.get(i).getContent();
- if ((content.contains(",") && content.substring(0, content.indexOf(",")).contains("权利要求")) || (!content.contains(",") && content.contains("权利要求"))) {
- Matcher matcher1 = Pattern.compile(regex1).matcher(content);
- Matcher matcher2 = Pattern.compile(regex2).matcher(content);
- if (matcher2.find()) {
- String parentNum = matcher2.group().substring(matcher2.group().indexOf("求") + 1);
- String[] parentNums;
- if (parentNum.contains("~")) {
- parentNums = parentNum.split("~");
- } else if (parentNum.contains("-")) {
- parentNums = parentNum.split("-");
- } else if (parentNum.contains("至")) {
- parentNums = parentNum.split("至");
- } else if (parentNum.contains("或")) {
- parentNums = parentNum.split("或");
- } else {
- parentNums = new String[0];
- }
- StringBuilder builder = new StringBuilder();
- for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) {
- if ((builder + "").equals("")) {
- builder.append(j);
- } else {
- builder.append(",").append(j);
- }
- }
- patentRight.setType(0)
- .setParentSort(builder + "");
- } else if (matcher1.find()) {
- String parentNum = matcher1.group().substring(matcher1.group().indexOf("求") + 1);
- patentRight.setType(0)
- .setParentSort((Integer.parseInt(parentNum) - 1) + "");
- } else {
- patentRight
- .setType(1)
- .setParentSort("-1");
- }
- } else {
- if (!StringUtils.isEmpty(content)) {
- patentRight
- .setType(1)
- .setParentSort("-1");
- }
- }
- }
- }
- private static void loadENPatentClaim(String patentNo, ArrayList<RePatentClaim> patentRights) {
- //定义父权要排序号
- String regex1 = "claim [0-9]+";
- String regex2 = "claims [0-9]+ or [0-9]+";
- String regex3 = "any preceding claim";
- String regex5 = "(Anspruch [0-9]+ oder [0-9]+)|(Ansprüche [0-9]+ bis [0-9]+)";
- String regex4 = "Anspruch [0-9]+";
- for (int i = 0; i < patentRights.size(); i++) {
- RePatentClaim patentRight = patentRights.get(i);
- String content = patentRight.getContent();
- Matcher matcher1 = Pattern.compile(regex1).matcher(content);
- Matcher matcher2 = Pattern.compile(regex2).matcher(content);
- Matcher matcher3 = Pattern.compile(regex3).matcher(content);
- Matcher matcher4 = Pattern.compile(regex4).matcher(content);
- Matcher matcher5 = Pattern.compile(regex5).matcher(content);
- Boolean flag1 = matcher1.find();
- Boolean flag2 = matcher2.find();
- Boolean flag3 = matcher3.find();
- Boolean flag4 = matcher4.find();
- Boolean flag5 = matcher5.find();
- if (flag2 || flag5) {
- List<String> parentNums = new ArrayList<>();
- if (flag2) {
- parentNums = getNums(matcher2.group());
- } else if (flag5) {
- parentNums = getNums(matcher5.group());
- }
- StringBuilder builder = new StringBuilder();
- for (int j = Integer.parseInt(parentNums.get(0)) - 1; j < Integer.parseInt(parentNums.get(parentNums.size() - 1)); j++) {
- if ((builder + "").equals("")) {
- builder.append(j);
- } else {
- builder.append(",").append(j);
- }
- }
- patentRight
- .setPatentNo(patentNo)
- .setType(0)
- .setParentSort(builder + "");
- } else if (flag1 || flag4) {
- String parentNum = null;
- if (flag1) {
- parentNum = getNums(matcher1.group()).get(0);
- } else if (flag4) {
- parentNum = getNums(matcher4.group()).get(0);
- }
- patentRight
- .setPatentNo(patentNo)
- .setType(0)
- .setParentSort((Integer.parseInt(parentNum) - 1) + "");
- } else if (flag3) {
- StringBuilder builder = new StringBuilder();
- for (int sort = 0; sort < patentRights.size(); sort++) {
- if (sort == i) {
- continue;
- }
- builder.append(i).append(",");
- }
- patentRight
- .setPatentNo(patentNo)
- .setType(0)
- .setParentSort(builder.substring(0, builder.lastIndexOf(",")));
- } else {
- patentRight
- .setPatentNo(patentNo)
- .setType(1)
- .setParentSort("-1");
- }
- }
- }
- private static void loadFRPatentClaim(String patentNo, ArrayList<RePatentClaim> patentRights) {
- //定义父权要排序号
- String regex1 = "revendications là [0-9]+";
- String regex2 = "(revendication [0-9]+ ou [0-9]+)|(revendication [0-9]+ et [0-9]+)";
- String regex3 = "des revendications précédentes";
- String regex4 = "revendication précédente";
- for (int i = 0; i < patentRights.size(); i++) {
- RePatentClaim patentRight = patentRights.get(i);
- String content = patentRight.getContent();
- Matcher matcher1 = Pattern.compile(regex1).matcher(content);
- Matcher matcher2 = Pattern.compile(regex2).matcher(content);
- Matcher matcher3 = Pattern.compile(regex3).matcher(content);
- Matcher matcher4 = Pattern.compile(regex4).matcher(content);
- Boolean flag1 = matcher1.find();
- Boolean flag2 = matcher2.find();
- Boolean flag3 = matcher3.find();
- Boolean flag4 = matcher4.find();
- if (flag2) {
- List<String> parentNums = new ArrayList<>();
- if (flag2) {
- parentNums = getNums(matcher2.group());
- }
- StringBuilder builder = new StringBuilder();
- for (int j = Integer.parseInt(parentNums.get(0)) - 1; j < Integer.parseInt(parentNums.get(parentNums.size() - 1)); j++) {
- if ((builder + "").equals("")) {
- builder.append(j);
- } else {
- builder.append(",").append(j);
- }
- }
- patentRight
- .setPatentNo(patentNo)
- .setType(0)
- .setParentSort(builder + "");
- } else if (flag1) {
- String parentNum = null;
- if (flag1) {
- parentNum = getNums(matcher1.group()).get(0);
- }
- patentRight
- .setPatentNo(patentNo)
- .setType(0)
- .setParentSort((Integer.parseInt(parentNum) - 1) + "");
- } else if (flag3) {
- StringBuilder builder = new StringBuilder();
- for (int sort = 0; sort < patentRights.size(); sort++) {
- if (sort == i) {
- continue;
- }
- builder.append(i).append(",");
- }
- patentRight
- .setPatentNo(patentNo)
- .setType(0)
- .setParentSort(builder.substring(0, builder.lastIndexOf(",")));
- } else if (flag4) {
- StringBuilder builder = new StringBuilder();
- builder.append(i - 1);
- patentRight
- .setPatentNo(patentNo)
- .setType(0)
- .setParentSort(builder.toString());
- } else {
- patentRight
- .setPatentNo(patentNo)
- .setType(1)
- .setParentSort("-1");
- }
- }
- }
- public static List<String> getNums(String test) {
- List<String> nums = new ArrayList<>();
- String regex1 = "[0-9]+";
- Matcher matcher1 = Pattern.compile(regex1).matcher(test);
- while (matcher1.find()) {
- nums.add(matcher1.group());
- }
- return nums;
- }
- public static final ArrayList<RePatentClaim> getSplitedRePatentClaim(String content) {
- ArrayList<RePatentClaim> arrayList = new ArrayList<>();
- String[] contents = content.split("@##");
- for (int i = 0; i < contents.length; i++) {
- RePatentClaim rePatentClaim = new RePatentClaim();
- rePatentClaim.setSort(i);
- rePatentClaim.setContent(contents[i]);
- arrayList.add(rePatentClaim);
- }
- return arrayList;
- }
- }
|