123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320 |
- package cn.cslg.pas.common.utils;
- import cn.cslg.pas.common.model.params.PatentRightParams;
- import cn.cslg.pas.domain.PatentRight;
- import org.springframework.stereotype.Component;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.List;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- /**
- * 拆分权要工具类
- *
- * @Author chenyu
- * @Date 2023/4/25
- */
- @Component
- public class PatentRightUtils {
- public static List<PatentRight> formatPatentRight(PatentRightParams params) {
- try {
- //国家
- String country = params.getPatentNo().substring(0, 2);
- //原文
- String content = params.getContent();
- //译文
- String contentOut = params.getContentOut();
- //创建一个权要集合
- ArrayList<PatentRight> patentRights = new ArrayList<>();
- //若原文为null或空串,则直接结束并返回空集合;若译文为null,则置为空串
- if (content == null || content.equals("")) {
- return patentRights;
- }
- if (contentOut == null) {
- contentOut = "";
- }
- //将原文和译文的所有换行符 "\r\n"或是"\n" 替换成 "@",这样首先可以使得全文连在一起,其次再根据特殊符号@拆分权要
- if (content.contains("\r\n")) {
- content = content.replaceAll("\r\n", "@");
- } else {
- content = content.replaceAll("\n", "@");
- }
- if (contentOut.contains("\r\n")) {
- contentOut = contentOut.replaceAll("\r\n", "@");
- } else {
- contentOut = contentOut.replaceAll("\n", "@");
- }
- //去掉译文的所有空格
- contentOut = contentOut.replaceAll(" +", "");
- //中日韩权要↓
- if (country.equals("CN") || country.equals("JP") || country.equals("KR") || content.contains("权利要求")) {
- String regex;
- if (content.contains("@2")) {
- regex = "@[0-9]+";
- } else if (content.contains("@[00")) {
- regex = "@\\[[0-9]+]";
- } else if (content.contains("@請求項")) {
- regex = "@【請求項[0-9]+】";
- } else if (content.contains("@청구항")) {
- regex = "@청구항 [0-9]+";
- } else {
- regex = "";
- }
- String[] strs; //原文数组
- String[] strsOut; //译文数组
- //若以上没有匹配到,则权要直接以句号 "。" 拆分
- if (regex.equals("")) {
- content = content.replaceAll("@", "");
- contentOut = contentOut.replaceAll("@", "");
- strs = content.split("。");
- strsOut = contentOut.split("。");
- //若匹配到,则根据不同情形拆分
- }
- else {
- Matcher matcher = Pattern.compile(regex).matcher(content);
- Matcher matcherOut = Pattern.compile(regex).matcher(contentOut);
- while (matcher.find()) {
- content = content.replaceFirst(matcher.group(), "\n" + matcher.group().substring(matcher.group().indexOf("@") + 1));
- }
- while (matcherOut.find()) {
- contentOut = contentOut.replaceFirst(matcherOut.group(), "\n" + matcherOut.group().substring(matcherOut.group().indexOf("@") + 1));
- }
- content = content.replaceAll("@", "");
- contentOut = contentOut.replaceAll("@", "");
- strs = content.split("\n");
- strsOut = contentOut.split("\n");
- }
- //定义父权要排序号
- String regex1 = "权利要求[0-9]+";
- String regex2 = "权利要求[0-9]+[至或~-]+[0-9]+";
- for (int i = 0; i < strs.length; i++) {
- //之前:if (strs[i].contains("权利要求")) {
- //现在:if(该权要有逗号,并且它第一句话中有"权利要求"4个字 || 该权要没有逗号,并且它有"权利要求"4个字) {
- if ((strs[i].contains(",") && strs[i].substring(0, strs[i].indexOf(",")).contains("权利要求")) || (!strs[i].contains(",") && strs[i].contains("权利要求"))) {
- Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]);
- Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]);
- if (matcher2.find()) {
- String parentNum = matcher2.group().substring(matcher2.group().indexOf("求") + 1);
- String[] parentNums;
- if (parentNum.contains("~")) {
- parentNums = parentNum.split("~");
- } else if (parentNum.contains("-")) {
- parentNums = parentNum.split("-");
- } else if (parentNum.contains("至")) {
- parentNums = parentNum.split("至");
- } else if (parentNum.contains("或")) {
- parentNums = parentNum.split("或");
- } else {
- parentNums = new String[0];
- }
- StringBuilder builder = new StringBuilder();
- for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) {
- if ((builder + "").equals("")) {
- builder.append(j);
- } else {
- builder.append(",").append(j);
- }
- }
- PatentRight patentRight = new PatentRight()
- .setPatentId(params.getPatentId())
- .setType(0)
- .setContent(strs[i])
- .setSort(i)
- .setParentSort(builder + "");
- if (strsOut.length - 1 >= i) {
- //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
- if(strsOut.length>strs.length&&strs.length==(i+1)){
- patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length)));
- }
- else {
- patentRight.setContentOut(strsOut[i]);
- }
- }
- patentRights.add(patentRight);
- } else if (matcher1.find()) {
- String parentNum = matcher1.group().substring(matcher1.group().indexOf("求") + 1);
- PatentRight patentRight = new PatentRight()
- .setPatentId(params.getPatentId())
- .setType(0)
- .setContent(strs[i])
- .setSort(i)
- .setParentSort((Integer.parseInt(parentNum) - 1) + "");
- if (strsOut.length - 1 >= i) {
- patentRight.setContentOut(strsOut[i]);
- }
- patentRights.add(patentRight);
- }
- } else {
- PatentRight patentRight = new PatentRight()
- .setPatentId(params.getPatentId())
- .setType(1)
- .setContent(strs[i])
- .setSort(i)
- .setParentSort("-1");
- if (strsOut.length - 1 >= i) {
- //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
- if(strsOut.length>strs.length&&strs.length==(i+1)){
- patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length)));
- }
- else {
- patentRight.setContentOut(strsOut[i]);
- }
- }
- patentRights.add(patentRight);
- }
- }
- //英文专利的权要拆分 ↓
- } else {
- if (!content.equals("") && content.contains("1")) {
- content = content.substring(content.indexOf("1"));
- }
- if (!contentOut.equals("") && contentOut.contains("1")) {
- contentOut = contentOut.substring(contentOut.indexOf("1"));
- }
- String regex;
- if (content.contains("@")) {
- regex = "@[0-9]+";
- } else if (content.contains("[001]")) {
- regex = "\\[[0-9]+]";
- } else {
- regex = "";
- }
- Matcher matcher = Pattern.compile(regex).matcher(content);
- Matcher matcherOut = Pattern.compile("@[0-9]+").matcher(contentOut);
- while (matcher.find()) {
- content = content.replaceFirst(matcher.group(), "\n" + matcher.group().substring(matcher.group().indexOf("@") + 1));
- }
- while (matcherOut.find()) {
- contentOut = contentOut.replaceFirst(matcherOut.group(), "\n" + matcherOut.group().substring(matcherOut.group().indexOf("@") + 1));
- }
- content = content.replaceAll("@", "");
- contentOut = contentOut.replaceAll("@", "");
- String[] strs = content.split("\n");
- String[] strsOut = contentOut.split("\n");
- //定义父权要排序号
- String regex1 = "claim [0-9]+";
- String regex2 = "claims [0-9]+ or [0-9]+";
- String regex3 = "any preceding claim";
- for (int i = 0; i < strs.length; i++) {
- if (strs[i].contains("claim")) {
- Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]);
- Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]);
- Matcher matcher3 = Pattern.compile(regex3).matcher(strs[i]);
- if (matcher2.find()) {
- String parentNum = matcher2.group().substring(matcher2.group().indexOf("s") + 2);
- String[] parentNums;
- if (parentNum.contains(" or ")) {
- parentNums = parentNum.split(" or ");
- } else if (parentNum.contains(" - ")) {
- parentNums = parentNum.split(" - ");
- } else if (parentNum.contains(" to ")) {
- parentNums = parentNum.split(" to ");
- } else if (parentNum.contains("或")) {
- parentNums = parentNum.split("或");
- } else {
- parentNums = new String[0];
- }
- StringBuilder builder = new StringBuilder();
- for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) {
- if ((builder + "").equals("")) {
- builder.append(j);
- } else {
- builder.append(",").append(j);
- }
- }
- PatentRight patentRight = new PatentRight()
- .setPatentId(params.getPatentId())
- .setType(0)
- .setContent(strs[i])
- .setSort(i)
- .setParentSort(builder + "");
- if (strsOut.length - 1 >= i) {
- //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
- if(strsOut.length>strs.length&&strs.length==(i+1)){
- patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length)));
- }
- else {
- patentRight.setContentOut(strsOut[i]);
- }
- }
- patentRights.add(patentRight);
- } else if (matcher1.find()) {
- String parentNum = matcher1.group().substring(matcher1.group().indexOf("m") + 2);
- PatentRight patentRight = new PatentRight()
- .setPatentId(params.getPatentId())
- .setType(0)
- .setContent(strs[i])
- .setSort(i)
- .setParentSort((Integer.parseInt(parentNum) - 1) + "");
- //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
- if(strsOut.length>strs.length&&strs.length==(i+1)){
- patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length)));
- }
- else {
- patentRight.setContentOut(strsOut[i]);
- }
- patentRights.add(patentRight);
- } else if (matcher3.find()) {
- StringBuilder builder = new StringBuilder();
- for (int sort = 0; sort < strs.length; sort++) {
- if (sort == i) {
- continue;
- }
- builder.append(i).append(",");
- }
- PatentRight patentRight = new PatentRight()
- .setPatentId(params.getPatentId())
- .setType(0)
- .setContent(strs[i])
- .setSort(i)
- .setParentSort(builder.substring(0, builder.lastIndexOf(",")));
- //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
- if(strsOut.length>strs.length&&strs.length==(i+1)){
- patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length)));
- }
- else {
- patentRight.setContentOut(strsOut[i]);
- }
- patentRights.add(patentRight);
- }
- } else {
- PatentRight patentRight = new PatentRight()
- .setPatentId(params.getPatentId())
- .setType(1)
- .setContent(strs[i])
- .setSort(i)
- .setParentSort("-1");
- //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
- if(strsOut.length>strs.length&&strs.length==(i+1)){
- patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length)));
- }
- else {
- patentRight.setContentOut(strsOut[i]);
- }
- patentRights.add(patentRight);
- }
- }
- }
- return patentRights;
- } catch (Exception e) {
- return new ArrayList<>();
- }
- }
- }
|