瀏覽代碼

拆分权要工厂类方法、对中国专利的拆分、理主附权要、理层级关系

chendayu 2 年之前
父節點
當前提交
5b0e3c5191

+ 7 - 1
PAS/src/main/java/cn/cslg/pas/service/patentRightSplit/PatentRightSplitCNService.java

@@ -26,6 +26,11 @@ public class PatentRightSplitCNService implements PatentRightSplitService {
             //原文
             String content = params.getContent();
 
+            //将原文从"1"截取到末尾(为了去除首行"声称的是:\n")
+            if (!content.equals("") && content.contains("声称的是") && content.contains("1")) {
+                content = content.substring(content.indexOf("1"));
+            }
+
             //第1步:拆分权要原文 ↓
             //将原文和译文的所有换行符 "\r\n"和"\n" 替换成特殊符号 "##@@&&",这样可以使得全文连在一起,其次再根据特殊符号##@@&&拆分权要
             content = content.replace("\r\n", "##@@&&").replace("\n", "##@@&&");
@@ -41,9 +46,10 @@ public class PatentRightSplitCNService implements PatentRightSplitService {
                     String macherGroup = matcher.group();
                     content = content.replaceFirst(macherGroup, "\n" + macherGroup.substring(macherGroup.indexOf("##@@&&") + 6));
                 }
+                //去除所有特殊符号"##@@&&"
                 content = content.replace("##@@&&", "");
             }
-            //最后按照换行符"\n"将权要拆成多个
+            //最后按照换行符"\n"将权要拆成多个
             String[] strs = content.split("\n");
 
 

+ 119 - 0
PAS/src/main/java/cn/cslg/pas/service/patentRightSplit/PatentRightSplitENService.java

@@ -0,0 +1,119 @@
+package cn.cslg.pas.service.patentRightSplit;
+
+import cn.cslg.pas.common.model.params.PatentRightParams;
+import cn.cslg.pas.domain.PatentRight;
+import org.springframework.stereotype.Service;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * 拆分权要类(英文专利EN)
+ *
+ * @author chenyu
+ * @date 2023/8/30
+ */
+@Service
+public class PatentRightSplitENService implements PatentRightSplitService {
+
+    @Override
+    public List<PatentRight> formatPatentRight(PatentRightParams params) {
+        try {
+            //创建一个权要集合,用于返回结果
+            ArrayList<PatentRight> patentRights = new ArrayList<>();
+            //取出原文
+            String content = params.getContent();
+
+            //将原文从"1"截取到末尾(为了去除首行"What is claimed is:\n")
+            if (!content.equals("") && content.contains("What is claimed is") && content.contains("1")) {
+                content = content.substring(content.indexOf("1"));
+            }
+
+            //第1步:拆分权要原文 ↓
+            //将原文和译文的所有换行符 "\r\n"和"\n" 替换成特殊符号 "##@@&&",这样可以使得全文连在一起,其次再根据特殊符号##@@&&拆分权要
+            content = content.replace("\r\n", "##@@&&").replace("\n", "##@@&&");
+
+            //定义正则表达式
+            String regex = "##@@&&[0-9]+";
+            Matcher matcher = Pattern.compile(regex).matcher(content);
+            while (matcher.find()) {
+                String macherGroup = matcher.group();
+                content = content.replaceFirst(macherGroup, "\n" + macherGroup.substring(macherGroup.indexOf("##@@&&") + 6));
+            }
+            //去除所有特殊符号"##@@&&"
+            content = content.replace("##@@&&", "");
+            //最后按照换行符"\n"将权要拆成多个
+            String[] strs = content.split("\n");
+
+
+            //第2步:理出每个权要的类型(type为 1主权要还是 0附属权要)、理出权要之间的层级关系(每个权要的父级权要序号parentSort) ↓
+            String regex1 = "claim [0-9]+";
+            String regex2 = "claims [0-9]+ or [0-9]+";
+            String regex3 = "any preceding claim";
+            for (int i = 0; i < strs.length; i++) {
+                //创建权要对象装载当前权要
+                PatentRight patentRight = new PatentRight()
+                        .setPatentId(params.getPatentId())
+                        .setContent(strs[i])
+                        .setSort(i);
+
+                //判断若该权要有逗号(即超过一句话)并且它第一句话中有"权利要求"4个字 或者 该权要没有逗号(即只有一句话),并且它有"权利要求"4个字,则该权要类型为附属权要
+                if ((strs[i].contains(",") && strs[i].substring(0, strs[i].indexOf(",")).contains("claim")) || (!strs[i].contains(",") && strs[i].contains("claim"))) {
+                    //则该权要类型为附属权要
+                    patentRight.setType(0);
+
+                    Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]);
+                    Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]);
+                    Matcher matcher3 = Pattern.compile(regex3).matcher(strs[i]);
+                    if (matcher2.find()) {  //"claims 2 or 3"
+                        String parentNum = matcher2.group().substring(matcher2.group().indexOf("claims") + 7);  //"2 or 3"
+                        String[] parentNumStrs = new String[0];
+                        if (parentNum.contains(" or ")) {
+                            parentNumStrs = parentNum.split(" or ");  //[2, 3]
+                        }
+
+                        int[] parentSorts = new int[parentNumStrs.length];  //[1, 2]
+                        for (int i1 = 0; i1 < parentSorts.length; i1++) {
+                            parentSorts[i1] = Integer.parseInt(parentNumStrs[i1]) - 1;
+                        }
+
+                        StringBuilder builder = new StringBuilder();
+                        for (int parentSort : parentSorts) {
+                            builder.append(parentSort).append(",");
+                        }
+                        patentRight.setParentSort(builder.substring(0, builder.lastIndexOf(",")));
+
+                    } else if (matcher1.find()) {
+                        String parentNum = matcher1.group().substring(matcher1.group().indexOf("claim") + 6);
+                        patentRight.setParentSort((Integer.parseInt(parentNum) - 1) + "");
+                    } else if (matcher3.find()) {
+                        StringBuilder builder = new StringBuilder();
+                        for (int sort = 0; sort < i; sort++) {
+                            builder.append(sort).append(",");
+                        }
+                        patentRight.setParentSort(builder.substring(0, builder.lastIndexOf(",")));
+                    }
+                } else {
+                    //否则该权要为主权要
+                    patentRight
+                            .setType(1)
+                            .setParentSort("-1");
+                }
+
+                patentRights.add(patentRight);
+            }
+
+            return patentRights;
+        } catch (Exception e) {
+            e.printStackTrace();
+            ArrayList<PatentRight> patentRights = new ArrayList<>();
+            patentRights.add(new PatentRight().setContent(params.getContent()));
+            return patentRights;
+        }
+
+    }
+
+}

文件差異過大導致無法顯示
+ 1 - 53
PAS/src/test/java/cn/cslg/pas/service/PatentRightSplitENServiceTests.java