Parcourir la source

拆分权要工厂类方法、对中国专利的拆分、理主附权要、理层级关系

chendayu il y a 2 ans
Parent
commit
9457dbadc8

+ 0 - 2
PAS/src/main/java/cn/cslg/pas/common/utils/PatentRightUtils.java

@@ -221,8 +221,6 @@ public class PatentRightUtils {
                                 parentNums = parentNum.split(" - ");
                             } else if (parentNum.contains(" to ")) {
                                 parentNums = parentNum.split(" to ");
-                            } else if (parentNum.contains("或")) {
-                                parentNums = parentNum.split("或");
                             } else {
                                 parentNums = new String[0];
                             }

+ 127 - 0
PAS/src/main/java/cn/cslg/pas/service/patentRightSplit/PatentRightSplitCNService.java

@@ -0,0 +1,127 @@
+package cn.cslg.pas.service.patentRightSplit;
+
+import cn.cslg.pas.common.model.params.PatentRightParams;
+import cn.cslg.pas.domain.PatentRight;
+import org.springframework.stereotype.Service;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * 拆分权要类(中国专利CN)
+ *
+ * @author chenyu
+ * @date 2023/8/30
+ */
+@Service
+public class PatentRightSplitCNService implements PatentRightSplitService {
+
+    @Override
+    public List<PatentRight> formatPatentRight(PatentRightParams params) {
+        try {
+            //创建一个权要集合,用于返回结果
+            ArrayList<PatentRight> patentRights = new ArrayList<>();
+            //原文
+            String content = params.getContent();
+
+            //第1步:拆分权要原文 ↓
+            //将原文和译文的所有换行符 "\r\n"和"\n" 替换成特殊符号 "##@@&&",这样可以使得全文连在一起,其次再根据特殊符号##@@&&拆分权要
+            content = content.replace("\r\n", "##@@&&").replace("\n", "##@@&&");
+
+            //若原文没有换行符,则全文按照句号"。"拆分
+            if (!content.contains("##@@&&")) {
+                content = content.replace("。", "。\n");
+            } else {
+                //定义正则表达式
+                String regex = "##@@&&[0-9]+";
+                Matcher matcher = Pattern.compile(regex).matcher(content);
+                while (matcher.find()) {
+                    String macherGroup = matcher.group();
+                    content = content.replaceFirst(macherGroup, "\n" + macherGroup.substring(macherGroup.indexOf("##@@&&") + 6));
+                }
+                content = content.replace("##@@&&", "");
+            }
+            //最后按照换行符"\n"将权要拆分成多个
+            String[] strs = content.split("\n");
+
+
+            //第2步:理出每个权要的类型(type为 1主权要还是 0附属权要)、理出权要之间的层级关系(每个权要的父级权要序号parentSort) ↓
+            String regex1 = "权利要求[0-9]+";
+            String regex2 = "权利要求[0-9]+[至或~-]+[0-9]+";
+            for (int i = 0; i < strs.length; i++) {
+                //创建权要对象装载当前权要
+                PatentRight patentRight = new PatentRight()
+                        .setPatentId(params.getPatentId())
+                        .setContent(strs[i])
+                        .setSort(i);
+
+                //判断若该权要有逗号(即超过一句话)并且它第一句话中有"权利要求"4个字 或者 该权要没有逗号(即只有一句话),并且它有"权利要求"4个字,则该权要类型为附属权要
+                if ((strs[i].contains(",") && strs[i].substring(0, strs[i].indexOf(",")).contains("权利要求")) || (!strs[i].contains(",") && strs[i].contains("权利要求"))) {
+                    //则该权要类型为附属权要
+                    patentRight.setType(0);
+
+                    Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]);
+                    Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]);
+                    if (matcher2.find()) {  //"权利要求2~4"
+                        String parentNum = matcher2.group().substring(matcher2.group().indexOf("权利要求") + 4);  //"2~4"
+                        String[] parentNumStrs = new String[0];
+                        if (parentNum.contains("~")) {
+                            parentNumStrs = parentNum.split("~");  //[2, 4]
+                        } else if (parentNum.contains("-")) {
+                            parentNumStrs = parentNum.split("-");
+                        } else if (parentNum.contains("至")) {
+                            parentNumStrs = parentNum.split("至");
+                        } else if (parentNum.contains("或")) {
+                            parentNumStrs = parentNum.split("或");
+                        }
+
+                        int[] parentSorts = new int[parentNumStrs.length];  //[1, 3]
+                        for (int i1 = 0; i1 < parentSorts.length; i1++) {
+                            parentSorts[i1] = Integer.parseInt(parentNumStrs[i1]) - 1;
+                        }
+
+                        if (parentNum.contains("或")) {
+                            StringBuilder builder = new StringBuilder();
+                            for (int parentSort : parentSorts) {
+                                builder.append(parentSort).append(",");
+                            }
+                            patentRight.setParentSort(builder.substring(0, builder.lastIndexOf(",")));
+                        } else {
+                            StringBuilder builder = new StringBuilder();
+                            for (int j = parentSorts[0]; j <= parentSorts[parentSorts.length - 1]; j++) {
+                                if ((builder + "").equals("")) {
+                                    builder.append(j);
+                                } else {
+                                    builder.append(",").append(j);
+                                }
+                            }
+                            patentRight.setParentSort(builder + "");
+                        }
+
+                    } else if (matcher1.find()) {
+                        String parentNum = matcher1.group().substring(matcher1.group().indexOf("权利要求") + 4);
+                        patentRight.setParentSort((Integer.parseInt(parentNum) - 1) + "");
+                    }
+                } else {
+                    //否则该权要为主权要
+                    patentRight
+                            .setType(1)
+                            .setParentSort("-1");
+                }
+
+                patentRights.add(patentRight);
+            }
+
+            return patentRights;
+        } catch (Exception e) {
+            e.printStackTrace();
+            ArrayList<PatentRight> patentRights = new ArrayList<>();
+            patentRights.add(new PatentRight().setContent(params.getContent()));
+            return patentRights;
+        }
+
+    }
+
+}

+ 23 - 0
PAS/src/main/java/cn/cslg/pas/service/patentRightSplit/PatentRightSplitService.java

@@ -0,0 +1,23 @@
+package cn.cslg.pas.service.patentRightSplit;
+
+import cn.cslg.pas.common.model.params.PatentRightParams;
+import cn.cslg.pas.domain.PatentRight;
+
+import java.util.List;
+
+/**
+ * 拆分权要工厂类
+ *
+ * @author chenyu
+ * @date 2023/8/30
+ */
+public interface PatentRightSplitService {
+    /**
+     * 拆分权要工厂抽象方法
+     *
+     * @param params 装载权要对象
+     * @return 返回已拆分、理出主附权要、层级关系的多个权要
+     */
+    List<PatentRight> formatPatentRight(PatentRightParams params);
+
+}