Browse Source

修改4-27脚本提出bug

lwhhszx 2 years ago
parent
commit
d81a73fcdb

+ 32 - 0
RMS/src/main/java/cn/cslg/report/common/model/PatentRightParams.java

@@ -0,0 +1,32 @@
+package cn.cslg.report.common.model;
+
+import lombok.Data;
+import lombok.experimental.Accessors;
+
+/**
+ * 权利要求
+ */
+@Accessors(chain = true)
+@Data
+public class PatentRightParams {
+    /**
+     * 专利id
+     */
+    private Integer patentId;
+    /**
+     * 专利号
+     */
+    private String patentNo;
+    /**
+     * 原文
+     */
+    private String content;
+    /**
+     * 译文
+     */
+    private String contentOut;
+    /**
+     * 独立权要
+     */
+    private String selfContent;
+}

+ 272 - 0
RMS/src/main/java/cn/cslg/report/common/utils/PatentRightUtils.java

@@ -0,0 +1,272 @@
+package cn.cslg.report.common.utils;
+
+
+
+import cn.cslg.report.common.model.PatentRightParams;
+import cn.cslg.report.entity.PatentRight;
+import org.springframework.stereotype.Component;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * 拆分权要工具类
+ *
+ * @Author chenyu
+ * @Date 2023/4/25
+ */
+@Component
+public class PatentRightUtils {
+
+    public List<PatentRight> formatPatentRight(PatentRightParams params) {
+        //国家
+        String country = params.getPatentNo().substring(0, 2);
+        //原文
+        String content = params.getContent();
+        //译文
+        String contentOut = params.getContentOut();
+
+        //创建一个权要集合
+        ArrayList<PatentRight> patentRights = new ArrayList<>();
+
+        //若权要原文为空,则直接返回空集合
+        if (content == null || content.equals("")) {
+            return patentRights;
+        }
+
+        //先去掉所有换行符,使原文连成一个长字符串
+        if (content.contains("\r\n")) {
+            content = content.replaceAll("\r\n", "");
+        } else {
+            content = content.replaceAll("\n", "");
+        }
+
+        if (contentOut == null) {
+            contentOut = "";
+        }
+        if (contentOut.contains("\r\n")) {
+            contentOut = contentOut.replaceAll("\r\n", "");
+        } else {
+            contentOut = contentOut.replaceAll("\n", "");
+        }
+        //去掉译文的所有空格
+        contentOut = contentOut.replaceAll(" +", "");
+
+        //中日韩专利↓
+        if (country.equals("CN") || country.equals("JP") || country.equals("KR")) {
+            String regex;
+            if (content.contains("。2.")) {
+                regex = "。[0-9]+\\.";
+            } else if (content.contains("。2、")) {
+                regex = "。[0-9]+、";
+            } else if (content.contains("[001]")) {
+                regex = "\\[[0-9]+]";
+            } else if (content.contains("請求項")) {
+                regex = "。【請求項[0-9]+】";
+            } else if (content.contains(".청구항 2")) {
+                regex = "\\.청구항 [0-9]+";
+            } else {
+                regex = "";
+            }
+            Matcher matcher = Pattern.compile(regex).matcher(content);
+            Matcher matcherOut = Pattern.compile(regex).matcher(contentOut);
+            while (matcher.find()) {
+                if (matcher.group().contains(".청구항")) {
+                    //".청구항 2" -> "\\.청구항 2"
+                    String matcherGroup = matcher.group().replace(".", "\\.");
+                    //".청구항 2" -> ".\n청구항 2"
+                    content = content.replaceFirst(matcherGroup, ".\n" + matcherGroup.substring(matcherGroup.indexOf(".") + 1));
+                } else if (matcher.group().contains(".")) {
+                    //"。2." -> "。2\\."
+                    String matcherGroup = matcher.group().replace(".", "\\.");
+                    //"。2." -> "。\n2."
+                    content = content.replaceFirst(matcherGroup, "。\n" + matcherGroup.substring(matcherGroup.indexOf("。") + 1, matcherGroup.indexOf("\\")) + ".");
+                } else if (matcher.group().contains("、") || matcher.group().contains("請求項")) {
+                    content = content.replaceFirst(matcher.group(), "。\n" + matcher.group().substring(matcher.group().indexOf("。") + 1));
+                }
+            }
+            String[] strs = content.split("\n");
+            while (matcherOut.find()) {
+                if (matcherOut.group().contains(".")) {
+                    String matcherOutGroup = matcherOut.group().replace(".", "\\.");
+                    contentOut = contentOut.replaceFirst(matcherOutGroup, "。\n" + matcherOutGroup.substring(matcherOutGroup.indexOf("。") + 1, matcherOutGroup.indexOf("\\")) + ".");
+                } else if (matcherOut.group().contains("、")) {
+                    contentOut = contentOut.replaceFirst(matcherOut.group(), "。\n" + matcherOut.group().substring(matcherOut.group().indexOf("。") + 1));
+                }
+            }
+            String[] strsOut = contentOut.split("\n");
+
+            //定义父权要排序号
+            String regex1 = "权利要求[0-9]+";
+            String regex2 = "权利要求[0-9]+[至或~-]+[0-9]+";
+            for (int i = 0; i < strs.length; i++) {
+                //之前:if (strs[i].contains("权利要求")) {
+                //现在:if(该权要有逗号,并且它第一句话中有"权利要求"4个字 || 该权要没有逗号,并且它有"权利要求"4个字) {
+                if ((strs[i].contains(",") && strs[i].substring(0, strs[i].indexOf(",")).contains("权利要求")) || (!strs[i].contains(",") && strs[i].contains("权利要求"))) {
+                    Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]);
+                    Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]);
+                    if (matcher2.find()) {
+                        String parentNum = matcher2.group().substring(matcher2.group().indexOf("求") + 1);
+                        String[] parentNums;
+                        if (parentNum.contains("~")) {
+                            parentNums = parentNum.split("~");
+                        } else if (parentNum.contains("-")) {
+                            parentNums = parentNum.split("-");
+                        } else if (parentNum.contains("至")) {
+                            parentNums = parentNum.split("至");
+                        } else if (parentNum.contains("或")) {
+                            parentNums = parentNum.split("或");
+                        } else {
+                            parentNums = new String[0];
+                        }
+                        StringBuilder builder = new StringBuilder();
+                        for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) {
+                            if ((builder + "").equals("")) {
+                                builder.append(j);
+                            } else {
+                                builder.append(",").append(j);
+                            }
+                        }
+
+                        PatentRight patentRight = new PatentRight()
+                                .setPatentId(params.getPatentId())
+                                .setType(0)
+                                .setContent(strs[i])
+                                .setSort(i)
+                                .setParentSort(builder + "");
+                        if (strsOut.length - 1 >= i) {
+                            patentRight.setContentOut(strsOut[i]);
+                        }
+                        patentRights.add(patentRight);
+                    } else if (matcher1.find()) {
+                        String parentNum = matcher1.group().substring(matcher1.group().indexOf("求") + 1);
+                        PatentRight patentRight = new PatentRight()
+                                .setPatentId(params.getPatentId())
+                                .setType(0)
+                                .setContent(strs[i])
+                                .setSort(i)
+                                .setParentSort((Integer.parseInt(parentNum) - 1) + "");
+                        if (strsOut.length - 1 >= i) {
+                            patentRight.setContentOut(strsOut[i]);
+                        }
+                        patentRights.add(patentRight);
+                    }
+
+                } else {
+                    PatentRight patentRight = new PatentRight()
+                            .setPatentId(params.getPatentId())
+                            .setType(1)
+                            .setContent(strs[i])
+                            .setSort(i)
+                            .setParentSort("-1");
+                    if (strsOut.length - 1 >= i) {
+                        patentRight.setContentOut(strsOut[i]);
+                    }
+                    patentRights.add(patentRight);
+                }
+            }
+
+            //英文专利的权要拆分 ↓
+        } else {
+            content = content.substring(content.indexOf("1."));
+            if (contentOut.contains("1.")) {
+                contentOut = contentOut.substring(contentOut.indexOf("1."));
+            } else {
+                contentOut = contentOut.substring(contentOut.indexOf("1、"));
+            }
+            String regex;
+            if (content.contains("1.")) {
+                regex = "\\.[0-9]+\\. ";
+            } else if (content.contains("1、")) {
+                regex = "\\.[0-9]+\\. ";
+            } else if (content.contains("[001]")) {
+                regex = "\\[[0-9]+]";
+            } else {
+                regex = "";
+            }
+            Matcher matcher = Pattern.compile(regex).matcher(content);
+            Matcher matcherOut = Pattern.compile("。[0-9]+\\.").matcher(contentOut);
+            while (matcher.find()) {
+                String matcherGroup = matcher.group().replace(".", "\\.");
+                content = content.replaceFirst(matcherGroup, ".\n" + matcherGroup.substring(matcherGroup.indexOf(".") + 1, matcherGroup.lastIndexOf("\\")) + ". ");
+            }
+            String[] strs = content.split("\n");
+            while (matcherOut.find()) {
+                String matcherOutGroup = matcherOut.group().replace(".", "\\.");
+                contentOut = contentOut.replaceFirst(matcherOutGroup, "。\n" + matcherOutGroup.substring(matcherOutGroup.indexOf("。") + 1, matcherOutGroup.indexOf("\\")) + ".");
+            }
+            String[] strsOut = contentOut.split("\n");
+            //定义父权要排序号
+            String regex1 = "claim [0-9]+";
+            String regex2 = "claims [0-9]+ or [0-9]+";
+            for (int i = 0; i < strs.length; i++) {
+                if (strs[i].contains("claim")) {
+                    Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]);
+                    Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]);
+                    if (matcher2.find()) {
+                        String parentNum = matcher2.group().substring(matcher2.group().indexOf("s") + 2);
+                        String[] parentNums;
+                        if (parentNum.contains(" or ")) {
+                            parentNums = parentNum.split(" or ");
+                        } else if (parentNum.contains(" - ")) {
+                            parentNums = parentNum.split(" - ");
+                        } else if (parentNum.contains(" to ")) {
+                            parentNums = parentNum.split(" to ");
+                        } else if (parentNum.contains("或")) {
+                            parentNums = parentNum.split("或");
+                        } else {
+                            parentNums = new String[0];
+                        }
+                        StringBuilder builder = new StringBuilder();
+                        for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) {
+                            if ((builder + "").equals("")) {
+                                builder.append(j);
+                            } else {
+                                builder.append(",").append(j);
+                            }
+                        }
+                        PatentRight patentRight = new PatentRight()
+                                .setPatentId(params.getPatentId())
+                                .setType(0)
+                                .setContent(strs[i])
+                                .setSort(i)
+                                .setParentSort(builder + "");
+                        if (strsOut.length - 1 >= i) {
+                            patentRight.setContentOut(strsOut[i]);
+                        }
+                        patentRights.add(patentRight);
+                    } else if (matcher1.find()) {
+                        String parentNum = matcher1.group().substring(matcher1.group().indexOf("m") + 2);
+                        PatentRight patentRight = new PatentRight()
+                                .setPatentId(params.getPatentId())
+                                .setType(0)
+                                .setContent(strs[i])
+                                .setSort(i)
+                                .setParentSort((Integer.parseInt(parentNum) - 1) + "");
+                        if (strsOut.length - 1 >= i) {
+                            patentRight.setContentOut(strsOut[i]);
+                        }
+                        patentRights.add(patentRight);
+                    }
+
+                } else {
+                    PatentRight patentRight = new PatentRight()
+                            .setPatentId(params.getPatentId())
+                            .setType(1)
+                            .setContent(strs[i])
+                            .setSort(i)
+                            .setParentSort("-1");
+                    if (strsOut.length - 1 >= i) {
+                        patentRight.setContentOut(strsOut[i]);
+                    }
+                    patentRights.add(patentRight);
+                }
+            }
+        }
+
+        return patentRights;
+    }
+
+}