123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- package cn.cslg.pas.service.patentRightSplit;
- import cn.cslg.pas.common.PatentRightContent;
- import cn.cslg.pas.common.model.params.PatentRightParams;
- import cn.cslg.pas.domain.PatentRight;
- import cn.cslg.pas.domain.QuestionRecord;
- import cn.cslg.pas.service.upLoadPatent.QuestionRecordService;
- import lombok.RequiredArgsConstructor;
- import org.springframework.stereotype.Service;
- import java.util.ArrayList;
- import java.util.List;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- /**
- * 拆分权要类(韩国专利KR)
- *
- * @author chenyu
- * @date 2023/8/30
- */
- @RequiredArgsConstructor
- @Service
- public class PatentRightSplitKRService implements PatentRightSplitService {
- private final QuestionRecordService questionRecordService;
- @Override
- public List<PatentRightContent> formatPatentRight(PatentRightParams params) {
- try {
- //创建一个权要集合,用于返回结果
- ArrayList<PatentRightContent> patentRightContents = new ArrayList<>();
- //原文
- String content = params.getContent();
- //第1步 ↓:拆分权要原文
- //将原文和译文的所有换行符 "\r\n"和"\n" 替换成特殊符号 "##@@&&",这样可以使得全文连在一起,其次再根据特殊符号##@@&&拆分权要
- content = content.replace("\r\n", "##@@&&").replace("\n", "##@@&&");
- //若原文没有换行符则全文按照句号"。"拆分
- if (!content.contains("##@@&&")) {
- if (content.contains("。")) { //若有句号"。"
- content = content.replace("。", "。\n");
- } else { //若没有句号"。",则全文按照权要序号如 "청구항 1" "청구항 2" "청구항 3"进行拆分
- Matcher matcher = Pattern.compile("청구항 [0-9]+").matcher(content);
- while (matcher.find()) {
- String macherGroup = matcher.group(); //청구항 2
- if (!macherGroup.equals("청구항 1")) {
- content = content.replaceFirst(macherGroup, "\n" + macherGroup); //"\n청구항 2"
- }
- }
- }
- } else {
- //定义正则表达式
- String regex = "##@@&&청구항 [0-9]+";
- Matcher matcher = Pattern.compile(regex).matcher(content);
- while (matcher.find()) {
- String macherGroup = matcher.group();
- content = content.replaceFirst(macherGroup, "\n" + macherGroup.substring(macherGroup.indexOf("##@@&&") + 6));
- }
- //去除所有特殊符号"##@@&&"
- content = content.replace("##@@&&", "");
- }
- //最后按照换行符"\n"将权要拆成多个
- String[] strs = content.split("\n");
- //第2步 ↓:理出权要的类型(type为 1主权要还是 0附属权要)、权要之间的层级关系(每个权要的父级权要序号parentSort)
- String regex1 = "제[0-9]+항에 있어서";
- //String regex2 = "权利要求[0-9]+[至或~-]+[0-9]+";
- for (int i = 0; i < strs.length; i++) {
- //创建权要对象装载当前权要
- PatentRightContent patentRightContent = new PatentRightContent()
- .setContent(strs[i])
- .setSort(i);
- //创建父级排序号集合,装载父级序号
- ArrayList<Integer> parentSorts = new ArrayList<>();
- String macherGroup;
- ////判断若该权要有逗号(即超过一句话)并且它第一句话中有"权利要求"4个字 或者 该权要没有逗号(即只有一句话),并且它有"权利要求"4个字,则该权要类型为附属权要
- //if ((strs[i].contains(",") && strs[i].substring(0, strs[i].indexOf(",")).contains("权利要求")) || (!strs[i].contains(",") && strs[i].contains("权利要求"))) {
- //判断若不是权要1,并且含有"항에 있어서"几个字,则为附属权要
- if (i != 0 && strs[i].substring(strs[i].indexOf("청구항")).contains("항에 있어서")) {
- //则该权要为附权要
- patentRightContent.setType(0);
- Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]);
- //Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]);
- // if (matcher2.find()) { //"权利要求2~4"、权利要求"2-4"、权利要求"2至4"、权利要求"2或4"
- // macherGroup = matcher2.group();
- // String parentNum = macherGroup.substring(macherGroup.indexOf("权利要求") + 4); //"2~4"、"2-4"、"2至4"、"2或4"
- // String[] parentNumStrs = new String[0]; //[2, 4]
- // if (parentNum.contains("~")) {
- // parentNumStrs = parentNum.split("~");
- // } else if (parentNum.contains("-")) {
- // parentNumStrs = parentNum.split("-");
- // } else if (parentNum.contains("至")) {
- // parentNumStrs = parentNum.split("至");
- // } else if (parentNum.contains("或")) {
- // parentNumStrs = parentNum.split("或");
- // }
- //
- // if (parentNum.contains("或")) { //"2或4"
- // for (String parentNumStr : parentNumStrs) {
- // parentSorts.add(Integer.parseInt(parentNumStr) - 1); //sort:{1, 3}
- // }
- // } else { //"2~4"、"2-4"、"2至4"
- // for (int i1 = Integer.parseInt(parentNumStrs[0]); i1 <= Integer.parseInt(parentNumStrs[parentNumStrs.length - 1]); i1++) {
- // parentSorts.add(i1 - 1); //sort:{1, 2, 3}
- // }
- // }
- // } else
- if (matcher1.find()) { //"제4항에 있어서"
- macherGroup = matcher1.group();
- String parentNum = macherGroup.substring(macherGroup.indexOf("제") + 1, macherGroup.indexOf("항에 있어서")); //"4"
- int parentSort = Integer.parseInt(parentNum) - 1;
- parentSorts.add(parentSort); //sort:{3}
- }
- patentRightContent.setParentSorts(parentSorts);
- } else {
- //否则该权要为主权要
- parentSorts.add(-1); //sort:{-1}
- patentRightContent
- .setType(1)
- .setParentSorts(parentSorts);
- }
- patentRightContents.add(patentRightContent);
- }
- return patentRightContents;
- } catch (Exception e) {
- e.printStackTrace();
- //1.记录当前拆分出错专利,记住问题记录表"question_records"
- QuestionRecord questionRecord = new QuestionRecord()
- .setPatentNo(params.getPatentNo())
- .setRemark("查询权要树时拆分出错")
- .setCell(-1);
- questionRecordService.save(questionRecord);
- //2.返回原文
- ArrayList<PatentRightContent> patentRightContents = new ArrayList<>();
- patentRightContents.add(new PatentRightContent().setSort(-1).setType(-1).setContent(params.getContent()));
- return patentRightContents;
- }
- }
- }
|