ClaimSplitUtils.java 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. package cn.cslg.pas.common.utils.ClaimUtils;
  2. import cn.cslg.pas.common.utils.StringUtils;
  3. import cn.cslg.pas.common.vo.PatentRightParams;
  4. import cn.cslg.pas.common.vo.RePatentClaim;
  5. import org.springframework.stereotype.Component;
  6. import java.util.ArrayList;
  7. import java.util.Arrays;
  8. import java.util.List;
  9. import java.util.regex.Matcher;
  10. import java.util.regex.Pattern;
  11. import java.util.stream.Collectors;
  12. /**
  13. * 拆分权要工具类
  14. *
  15. * @Author chenyu
  16. * @Date 2023/4/25
  17. */
  18. @Component
  19. public class ClaimSplitUtils {
  20. private static final List<String> CNRegix = Arrays.asList("[0-9]+\\.[^0-9]+?", "\\[[0-9]+]");
  21. private static final List<String> JPRegix = Arrays.asList("【請求項[0-9]+】");
  22. private static final List<String> KORRegix = Arrays.asList("청구항 [0-9]+", "\\[[0-9]+]");
  23. private static final List<String> ENReginx = Arrays.asList("[0-9]+\\.[^0-9]+?", "\\[[0-9]+]");
  24. public static List<RePatentClaim> formatPatentRight(PatentRightParams params) {
  25. try {
  26. //国家
  27. String country = params.getCountry();
  28. String patentNo = params.getPatentNo();
  29. if (country == null) {
  30. country = params.getPatentNo().substring(0, 2);
  31. }
  32. //原文
  33. String content = params.getContent();
  34. //创建一个权要集合
  35. ArrayList<RePatentClaim> patentRights = new ArrayList<>();
  36. //若原文为null或空串,则直接结束并返回空集合;若译文为null,则置为空串
  37. if (content == null || content.equals("")) {
  38. return patentRights;
  39. }
  40. if (content.contains("@##")) {
  41. patentRights = getSplitedRePatentClaim(content);
  42. } else {
  43. String regex = ClaimSplitUtils.getRegex(country, content);
  44. if (regex == null) {
  45. reDefaultPatentClaim(patentRights, content);
  46. return patentRights;
  47. }
  48. patentRights = splitPatentRight(regex, content);
  49. }
  50. switch (country) {
  51. case "CN":
  52. loadCNPatentClaim(patentNo, patentRights);
  53. break;
  54. case "FR":
  55. loadFRPatentClaim(patentNo, patentRights);
  56. break;
  57. default:
  58. loadENPatentClaim(patentNo, patentRights);
  59. break;
  60. }
  61. return patentRights;
  62. } catch (Exception e) {
  63. e.printStackTrace();
  64. ArrayList<RePatentClaim> patentRights = new ArrayList<>();
  65. patentRights.add(new RePatentClaim().setPatentNo(params.getPatentNo()).setContent(params.getContent()).setContentOut(params.getContentOut()).setType(1).setSort(0).setParentSort("-1"));
  66. return patentRights;
  67. }
  68. }
  69. private static final ArrayList<RePatentClaim> splitPatentRight(String regex, String content) {
  70. //获取拆分信息
  71. List<ClaimSplitVO> claimSplitVOS = ClaimSplitUtils.getClaimSpitVO(regex, content);
  72. String regexNum = "[1-9][0-9]*";
  73. Integer num = 1;
  74. ArrayList<RePatentClaim> patentRights = new ArrayList<>();
  75. Boolean flag = true;
  76. Integer start = 0;
  77. Integer end = 0;
  78. for (int i = 0; i < claimSplitVOS.size(); i++) {
  79. String cont = claimSplitVOS.get(i).getContent();
  80. Matcher matcherNum = Pattern.compile(regexNum).matcher(cont);
  81. //查询当前序号
  82. if (matcherNum.find()) {
  83. String a = matcherNum.group();
  84. Integer num2 = Integer.parseInt(a);
  85. if (num2 == num) {
  86. if (num == 1) {
  87. start = claimSplitVOS.get(i).getPosition();
  88. num++;
  89. } else {
  90. end = claimSplitVOS.get(i).getPosition();
  91. String str = content.substring(start, end);
  92. RePatentClaim rePatentClaim = new RePatentClaim();
  93. rePatentClaim.setSort(num - 2);
  94. rePatentClaim.setContent(str);
  95. patentRights.add(rePatentClaim);
  96. start = claimSplitVOS.get(i).getPosition();
  97. num++;
  98. //最后一个的处理逻辑
  99. if (i + 1 == claimSplitVOS.size()) {
  100. end = content.length();
  101. String str1 = content.substring(start, end);
  102. RePatentClaim rePatentClaim1 = new RePatentClaim();
  103. rePatentClaim1.setSort(num - 1);
  104. rePatentClaim1.setContent(str1);
  105. patentRights.add(rePatentClaim1);
  106. }
  107. }
  108. } else {
  109. if (i == 0 && num2.equals(2)) {
  110. end = claimSplitVOS.get(i).getPosition();
  111. String str = content.substring(0, end);
  112. RePatentClaim rePatentClaim = new RePatentClaim();
  113. rePatentClaim.setSort(num - 1);
  114. rePatentClaim.setContent(str);
  115. patentRights.add(rePatentClaim);
  116. start = claimSplitVOS.get(i).getPosition();
  117. num += 2;
  118. }
  119. continue;
  120. }
  121. } else {
  122. flag = false;
  123. break;
  124. }
  125. }
  126. if (flag.equals(false) || patentRights.size() == 1) {
  127. reDefaultPatentClaim(patentRights, content);
  128. }
  129. return patentRights;
  130. }
  131. /**
  132. * 根据权要文本拆分,获得拆分内容以及起始位置信息
  133. *
  134. * @param regex
  135. * @param content
  136. */
  137. public static List<ClaimSplitVO> getClaimSpitVO(String regex, String content) {
  138. List<ClaimSplitVO> claimSplitVOS = new ArrayList<>();
  139. Matcher matcher = Pattern.compile(regex).matcher(content);
  140. while (matcher.find()) {
  141. ClaimSplitVO claimSplitVO = new ClaimSplitVO();
  142. claimSplitVO.setContent(matcher.group());
  143. claimSplitVO.setPosition(matcher.start());
  144. claimSplitVOS.add(claimSplitVO);
  145. }
  146. return claimSplitVOS;
  147. }
  148. public static String getRegex(String country, String content) {
  149. String reginx = null;
  150. List<String> regixs = new ArrayList<>();
  151. switch (country) {
  152. case "CN":
  153. regixs = ClaimSplitUtils.CNRegix;
  154. break;
  155. case "JP":
  156. regixs = ClaimSplitUtils.JPRegix;
  157. break;
  158. case "KR":
  159. regixs = ClaimSplitUtils.KORRegix;
  160. break;
  161. default:
  162. regixs = ClaimSplitUtils.ENReginx;
  163. break;
  164. }
  165. for (String re : regixs) {
  166. Matcher matcher = Pattern.compile(re).matcher(content);
  167. if (matcher.find()) {
  168. reginx = re;
  169. break;
  170. }
  171. }
  172. return reginx;
  173. }
  174. private static void reDefaultPatentClaim(ArrayList<RePatentClaim> patentRights, String content) {
  175. patentRights.clear();
  176. RePatentClaim rePatentClaim = new RePatentClaim();
  177. rePatentClaim.setSort(0);
  178. rePatentClaim.setContent(content);
  179. rePatentClaim.setParentSort("-1");
  180. patentRights.add(rePatentClaim);
  181. }
  182. private static void loadCNPatentClaim(String patentNo, ArrayList<RePatentClaim> rePatentClaims) {
  183. String regex1 = "权利要求[0-9]+";
  184. String regex2 = "权利要求[0-9]+[至或~-]+[0-9]+";
  185. for (int i = 0; i < rePatentClaims.size(); i++) {
  186. RePatentClaim patentRight = rePatentClaims.get(i);
  187. String content = rePatentClaims.get(i).getContent();
  188. if ((content.contains(",") && content.substring(0, content.indexOf(",")).contains("权利要求")) || (!content.contains(",") && content.contains("权利要求"))) {
  189. Matcher matcher1 = Pattern.compile(regex1).matcher(content);
  190. Matcher matcher2 = Pattern.compile(regex2).matcher(content);
  191. if (matcher2.find()) {
  192. String parentNum = matcher2.group().substring(matcher2.group().indexOf("求") + 1);
  193. String[] parentNums;
  194. if (parentNum.contains("~")) {
  195. parentNums = parentNum.split("~");
  196. } else if (parentNum.contains("-")) {
  197. parentNums = parentNum.split("-");
  198. } else if (parentNum.contains("至")) {
  199. parentNums = parentNum.split("至");
  200. } else if (parentNum.contains("或")) {
  201. parentNums = parentNum.split("或");
  202. } else {
  203. parentNums = new String[0];
  204. }
  205. StringBuilder builder = new StringBuilder();
  206. for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) {
  207. if ((builder + "").equals("")) {
  208. builder.append(j);
  209. } else {
  210. builder.append(",").append(j);
  211. }
  212. }
  213. patentRight.setType(0)
  214. .setParentSort(builder + "");
  215. } else if (matcher1.find()) {
  216. String parentNum = matcher1.group().substring(matcher1.group().indexOf("求") + 1);
  217. patentRight.setType(0)
  218. .setParentSort((Integer.parseInt(parentNum) - 1) + "");
  219. } else {
  220. patentRight
  221. .setType(1)
  222. .setParentSort("-1");
  223. }
  224. } else {
  225. if (!StringUtils.isEmpty(content)) {
  226. patentRight
  227. .setType(1)
  228. .setParentSort("-1");
  229. }
  230. }
  231. }
  232. }
  233. private static void loadENPatentClaim(String patentNo, ArrayList<RePatentClaim> patentRights) {
  234. //定义父权要排序号
  235. String regex1 = "claim [0-9]+";
  236. String regex2 = "claims [0-9]+ or [0-9]+";
  237. String regex3 = "any preceding claim";
  238. String regex5 = "(Anspruch [0-9]+ oder [0-9]+)|(Ansprüche [0-9]+ bis [0-9]+)";
  239. String regex4 = "Anspruch [0-9]+";
  240. for (int i = 0; i < patentRights.size(); i++) {
  241. RePatentClaim patentRight = patentRights.get(i);
  242. String content = patentRight.getContent();
  243. Matcher matcher1 = Pattern.compile(regex1).matcher(content);
  244. Matcher matcher2 = Pattern.compile(regex2).matcher(content);
  245. Matcher matcher3 = Pattern.compile(regex3).matcher(content);
  246. Matcher matcher4 = Pattern.compile(regex4).matcher(content);
  247. Matcher matcher5 = Pattern.compile(regex5).matcher(content);
  248. Boolean flag1 = matcher1.find();
  249. Boolean flag2 = matcher2.find();
  250. Boolean flag3 = matcher3.find();
  251. Boolean flag4 = matcher4.find();
  252. Boolean flag5 = matcher5.find();
  253. if (flag2 || flag5) {
  254. List<String> parentNums = new ArrayList<>();
  255. if (flag2) {
  256. parentNums = getNums(matcher2.group());
  257. } else if (flag5) {
  258. parentNums = getNums(matcher5.group());
  259. }
  260. StringBuilder builder = new StringBuilder();
  261. for (int j = Integer.parseInt(parentNums.get(0)) - 1; j < Integer.parseInt(parentNums.get(parentNums.size() - 1)); j++) {
  262. if ((builder + "").equals("")) {
  263. builder.append(j);
  264. } else {
  265. builder.append(",").append(j);
  266. }
  267. }
  268. patentRight
  269. .setPatentNo(patentNo)
  270. .setType(0)
  271. .setParentSort(builder + "");
  272. } else if (flag1 || flag4) {
  273. String parentNum = null;
  274. if (flag1) {
  275. parentNum = getNums(matcher1.group()).get(0);
  276. } else if (flag4) {
  277. parentNum = getNums(matcher4.group()).get(0);
  278. }
  279. patentRight
  280. .setPatentNo(patentNo)
  281. .setType(0)
  282. .setParentSort((Integer.parseInt(parentNum) - 1) + "");
  283. } else if (flag3) {
  284. StringBuilder builder = new StringBuilder();
  285. for (int sort = 0; sort < patentRights.size(); sort++) {
  286. if (sort == i) {
  287. continue;
  288. }
  289. builder.append(i).append(",");
  290. }
  291. patentRight
  292. .setPatentNo(patentNo)
  293. .setType(0)
  294. .setParentSort(builder.substring(0, builder.lastIndexOf(",")));
  295. } else {
  296. patentRight
  297. .setPatentNo(patentNo)
  298. .setType(1)
  299. .setParentSort("-1");
  300. }
  301. }
  302. }
  303. private static void loadFRPatentClaim(String patentNo, ArrayList<RePatentClaim> patentRights) {
  304. //定义父权要排序号
  305. String regex1 = "revendications là [0-9]+";
  306. String regex2 = "(revendication [0-9]+ ou [0-9]+)|(revendication [0-9]+ et [0-9]+)";
  307. String regex3 = "des revendications précédentes";
  308. String regex4 = "revendication précédente";
  309. for (int i = 0; i < patentRights.size(); i++) {
  310. RePatentClaim patentRight = patentRights.get(i);
  311. String content = patentRight.getContent();
  312. Matcher matcher1 = Pattern.compile(regex1).matcher(content);
  313. Matcher matcher2 = Pattern.compile(regex2).matcher(content);
  314. Matcher matcher3 = Pattern.compile(regex3).matcher(content);
  315. Matcher matcher4 = Pattern.compile(regex4).matcher(content);
  316. Boolean flag1 = matcher1.find();
  317. Boolean flag2 = matcher2.find();
  318. Boolean flag3 = matcher3.find();
  319. Boolean flag4 = matcher4.find();
  320. if (flag2) {
  321. List<String> parentNums = new ArrayList<>();
  322. if (flag2) {
  323. parentNums = getNums(matcher2.group());
  324. }
  325. StringBuilder builder = new StringBuilder();
  326. for (int j = Integer.parseInt(parentNums.get(0)) - 1; j < Integer.parseInt(parentNums.get(parentNums.size() - 1)); j++) {
  327. if ((builder + "").equals("")) {
  328. builder.append(j);
  329. } else {
  330. builder.append(",").append(j);
  331. }
  332. }
  333. patentRight
  334. .setPatentNo(patentNo)
  335. .setType(0)
  336. .setParentSort(builder + "");
  337. } else if (flag1) {
  338. String parentNum = null;
  339. if (flag1) {
  340. parentNum = getNums(matcher1.group()).get(0);
  341. }
  342. patentRight
  343. .setPatentNo(patentNo)
  344. .setType(0)
  345. .setParentSort((Integer.parseInt(parentNum) - 1) + "");
  346. } else if (flag3) {
  347. StringBuilder builder = new StringBuilder();
  348. for (int sort = 0; sort < patentRights.size(); sort++) {
  349. if (sort == i) {
  350. continue;
  351. }
  352. builder.append(i).append(",");
  353. }
  354. patentRight
  355. .setPatentNo(patentNo)
  356. .setType(0)
  357. .setParentSort(builder.substring(0, builder.lastIndexOf(",")));
  358. } else if (flag4) {
  359. StringBuilder builder = new StringBuilder();
  360. builder.append(i - 1);
  361. patentRight
  362. .setPatentNo(patentNo)
  363. .setType(0)
  364. .setParentSort(builder.toString());
  365. } else {
  366. patentRight
  367. .setPatentNo(patentNo)
  368. .setType(1)
  369. .setParentSort("-1");
  370. }
  371. }
  372. }
  373. public static List<String> getNums(String test) {
  374. List<String> nums = new ArrayList<>();
  375. String regex1 = "[0-9]+";
  376. Matcher matcher1 = Pattern.compile(regex1).matcher(test);
  377. while (matcher1.find()) {
  378. nums.add(matcher1.group());
  379. }
  380. return nums;
  381. }
  382. public static final ArrayList<RePatentClaim> getSplitedRePatentClaim(String content) {
  383. ArrayList<RePatentClaim> arrayList = new ArrayList<>();
  384. String[] contents = content.split("@##");
  385. for (int i = 0; i < contents.length; i++) {
  386. RePatentClaim rePatentClaim = new RePatentClaim();
  387. rePatentClaim.setSort(i);
  388. rePatentClaim.setContent(contents[i]);
  389. arrayList.add(rePatentClaim);
  390. }
  391. return arrayList;
  392. }
  393. }