PatentRightUtils.java 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. package cn.cslg.pas.common.utils;
  2. import cn.cslg.pas.common.vo.PatentRightParams;
  3. import cn.cslg.pas.common.vo.RePatentClaim;
  4. import org.springframework.stereotype.Component;
  5. import java.util.ArrayList;
  6. import java.util.Arrays;
  7. import java.util.List;
  8. import java.util.regex.Matcher;
  9. import java.util.regex.Pattern;
  10. import java.util.stream.Collectors;
  11. /**
  12. * 拆分权要工具类
  13. *
  14. * @Author chenyu
  15. * @Date 2023/4/25
  16. */
  17. @Component
  18. public class PatentRightUtils {
  19. public static List<RePatentClaim> formatPatentRight(PatentRightParams params) {
  20. try {
  21. //国家
  22. String country = params.getCountry();
  23. if (country == null) {
  24. country = params.getPatentNo().substring(0, 2);
  25. }
  26. //原文
  27. String content = params.getContent();
  28. //译文
  29. String contentOut = params.getContentOut();
  30. //创建一个权要集合
  31. ArrayList<RePatentClaim> patentRights = new ArrayList<>();
  32. //若原文为null或空串,则直接结束并返回空集合;若译文为null,则置为空串
  33. if (content == null || content.equals("")) {
  34. return patentRights;
  35. }
  36. if (contentOut == null) {
  37. contentOut = "";
  38. }
  39. //将原文和译文的所有换行符 "\r\n"或是"\n" 替换成 "@",这样首先可以使得全文连在一起,其次再根据特殊符号@拆分权要
  40. if (content.contains("\r\n")) {
  41. content = content.replaceAll("\r\n", "@");
  42. }
  43. if (content.contains("\n")) {
  44. content = content.replaceAll("\n", "@");
  45. }
  46. if (contentOut.contains("\r\n")) {
  47. contentOut = contentOut.replaceAll("\r\n", "@");
  48. }
  49. if (contentOut.contains("\n")) {
  50. contentOut = contentOut.replaceAll("\n", "@");
  51. }
  52. //去掉译文的所有空格
  53. contentOut = contentOut.replaceAll(" +", "");
  54. //中日韩权要↓
  55. if (country.equals("CN") || country.equals("JP") || country.equals("KR") || content.contains("权利要求")) {
  56. String regex;
  57. if (content.contains("@2")) {
  58. regex = "@[0-9]+";
  59. } else if (content.contains("@[00")) {
  60. regex = "@\\[[0-9]+]";
  61. } else if (content.contains("@請求項")) {
  62. regex = "@【請求項[0-9]+】";
  63. } else if (content.contains("@청구항")) {
  64. regex = "@청구항 [0-9]+";
  65. } else {
  66. regex = "";
  67. }
  68. String[] strs; //原文数组
  69. String[] strsOut; //译文数组
  70. //若以上没有匹配到,则权要直接以句号 "。" 拆分
  71. if (regex.equals("")) {
  72. content = content.replaceAll("@", "");
  73. contentOut = contentOut.replaceAll("@", "");
  74. strs = content.split("(?<=。)");
  75. strsOut = contentOut.split("(?<=。)");
  76. //若匹配到,则根据不同情形拆分
  77. } else {
  78. Matcher matcher = Pattern.compile(regex).matcher(content);
  79. Matcher matcherOut = Pattern.compile(regex).matcher(contentOut);
  80. while (matcher.find()) {
  81. content = content.replaceFirst(matcher.group(), "\n" + matcher.group().substring(matcher.group().indexOf("@") + 1));
  82. }
  83. while (matcherOut.find()) {
  84. contentOut = contentOut.replaceFirst(matcherOut.group(), "\n" + matcherOut.group().substring(matcherOut.group().indexOf("@") + 1));
  85. }
  86. content = content.replaceAll("@", "");
  87. contentOut = contentOut.replaceAll("@", "");
  88. strs = content.split("\n");
  89. strsOut = contentOut.split("\n");
  90. }
  91. //定义父权要排序号
  92. String regex1 = "权利要求[0-9]+";
  93. String regex2 = "权利要求[0-9]+[至或~-]+[0-9]+";
  94. for (int i = 0; i < strs.length; i++) {
  95. //之前:if (strs[i].contains("权利要求")) {
  96. //现在:if(该权要有逗号,并且它第一句话中有"权利要求"4个字 || 该权要没有逗号,并且它有"权利要求"4个字) {
  97. if ((strs[i].contains(",") && strs[i].substring(0, strs[i].indexOf(",")).contains("权利要求")) || (!strs[i].contains(",") && strs[i].contains("权利要求"))) {
  98. Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]);
  99. Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]);
  100. if (matcher2.find()) {
  101. String parentNum = matcher2.group().substring(matcher2.group().indexOf("求") + 1);
  102. String[] parentNums;
  103. if (parentNum.contains("~")) {
  104. parentNums = parentNum.split("~");
  105. } else if (parentNum.contains("-")) {
  106. parentNums = parentNum.split("-");
  107. } else if (parentNum.contains("至")) {
  108. parentNums = parentNum.split("至");
  109. } else if (parentNum.contains("或")) {
  110. parentNums = parentNum.split("或");
  111. } else {
  112. parentNums = new String[0];
  113. }
  114. StringBuilder builder = new StringBuilder();
  115. for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) {
  116. if ((builder + "").equals("")) {
  117. builder.append(j);
  118. } else {
  119. builder.append(",").append(j);
  120. }
  121. }
  122. RePatentClaim patentRight = new RePatentClaim()
  123. .setPatentNo(params.getPatentNo())
  124. .setType(0)
  125. .setContent(strs[i])
  126. .setSort(i)
  127. .setParentSort(builder + "");
  128. if (strsOut.length - 1 >= i) {
  129. //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
  130. if (strsOut.length > strs.length && strs.length == (i + 1)) {
  131. patentRight.setContentOut(String.join(" ", Arrays.copyOfRange(strsOut, i, strsOut.length)));
  132. } else {
  133. patentRight.setContentOut(strsOut[i]);
  134. }
  135. }
  136. patentRights.add(patentRight);
  137. } else if (matcher1.find()) {
  138. String parentNum = matcher1.group().substring(matcher1.group().indexOf("求") + 1);
  139. RePatentClaim patentRight = new RePatentClaim()
  140. .setPatentNo(params.getPatentNo())
  141. .setType(0)
  142. .setContent(strs[i])
  143. .setSort(i)
  144. .setParentSort((Integer.parseInt(parentNum) - 1) + "");
  145. if (strsOut.length - 1 >= i) {
  146. patentRight.setContentOut(strsOut[i]);
  147. }
  148. patentRights.add(patentRight);
  149. }
  150. } else {
  151. RePatentClaim patentRight = new RePatentClaim()
  152. .setPatentNo(params.getPatentNo())
  153. .setType(1)
  154. .setContent(strs[i])
  155. .setSort(i)
  156. .setParentSort("-1");
  157. if (strsOut.length - 1 >= i) {
  158. //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
  159. if (strsOut.length > strs.length && strs.length == (i + 1)) {
  160. patentRight.setContentOut(String.join(" ", Arrays.copyOfRange(strsOut, i, strsOut.length)));
  161. } else {
  162. patentRight.setContentOut(strsOut[i]);
  163. }
  164. }
  165. patentRights.add(patentRight);
  166. }
  167. }
  168. //英文专利的权要拆分 ↓
  169. } else {
  170. if (!content.equals("") && content.contains("1")) {
  171. content = content.substring(content.indexOf("1"));
  172. }
  173. if (!contentOut.equals("") && contentOut.contains("1")) {
  174. contentOut = contentOut.substring(contentOut.indexOf("1"));
  175. }
  176. String regex;
  177. if (content.contains("@2")) {
  178. regex = "@[0-9]+";
  179. } else if (content.contains("[001]")) {
  180. regex = "\\[[0-9]+]";
  181. } else {
  182. regex = "";
  183. }
  184. Matcher matcher = Pattern.compile(regex).matcher(content);
  185. Matcher matcherOut = Pattern.compile(regex).matcher(contentOut);
  186. while (matcher.find()) {
  187. content = content.replaceFirst(matcher.group(), "\n" + matcher.group().substring(matcher.group().indexOf("@") + 1));
  188. }
  189. while (matcherOut.find()) {
  190. contentOut = contentOut.replaceFirst(matcherOut.group(), "\n" + matcherOut.group().substring(matcherOut.group().indexOf("@") + 1));
  191. }
  192. content = content.replaceAll("@", "");
  193. contentOut = contentOut.replaceAll("@", "");
  194. String[] strs = content.split("\n");
  195. String[] strsOut = contentOut.split("\n");
  196. //定义父权要排序号
  197. String regex1 = "claim [0-9]+";
  198. String regex2 = "claims [0-9]+ or [0-9]+";
  199. String regex3 = "any preceding claim";
  200. for (int i = 0; i < strs.length; i++) {
  201. if (strs[i].contains("claim")) {
  202. Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]);
  203. Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]);
  204. Matcher matcher3 = Pattern.compile(regex3).matcher(strs[i]);
  205. if (matcher2.find()) {
  206. String parentNum = matcher2.group().substring(matcher2.group().indexOf("s") + 2);
  207. String[] parentNums;
  208. if (parentNum.contains(" or ")) {
  209. parentNums = parentNum.split(" or ");
  210. } else if (parentNum.contains(" - ")) {
  211. parentNums = parentNum.split(" - ");
  212. } else if (parentNum.contains(" to ")) {
  213. parentNums = parentNum.split(" to ");
  214. } else if (parentNum.contains("或")) {
  215. parentNums = parentNum.split("或");
  216. } else {
  217. parentNums = new String[0];
  218. }
  219. StringBuilder builder = new StringBuilder();
  220. for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) {
  221. if ((builder + "").equals("")) {
  222. builder.append(j);
  223. } else {
  224. builder.append(",").append(j);
  225. }
  226. }
  227. RePatentClaim patentRight = new RePatentClaim()
  228. .setPatentNo(params.getPatentNo())
  229. .setType(0)
  230. .setContent(strs[i])
  231. .setSort(i)
  232. .setParentSort(builder + "");
  233. if (strsOut.length - 1 >= i) {
  234. //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
  235. if (strsOut.length > strs.length && strs.length == (i + 1)) {
  236. patentRight.setContentOut(String.join(" ", Arrays.copyOfRange(strsOut, i, strsOut.length)));
  237. } else {
  238. patentRight.setContentOut(strsOut[i]);
  239. }
  240. }
  241. patentRights.add(patentRight);
  242. } else if (matcher1.find()) {
  243. String parentNum = matcher1.group().substring(matcher1.group().indexOf("m") + 2);
  244. RePatentClaim patentRight = new RePatentClaim()
  245. .setPatentNo(params.getPatentNo())
  246. .setType(0)
  247. .setContent(strs[i])
  248. .setSort(i)
  249. .setParentSort((Integer.parseInt(parentNum) - 1) + "");
  250. if (strsOut.length - 1 >= i) {
  251. //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
  252. if (strsOut.length > strs.length && strs.length == (i + 1)) {
  253. patentRight.setContentOut(String.join(" ", Arrays.copyOfRange(strsOut, i, strsOut.length)));
  254. } else {
  255. patentRight.setContentOut(strsOut[i]);
  256. }
  257. }
  258. patentRights.add(patentRight);
  259. } else if (matcher3.find()) {
  260. StringBuilder builder = new StringBuilder();
  261. for (int sort = 0; sort < strs.length; sort++) {
  262. if (sort == i) {
  263. continue;
  264. }
  265. builder.append(i).append(",");
  266. }
  267. RePatentClaim patentRight = new RePatentClaim()
  268. .setPatentNo(params.getPatentNo())
  269. .setType(0)
  270. .setContent(strs[i])
  271. .setSort(i)
  272. .setParentSort(builder.substring(0, builder.lastIndexOf(",")));
  273. if (strsOut.length - 1 >= i) {
  274. //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
  275. if (strsOut.length > strs.length && strs.length == (i + 1)) {
  276. patentRight.setContentOut(String.join(" ", Arrays.copyOfRange(strsOut, i, strsOut.length)));
  277. } else {
  278. patentRight.setContentOut(strsOut[i]);
  279. }
  280. }
  281. patentRights.add(patentRight);
  282. }
  283. } else {
  284. RePatentClaim patentRight = new RePatentClaim()
  285. .setPatentNo(params.getPatentNo())
  286. .setType(1)
  287. .setContent(strs[i])
  288. .setSort(i)
  289. .setParentSort("-1");
  290. if (strsOut.length - 1 >= i) {
  291. //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
  292. if (strsOut.length > strs.length && strs.length == (i + 1)) {
  293. patentRight.setContentOut(String.join(" ", Arrays.copyOfRange(strsOut, i, strsOut.length)));
  294. } else {
  295. patentRight.setContentOut(strsOut[i]);
  296. }
  297. }
  298. patentRights.add(patentRight);
  299. }
  300. }
  301. }
  302. return patentRights;
  303. } catch (Exception e) {
  304. e.printStackTrace();
  305. ArrayList<RePatentClaim> patentRights = new ArrayList<>();
  306. patentRights.add(new RePatentClaim().setPatentNo(params.getPatentNo()).setContent(params.getContent()).setContentOut(params.getContentOut()).setType(1).setSort(0).setParentSort("-1"));
  307. return patentRights;
  308. }
  309. }
  310. public static String getFormatClaim(String claim, String country) {
  311. String reStr = "";
  312. PatentRightParams params = new PatentRightParams();
  313. if (country == null) {
  314. country = "CN";
  315. }
  316. params.setContent(claim);
  317. params.setCountry(country);
  318. List<RePatentClaim> rePatentClaims = PatentRightUtils.formatPatentRight(params);
  319. if (rePatentClaims.size() > 0) {
  320. List<String> claims = rePatentClaims.stream().map(RePatentClaim::getContent).collect(Collectors.toList());
  321. reStr = StringUtils.join(claims, "。\t");
  322. }
  323. return reStr;
  324. }
  325. }