PatentRightUtils.java 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. package cn.cslg.pas.common.utils;
  2. import cn.cslg.pas.common.model.params.PatentRightParams;
  3. import cn.cslg.pas.domain.PatentRight;
  4. import org.springframework.stereotype.Component;
  5. import java.util.ArrayList;
  6. import java.util.Arrays;
  7. import java.util.List;
  8. import java.util.regex.Matcher;
  9. import java.util.regex.Pattern;
  10. /**
  11. * 拆分权要工具类
  12. *
  13. * @Author chenyu
  14. * @Date 2023/4/25
  15. */
  16. @Component
  17. public class PatentRightUtils {
  18. public static List<PatentRight> formatPatentRight(PatentRightParams params) {
  19. try {
  20. //国家
  21. String country = params.getPatentNo().substring(0, 2);
  22. //原文
  23. String content = params.getContent();
  24. //译文
  25. String contentOut = params.getContentOut();
  26. //创建一个权要集合
  27. ArrayList<PatentRight> patentRights = new ArrayList<>();
  28. //若原文为null或空串,则直接结束并返回空集合;若译文为null,则置为空串
  29. if (content == null || content.equals("")) {
  30. return patentRights;
  31. }
  32. if (contentOut == null) {
  33. contentOut = "";
  34. }
  35. //将原文和译文的所有换行符 "\r\n"或是"\n" 替换成 "@",这样首先可以使得全文连在一起,其次再根据特殊符号@拆分权要
  36. if (content.contains("\r\n")) {
  37. content = content.replaceAll("\r\n", "@");
  38. } else {
  39. content = content.replaceAll("\n", "@");
  40. }
  41. if (contentOut.contains("\r\n")) {
  42. contentOut = contentOut.replaceAll("\r\n", "@");
  43. } else {
  44. contentOut = contentOut.replaceAll("\n", "@");
  45. }
  46. //去掉译文的所有空格
  47. contentOut = contentOut.replaceAll(" +", "");
  48. //中日韩权要↓
  49. if (country.equals("CN") || country.equals("JP") || country.equals("KR") || content.contains("权利要求")) {
  50. String regex;
  51. if (content.contains("@2")) {
  52. regex = "@[0-9]+";
  53. } else if (content.contains("@[00")) {
  54. regex = "@\\[[0-9]+]";
  55. } else if (content.contains("@請求項")) {
  56. regex = "@【請求項[0-9]+】";
  57. } else if (content.contains("@청구항")) {
  58. regex = "@청구항 [0-9]+";
  59. } else {
  60. regex = "";
  61. }
  62. String[] strs; //原文数组
  63. String[] strsOut; //译文数组
  64. //若以上没有匹配到,则权要直接以句号 "。" 拆分
  65. if (regex.equals("")) {
  66. content = content.replaceAll("@", "");
  67. contentOut = contentOut.replaceAll("@", "");
  68. strs = content.split("。");
  69. strsOut = contentOut.split("。");
  70. //若匹配到,则根据不同情形拆分
  71. }
  72. else {
  73. Matcher matcher = Pattern.compile(regex).matcher(content);
  74. Matcher matcherOut = Pattern.compile(regex).matcher(contentOut);
  75. while (matcher.find()) {
  76. content = content.replaceFirst(matcher.group(), "\n" + matcher.group().substring(matcher.group().indexOf("@") + 1));
  77. }
  78. while (matcherOut.find()) {
  79. contentOut = contentOut.replaceFirst(matcherOut.group(), "\n" + matcherOut.group().substring(matcherOut.group().indexOf("@") + 1));
  80. }
  81. content = content.replaceAll("@", "");
  82. contentOut = contentOut.replaceAll("@", "");
  83. strs = content.split("\n");
  84. strsOut = contentOut.split("\n");
  85. }
  86. //定义父权要排序号
  87. String regex1 = "权利要求[0-9]+";
  88. String regex2 = "权利要求[0-9]+[至或~-]+[0-9]+";
  89. for (int i = 0; i < strs.length; i++) {
  90. //之前:if (strs[i].contains("权利要求")) {
  91. //现在:if(该权要有逗号,并且它第一句话中有"权利要求"4个字 || 该权要没有逗号,并且它有"权利要求"4个字) {
  92. if ((strs[i].contains(",") && strs[i].substring(0, strs[i].indexOf(",")).contains("权利要求")) || (!strs[i].contains(",") && strs[i].contains("权利要求"))) {
  93. Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]);
  94. Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]);
  95. if (matcher2.find()) {
  96. String parentNum = matcher2.group().substring(matcher2.group().indexOf("求") + 1);
  97. String[] parentNums;
  98. if (parentNum.contains("~")) {
  99. parentNums = parentNum.split("~");
  100. } else if (parentNum.contains("-")) {
  101. parentNums = parentNum.split("-");
  102. } else if (parentNum.contains("至")) {
  103. parentNums = parentNum.split("至");
  104. } else if (parentNum.contains("或")) {
  105. parentNums = parentNum.split("或");
  106. } else {
  107. parentNums = new String[0];
  108. }
  109. StringBuilder builder = new StringBuilder();
  110. for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) {
  111. if ((builder + "").equals("")) {
  112. builder.append(j);
  113. } else {
  114. builder.append(",").append(j);
  115. }
  116. }
  117. PatentRight patentRight = new PatentRight()
  118. .setPatentId(params.getPatentId())
  119. .setType(0)
  120. .setContent(strs[i])
  121. .setSort(i)
  122. .setParentSort(builder + "");
  123. if (strsOut.length - 1 >= i) {
  124. //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
  125. if(strsOut.length>strs.length&&strs.length==(i+1)){
  126. patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length)));
  127. }
  128. else {
  129. patentRight.setContentOut(strsOut[i]);
  130. }
  131. }
  132. patentRights.add(patentRight);
  133. } else if (matcher1.find()) {
  134. String parentNum = matcher1.group().substring(matcher1.group().indexOf("求") + 1);
  135. PatentRight patentRight = new PatentRight()
  136. .setPatentId(params.getPatentId())
  137. .setType(0)
  138. .setContent(strs[i])
  139. .setSort(i)
  140. .setParentSort((Integer.parseInt(parentNum) - 1) + "");
  141. if (strsOut.length - 1 >= i) {
  142. patentRight.setContentOut(strsOut[i]);
  143. }
  144. patentRights.add(patentRight);
  145. }
  146. } else {
  147. PatentRight patentRight = new PatentRight()
  148. .setPatentId(params.getPatentId())
  149. .setType(1)
  150. .setContent(strs[i])
  151. .setSort(i)
  152. .setParentSort("-1");
  153. if (strsOut.length - 1 >= i) {
  154. //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
  155. if(strsOut.length>strs.length&&strs.length==(i+1)){
  156. patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length)));
  157. }
  158. else {
  159. patentRight.setContentOut(strsOut[i]);
  160. }
  161. }
  162. patentRights.add(patentRight);
  163. }
  164. }
  165. //英文专利的权要拆分 ↓
  166. } else {
  167. if (!content.equals("") && content.contains("1")) {
  168. content = content.substring(content.indexOf("1"));
  169. }
  170. if (!contentOut.equals("") && contentOut.contains("1")) {
  171. contentOut = contentOut.substring(contentOut.indexOf("1"));
  172. }
  173. String regex;
  174. if (content.contains("@")) {
  175. regex = "@[0-9]+";
  176. } else if (content.contains("[001]")) {
  177. regex = "\\[[0-9]+]";
  178. } else {
  179. regex = "";
  180. }
  181. Matcher matcher = Pattern.compile(regex).matcher(content);
  182. Matcher matcherOut = Pattern.compile("@[0-9]+").matcher(contentOut);
  183. while (matcher.find()) {
  184. content = content.replaceFirst(matcher.group(), "\n" + matcher.group().substring(matcher.group().indexOf("@") + 1));
  185. }
  186. while (matcherOut.find()) {
  187. contentOut = contentOut.replaceFirst(matcherOut.group(), "\n" + matcherOut.group().substring(matcherOut.group().indexOf("@") + 1));
  188. }
  189. content = content.replaceAll("@", "");
  190. contentOut = contentOut.replaceAll("@", "");
  191. String[] strs = content.split("\n");
  192. String[] strsOut = contentOut.split("\n");
  193. //定义父权要排序号
  194. String regex1 = "claim [0-9]+";
  195. String regex2 = "claims [0-9]+ or [0-9]+";
  196. String regex3 = "any preceding claim";
  197. for (int i = 0; i < strs.length; i++) {
  198. if (strs[i].contains("claim")) {
  199. Matcher matcher1 = Pattern.compile(regex1).matcher(strs[i]);
  200. Matcher matcher2 = Pattern.compile(regex2).matcher(strs[i]);
  201. Matcher matcher3 = Pattern.compile(regex3).matcher(strs[i]);
  202. if (matcher2.find()) {
  203. String parentNum = matcher2.group().substring(matcher2.group().indexOf("s") + 2);
  204. String[] parentNums;
  205. if (parentNum.contains(" or ")) {
  206. parentNums = parentNum.split(" or ");
  207. } else if (parentNum.contains(" - ")) {
  208. parentNums = parentNum.split(" - ");
  209. } else if (parentNum.contains(" to ")) {
  210. parentNums = parentNum.split(" to ");
  211. } else if (parentNum.contains("或")) {
  212. parentNums = parentNum.split("或");
  213. } else {
  214. parentNums = new String[0];
  215. }
  216. StringBuilder builder = new StringBuilder();
  217. for (int j = Integer.parseInt(parentNums[0]) - 1; j < Integer.parseInt(parentNums[parentNums.length - 1]); j++) {
  218. if ((builder + "").equals("")) {
  219. builder.append(j);
  220. } else {
  221. builder.append(",").append(j);
  222. }
  223. }
  224. PatentRight patentRight = new PatentRight()
  225. .setPatentId(params.getPatentId())
  226. .setType(0)
  227. .setContent(strs[i])
  228. .setSort(i)
  229. .setParentSort(builder + "");
  230. if (strsOut.length - 1 >= i) {
  231. //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
  232. if(strsOut.length>strs.length&&strs.length==(i+1)){
  233. patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length)));
  234. }
  235. else {
  236. patentRight.setContentOut(strsOut[i]);
  237. }
  238. }
  239. patentRights.add(patentRight);
  240. } else if (matcher1.find()) {
  241. String parentNum = matcher1.group().substring(matcher1.group().indexOf("m") + 2);
  242. PatentRight patentRight = new PatentRight()
  243. .setPatentId(params.getPatentId())
  244. .setType(0)
  245. .setContent(strs[i])
  246. .setSort(i)
  247. .setParentSort((Integer.parseInt(parentNum) - 1) + "");
  248. //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
  249. if(strsOut.length>strs.length&&strs.length==(i+1)){
  250. patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length)));
  251. }
  252. else {
  253. patentRight.setContentOut(strsOut[i]);
  254. }
  255. patentRights.add(patentRight);
  256. } else if (matcher3.find()) {
  257. StringBuilder builder = new StringBuilder();
  258. for (int sort = 0; sort < strs.length; sort++) {
  259. if (sort == i) {
  260. continue;
  261. }
  262. builder.append(i).append(",");
  263. }
  264. PatentRight patentRight = new PatentRight()
  265. .setPatentId(params.getPatentId())
  266. .setType(0)
  267. .setContent(strs[i])
  268. .setSort(i)
  269. .setParentSort(builder.substring(0, builder.lastIndexOf(",")));
  270. //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
  271. if(strsOut.length>strs.length&&strs.length==(i+1)){
  272. patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length)));
  273. }
  274. else {
  275. patentRight.setContentOut(strsOut[i]);
  276. }
  277. patentRights.add(patentRight);
  278. }
  279. } else {
  280. PatentRight patentRight = new PatentRight()
  281. .setPatentId(params.getPatentId())
  282. .setType(1)
  283. .setContent(strs[i])
  284. .setSort(i)
  285. .setParentSort("-1");
  286. //当翻译的长度大于原文权要的长度时,将剩下所有的译文给最后一条原文
  287. if(strsOut.length>strs.length&&strs.length==(i+1)){
  288. patentRight.setContentOut(String.join(" ",Arrays.copyOfRange(strsOut,i,strsOut.length)));
  289. }
  290. else {
  291. patentRight.setContentOut(strsOut[i]);
  292. }
  293. patentRights.add(patentRight);
  294. }
  295. }
  296. }
  297. return patentRights;
  298. } catch (Exception e) {
  299. return new ArrayList<>();
  300. }
  301. }
  302. }