ExcutePatentDataStar.java 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. package cn.cslg.pas.service.upLoadPatent;
  2. import cn.cslg.pas.common.model.dto.UploadFileDTO;
  3. import cn.cslg.pas.common.model.outApi.PatentStarListDto;
  4. import cn.cslg.pas.common.model.vo.UploadParamsVO;
  5. import cn.cslg.pas.common.model.vo.outApi.StarPatentVO;
  6. import cn.cslg.pas.common.utils.FileUtils;
  7. import cn.cslg.pas.common.utils.RemoveHtmlTagsUtils;
  8. import cn.cslg.pas.domain.*;
  9. import cn.cslg.pas.domain.asso.AssoOsTaskQrtzTask;
  10. import cn.cslg.pas.service.UploadPatentBatchService;
  11. import cn.cslg.pas.service.asso.AssoOsTaskQrtzTaskService;
  12. import cn.cslg.pas.service.outApi.PatentStarApiService;
  13. import com.alibaba.fastjson.JSON;
  14. import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
  15. import lombok.RequiredArgsConstructor;
  16. import lombok.extern.slf4j.Slf4j;
  17. import org.apache.commons.fileupload.FileItem;
  18. import org.apache.commons.fileupload.disk.DiskFileItemFactory;
  19. import org.springframework.stereotype.Service;
  20. import org.springframework.web.multipart.MultipartFile;
  21. import org.springframework.web.multipart.commons.CommonsMultipartFile;
  22. import java.io.*;
  23. import java.net.URL;
  24. import java.util.ArrayList;
  25. import java.util.Arrays;
  26. import java.util.List;
  27. import java.util.Map;
  28. import java.util.regex.Matcher;
  29. import java.util.regex.Pattern;
  30. /**
  31. * 专利之星类型任务解析获取专利类
  32. *
  33. * @Author chenyu
  34. * @Date 2023/6/25
  35. */
  36. @Slf4j
  37. @RequiredArgsConstructor
  38. @Service
  39. public class ExcutePatentDataStar implements IExcutePatentData {
  40. private final AssoOsTaskQrtzTaskService assoOsTaskQrtzTaskService;
  41. private final PatentStarApiService patentStarApiService;
  42. private final FileUtils fileUtils;
  43. private final PantentQueueService pantentQueueService;
  44. private final UploadPatentBatchService uploadPatentBatchService;
  45. /**
  46. * 解析获取专利数据
  47. *
  48. * @param task 任务
  49. */
  50. @Override
  51. public void startExcute(Task task) throws IOException {
  52. //从任务关联网站导入任务表中取出当前任务
  53. List<AssoOsTaskQrtzTask> assoOsTaskQrtzTasks = assoOsTaskQrtzTaskService.list(new LambdaQueryWrapper<AssoOsTaskQrtzTask>().eq(AssoOsTaskQrtzTask::getTaskId, task.getId()));
  54. AssoOsTaskQrtzTask assoOsTaskQrtzTask = assoOsTaskQrtzTasks.get(0);
  55. //从任务数据中获取下载字段、检索式
  56. String cellsStr = assoOsTaskQrtzTask.getConfigCells();
  57. List<String> cells = Arrays.asList(cellsStr.split(","));
  58. String conditions = assoOsTaskQrtzTask.getConditions();
  59. //定义每次检索的专利数量(每次检索50件)
  60. int size = 50;
  61. //获得专利总数量
  62. Integer count = task.getTotal();
  63. //1.根据专利总数量count遍历检索专利
  64. int pageNum = 0; //页码
  65. //int exceptionNum = 0; //中途出错未生产完成的专利数量
  66. for (int i = 1; i <= count; i += size) {
  67. pageNum++;
  68. PatentStarListDto patentStarListDto = new PatentStarListDto()
  69. .setCurrentQuery(conditions)
  70. .setOrderBy("AD")
  71. .setOrderByType("DESC")
  72. .setPageNum(pageNum)
  73. .setRowCount(size)
  74. .setDBType("CN");
  75. //调用一般接口
  76. Map<String, Object> resultMap = patentStarApiService.patentStarSearchApi(patentStarListDto);
  77. if (resultMap == null || (Integer) resultMap.get("total") == 0) {
  78. continue;
  79. }
  80. //调用一般接口返回一批专利著录相关数据
  81. List<StarPatentVO> patents = (List<StarPatentVO>) resultMap.get("records");
  82. //遍历50个专利
  83. for (StarPatentVO starPatent : patents) {
  84. try {
  85. UploadParamsVO uploadParamsVO = new UploadParamsVO();
  86. setPatentZhuLu(starPatent, uploadParamsVO);
  87. //保存专利基础数据(专利表"os_patent")
  88. uploadPatentBatchService.getOneOrInsertOne(uploadParamsVO);
  89. PQueueData pQueueData = new PQueueData()
  90. .setTask(task)
  91. .setStarPatent(starPatent)
  92. .setUploadParamsVO(uploadParamsVO);
  93. //装载专利著录
  94. if (cells.contains("1")) {
  95. pantentQueueService.zhuluToPQueue(pQueueData);
  96. }
  97. //装载权要
  98. if (cells.contains("2")) {
  99. //setPatentClaim(starPatent, uploadParamsVO);
  100. pantentQueueService.rightToPQueue(pQueueData);
  101. }
  102. //装载说明书文本
  103. if (cells.contains("3")) {
  104. //setPatentInstructionText(starPatent, uploadParamsVO);
  105. pantentQueueService.instructionTextToPQueue(pQueueData);
  106. }
  107. //装载说明书pdf
  108. if (cells.contains("4")) {
  109. //setPatentInstructionPDF(starPatent, uploadParamsVO);
  110. pantentQueueService.instructionPDFToPQueue(pQueueData);
  111. }
  112. //装载摘要附图
  113. if (cells.contains("6")) {
  114. //setPatentPicture(starPatent, uploadParamsVO);
  115. pantentQueueService.imageToPQueue(pQueueData);
  116. }
  117. //将该专利存入5个消费者队列
  118. //ProjectImportPatentVO projectImportPatentVO = new ProjectImportPatentVO();
  119. //projectImportPatentVO.setProjectId(task.getProjectId());
  120. //task.setExceptionNum(exceptionNum);
  121. //专利丢入5个消费者队列,并唤醒5个消费者线程
  122. //pantentQueueService.patentToQueue(task, uploadParamsVO, projectImportPatentVO);
  123. } catch (Exception e) {
  124. e.printStackTrace();
  125. //exceptionNum++;
  126. //跳过当前生产出问题的专利,继续生产下一个专利
  127. }
  128. }
  129. }
  130. }
  131. /**
  132. * 装载著录方法
  133. *
  134. * @param starPatent 专利之星著录对象
  135. * @param uploadParamsVO 专利实体类对象
  136. */
  137. public void setPatentZhuLu(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) {
  138. //以下 ↓装载的是调用"一般检索"接口返回的专利相关数据
  139. Patent patent = new Patent();
  140. //装载专利号
  141. patent.setPatentNo(starPatent.getPatentNo());
  142. //装载摘要
  143. patent.setAbstractStr(starPatent.getAbstractStr());
  144. //装载标题
  145. patent.setName(starPatent.getName());
  146. //装载申请号
  147. patent.setApplicationNo(starPatent.getApplicationNo());
  148. //装载申请日
  149. if (starPatent.getApplicationDate() != null && !starPatent.getApplicationDate().equals("")) {
  150. patent.setApplicationDate(Integer.parseInt(starPatent.getApplicationDate()));
  151. }
  152. //装载公开号
  153. patent.setPublicNo(starPatent.getPublicNo());
  154. //装载公开日
  155. if (starPatent.getPublicDate() != null && !starPatent.getPublicDate().equals(""))
  156. patent.setPublicDate(Integer.parseInt(starPatent.getPublicDate()));
  157. //装载申请人
  158. if (starPatent.getAbstractStr() != null && !starPatent.getAbstractStr().equals("")) {
  159. uploadParamsVO.setPatentApplicantOriginalName(Arrays.asList(starPatent.getApplicantStr().split(";")));
  160. }
  161. //装载权利人
  162. if (starPatent.getCurrentApplicantStr() != null && !starPatent.getCurrentApplicantStr().equals("")) {
  163. uploadParamsVO.setPatentApplicantCurrentName(Arrays.asList(starPatent.getCurrentApplicantStr().split(";")));
  164. }
  165. //装载IPC分类号
  166. if (starPatent.getIpcListStr() != null && !starPatent.getIpcListStr().equals("")) {
  167. String[] ipcArr = starPatent.getIpcListStr().split(";");
  168. //装载IPC分类号
  169. uploadParamsVO.setMainIpc(ipcArr[0]);
  170. uploadParamsVO.setIpcList(Arrays.asList(ipcArr));
  171. }
  172. //以下 ↓装载的是调用"获得中国专利著录"接口返回的专利相关数据
  173. String appNo = null;
  174. if (starPatent.getApplicationNo().contains(".")) {
  175. appNo = starPatent.getApplicationNo().substring(0, starPatent.getApplicationNo().lastIndexOf("."));
  176. } else {
  177. appNo = starPatent.getApplicationNo();
  178. }
  179. //调用中国专利著录接口返回的专利相关数据最外层是一个集合"[]",但是集合中只有一个对象"{}",以下方式处理
  180. String chinaPatentZhuLuStr = patentStarApiService.getCnBibApi(appNo);
  181. //chinaPatentZhuLuStr = chinaPatentZhuLuStr.substring(chinaPatentZhuLuStr.indexOf("["), chinaPatentZhuLuStr.lastIndexOf("[")).trim();
  182. //ChinaPatentZhuLu chinaPatentZhuLu = JSONObject.parseObject(chinaPatentZhuLuStr, ChinaPatentZhuLu.class);
  183. //以上暂无需处理 ↑ 以下 ↓有现成的json数组字符串转为集合方法
  184. List<ChinaPatentZhuLu> chinaPatentZhuLus = JSON.parseArray(chinaPatentZhuLuStr, ChinaPatentZhuLu.class);
  185. ChinaPatentZhuLu chinaPatentZhuLu = chinaPatentZhuLus.get(0);
  186. //装载申请人地址
  187. ArrayList<String> patentApplicantOriginalAddresss = new ArrayList<>();
  188. if (chinaPatentZhuLu.getDZ().contains(" ")) {
  189. patentApplicantOriginalAddresss.add(chinaPatentZhuLu.getDZ().substring(chinaPatentZhuLu.getDZ().indexOf(" ") + 1));
  190. } else {
  191. patentApplicantOriginalAddresss.add(chinaPatentZhuLu.getDZ());
  192. }
  193. uploadParamsVO.setPatentApplicantOriginalAddress(patentApplicantOriginalAddresss);
  194. //装载代理人
  195. List<String> patentAgents = Arrays.asList(chinaPatentZhuLu.getAT().split(";"));
  196. ArrayList<PatentAgent> patentAgentList = new ArrayList<>();
  197. for (String n : patentAgents) {
  198. PatentAgent patentAgent = new PatentAgent();
  199. patentAgent.setName(n);
  200. patentAgentList.add(patentAgent);
  201. }
  202. uploadParamsVO.setPatentAgentList(patentAgentList);
  203. //装载代理机构地址
  204. if (chinaPatentZhuLu.getAGN() != null && !chinaPatentZhuLu.getAGN().equals("")) {
  205. String agencyAddress;
  206. if (chinaPatentZhuLu.getAGN().contains(" ")) {
  207. agencyAddress = chinaPatentZhuLu.getAGN().substring(0, chinaPatentZhuLu.getAGN().lastIndexOf(" "));
  208. } else {
  209. agencyAddress = chinaPatentZhuLu.getAGN();
  210. }
  211. patent.setAgencyId(agencyAddress);
  212. }
  213. //装载发明人
  214. List<String> patentInventorNames = Arrays.asList(chinaPatentZhuLu.getIV().split(";"));
  215. ArrayList<PatentInventor> patentInventors = new ArrayList<>();
  216. for (String patentInventorName : patentInventorNames) {
  217. PatentInventor patentInventor = new PatentInventor();
  218. patentInventor.setName(patentInventorName);
  219. patentInventors.add(patentInventor);
  220. }
  221. uploadParamsVO.setPatentInventorList(patentInventors);
  222. //装载优先权号、优先权国家、优先权日
  223. String priorityInfo = chinaPatentZhuLu.getPR();
  224. patent.setPriorityNo(priorityInfo);
  225. //以下 ↓装载的是调用"获得同族专利"接口返回的专利相关数据
  226. String familyPatentNoStr = patentStarApiService.getFamilyByPubNoApi(starPatent.getPatentNo());
  227. FamilyPatentNo familyPatentNo = JSON.parseObject(familyPatentNoStr, FamilyPatentNo.class);
  228. //装载同族号
  229. if (familyPatentNo.getFamilyinfo() != null && !familyPatentNo.getFamilyinfo().equals("")) {
  230. List<String> simpleFamily = Arrays.asList(familyPatentNo.getFamilyinfo().split(";"));
  231. uploadParamsVO.setSimpleFamily(simpleFamily);
  232. }
  233. //以下 ↓装载的是调用"获得中国专利法律状态"接口返回的专利相关数据
  234. String cnLegalApiStr = patentStarApiService.getCnLegalApi(appNo);
  235. List<ChinaLeagalStatus> chinaLeagalStatuses = JSON.parseArray(cnLegalApiStr, ChinaLeagalStatus.class);
  236. ChinaLeagalStatus chinaLeagalStatus = chinaLeagalStatuses.get(0);
  237. //装载法律状态
  238. uploadParamsVO.setPatentSimpleStatus(chinaLeagalStatus.getLegalStatus());
  239. uploadParamsVO.setSimpleStatus(chinaLeagalStatus.getLegalStatus());
  240. //最后将 patent装载到 uploadParamsVO
  241. uploadParamsVO.setPatent(patent);
  242. }
  243. /**
  244. * 装载权要方法
  245. *
  246. * @param starPatent 专利之星著录对象
  247. * @param uploadParamsVO 专利实体类对象
  248. */
  249. public void setPatentClaim(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) throws IOException {
  250. String appNo = null;
  251. if (starPatent.getApplicationNo().contains(".")) {
  252. appNo = starPatent.getApplicationNo().substring(0, starPatent.getApplicationNo().lastIndexOf("."));
  253. } else {
  254. appNo = starPatent.getApplicationNo();
  255. }
  256. //根据申请号调用"获得中国专利全文文本"接口,获得包含各种xml标签的专利全文内容的长字符串 cnFullXmlStr
  257. String cnFullXmlStr = patentStarApiService.getCnFullXmlApi(appNo);
  258. //使用正则表达式拼接出权要原文
  259. String regex = "(?<=<claim-text>)[\\w\\W]+?(?=</claim-text>)";
  260. Pattern compile = Pattern.compile(regex);
  261. Matcher matcher = compile.matcher(cnFullXmlStr);
  262. StringBuilder builder = new StringBuilder();
  263. while (matcher.find()) {
  264. builder.append(matcher.group()).append("\r\n");
  265. }
  266. String patentRightText = builder + "";
  267. //使用工具类去除字符串文本中的所有HTML格式标签
  268. patentRightText = RemoveHtmlTagsUtils.removeHtmlTags(patentRightText);
  269. //装载权利要求原文
  270. PatentRight patentRight = new PatentRight();
  271. patentRight.setContent(patentRightText);
  272. uploadParamsVO.setPatentRight(patentRight);
  273. }
  274. /**
  275. * 装载说明书文本
  276. *
  277. * @param starPatent 专利之星著录对象
  278. * @param uploadParamsVO 专利实体类对象
  279. */
  280. public void setPatentInstructionText(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) throws IOException {
  281. String appNo = null;
  282. if (starPatent.getApplicationNo().contains(".")) {
  283. appNo = starPatent.getApplicationNo().substring(0, starPatent.getApplicationNo().lastIndexOf("."));
  284. } else {
  285. appNo = starPatent.getApplicationNo();
  286. }
  287. //根据申请号调用"获得中国专利全文文本"接口,获得包含各种xml标签的专利全文内容的长字符串 cnFullXmlStr
  288. String cnFullXmlStr = patentStarApiService.getCnFullXmlApi(appNo);
  289. //使用正则表达式拼接出说明书文本全文
  290. String regex = "(?<=<p id=\"p)[\\w\\W]+?(?=</p>)";
  291. Pattern compile = Pattern.compile(regex);
  292. Matcher matcher = compile.matcher(cnFullXmlStr);
  293. StringBuilder builder = new StringBuilder();
  294. while (matcher.find()) {
  295. String oldRow = matcher.group();
  296. if (oldRow.contains("num=\"n")) {
  297. oldRow = oldRow.substring(oldRow.indexOf("num=\"n") + 6);
  298. oldRow = "[" + oldRow;
  299. oldRow = oldRow.replace("\">", "]");
  300. } else if (oldRow.contains("num=")) {
  301. oldRow = oldRow.substring(oldRow.indexOf("num=") + 5);
  302. oldRow = "[" + oldRow;
  303. oldRow = oldRow.replace("\">", "]");
  304. } else {
  305. oldRow = oldRow.substring(oldRow.indexOf("\">") + 2);
  306. }
  307. builder.append(oldRow).append("\r\n");
  308. }
  309. String instructionText = builder + "";
  310. //使用工具类去除字符串文本中的所有HTML格式标签
  311. instructionText = RemoveHtmlTagsUtils.removeHtmlTags(instructionText);
  312. //装载说明书文本全文
  313. PatentInstructionText patentInstructionText = new PatentInstructionText();
  314. patentInstructionText.setManual(instructionText);
  315. uploadParamsVO.setPatentInstructionText(patentInstructionText);
  316. }
  317. /**
  318. * 装载说明书pdf
  319. *
  320. * @param starPatent 专利之星著录对象
  321. * @param uploadParamsVO 专利实体类对象
  322. */
  323. public void setPatentInstructionPDF(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) {
  324. String patentNo = starPatent.getPatentNo();
  325. //根据专利号调用"获得世界专利pdf"接口,获得pdf的url地址
  326. String pdfUrl = patentStarApiService.getEnPdfApi(patentNo);
  327. try {
  328. URL url = new URL(pdfUrl); //想要读取的url地址
  329. InputStream in = url.openStream();
  330. File file = File.createTempFile("new_url", ".pdf"); //创建文件
  331. OutputStream os = new FileOutputStream(file); //创建文件输出流
  332. int bytesRead;
  333. byte[] buffer = new byte[8192];
  334. int len = 8192;
  335. while ((bytesRead = in.read(buffer, 0, len)) != -1) {
  336. os.write(buffer, 0, bytesRead);
  337. }
  338. //关闭释放流
  339. os.close();
  340. in.close();
  341. DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null);
  342. FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName());
  343. buffer = new byte[8192];
  344. FileInputStream fis = new FileInputStream(file);
  345. OutputStream fos = item.getOutputStream();
  346. len = 8192;
  347. while ((bytesRead = fis.read(buffer, 0, len)) != -1) {
  348. fos.write(buffer, 0, bytesRead);
  349. }
  350. //关闭释放流
  351. fos.close();
  352. fis.close();
  353. MultipartFile multipartFile = new CommonsMultipartFile(item);
  354. UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile);
  355. uploadParamsVO.setFileDTO(fileDTO);
  356. } catch (Exception e) {
  357. e.printStackTrace();
  358. }
  359. }
  360. /**
  361. * 装载摘要附图
  362. *
  363. * @param starPatent 专利之星著录对象
  364. * @param uploadParamsVO 专利实体类对象
  365. */
  366. public void setPatentPicture(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) {
  367. String appNo = null;
  368. if (starPatent.getApplicationNo().contains(".")) {
  369. appNo = starPatent.getApplicationNo().substring(0, starPatent.getApplicationNo().lastIndexOf("."));
  370. } else {
  371. appNo = starPatent.getApplicationNo();
  372. }
  373. //根据申请号调用"获得中国专利摘要附图"接口,获得摘要附图的url地址
  374. String pictureUrl = patentStarApiService.getPictureApi(appNo);
  375. try {
  376. URL url = new URL(pictureUrl); //想要读取的url地址
  377. InputStream in = url.openStream();
  378. File file = File.createTempFile("new_url", ".jpg"); //创建文件
  379. OutputStream os = new FileOutputStream(file); //创建文件输出流
  380. int bytesRead;
  381. byte[] buffer = new byte[8192];
  382. int len = 8192;
  383. while ((bytesRead = in.read(buffer, 0, len)) != -1) {
  384. os.write(buffer, 0, bytesRead);
  385. }
  386. //关闭释放流
  387. os.close();
  388. in.close();
  389. DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null);
  390. FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName());
  391. buffer = new byte[8192];
  392. FileInputStream fis = new FileInputStream(file);
  393. OutputStream fos = item.getOutputStream();
  394. len = 8192;
  395. while ((bytesRead = fis.read(buffer, 0, len)) != -1) {
  396. fos.write(buffer, 0, bytesRead);
  397. }
  398. //关闭释放流
  399. fos.close();
  400. fis.close();
  401. MultipartFile multipartFile = new CommonsMultipartFile(item);
  402. UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile);
  403. uploadParamsVO.setFileDTO(fileDTO);
  404. } catch (Exception e) {
  405. e.printStackTrace();
  406. }
  407. }
  408. }