UploadFromWebService.java 62 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239
  1. package com.example.demo.service;
  2. import com.alibaba.fastjson.JSONObject;
  3. import com.example.demo.domain.dto.*;
  4. import com.example.demo.domain.entity.*;
  5. import com.example.demo.exception.XiaoShiException;
  6. import com.example.demo.model.dto.TaskWebSocketDTO;
  7. import com.example.demo.model.dto.WebQueryDTO;
  8. import com.example.demo.util.*;
  9. import com.spire.pdf.FileFormat;
  10. import com.spire.pdf.PdfDocument;
  11. import com.spire.pdf.PdfDocumentBase;
  12. import lombok.RequiredArgsConstructor;
  13. import lombok.extern.slf4j.Slf4j;
  14. import org.apache.commons.fileupload.FileItem;
  15. import org.apache.commons.fileupload.disk.DiskFileItemFactory;
  16. import org.openqa.selenium.By;
  17. import org.openqa.selenium.WebDriver;
  18. import org.openqa.selenium.WebElement;
  19. import org.openqa.selenium.chrome.ChromeDriver;
  20. import org.openqa.selenium.chrome.ChromeOptions;
  21. import org.openqa.selenium.interactions.Actions;
  22. import org.openqa.selenium.support.ui.ExpectedConditions;
  23. import org.openqa.selenium.support.ui.WebDriverWait;
  24. import org.springframework.beans.BeanUtils;
  25. import org.springframework.beans.factory.annotation.Value;
  26. import org.springframework.context.annotation.Lazy;
  27. import org.springframework.stereotype.Service;
  28. import org.springframework.web.multipart.MultipartFile;
  29. import org.springframework.web.multipart.commons.CommonsMultipartFile;
  30. import java.io.*;
  31. import java.util.*;
  32. import java.util.concurrent.TimeUnit;
  33. import java.util.regex.Matcher;
  34. import java.util.regex.Pattern;
  35. /**
  36. * @author admin
  37. * @description 针对表【qrtz_task(定时任务表)】的数据库操作Service实现
  38. * @createDate 2023-03-15 13:40:32
  39. */
  40. @Slf4j
  41. @Service
  42. @RequiredArgsConstructor(onConstructor_ = {@Lazy})
  43. public class UploadFromWebService {
  44. private final FileUtils fileUtils;
  45. private final OutInterfaceService outInterfaceService;
  46. private final WebConfigService webConfigService;
  47. private final QrTaskService qrTaskService;
  48. private final QrTaskDetailService qrTaskDetailService;
  49. private final WebLoginConfigService webLoginConfigService;
  50. @Value("${driverUrl}")
  51. private String url;
  52. // 查询并上传(专利之星)
  53. public List<PatentCell> getPatentStar(WebQueryDTO webQueryDTO) throws Exception {
  54. List<PatentCell> patentCells = new ArrayList<>();
  55. //当前任务执行情况
  56. QrtzTaskDetail qrtzTaskDetail = qrTaskDetailService.getById(webQueryDTO.getTaskId());
  57. Integer successNum = qrtzTaskDetail.getSuccessNum();
  58. int startPage = successNum / 5;
  59. int startNum = successNum % 5;
  60. //当前任务条件
  61. QrtzTask qrtzTask = qrTaskService.getById(qrtzTaskDetail.getTaskId());
  62. //专利成功条数
  63. try {
  64. List<String> cells = webQueryDTO.getConCells();
  65. String conditions = webQueryDTO.getConditions();
  66. Integer id = webQueryDTO.getWebConfigId();
  67. //conditions = this.formatConditions(conditions);
  68. //根据id 获得网站配置
  69. WebConfig webConfig = webConfigService.getConfigById(id);
  70. //根据网站id和用户的租户获得登录信息
  71. WebLoginConfig webLoginConfig = webLoginConfigService.getLoginConfig(webConfig.getId(), qrtzTask.getTenantId());
  72. if (webLoginConfig == null) {
  73. qrtzTaskDetail.setTaskDetailState(3);
  74. qrtzTaskDetail.setSuccessNum(successNum);
  75. qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum);
  76. qrtzTaskDetail.setFailure("未配置登录信息");
  77. qrtzTaskDetail.setEndTime(new Date());
  78. qrtzTaskDetail.updateById();
  79. WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
  80. .setId(webQueryDTO.getTaskId())
  81. .setProjectId(webQueryDTO.getProjectId())
  82. .setComplete(true)
  83. .setIndex(successNum)
  84. .setPercentage(0.0)
  85. .setTaskDetailState(3)
  86. .setFileName("")
  87. .setUrl("")
  88. .setTotal(successNum), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
  89. return null;
  90. }
  91. //1.获得驱动
  92. // System.setProperty("webdriver.chrome.driver", "D:\\driver\\chromedriver.exe");
  93. System.setProperty("webdriver.chrome.driver", url);
  94. ChromeOptions chromeOptions = new ChromeOptions();
  95. // 设置无头模式
  96. chromeOptions.setHeadless(Boolean.TRUE);
  97. // 设置无轨 开发时还是不要加,可以看到浏览器效果
  98. chromeOptions.addArguments("-headless");
  99. chromeOptions.addArguments("no-sandbox");
  100. // 3.创建驱动
  101. WebDriver driver = new ChromeDriver(chromeOptions);
  102. // 4.创建动作
  103. Actions action = new Actions(driver);
  104. //设置等待时间
  105. long formSecond1 = 10;
  106. WebDriverWait wait1 = new WebDriverWait(driver, formSecond1);
  107. // 打开专利之星首页
  108. driver.get(webConfig.getWebAddress());
  109. wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("loginname")));
  110. WebElement loginName = driver.findElement(By.id("loginname"));
  111. WebElement password = driver.findElement(By.id("password"));
  112. loginName.sendKeys(webLoginConfig.getLoginAccount());
  113. password.sendKeys(webLoginConfig.getLoginPassword());
  114. WebElement loginButton = driver.findElement(By.id("login"));
  115. loginButton.click();
  116. TimeUnit.MILLISECONDS.sleep(5000);//毫秒
  117. //获得表格搜索按钮并点击
  118. wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("tablepage")));
  119. WebElement tablePage = driver.findElement(By.id("tablepage"));
  120. WebElement tablePagea = tablePage.findElement(By.tagName("a"));
  121. tablePagea.click();
  122. TimeUnit.MILLISECONDS.sleep(5000);//毫秒
  123. //获得搜索框并点击
  124. wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("TxtSearch")));
  125. WebElement TxtSearch = driver.findElement(By.id("TxtSearch"));
  126. TxtSearch.sendKeys(conditions);
  127. WebElement searchBtn = driver.findElement(By.id("searchbtn2"));
  128. searchBtn.click();
  129. wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("tcdNumber")));
  130. List<WebElement> tcdNumbers = driver.findElements(By.className("tcdNumber"));
  131. String pagesText = tcdNumbers.get(tcdNumbers.size() - 1).getText();
  132. int page = Integer.parseInt(pagesText);
  133. //获得总数
  134. WebElement countEle = driver.findElement(By.className("allcountlab"));
  135. String countText = countEle.getText();
  136. Integer count = Integer.parseInt(countText);
  137. //总数赋值给当前任务执行情况的总量属性
  138. qrtzTaskDetail.setAllNum(count);
  139. qrtzTaskDetail.updateById();
  140. String Handle = driver.getWindowHandle();
  141. //创建一个map集合存放浏览器句柄
  142. HashMap<String, String> handleMap = new HashMap<>();
  143. //将智慧芽句柄放到map中
  144. handleMap.put("mainPage", Handle);
  145. WebElement pagNum = driver.findElement(By.className("page_num"));
  146. pagNum.clear();
  147. pagNum.sendKeys(startPage + 1 + "");
  148. WebElement pagBtn = driver.findElement(By.className("page_btn"));
  149. pagBtn.click();
  150. TimeUnit.MILLISECONDS.sleep(10000);//毫秒
  151. //计算进度值
  152. double percentage = count == 0 ? 0 : (count.equals(successNum) ? (successNum * 1D) : (successNum + 1D) / count * 100D);
  153. percentage = MathUtils.saveTwoDecimal(percentage);
  154. for (int p = startPage; p < page; p++) {
  155. //等待数据加载
  156. wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("patent")));
  157. TimeUnit.MILLISECONDS.sleep(1000);//毫秒
  158. //获得列表
  159. List<WebElement> patentContents = driver.findElements(By.className("patent"));
  160. // 页面元素标签
  161. for (int i = startNum; i < patentContents.size(); i++) {
  162. //查看任务状态
  163. QrtzTaskDetail qrtzTaskDetail1 = qrTaskDetailService.getById(webQueryDTO.getTaskId());
  164. if (qrtzTaskDetail1.getTaskDetailState().equals(7)) {
  165. qrtzTaskDetail1.setTaskDetailState(5);
  166. qrtzTaskDetail1.setSuccessNum(successNum);
  167. qrtzTaskDetail1.setTaskProcess(percentage);
  168. qrtzTaskDetail1.updateById();
  169. WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
  170. .setId(webQueryDTO.getTaskId())
  171. .setProjectId(webQueryDTO.getProjectId())
  172. .setComplete(false)
  173. .setIndex(successNum)
  174. .setTaskDetailState(5)
  175. .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
  176. return null;
  177. } else if (qrtzTaskDetail1.getTaskDetailState().equals(8)) {
  178. qrtzTaskDetail1.setTaskDetailState(6);
  179. qrtzTaskDetail1.setSuccessNum(successNum);
  180. qrtzTaskDetail1.setTaskProcess(percentage);
  181. qrtzTaskDetail1.updateById();
  182. WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
  183. .setId(webQueryDTO.getTaskId())
  184. .setProjectId(webQueryDTO.getProjectId())
  185. .setComplete(false)
  186. .setIndex(successNum)
  187. .setTaskDetailState(6)
  188. .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
  189. return null;
  190. }
  191. PatentCell patentCell = new PatentCell();
  192. patentCell.setProjectId(webQueryDTO.getProjectId());
  193. patentCell.setReportId(webQueryDTO.getReportId());
  194. wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("title-color")));
  195. WebElement titleA = patentContents.get(i).findElement(By.className("title-color"));
  196. titleA.click();
  197. TimeUnit.MILLISECONDS.sleep(10000);//毫秒
  198. //9、获取到所有的句柄
  199. Set<String> set = driver.getWindowHandles();
  200. //10、循环找到详情窗口句柄
  201. for (String s : set) {
  202. //10.1、将详情窗口的句柄放到map中
  203. if (!s.equals(Handle)) {
  204. handleMap.put("detail", s);
  205. }
  206. }
  207. driver.switchTo().window(handleMap.get("detail"));
  208. wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("item-content")));
  209. WebElement itemContent = driver.findElement(By.className("item-content"));
  210. List<WebElement> divItems = itemContent.findElements(By.tagName("div"));
  211. for (int t = 0; t < divItems.size(); t++) {
  212. WebElement temEle = divItems.get(t);
  213. String text = temEle.getText();
  214. String value = "";
  215. String[] strings = text.split(":");
  216. if (strings.length > 1) {
  217. value = strings[1].trim();
  218. String key = strings[0].trim();
  219. switch (key) {
  220. case "申请号":
  221. patentCell.setApplicationNo(value);
  222. break;
  223. case "申请日":
  224. patentCell.setApplicationDate(value);
  225. break;
  226. case "国家/省市":
  227. patentCell.setCountry(value);
  228. break;
  229. case "公开号":
  230. patentCell.setPublicNo(value);
  231. break;
  232. case "公开日":
  233. patentCell.setPubilcDate(value);
  234. break;
  235. case "授权公告号":
  236. patentCell.setPublicAccreditNo(value);
  237. break;
  238. case "授权公告日":
  239. patentCell.setPublicAccreditDate(value);
  240. break;
  241. case "主分类号":
  242. patentCell.setMainIpc(value);
  243. break;
  244. case "分类号":
  245. patentCell.setIpc(this.StringToList(value, ";"));
  246. break;
  247. case "申请人":
  248. patentCell.setApplicationPersons(this.StringToList(value, " "));
  249. break;
  250. case "申请人地址":
  251. patentCell.setApplicationAddress(this.StringToList(value, " "));
  252. break;
  253. case "发明人":
  254. patentCell.setInventors(this.StringToList(value, " "));
  255. break;
  256. case "当前权利人":
  257. patentCell.setApplicationCurrents(this.StringToList(value, " "));
  258. break;
  259. case "代理人":
  260. patentCell.setAgencyPersons(this.StringToList(value, " "));
  261. break;
  262. case "代理机构":
  263. patentCell.setAgency(value);
  264. break;
  265. case "范畴分类":
  266. patentCell.setClassical(value);
  267. break;
  268. case "当前状态":
  269. patentCell.setStatue(value);
  270. break;
  271. }
  272. }
  273. }
  274. //获得摘要
  275. WebElement abstrElement = driver.findElement(By.className("item-summary"));
  276. List<WebElement> spans = abstrElement.findElements(By.tagName("span"));
  277. if (cells.contains("1")) {
  278. String abstrText = spans.get(0).getText();
  279. patentCell.setAbstrText(abstrText);
  280. }
  281. //获得图片路径
  282. wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("item-img")));
  283. WebElement picElement = driver.findElement(By.className("item-img"));
  284. WebElement imag = picElement.findElement(By.tagName("img"));
  285. String url = imag.getAttribute("src");
  286. url = fileUtils.uploadToLocal(url, ".jpg").getPath();
  287. patentCell.setPicUrl(url);
  288. if (cells.contains("2")) {
  289. TimeUnit.MILLISECONDS.sleep(10000);//毫秒
  290. WebElement CLElement = driver.findElement(By.id("itemCL"));
  291. CLElement.click();
  292. //获得主权要
  293. String mainRight = spans.get(1).getText();
  294. patentCell.setMainRignt(mainRight);
  295. //获得权要
  296. driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
  297. List<WebElement> rightElements = driver.findElements(By.tagName("claim-text"));
  298. if (rightElements.size() == 0) {
  299. WebElement suBtn = driver.findElement(By.className("layui-layer-btn0"));
  300. suBtn.click();
  301. TimeUnit.MILLISECONDS.sleep(10000);//毫秒
  302. } else {
  303. TimeUnit.MILLISECONDS.sleep(10000);//毫秒
  304. List<String> rights = new ArrayList<>();
  305. rightElements.forEach(item -> {
  306. rights.add(item.getText());
  307. });
  308. patentCell.setRights(rights);
  309. }
  310. }
  311. if (cells.contains("3")) {
  312. //获得说明书
  313. WebElement DSElement = driver.findElement(By.id("itemDS"));
  314. DSElement.click();
  315. TimeUnit.MILLISECONDS.sleep(10000);//毫秒
  316. driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
  317. List<WebElement> titleElements = driver.findElements(By.tagName("invention-title"));
  318. if (titleElements.size() == 0) {
  319. WebElement suBtn = driver.findElement(By.className("layui-layer-btn0"));
  320. suBtn.click();
  321. TimeUnit.MILLISECONDS.sleep(10000);//毫秒
  322. } else {
  323. WebElement titleElement = titleElements.get(0);
  324. patentCell.setTitle(titleElement.getText());
  325. StringBuilder stringBuilder = new StringBuilder();
  326. int flag = 1;
  327. //技术领域
  328. WebElement textElement1 = driver.findElement(By.tagName("description"));
  329. List<WebElement> textP1 = textElement1.findElements(By.tagName("p"));
  330. stringBuilder.append(textP1.get(0).getText());
  331. stringBuilder.append("\r\n");
  332. for (int t = 1; t < textP1.size(); t++) {
  333. String tem = String.format("%04d", flag);
  334. stringBuilder.append("[").append(tem).append("]").append(textP1.get(t).getText());
  335. stringBuilder.append("\r\n");
  336. flag += 1;
  337. }
  338. patentCell.setPatentInstructionText(stringBuilder.toString());
  339. }
  340. }
  341. //获得pdf文档
  342. if (cells.contains("4")) {
  343. WebElement PDFElement = driver.findElement(By.id("itemPdf"));
  344. PDFElement.click();
  345. TimeUnit.MILLISECONDS.sleep(10000);//毫秒
  346. driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
  347. List<WebElement> PDFFile = driver.findElements(By.tagName("embed"));
  348. String pdfUrl = "";
  349. if (PDFFile.size() == 0) {
  350. List<WebElement> PDFFiles = driver.findElements(By.className("pdfbtn"));
  351. if (PDFFiles.size() != 0) {
  352. pdfUrl = PDFFiles.get(0).getAttribute("data-url");
  353. } else {
  354. WebElement suBtn = driver.findElement(By.className("layui-layer-btn0"));
  355. suBtn.click();
  356. TimeUnit.MILLISECONDS.sleep(10000);//毫秒
  357. }
  358. } else {
  359. pdfUrl = PDFFile.get(0).getAttribute("src");
  360. }
  361. if (pdfUrl != "") {
  362. UploadFileDTO fileDTO = fileUtils.uploadToLocal(pdfUrl, ".PDF");
  363. patentCell.setPDFName(fileDTO.getFileName());
  364. patentCell.setPDFSize(fileDTO.getFileSize());
  365. patentCell.setPDFUrl(fileDTO.getPath());
  366. patentCell.setPdf(fileDTO);
  367. }
  368. }
  369. if (cells.contains("5")) {
  370. //获得法律状态
  371. WebElement flztElement = driver.findElement(By.id("flztbtn"));
  372. flztElement.click();
  373. TimeUnit.MILLISECONDS.sleep(10000);//毫秒
  374. wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("legalContainer")));
  375. WebElement tBody = driver.findElement(By.id("legalContainer"));
  376. TimeUnit.MILLISECONDS.sleep(1000);//毫秒
  377. List<WebElement> trs = tBody.findElements(By.tagName("tr"));
  378. List<PatentCell.PatentAffair> affairs = new ArrayList<>();
  379. trs.forEach(item -> {
  380. List<WebElement> tds = item.findElements(By.tagName("td"));
  381. PatentCell.PatentAffair affair = new PatentCell.PatentAffair();
  382. affair.setDateTime(tds.get(0).getText());
  383. affair.setSimpleStatus(tds.get(1).getText());
  384. affair.setStatus(tds.get(2).getText());
  385. affairs.add(affair);
  386. });
  387. patentCell.setPatentAffairs(affairs);
  388. }
  389. successNum++;
  390. if (patentCell.getPublicNo() != null) {
  391. patentCell.setPatentNo(patentCell.getPublicNo());
  392. } else {
  393. patentCell.setPatentNo(patentCell.getPublicAccreditNo());
  394. patentCell.setPublicNo(patentCell.getPublicAccreditNo());
  395. }
  396. if (patentCell.getPatentNo() != null) {
  397. String res = outInterfaceService.importPatents(patentCell);
  398. JSONObject jsonObject = JSONObject.parseObject(res);
  399. if (jsonObject.get("code").toString().equals("500")) {
  400. //任务执行情况状态设为失败,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行)
  401. qrtzTaskDetail.setTaskDetailState(3);
  402. qrtzTaskDetail.setSuccessNum(successNum);
  403. qrtzTaskDetail.setDefaultNum(count - successNum);
  404. qrtzTaskDetail.setEndTime(new Date());
  405. qrtzTaskDetail.updateById();
  406. if (qrtzTask.getTaskType() == 1) {
  407. qrtzTask.setTaskState(4);
  408. } else {
  409. qrtzTask.setTaskState(3);
  410. }
  411. qrtzTask.updateById();
  412. return null;
  413. }
  414. }
  415. QrtzTaskDetail qrtzTaskDetail11 = qrTaskDetailService.getById(qrtzTaskDetail.getId());
  416. percentage = count == 0 ? 0 : (count.equals(successNum) ? (successNum * 1D) : (successNum + 1D) / count * 100D);
  417. percentage = MathUtils.saveTwoDecimal(percentage);
  418. WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
  419. .setId(webQueryDTO.getTaskId())
  420. .setProjectId(webQueryDTO.getProjectId())
  421. .setComplete(false)
  422. .setIndex(successNum)
  423. .setPercentage(percentage)
  424. .setTaskDetailState(qrtzTaskDetail11.getTaskDetailState())
  425. .setFileName("")
  426. .setUrl("")
  427. .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
  428. patentCells.add(patentCell);
  429. driver.close();
  430. driver.switchTo().window(handleMap.get("mainPage"));
  431. }
  432. wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("tcdPageCode")));
  433. List<WebElement> nexts = driver.findElements(By.className("nextPage"));
  434. if (nexts.size() != 0) {
  435. nexts.get(0).click();
  436. TimeUnit.MILLISECONDS.sleep(10000);//毫秒
  437. }
  438. }
  439. WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
  440. .setId(webQueryDTO.getTaskId())
  441. .setProjectId(webQueryDTO.getProjectId())
  442. .setComplete(true)
  443. .setIndex(count)
  444. .setPercentage(100D)
  445. .setTaskDetailState(qrtzTaskDetail.getTaskDetailState())
  446. .setFileName("")
  447. .setUrl("")
  448. .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "");
  449. //任务执行情况状态设为成功,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行)
  450. qrtzTaskDetail.setTaskDetailState(2);
  451. qrtzTaskDetail.setSuccessNum(successNum);
  452. qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum);
  453. qrtzTaskDetail.setTaskProcess(100);
  454. qrtzTaskDetail.setEndTime(new Date());
  455. qrtzTaskDetail.updateById();
  456. if (qrtzTask.getTaskType() == 1) {
  457. qrtzTask.setTaskState(4);
  458. } else {
  459. qrtzTask.setTaskState(2);
  460. }
  461. qrtzTask.updateById();
  462. driver.quit();
  463. } catch (Exception e) {
  464. log.error(e.toString());
  465. //任务执行情况状态设为失败,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行)
  466. qrtzTaskDetail.setTaskDetailState(3);
  467. qrtzTaskDetail.setSuccessNum(successNum);
  468. qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum);
  469. qrtzTaskDetail.setFailure("拉取节点失败");
  470. qrtzTaskDetail.setEndTime(new Date());
  471. qrtzTaskDetail.updateById();
  472. if (qrtzTask.getTaskType() == 1) {
  473. qrtzTask.setTaskState(4);
  474. } else {
  475. qrtzTask.setTaskState(3);
  476. }
  477. qrtzTask.updateById();
  478. WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
  479. .setId(webQueryDTO.getTaskId())
  480. .setProjectId(webQueryDTO.getProjectId())
  481. .setComplete(true)
  482. .setIndex(successNum)
  483. .setPercentage(100D)
  484. .setTaskDetailState(qrtzTaskDetail.getTaskDetailState())
  485. .setFileName("")
  486. .setUrl("")
  487. .setTotal(successNum), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "");
  488. }
  489. return patentCells;
  490. }
  491. /**
  492. * 下载欧专局专利数据
  493. *
  494. * @param webQueryDTO 前台传来的下载条件等数据
  495. * @return 返回
  496. * @throws IOException 抛出IO异常
  497. */
  498. public List<PatentCell> getEPO(WebQueryDTO webQueryDTO) throws Exception {
  499. log.info("开始处理【下载欧专局专利数据】的业务,参数为:{}", webQueryDTO);
  500. List<PatentCell> patentCells = new ArrayList<>();
  501. QrtzTaskDetail qrtzTaskDetail = qrTaskDetailService.getById(webQueryDTO.getTaskId());
  502. Integer successNum = qrtzTaskDetail.getSuccessNum();
  503. int startPage = successNum / 5;
  504. int startNum = successNum % 5;
  505. //当前任务条件
  506. QrtzTask qrtzTask = qrTaskService.getById(qrtzTaskDetail.getTaskId());
  507. //1.从DTO中取出网站id、检索信息条件、下载字段、专题库id、报告id
  508. Integer webId = webQueryDTO.getWebConfigId();
  509. List<String> cells = webQueryDTO.getConCells();
  510. String conditions = webQueryDTO.getConditions();
  511. Integer projectId = webQueryDTO.getProjectId();
  512. Integer reportId = webQueryDTO.getReportId();
  513. //根据id 获得网站配置
  514. WebConfig webConfig = webConfigService.getConfigById(webId);
  515. //根据网站id和用户的租户获得登录信息
  516. WebLoginConfig webLoginConfig = webLoginConfigService.getLoginConfig(webConfig.getId(), qrtzTask.getTenantId());
  517. if (webLoginConfig == null) {
  518. qrtzTaskDetail.setTaskDetailState(3);
  519. qrtzTaskDetail.setSuccessNum(successNum);
  520. qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum);
  521. qrtzTaskDetail.setFailure("未配置登录信息");
  522. qrtzTaskDetail.setEndTime(new Date());
  523. qrtzTaskDetail.updateById();
  524. WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
  525. .setId(webQueryDTO.getTaskId())
  526. .setProjectId(webQueryDTO.getProjectId())
  527. .setComplete(true)
  528. .setIndex(successNum)
  529. .setPercentage(0.0)
  530. .setTaskDetailState(3)
  531. .setFileName("")
  532. .setUrl("")
  533. .setTotal(successNum), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
  534. return null;
  535. }
  536. //定义每次检索的专利数量(每次检索50件)
  537. int size = 50;
  538. //2.根据检索式conditions先检索一件专利著录信息【此操作主要是为了获得专利总数量count】
  539. SerachBiblioData serachBiblioData = getSerachBiblioData(conditions, 1, 1);
  540. if (serachBiblioData == null || serachBiblioData.getTotals() == 0) {
  541. //conditions没有检索到任何相关专利(杰哥处理)
  542. ThrowException.throwXiaoShiException("检索失败,未检索出任何相关专利信息");
  543. }
  544. //获得专利总数量
  545. Integer count = serachBiblioData.getTotals();
  546. //计算进度值
  547. double percentage = count == 0 ? 0 : (count.equals(successNum) ? (successNum * 1D) : (successNum + 1D) / count * 100D);
  548. percentage = MathUtils.saveTwoDecimal(percentage);
  549. //3.根据专利总数量count计算检索次数,来循环检索
  550. for (int i = 1; i <= count; i += size) {
  551. //查看任务状态
  552. QrtzTaskDetail qrtzTaskDetail1 = qrTaskDetailService.getById(webQueryDTO.getTaskId());
  553. if (qrtzTaskDetail1.getTaskDetailState().equals(7)) {
  554. qrtzTaskDetail1.setTaskDetailState(5);
  555. qrtzTaskDetail1.setSuccessNum(successNum);
  556. qrtzTaskDetail1.setTaskProcess(percentage);
  557. qrtzTaskDetail1.updateById();
  558. WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
  559. .setId(webQueryDTO.getTaskId())
  560. .setProjectId(webQueryDTO.getProjectId())
  561. .setComplete(false)
  562. .setIndex(successNum)
  563. .setTaskDetailState(5)
  564. .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
  565. return null;
  566. } else if (qrtzTaskDetail1.getTaskDetailState().equals(8)) {
  567. qrtzTaskDetail1.setTaskDetailState(6);
  568. qrtzTaskDetail1.setSuccessNum(successNum);
  569. qrtzTaskDetail1.setTaskProcess(percentage);
  570. qrtzTaskDetail1.updateById();
  571. WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
  572. .setId(webQueryDTO.getTaskId())
  573. .setProjectId(webQueryDTO.getProjectId())
  574. .setComplete(false)
  575. .setIndex(successNum)
  576. .setTaskDetailState(6)
  577. .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
  578. return null;
  579. }
  580. //3.1根据检索式conditions、专利开始数i、专利最后数i + size - 1检索专利著录信息
  581. serachBiblioData = getSerachBiblioData(conditions, i, i + size - 1);
  582. //获取检索结果中的所有专利著录信息
  583. if (serachBiblioData == null || serachBiblioData.getTotals() == 0) {
  584. continue;
  585. }
  586. List<PatentZhuLu> patents = serachBiblioData.getPatents();
  587. //3.2遍历专利
  588. for (PatentZhuLu patent : patents) {
  589. PatentCell patentCell = new PatentCell();
  590. patentCell.setProjectId(qrtzTask.getProjectId());
  591. PubNo pubNO = new PubNo();
  592. //装载专利著录
  593. if (cells.contains("1")) {
  594. setPatentZhuLu(patentCell, patent, pubNO);
  595. }
  596. //装载权要
  597. if (cells.contains("2")) {
  598. setPatentClaim(patentCell, pubNO);
  599. }
  600. //装载说明书文本
  601. if (cells.contains("3")) {
  602. setPatentInstructionText(patentCell, pubNO);
  603. }
  604. //以下代码是在准备一会要调用拿取各种附图的接口所需的参数(FullDocument->说明书pdf、Drawing->其他附图、FirstPageClipping->摘要附图)
  605. String fullDocumentLink = "", fullDocumentType = "", drawingLink = "", drawingType = "", firstPageClippingLink = "", firstPageClippingType = "";
  606. Integer fullDocumentPage = 0, drawingPage = 0, firstPageClippingPage = 0;
  607. //根据当前专利号调用接口获取一会调用拿取各种附图的接口的参数
  608. try {
  609. ImageInfo imageInfo = getImage(pubNO);
  610. for (Image image : imageInfo.getImages()) {
  611. //如果附件类型是说明书
  612. if (image.getImageType().equals("FullDocument")) {
  613. fullDocumentLink = image.getUrlLink();
  614. fullDocumentPage = image.getNumberOfPages();
  615. for (String formatOption : image.getFormatOptions()) {
  616. if (formatOption.contains("pdf")) {
  617. fullDocumentType = formatOption;
  618. }
  619. }
  620. }
  621. //如果附件类型是其他附件
  622. if (image.getImageType().equals("Drawing")) {
  623. drawingLink = image.getUrlLink();
  624. drawingPage = image.getNumberOfPages();
  625. for (String formatOption : image.getFormatOptions()) {
  626. if (formatOption.contains("tiff")) {
  627. drawingType = formatOption;
  628. }
  629. }
  630. }
  631. //如果附件类型是摘要附图
  632. if (image.getImageType().equals("FirstPageClipping")) {
  633. firstPageClippingLink = image.getUrlLink();
  634. firstPageClippingPage = image.getNumberOfPages();
  635. for (String formatOption : image.getFormatOptions()) {
  636. if (formatOption.contains("jpeg")) {
  637. firstPageClippingType = formatOption;
  638. }
  639. }
  640. }
  641. }
  642. //装载说明书pdf
  643. if (cells.contains("4")) {
  644. setFuJian(fullDocumentLink, fullDocumentPage, fullDocumentType, patentCell, ".pdf");
  645. }
  646. //装载摘要附图
  647. if (cells.contains("6")) {
  648. setFuJian(firstPageClippingLink, firstPageClippingPage, firstPageClippingType, patentCell, ".jpeg");
  649. }
  650. //装载其他附图
  651. if (cells.contains("7")) {
  652. setFuJian(drawingLink, drawingPage, drawingType, patentCell, ".tiff");
  653. }
  654. if (patentCell.getPatentNo() != null) {
  655. String res = outInterfaceService.importPatents(patentCell);
  656. JSONObject jsonObject = JSONObject.parseObject(res);
  657. if (jsonObject.get("code").toString().equals("500")) {
  658. // //任务执行情况状态设为失败,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行)
  659. // qrtzTaskDetail.setTaskDetailState(3);
  660. // qrtzTaskDetail.setSuccessNum(successNum);
  661. // qrtzTaskDetail.setDefaultNum(count - successNum);
  662. // qrtzTaskDetail.setEndTime(new Date());
  663. // qrtzTaskDetail.updateById();
  664. // if (qrtzTask.getTaskType() == 1) {
  665. // qrtzTask.setTaskState(4);
  666. // } else {
  667. // qrtzTask.setTaskState(3);
  668. // }
  669. // qrtzTask.updateById();
  670. // return null;
  671. //拿到当前这个下载失败的专利号,保存起来(具体怎么处理失败的专利考虑)
  672. String patentNo = patentCell.getPatentNo();
  673. //然后跳过本次专利,继续下载下一个专利
  674. continue;
  675. }
  676. }
  677. QrtzTaskDetail qrtzTaskDetail11 = qrTaskDetailService.getById(qrtzTaskDetail.getId());
  678. percentage = count == 0 ? 0 : (count.equals(successNum) ? (successNum * 1D) : (successNum + 1D) / count * 100D);
  679. percentage = MathUtils.saveTwoDecimal(percentage);
  680. WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
  681. .setId(webQueryDTO.getTaskId())
  682. .setProjectId(webQueryDTO.getProjectId())
  683. .setComplete(false)
  684. .setIndex(successNum)
  685. .setPercentage(percentage)
  686. .setTaskDetailState(qrtzTaskDetail11.getTaskDetailState())
  687. .setFileName("")
  688. .setUrl("")
  689. .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
  690. } catch (XiaoShiException e) {
  691. //虽然拿不到所有附图,但已经拿到了专利著录,能拿到啥是啥
  692. log.info(e.getMessage());
  693. } catch (Exception e) {
  694. qrtzTaskDetail.setTaskDetailState(3);
  695. qrtzTaskDetail.setSuccessNum(successNum);
  696. qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum);
  697. qrtzTaskDetail.setFailure("拉取节点失败");
  698. qrtzTaskDetail.setEndTime(new Date());
  699. qrtzTaskDetail.updateById();
  700. if (qrtzTask.getTaskType() == 1) {
  701. qrtzTask.setTaskState(4);
  702. } else {
  703. qrtzTask.setTaskState(3);
  704. }
  705. qrtzTask.updateById();
  706. WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
  707. .setId(webQueryDTO.getTaskId())
  708. .setProjectId(webQueryDTO.getProjectId())
  709. .setComplete(true)
  710. .setIndex(successNum)
  711. .setPercentage(100D)
  712. .setTaskDetailState(qrtzTaskDetail.getTaskDetailState())
  713. .setFileName("")
  714. .setUrl("")
  715. .setTotal(successNum), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "");
  716. }
  717. //4.保存专利信息(发送给专题库)
  718. patentCells.add(patentCell);
  719. }
  720. }
  721. WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
  722. .setId(webQueryDTO.getTaskId())
  723. .setProjectId(webQueryDTO.getProjectId())
  724. .setComplete(true)
  725. .setIndex(count)
  726. .setPercentage(100D)
  727. .setTaskDetailState(qrtzTaskDetail.getTaskDetailState())
  728. .setFileName("")
  729. .setUrl("")
  730. .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "");
  731. //任务执行情况状态设为成功,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行)
  732. qrtzTaskDetail.setTaskDetailState(2);
  733. qrtzTaskDetail.setSuccessNum(successNum);
  734. qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum);
  735. qrtzTaskDetail.setTaskProcess(100);
  736. qrtzTaskDetail.setEndTime(new Date());
  737. qrtzTaskDetail.updateById();
  738. if (qrtzTask.getTaskType() == 1) {
  739. qrtzTask.setTaskState(4);
  740. } else {
  741. qrtzTask.setTaskState(2);
  742. }
  743. qrtzTask.updateById();
  744. return patentCells;
  745. }
  746. /**
  747. * 装载说明书附件pdf方法
  748. *
  749. * @param link 附件链接
  750. * @param page 附件页数
  751. * @param type 附件类型
  752. * @param patentCell 实体类对象
  753. */
  754. private void setFuJian(String link, Integer page, String type, PatentCell patentCell, String FuJianSuffix) throws Exception {
  755. //合并说明书pdf文档时所需的读取流数组
  756. InputStream[] streams = new InputStream[page];
  757. //其他附图url数组
  758. ArrayList<String> otherUrls = new ArrayList<>();
  759. //遍历附件页数
  760. for (int i = 1; i <= page; i++) {
  761. GetFuTuParamsDTO getFuTuParamsDTO = new GetFuTuParamsDTO()
  762. .setLink(link)
  763. .setPage(i)
  764. .setType(type);
  765. byte[] buffer = outInterfaceService.getPatentFile(getFuTuParamsDTO);
  766. InputStream inputStream = new ByteArrayInputStream(buffer);
  767. streams[i - 1] = inputStream;
  768. // File file = File.createTempFile("new_url", FuJianSuffix);
  769. // FileOutputStream out = new FileOutputStream(file);
  770. // out.write(buffer);
  771. // out.close();
  772. // streams[i - 1] = new FileInputStream(file);
  773. // DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null);
  774. // FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName());
  775. // int bytesRead = 0;
  776. // buffer = new byte[8192];
  777. // try {
  778. // FileInputStream fis = new FileInputStream(file);
  779. // OutputStream os = item.getOutputStream();
  780. // int len = 8192;
  781. // while ((bytesRead = fis.read(buffer, 0, len)) != -1) {
  782. // os.write(buffer, 0, bytesRead);
  783. // }
  784. // os.close();
  785. // fis.close();
  786. // } catch (IOException e) {
  787. // e.printStackTrace();
  788. // }
  789. // MultipartFile multipartFile = new CommonsMultipartFile(item);
  790. // UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile);
  791. // switch (FuJianSuffix) {
  792. // case ".pdf":
  793. // streams[i - 1] = new FileInputStream(fileDTO.getPath());
  794. // break;
  795. // case ".jpeg":
  796. // patentCell.setPicUrl(fileDTO.getPath());
  797. // break;
  798. // case ".tiff":
  799. // otherUrls.add(fileDTO.getPath());
  800. // break;
  801. // }
  802. //如果不是说明书pdf
  803. if (!FuJianSuffix.equals(".pdf")) {
  804. File file = File.createTempFile("new_url", FuJianSuffix);
  805. FileOutputStream out = new FileOutputStream(file);
  806. out.write(buffer);
  807. out.close();
  808. streams[i - 1] = new FileInputStream(file);
  809. DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null);
  810. FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName());
  811. int bytesRead = 0;
  812. buffer = new byte[8192];
  813. try {
  814. FileInputStream fis = new FileInputStream(file);
  815. OutputStream os = item.getOutputStream();
  816. int len = 8192;
  817. while ((bytesRead = fis.read(buffer, 0, len)) != -1) {
  818. os.write(buffer, 0, bytesRead);
  819. }
  820. os.close();
  821. fis.close();
  822. } catch (IOException e) {
  823. e.printStackTrace();
  824. }
  825. MultipartFile multipartFile = new CommonsMultipartFile(item);
  826. UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile);
  827. //如果是摘要附图
  828. if (FuJianSuffix.equals(".jpeg")) {
  829. patentCell.setPicUrl(fileDTO.getPath());
  830. }
  831. //如果是其他附图
  832. if (FuJianSuffix.equals(".tiff")) {
  833. otherUrls.add(fileDTO.getPath());
  834. }
  835. }
  836. }
  837. //如果是说明书pdf
  838. if (FuJianSuffix.equals(".pdf")) {
  839. //合并说明书多个pdf文件
  840. PdfDocumentBase doc = PdfDocument.mergeFiles(streams);
  841. //保存结果文件
  842. doc.save("merge.pdf", FileFormat.PDF);
  843. doc.close();
  844. //手动将合并后的pdf文件转成MultipartFile,上传文件并获取path装载到patentCell
  845. File file = new File("merge.pdf");
  846. DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null);
  847. FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName());
  848. int bytesRead = 0;
  849. byte[] buffer = new byte[8192];
  850. try {
  851. FileInputStream fis = new FileInputStream(file);
  852. OutputStream os = item.getOutputStream();
  853. int len = 8192;
  854. while ((bytesRead = fis.read(buffer, 0, len)) != -1) {
  855. os.write(buffer, 0, bytesRead);
  856. }
  857. os.close();
  858. fis.close();
  859. } catch (IOException e) {
  860. e.printStackTrace();
  861. }
  862. MultipartFile multipartFile = new CommonsMultipartFile(item);
  863. UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile);
  864. patentCell.setPdf(fileDTO);
  865. } else if (FuJianSuffix.equals(".tiff")) {
  866. patentCell.setOtherUrls(otherUrls);
  867. }
  868. }
  869. /**
  870. * 获取Image信息方法(从信息中获取三种附图的检索参数)
  871. *
  872. * @param pubNo 公开号对象
  873. */
  874. private ImageInfo getImage(PubNo pubNo) throws IOException {
  875. String res = outInterfaceService.getImagesInfo(pubNo);
  876. JSONObject jsonObject = JSONObject.parseObject(res);
  877. if (!jsonObject.get("status").toString().equals("200")) {
  878. //若发生类似400、500等异常(杰哥处理)
  879. ThrowException.throwXiaoShiException("Image信息接口无法检索该国家专利");
  880. }
  881. //拿到检索结果(未格式化的含有页面等标签的说明书)
  882. String unFormatData = jsonObject.get("data").toString();
  883. String data = unFormatData.substring(unFormatData.indexOf("[") + 1, unFormatData.lastIndexOf("]"));
  884. ImageInfo imageInfo = JSONObject.parseObject(data, ImageInfo.class);
  885. return imageInfo;
  886. }
  887. /**
  888. * 装载说明书方法
  889. *
  890. * @param patentCell 实体类对象
  891. * @param pubNo 公开号对象
  892. */
  893. private void setPatentInstructionText(PatentCell patentCell, PubNo pubNo) throws IOException {
  894. GetDescriptionInfoParamsDTO getDescriptionInfoParamsDTO = new GetDescriptionInfoParamsDTO()
  895. .setCc(pubNo.getCountry())
  896. .setNumber(pubNo.getNumber())
  897. .setKind(pubNo.getKind());
  898. String res = outInterfaceService.getDescriptionInfo(getDescriptionInfoParamsDTO);
  899. JSONObject jsonObject = JSONObject.parseObject(res);
  900. if (!jsonObject.get("status").toString().equals("200")) {
  901. //若发生类似400、500等异常(杰哥处理)
  902. //ThrowException.throwXiaoShiException("说明书接口无法检索该国家专利");
  903. log.info("说明书接口无法检索该国家专利");
  904. return;
  905. }
  906. //拿到检索结果(未格式化的含有页面等标签的说明书)
  907. //String unformatInstructionText = JSONObject.parseObject(jsonObject.get("data").toString(), String.class);
  908. List<String> unformatInstructionText = JSONObject.parseObject(jsonObject.get("data").toString(), List.class);
  909. StringBuilder builder = new StringBuilder();
  910. for (String n : unformatInstructionText) {
  911. builder.append(n);
  912. }
  913. // String regex = "<p>(.+?)</p>";
  914. // Matcher matcher = Pattern.compile(regex).matcher(unformatInstructionText);
  915. // if (matcher.find()) {
  916. // patentCell.setPatentInstructionText(matcher.group());
  917. // }
  918. patentCell.setPatentInstructionText(builder + "");
  919. }
  920. /**
  921. * 装载权要方法
  922. *
  923. * @param patentCell 实体类对象
  924. * @param pubNo 公开号对象
  925. */
  926. private void setPatentClaim(PatentCell patentCell, PubNo pubNo) throws IOException {
  927. GetClaimsInfoParamsDTO getClaimsInfoParamsDTO = new GetClaimsInfoParamsDTO()
  928. .setCc(pubNo.getCountry())
  929. .setNumber(pubNo.getNumber())
  930. .setKind(pubNo.getKind());
  931. String res = outInterfaceService.getClaimsInfo(getClaimsInfoParamsDTO);
  932. JSONObject jsonObject = JSONObject.parseObject(res);
  933. if (!jsonObject.get("status").toString().equals("200")) {
  934. //若发生类似400、500等异常(杰哥处理)
  935. //ThrowException.throwXiaoShiException("权要接口无法检索该国家专利");
  936. log.info("权要接口无法检索该国家专利");
  937. return;
  938. }
  939. //拿到检索结果(未格式化的含有页面等标签的权要)并格式化权要
  940. //String unformatRight = JSONObject.parseObject(jsonObject.get("data").toString(), String.class);
  941. ArrayList<String> unformatRights = JSONObject.parseObject(jsonObject.get("data").toString(), ArrayList.class);
  942. //String[] rightArr = unformatRight.split("\n");
  943. //ArrayList<String> rights = new ArrayList<>(Arrays.asList(rightArr));
  944. patentCell.setRights(unformatRights);
  945. }
  946. /**
  947. * 装载著录方法
  948. *
  949. * @param patentCell 实体类对象
  950. * @param patent 专利对象
  951. * @param pubNo 公开号对象
  952. */
  953. private void setPatentZhuLu(PatentCell patentCell, PatentZhuLu patent, PubNo pubNo) {
  954. //装载申请号
  955. for (AppNo appNo : patent.getAppNos()) {
  956. if (appNo.getType().equals("docdb")) {
  957. patentCell.setApplicationNo(appNo.getCountry() + appNo.getNumber() + appNo.getKind());
  958. }
  959. }
  960. //装载申请日
  961. patentCell.setApplicationDate(patent.getAppDate());
  962. //装载国家/省市
  963. patentCell.setCountry(patent.getAppCountry());
  964. //装载公开号
  965. for (PubNo n : patent.getPubNos()) {
  966. if (n.getType().equals("docdb")) {
  967. BeanUtils.copyProperties(n, pubNo);
  968. patentCell.setPublicNo(n.getCountry() + n.getNumber() + n.getKind());
  969. }
  970. }
  971. //装载专利号
  972. patentCell.setPatentNo(patentCell.getPublicNo());
  973. //装载公开日
  974. patentCell.setPubilcDate(patent.getPubDate());
  975. //装载授权公告号(未找到)
  976. //装载授权公告日(未找到)
  977. //装载主分类号
  978. List<String> ipCs = patent.getIpCs();
  979. List<String> cpCs = patent.getCpCs();
  980. ipCs.addAll(cpCs);
  981. if (ipCs != null && ipCs.size() > 0) {
  982. patentCell.setMainIpc(ipCs.get(0));
  983. }
  984. //装载分类号
  985. if (ipCs != null && ipCs.size() > 0) {
  986. patentCell.setIpc(ipCs);
  987. }
  988. //装载申请人
  989. ArrayList<String> applicationPersons = new ArrayList<>();
  990. for (Application application : patent.getApplications()) {
  991. if (application.getOriginalName().contains("(标:)")) {
  992. application.setOriginalName(application.getOriginalName().substring(0, application.getOriginalName().indexOf("(标:)")));
  993. }
  994. applicationPersons.add(application.getOriginalName());
  995. }
  996. patentCell.setApplicationPersons(applicationPersons);
  997. //装载申请人地址(未找到)
  998. //装载发明人
  999. ArrayList<String> inventors = new ArrayList<>();
  1000. for (Inventor inventor : patent.getInventors()) {
  1001. inventors.add(inventor.getOriginalName());
  1002. }
  1003. patentCell.setInventors(inventors);
  1004. //装载当前权利人(未找到)
  1005. //装载代理人(未找到)
  1006. //装载代理机构(未找到)
  1007. //装载范畴分类(未找到)
  1008. //装载当前状态(未找到)
  1009. //装载同族号
  1010. patentCell.setFamilyId(patent.getFamilyId());
  1011. //装载著录标题
  1012. String olTitle = patent.getOlTitle();
  1013. String enTitle = patent.getEnTitle();
  1014. if (olTitle == null) {
  1015. patentCell.setTitle(enTitle);
  1016. } else {
  1017. patentCell.setTitle(olTitle);
  1018. }
  1019. //装载摘要
  1020. String olAbstract = patent.getOlAbstract();
  1021. String enAbstract = patent.getEnAbstract();
  1022. if (olAbstract == null) {
  1023. patentCell.setAbstrText(enAbstract);
  1024. } else {
  1025. patentCell.setAbstrText(olAbstract);
  1026. }
  1027. //装载优先权号、优先权国家、优先权日
  1028. ArrayList<Priority> priorities = new ArrayList<>();
  1029. List<Priorityy> priorties = patent.getPriorties();
  1030. for (Priorityy priorty : priorties) {
  1031. for (PriorityNumber number : priorty.getNumbers()) {
  1032. if (number.getType().equals("epodoc")) {
  1033. Priority priority = new Priority()
  1034. .setPriorityNo(number.getNumber().substring(2))
  1035. .setPriorityCountry(number.getNumber().substring(0, 2))
  1036. .setPriorityDate(priorty.getDate());
  1037. priorities.add(priority);
  1038. }
  1039. }
  1040. }
  1041. patentCell.setPriorities(priorities);
  1042. }
  1043. //调用接口获取一批专利著录信息
  1044. private SerachBiblioData getSerachBiblioData(String conditions, Integer start, Integer size) throws IOException {
  1045. GetSearchBiblioParamsDTO getSearchBiblioParamsDTO = new GetSearchBiblioParamsDTO()
  1046. .setQuery(conditions)
  1047. .setStart(start)
  1048. .setEnd(size);
  1049. String res = outInterfaceService.getSearchBiblio(getSearchBiblioParamsDTO);
  1050. if (res == null || res.equals("")) {
  1051. return null;
  1052. }
  1053. JSONObject jsonObject = JSONObject.parseObject(res);
  1054. if (!jsonObject.get("status").toString().equals("200")) {
  1055. //若发生类似400、500等异常(杰哥处理)
  1056. return null;
  1057. }
  1058. //返回检索结果data
  1059. return JSONObject.parseObject(jsonObject.get("data").toString(), SerachBiblioData.class);
  1060. }
  1061. /**
  1062. * 下载爬取智慧芽专利数据
  1063. *
  1064. * @param patentVO
  1065. * @return
  1066. * @throws IOException
  1067. * @throws InterruptedException
  1068. */
  1069. public List<PatentCell> getPatentya(String patentVO) throws InterruptedException {
  1070. //1.获得驱动
  1071. System.setProperty("webdriver.chrome.driver", "D:\\driver\\chromedriver.exe");
  1072. // 2.页面最大化
  1073. ChromeOptions chromeOptions = new ChromeOptions();
  1074. chromeOptions.addArguments("--start-maximized");
  1075. // 3.创建驱动
  1076. WebDriver driver = new ChromeDriver(chromeOptions);
  1077. // 4.创建动作
  1078. Actions action = new Actions(driver);
  1079. // 打开智慧芽首页
  1080. driver.get("https://account.zhihuiya.com");
  1081. //设置等待时间
  1082. long formSecond1 = 50;
  1083. WebDriverWait wait1 = new WebDriverWait(driver, formSecond1);
  1084. //等待密码登录界面加载
  1085. wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("tab-password")));
  1086. // 点击密码登录
  1087. driver.findElement(By.id("tab-password")).click();
  1088. // 获取输入框,输入账号密码
  1089. List<WebElement> ret = driver.findElements(By.className("el-input__inner"));
  1090. ret.get(0).sendKeys("liting@china-wispro.com");
  1091. ret.get(1).sendKeys("Lt199299");
  1092. // 点击登录
  1093. driver.findElement(By.className("el-button")).click();
  1094. //等待加载
  1095. TimeUnit.MILLISECONDS.sleep(1000);//毫秒
  1096. List<WebElement> alerts = driver.findElements(By.className("patsnap-el-confirm"));
  1097. if (alerts.size() != 0) {
  1098. alerts.get(0).click();
  1099. }
  1100. wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("master_nav-item__2nd_lv")));
  1101. WebElement searchTag = driver.findElement(By.className("master_nav-item__2nd_lv"));
  1102. searchTag.click();
  1103. //等待搜索框加载
  1104. wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("div-textarea")));
  1105. List<WebElement> ret1 = driver.findElements(By.className("div-textarea"));
  1106. //搜索框赋值
  1107. ret1.get(0).sendKeys(patentVO);
  1108. TimeUnit.MILLISECONDS.sleep(2000);//毫秒
  1109. wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("search-form__btn")));
  1110. WebElement rr = driver.findElement(By.className("search-form__btn"));
  1111. rr.click();
  1112. TimeUnit.MILLISECONDS.sleep(1000);//毫秒
  1113. List<PatentCell> patentCells = new ArrayList<>();
  1114. long formSecond = 50;
  1115. WebDriverWait wait = new WebDriverWait(driver, formSecond);
  1116. wait.until(ExpectedConditions.presenceOfElementLocated(By.className("search-result-standard-view__patent")));
  1117. // String js ="document.getElementsByClassName('sidebar-table__right')[0].scrollBy(0, 8000)";
  1118. // ((JavascriptExecutor) driver).executeScript(js);
  1119. List<WebElement> ret2 = driver.findElements(By.className("search-result-standard-view__patent"));
  1120. ret2.remove(ret2.size() - 1);
  1121. List<WebElement> ret3 = driver.findElements(By.className("pn-cell-popover"));
  1122. for (int i = 0; i < ret2.size(); i++) {
  1123. action.moveToElement(ret3.get(i)).perform();
  1124. wait.until(ExpectedConditions.attributeToBeNotEmpty(ret3.get(i), "href"));
  1125. String patenNo = ret3.get(i).getAttribute("data-link-data");
  1126. System.out.println(patenNo);
  1127. }
  1128. for (WebElement item : ret2) {
  1129. List<WebElement> aLinks = item.findElements(By.tagName("a"));
  1130. PatentCell patentCell = new PatentCell();
  1131. aLinks.forEach(tem -> {
  1132. String type = tem.getAttribute("data-link-type");
  1133. if (type != null && type.equals("TITLE")) {
  1134. String title = tem.getAttribute("data-link-data");
  1135. patentCell.setTitle(title);
  1136. } else if (type != null && type.equals("PN")) {
  1137. String url = tem.getAttribute("href");
  1138. patentCell.setUrl(url);
  1139. String patenNo = tem.getAttribute("data-link-data");
  1140. patentCell.setPatentNo(patenNo);
  1141. } else if (type != null && type.equals("ANC")) {
  1142. String applications = tem.getAttribute("data-link-data");
  1143. patentCell.setApplications(applications);
  1144. }
  1145. });
  1146. String legal = driver.findElement(By.className("legal-tag__LEGAL_STATUS")).getText();
  1147. patentCell.setLegal(legal);
  1148. patentCells.add(patentCell);
  1149. }
  1150. return patentCells;
  1151. }
  1152. //格式化参数
  1153. public String formatConditions(String condition) throws IOException {
  1154. String jsons = FileUtils.readerMethod();
  1155. JSONObject jsonObject = new JSONObject();
  1156. List<JSONObject> sources = JSONObject.parseArray(jsons, JSONObject.class);
  1157. for (JSONObject jsonObject1 : sources) {
  1158. jsonObject = jsonObject1;
  1159. }
  1160. condition = condition.replace(" ", "");
  1161. condition = condition.replace("&&", "*");
  1162. condition = condition.replace("||", "+");
  1163. StringBuilder stringBuilder = new StringBuilder("F XX ");
  1164. String pattern = "\\([^)]*\\)";
  1165. Pattern p = Pattern.compile(pattern);
  1166. Matcher m = p.matcher(condition);
  1167. while (m.find()) { // 当字符串中有匹配到 {} 时
  1168. String param = m.group(0);
  1169. String cell = formatParam(param, jsonObject);
  1170. condition = condition.replace(param, cell);
  1171. }
  1172. return condition;
  1173. }
  1174. public String formatParam(String param, JSONObject jsonObject) {
  1175. param = param.replace("(", "");
  1176. param = param.replace(")", "");
  1177. String[] params = param.split("=");// {} 和里面的内容
  1178. StringBuilder stringBuilder = new StringBuilder("(" + params[1]);
  1179. stringBuilder.append("/");
  1180. String cell = jsonObject.get(params[0]).toString();
  1181. stringBuilder.append(cell + ")");
  1182. return stringBuilder.toString();
  1183. }
  1184. //将值转换为list
  1185. public List<String> StringToList(String value, String split) {
  1186. List<String> list = new ArrayList<>();
  1187. if (value != null && value != "") {
  1188. String[] temValue = value.split(split);
  1189. list = new ArrayList<>(Arrays.asList(temValue));
  1190. }
  1191. return list;
  1192. }
  1193. }