123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239 |
- package com.example.demo.service;
- import com.alibaba.fastjson.JSONObject;
- import com.example.demo.domain.dto.*;
- import com.example.demo.domain.entity.*;
- import com.example.demo.exception.XiaoShiException;
- import com.example.demo.model.dto.TaskWebSocketDTO;
- import com.example.demo.model.dto.WebQueryDTO;
- import com.example.demo.util.*;
- import com.spire.pdf.FileFormat;
- import com.spire.pdf.PdfDocument;
- import com.spire.pdf.PdfDocumentBase;
- import lombok.RequiredArgsConstructor;
- import lombok.extern.slf4j.Slf4j;
- import org.apache.commons.fileupload.FileItem;
- import org.apache.commons.fileupload.disk.DiskFileItemFactory;
- import org.openqa.selenium.By;
- import org.openqa.selenium.WebDriver;
- import org.openqa.selenium.WebElement;
- import org.openqa.selenium.chrome.ChromeDriver;
- import org.openqa.selenium.chrome.ChromeOptions;
- import org.openqa.selenium.interactions.Actions;
- import org.openqa.selenium.support.ui.ExpectedConditions;
- import org.openqa.selenium.support.ui.WebDriverWait;
- import org.springframework.beans.BeanUtils;
- import org.springframework.beans.factory.annotation.Value;
- import org.springframework.context.annotation.Lazy;
- import org.springframework.stereotype.Service;
- import org.springframework.web.multipart.MultipartFile;
- import org.springframework.web.multipart.commons.CommonsMultipartFile;
- import java.io.*;
- import java.util.*;
- import java.util.concurrent.TimeUnit;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- /**
- * @author admin
- * @description 针对表【qrtz_task(定时任务表)】的数据库操作Service实现
- * @createDate 2023-03-15 13:40:32
- */
- @Slf4j
- @Service
- @RequiredArgsConstructor(onConstructor_ = {@Lazy})
- public class UploadFromWebService {
- private final FileUtils fileUtils;
- private final OutInterfaceService outInterfaceService;
- private final WebConfigService webConfigService;
- private final QrTaskService qrTaskService;
- private final QrTaskDetailService qrTaskDetailService;
- private final WebLoginConfigService webLoginConfigService;
- @Value("${driverUrl}")
- private String url;
- // 查询并上传(专利之星)
- public List<PatentCell> getPatentStar(WebQueryDTO webQueryDTO) throws Exception {
- List<PatentCell> patentCells = new ArrayList<>();
- //当前任务执行情况
- QrtzTaskDetail qrtzTaskDetail = qrTaskDetailService.getById(webQueryDTO.getTaskId());
- Integer successNum = qrtzTaskDetail.getSuccessNum();
- int startPage = successNum / 5;
- int startNum = successNum % 5;
- //当前任务条件
- QrtzTask qrtzTask = qrTaskService.getById(qrtzTaskDetail.getTaskId());
- //专利成功条数
- try {
- List<String> cells = webQueryDTO.getConCells();
- String conditions = webQueryDTO.getConditions();
- Integer id = webQueryDTO.getWebConfigId();
- //conditions = this.formatConditions(conditions);
- //根据id 获得网站配置
- WebConfig webConfig = webConfigService.getConfigById(id);
- //根据网站id和用户的租户获得登录信息
- WebLoginConfig webLoginConfig = webLoginConfigService.getLoginConfig(webConfig.getId(), qrtzTask.getTenantId());
- if (webLoginConfig == null) {
- qrtzTaskDetail.setTaskDetailState(3);
- qrtzTaskDetail.setSuccessNum(successNum);
- qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum);
- qrtzTaskDetail.setFailure("未配置登录信息");
- qrtzTaskDetail.setEndTime(new Date());
- qrtzTaskDetail.updateById();
- WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
- .setId(webQueryDTO.getTaskId())
- .setProjectId(webQueryDTO.getProjectId())
- .setComplete(true)
- .setIndex(successNum)
- .setPercentage(0.0)
- .setTaskDetailState(3)
- .setFileName("")
- .setUrl("")
- .setTotal(successNum), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
- return null;
- }
- //1.获得驱动
- // System.setProperty("webdriver.chrome.driver", "D:\\driver\\chromedriver.exe");
- System.setProperty("webdriver.chrome.driver", url);
- ChromeOptions chromeOptions = new ChromeOptions();
- // 设置无头模式
- chromeOptions.setHeadless(Boolean.TRUE);
- // 设置无轨 开发时还是不要加,可以看到浏览器效果
- chromeOptions.addArguments("-headless");
- chromeOptions.addArguments("no-sandbox");
- // 3.创建驱动
- WebDriver driver = new ChromeDriver(chromeOptions);
- // 4.创建动作
- Actions action = new Actions(driver);
- //设置等待时间
- long formSecond1 = 10;
- WebDriverWait wait1 = new WebDriverWait(driver, formSecond1);
- // 打开专利之星首页
- driver.get(webConfig.getWebAddress());
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("loginname")));
- WebElement loginName = driver.findElement(By.id("loginname"));
- WebElement password = driver.findElement(By.id("password"));
- loginName.sendKeys(webLoginConfig.getLoginAccount());
- password.sendKeys(webLoginConfig.getLoginPassword());
- WebElement loginButton = driver.findElement(By.id("login"));
- loginButton.click();
- TimeUnit.MILLISECONDS.sleep(5000);//毫秒
- //获得表格搜索按钮并点击
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("tablepage")));
- WebElement tablePage = driver.findElement(By.id("tablepage"));
- WebElement tablePagea = tablePage.findElement(By.tagName("a"));
- tablePagea.click();
- TimeUnit.MILLISECONDS.sleep(5000);//毫秒
- //获得搜索框并点击
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("TxtSearch")));
- WebElement TxtSearch = driver.findElement(By.id("TxtSearch"));
- TxtSearch.sendKeys(conditions);
- WebElement searchBtn = driver.findElement(By.id("searchbtn2"));
- searchBtn.click();
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("tcdNumber")));
- List<WebElement> tcdNumbers = driver.findElements(By.className("tcdNumber"));
- String pagesText = tcdNumbers.get(tcdNumbers.size() - 1).getText();
- int page = Integer.parseInt(pagesText);
- //获得总数
- WebElement countEle = driver.findElement(By.className("allcountlab"));
- String countText = countEle.getText();
- Integer count = Integer.parseInt(countText);
- //总数赋值给当前任务执行情况的总量属性
- qrtzTaskDetail.setAllNum(count);
- qrtzTaskDetail.updateById();
- String Handle = driver.getWindowHandle();
- //创建一个map集合存放浏览器句柄
- HashMap<String, String> handleMap = new HashMap<>();
- //将智慧芽句柄放到map中
- handleMap.put("mainPage", Handle);
- WebElement pagNum = driver.findElement(By.className("page_num"));
- pagNum.clear();
- pagNum.sendKeys(startPage + 1 + "");
- WebElement pagBtn = driver.findElement(By.className("page_btn"));
- pagBtn.click();
- TimeUnit.MILLISECONDS.sleep(10000);//毫秒
- //计算进度值
- double percentage = count == 0 ? 0 : (count.equals(successNum) ? (successNum * 1D) : (successNum + 1D) / count * 100D);
- percentage = MathUtils.saveTwoDecimal(percentage);
- for (int p = startPage; p < page; p++) {
- //等待数据加载
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("patent")));
- TimeUnit.MILLISECONDS.sleep(1000);//毫秒
- //获得列表
- List<WebElement> patentContents = driver.findElements(By.className("patent"));
- // 页面元素标签
- for (int i = startNum; i < patentContents.size(); i++) {
- //查看任务状态
- QrtzTaskDetail qrtzTaskDetail1 = qrTaskDetailService.getById(webQueryDTO.getTaskId());
- if (qrtzTaskDetail1.getTaskDetailState().equals(7)) {
- qrtzTaskDetail1.setTaskDetailState(5);
- qrtzTaskDetail1.setSuccessNum(successNum);
- qrtzTaskDetail1.setTaskProcess(percentage);
- qrtzTaskDetail1.updateById();
- WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
- .setId(webQueryDTO.getTaskId())
- .setProjectId(webQueryDTO.getProjectId())
- .setComplete(false)
- .setIndex(successNum)
- .setTaskDetailState(5)
- .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
- return null;
- } else if (qrtzTaskDetail1.getTaskDetailState().equals(8)) {
- qrtzTaskDetail1.setTaskDetailState(6);
- qrtzTaskDetail1.setSuccessNum(successNum);
- qrtzTaskDetail1.setTaskProcess(percentage);
- qrtzTaskDetail1.updateById();
- WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
- .setId(webQueryDTO.getTaskId())
- .setProjectId(webQueryDTO.getProjectId())
- .setComplete(false)
- .setIndex(successNum)
- .setTaskDetailState(6)
- .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
- return null;
- }
- PatentCell patentCell = new PatentCell();
- patentCell.setProjectId(webQueryDTO.getProjectId());
- patentCell.setReportId(webQueryDTO.getReportId());
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("title-color")));
- WebElement titleA = patentContents.get(i).findElement(By.className("title-color"));
- titleA.click();
- TimeUnit.MILLISECONDS.sleep(10000);//毫秒
- //9、获取到所有的句柄
- Set<String> set = driver.getWindowHandles();
- //10、循环找到详情窗口句柄
- for (String s : set) {
- //10.1、将详情窗口的句柄放到map中
- if (!s.equals(Handle)) {
- handleMap.put("detail", s);
- }
- }
- driver.switchTo().window(handleMap.get("detail"));
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("item-content")));
- WebElement itemContent = driver.findElement(By.className("item-content"));
- List<WebElement> divItems = itemContent.findElements(By.tagName("div"));
- for (int t = 0; t < divItems.size(); t++) {
- WebElement temEle = divItems.get(t);
- String text = temEle.getText();
- String value = "";
- String[] strings = text.split(":");
- if (strings.length > 1) {
- value = strings[1].trim();
- String key = strings[0].trim();
- switch (key) {
- case "申请号":
- patentCell.setApplicationNo(value);
- break;
- case "申请日":
- patentCell.setApplicationDate(value);
- break;
- case "国家/省市":
- patentCell.setCountry(value);
- break;
- case "公开号":
- patentCell.setPublicNo(value);
- break;
- case "公开日":
- patentCell.setPubilcDate(value);
- break;
- case "授权公告号":
- patentCell.setPublicAccreditNo(value);
- break;
- case "授权公告日":
- patentCell.setPublicAccreditDate(value);
- break;
- case "主分类号":
- patentCell.setMainIpc(value);
- break;
- case "分类号":
- patentCell.setIpc(this.StringToList(value, ";"));
- break;
- case "申请人":
- patentCell.setApplicationPersons(this.StringToList(value, " "));
- break;
- case "申请人地址":
- patentCell.setApplicationAddress(this.StringToList(value, " "));
- break;
- case "发明人":
- patentCell.setInventors(this.StringToList(value, " "));
- break;
- case "当前权利人":
- patentCell.setApplicationCurrents(this.StringToList(value, " "));
- break;
- case "代理人":
- patentCell.setAgencyPersons(this.StringToList(value, " "));
- break;
- case "代理机构":
- patentCell.setAgency(value);
- break;
- case "范畴分类":
- patentCell.setClassical(value);
- break;
- case "当前状态":
- patentCell.setStatue(value);
- break;
- }
- }
- }
- //获得摘要
- WebElement abstrElement = driver.findElement(By.className("item-summary"));
- List<WebElement> spans = abstrElement.findElements(By.tagName("span"));
- if (cells.contains("1")) {
- String abstrText = spans.get(0).getText();
- patentCell.setAbstrText(abstrText);
- }
- //获得图片路径
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("item-img")));
- WebElement picElement = driver.findElement(By.className("item-img"));
- WebElement imag = picElement.findElement(By.tagName("img"));
- String url = imag.getAttribute("src");
- url = fileUtils.uploadToLocal(url, ".jpg").getPath();
- patentCell.setPicUrl(url);
- if (cells.contains("2")) {
- TimeUnit.MILLISECONDS.sleep(10000);//毫秒
- WebElement CLElement = driver.findElement(By.id("itemCL"));
- CLElement.click();
- //获得主权要
- String mainRight = spans.get(1).getText();
- patentCell.setMainRignt(mainRight);
- //获得权要
- driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
- List<WebElement> rightElements = driver.findElements(By.tagName("claim-text"));
- if (rightElements.size() == 0) {
- WebElement suBtn = driver.findElement(By.className("layui-layer-btn0"));
- suBtn.click();
- TimeUnit.MILLISECONDS.sleep(10000);//毫秒
- } else {
- TimeUnit.MILLISECONDS.sleep(10000);//毫秒
- List<String> rights = new ArrayList<>();
- rightElements.forEach(item -> {
- rights.add(item.getText());
- });
- patentCell.setRights(rights);
- }
- }
- if (cells.contains("3")) {
- //获得说明书
- WebElement DSElement = driver.findElement(By.id("itemDS"));
- DSElement.click();
- TimeUnit.MILLISECONDS.sleep(10000);//毫秒
- driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
- List<WebElement> titleElements = driver.findElements(By.tagName("invention-title"));
- if (titleElements.size() == 0) {
- WebElement suBtn = driver.findElement(By.className("layui-layer-btn0"));
- suBtn.click();
- TimeUnit.MILLISECONDS.sleep(10000);//毫秒
- } else {
- WebElement titleElement = titleElements.get(0);
- patentCell.setTitle(titleElement.getText());
- StringBuilder stringBuilder = new StringBuilder();
- int flag = 1;
- //技术领域
- WebElement textElement1 = driver.findElement(By.tagName("description"));
- List<WebElement> textP1 = textElement1.findElements(By.tagName("p"));
- stringBuilder.append(textP1.get(0).getText());
- stringBuilder.append("\r\n");
- for (int t = 1; t < textP1.size(); t++) {
- String tem = String.format("%04d", flag);
- stringBuilder.append("[").append(tem).append("]").append(textP1.get(t).getText());
- stringBuilder.append("\r\n");
- flag += 1;
- }
- patentCell.setPatentInstructionText(stringBuilder.toString());
- }
- }
- //获得pdf文档
- if (cells.contains("4")) {
- WebElement PDFElement = driver.findElement(By.id("itemPdf"));
- PDFElement.click();
- TimeUnit.MILLISECONDS.sleep(10000);//毫秒
- driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
- List<WebElement> PDFFile = driver.findElements(By.tagName("embed"));
- String pdfUrl = "";
- if (PDFFile.size() == 0) {
- List<WebElement> PDFFiles = driver.findElements(By.className("pdfbtn"));
- if (PDFFiles.size() != 0) {
- pdfUrl = PDFFiles.get(0).getAttribute("data-url");
- } else {
- WebElement suBtn = driver.findElement(By.className("layui-layer-btn0"));
- suBtn.click();
- TimeUnit.MILLISECONDS.sleep(10000);//毫秒
- }
- } else {
- pdfUrl = PDFFile.get(0).getAttribute("src");
- }
- if (pdfUrl != "") {
- UploadFileDTO fileDTO = fileUtils.uploadToLocal(pdfUrl, ".PDF");
- patentCell.setPDFName(fileDTO.getFileName());
- patentCell.setPDFSize(fileDTO.getFileSize());
- patentCell.setPDFUrl(fileDTO.getPath());
- patentCell.setPdf(fileDTO);
- }
- }
- if (cells.contains("5")) {
- //获得法律状态
- WebElement flztElement = driver.findElement(By.id("flztbtn"));
- flztElement.click();
- TimeUnit.MILLISECONDS.sleep(10000);//毫秒
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("legalContainer")));
- WebElement tBody = driver.findElement(By.id("legalContainer"));
- TimeUnit.MILLISECONDS.sleep(1000);//毫秒
- List<WebElement> trs = tBody.findElements(By.tagName("tr"));
- List<PatentCell.PatentAffair> affairs = new ArrayList<>();
- trs.forEach(item -> {
- List<WebElement> tds = item.findElements(By.tagName("td"));
- PatentCell.PatentAffair affair = new PatentCell.PatentAffair();
- affair.setDateTime(tds.get(0).getText());
- affair.setSimpleStatus(tds.get(1).getText());
- affair.setStatus(tds.get(2).getText());
- affairs.add(affair);
- });
- patentCell.setPatentAffairs(affairs);
- }
- successNum++;
- if (patentCell.getPublicNo() != null) {
- patentCell.setPatentNo(patentCell.getPublicNo());
- } else {
- patentCell.setPatentNo(patentCell.getPublicAccreditNo());
- patentCell.setPublicNo(patentCell.getPublicAccreditNo());
- }
- if (patentCell.getPatentNo() != null) {
- String res = outInterfaceService.importPatents(patentCell);
- JSONObject jsonObject = JSONObject.parseObject(res);
- if (jsonObject.get("code").toString().equals("500")) {
- //任务执行情况状态设为失败,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行)
- qrtzTaskDetail.setTaskDetailState(3);
- qrtzTaskDetail.setSuccessNum(successNum);
- qrtzTaskDetail.setDefaultNum(count - successNum);
- qrtzTaskDetail.setEndTime(new Date());
- qrtzTaskDetail.updateById();
- if (qrtzTask.getTaskType() == 1) {
- qrtzTask.setTaskState(4);
- } else {
- qrtzTask.setTaskState(3);
- }
- qrtzTask.updateById();
- return null;
- }
- }
- QrtzTaskDetail qrtzTaskDetail11 = qrTaskDetailService.getById(qrtzTaskDetail.getId());
- percentage = count == 0 ? 0 : (count.equals(successNum) ? (successNum * 1D) : (successNum + 1D) / count * 100D);
- percentage = MathUtils.saveTwoDecimal(percentage);
- WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
- .setId(webQueryDTO.getTaskId())
- .setProjectId(webQueryDTO.getProjectId())
- .setComplete(false)
- .setIndex(successNum)
- .setPercentage(percentage)
- .setTaskDetailState(qrtzTaskDetail11.getTaskDetailState())
- .setFileName("")
- .setUrl("")
- .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
- patentCells.add(patentCell);
- driver.close();
- driver.switchTo().window(handleMap.get("mainPage"));
- }
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("tcdPageCode")));
- List<WebElement> nexts = driver.findElements(By.className("nextPage"));
- if (nexts.size() != 0) {
- nexts.get(0).click();
- TimeUnit.MILLISECONDS.sleep(10000);//毫秒
- }
- }
- WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
- .setId(webQueryDTO.getTaskId())
- .setProjectId(webQueryDTO.getProjectId())
- .setComplete(true)
- .setIndex(count)
- .setPercentage(100D)
- .setTaskDetailState(qrtzTaskDetail.getTaskDetailState())
- .setFileName("")
- .setUrl("")
- .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "");
- //任务执行情况状态设为成功,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行)
- qrtzTaskDetail.setTaskDetailState(2);
- qrtzTaskDetail.setSuccessNum(successNum);
- qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum);
- qrtzTaskDetail.setTaskProcess(100);
- qrtzTaskDetail.setEndTime(new Date());
- qrtzTaskDetail.updateById();
- if (qrtzTask.getTaskType() == 1) {
- qrtzTask.setTaskState(4);
- } else {
- qrtzTask.setTaskState(2);
- }
- qrtzTask.updateById();
- driver.quit();
- } catch (Exception e) {
- log.error(e.toString());
- //任务执行情况状态设为失败,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行)
- qrtzTaskDetail.setTaskDetailState(3);
- qrtzTaskDetail.setSuccessNum(successNum);
- qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum);
- qrtzTaskDetail.setFailure("拉取节点失败");
- qrtzTaskDetail.setEndTime(new Date());
- qrtzTaskDetail.updateById();
- if (qrtzTask.getTaskType() == 1) {
- qrtzTask.setTaskState(4);
- } else {
- qrtzTask.setTaskState(3);
- }
- qrtzTask.updateById();
- WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
- .setId(webQueryDTO.getTaskId())
- .setProjectId(webQueryDTO.getProjectId())
- .setComplete(true)
- .setIndex(successNum)
- .setPercentage(100D)
- .setTaskDetailState(qrtzTaskDetail.getTaskDetailState())
- .setFileName("")
- .setUrl("")
- .setTotal(successNum), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "");
- }
- return patentCells;
- }
- /**
- * 下载欧专局专利数据
- *
- * @param webQueryDTO 前台传来的下载条件等数据
- * @return 返回
- * @throws IOException 抛出IO异常
- */
- public List<PatentCell> getEPO(WebQueryDTO webQueryDTO) throws Exception {
- log.info("开始处理【下载欧专局专利数据】的业务,参数为:{}", webQueryDTO);
- List<PatentCell> patentCells = new ArrayList<>();
- QrtzTaskDetail qrtzTaskDetail = qrTaskDetailService.getById(webQueryDTO.getTaskId());
- Integer successNum = qrtzTaskDetail.getSuccessNum();
- int startPage = successNum / 5;
- int startNum = successNum % 5;
- //当前任务条件
- QrtzTask qrtzTask = qrTaskService.getById(qrtzTaskDetail.getTaskId());
- //1.从DTO中取出网站id、检索信息条件、下载字段、专题库id、报告id
- Integer webId = webQueryDTO.getWebConfigId();
- List<String> cells = webQueryDTO.getConCells();
- String conditions = webQueryDTO.getConditions();
- Integer projectId = webQueryDTO.getProjectId();
- Integer reportId = webQueryDTO.getReportId();
- //根据id 获得网站配置
- WebConfig webConfig = webConfigService.getConfigById(webId);
- //根据网站id和用户的租户获得登录信息
- WebLoginConfig webLoginConfig = webLoginConfigService.getLoginConfig(webConfig.getId(), qrtzTask.getTenantId());
- if (webLoginConfig == null) {
- qrtzTaskDetail.setTaskDetailState(3);
- qrtzTaskDetail.setSuccessNum(successNum);
- qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum);
- qrtzTaskDetail.setFailure("未配置登录信息");
- qrtzTaskDetail.setEndTime(new Date());
- qrtzTaskDetail.updateById();
- WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
- .setId(webQueryDTO.getTaskId())
- .setProjectId(webQueryDTO.getProjectId())
- .setComplete(true)
- .setIndex(successNum)
- .setPercentage(0.0)
- .setTaskDetailState(3)
- .setFileName("")
- .setUrl("")
- .setTotal(successNum), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
- return null;
- }
- //定义每次检索的专利数量(每次检索50件)
- int size = 50;
- //2.根据检索式conditions先检索一件专利著录信息【此操作主要是为了获得专利总数量count】
- SerachBiblioData serachBiblioData = getSerachBiblioData(conditions, 1, 1);
- if (serachBiblioData == null || serachBiblioData.getTotals() == 0) {
- //conditions没有检索到任何相关专利(杰哥处理)
- ThrowException.throwXiaoShiException("检索失败,未检索出任何相关专利信息");
- }
- //获得专利总数量
- Integer count = serachBiblioData.getTotals();
- //计算进度值
- double percentage = count == 0 ? 0 : (count.equals(successNum) ? (successNum * 1D) : (successNum + 1D) / count * 100D);
- percentage = MathUtils.saveTwoDecimal(percentage);
- //3.根据专利总数量count计算检索次数,来循环检索
- for (int i = 1; i <= count; i += size) {
- //查看任务状态
- QrtzTaskDetail qrtzTaskDetail1 = qrTaskDetailService.getById(webQueryDTO.getTaskId());
- if (qrtzTaskDetail1.getTaskDetailState().equals(7)) {
- qrtzTaskDetail1.setTaskDetailState(5);
- qrtzTaskDetail1.setSuccessNum(successNum);
- qrtzTaskDetail1.setTaskProcess(percentage);
- qrtzTaskDetail1.updateById();
- WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
- .setId(webQueryDTO.getTaskId())
- .setProjectId(webQueryDTO.getProjectId())
- .setComplete(false)
- .setIndex(successNum)
- .setTaskDetailState(5)
- .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
- return null;
- } else if (qrtzTaskDetail1.getTaskDetailState().equals(8)) {
- qrtzTaskDetail1.setTaskDetailState(6);
- qrtzTaskDetail1.setSuccessNum(successNum);
- qrtzTaskDetail1.setTaskProcess(percentage);
- qrtzTaskDetail1.updateById();
- WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
- .setId(webQueryDTO.getTaskId())
- .setProjectId(webQueryDTO.getProjectId())
- .setComplete(false)
- .setIndex(successNum)
- .setTaskDetailState(6)
- .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
- return null;
- }
- //3.1根据检索式conditions、专利开始数i、专利最后数i + size - 1检索专利著录信息
- serachBiblioData = getSerachBiblioData(conditions, i, i + size - 1);
- //获取检索结果中的所有专利著录信息
- if (serachBiblioData == null || serachBiblioData.getTotals() == 0) {
- continue;
- }
- List<PatentZhuLu> patents = serachBiblioData.getPatents();
- //3.2遍历专利
- for (PatentZhuLu patent : patents) {
- PatentCell patentCell = new PatentCell();
- patentCell.setProjectId(qrtzTask.getProjectId());
- PubNo pubNO = new PubNo();
- //装载专利著录
- if (cells.contains("1")) {
- setPatentZhuLu(patentCell, patent, pubNO);
- }
- //装载权要
- if (cells.contains("2")) {
- setPatentClaim(patentCell, pubNO);
- }
- //装载说明书文本
- if (cells.contains("3")) {
- setPatentInstructionText(patentCell, pubNO);
- }
- //以下代码是在准备一会要调用拿取各种附图的接口所需的参数(FullDocument->说明书pdf、Drawing->其他附图、FirstPageClipping->摘要附图)
- String fullDocumentLink = "", fullDocumentType = "", drawingLink = "", drawingType = "", firstPageClippingLink = "", firstPageClippingType = "";
- Integer fullDocumentPage = 0, drawingPage = 0, firstPageClippingPage = 0;
- //根据当前专利号调用接口获取一会调用拿取各种附图的接口的参数
- try {
- ImageInfo imageInfo = getImage(pubNO);
- for (Image image : imageInfo.getImages()) {
- //如果附件类型是说明书
- if (image.getImageType().equals("FullDocument")) {
- fullDocumentLink = image.getUrlLink();
- fullDocumentPage = image.getNumberOfPages();
- for (String formatOption : image.getFormatOptions()) {
- if (formatOption.contains("pdf")) {
- fullDocumentType = formatOption;
- }
- }
- }
- //如果附件类型是其他附件
- if (image.getImageType().equals("Drawing")) {
- drawingLink = image.getUrlLink();
- drawingPage = image.getNumberOfPages();
- for (String formatOption : image.getFormatOptions()) {
- if (formatOption.contains("tiff")) {
- drawingType = formatOption;
- }
- }
- }
- //如果附件类型是摘要附图
- if (image.getImageType().equals("FirstPageClipping")) {
- firstPageClippingLink = image.getUrlLink();
- firstPageClippingPage = image.getNumberOfPages();
- for (String formatOption : image.getFormatOptions()) {
- if (formatOption.contains("jpeg")) {
- firstPageClippingType = formatOption;
- }
- }
- }
- }
- //装载说明书pdf
- if (cells.contains("4")) {
- setFuJian(fullDocumentLink, fullDocumentPage, fullDocumentType, patentCell, ".pdf");
- }
- //装载摘要附图
- if (cells.contains("6")) {
- setFuJian(firstPageClippingLink, firstPageClippingPage, firstPageClippingType, patentCell, ".jpeg");
- }
- //装载其他附图
- if (cells.contains("7")) {
- setFuJian(drawingLink, drawingPage, drawingType, patentCell, ".tiff");
- }
- if (patentCell.getPatentNo() != null) {
- String res = outInterfaceService.importPatents(patentCell);
- JSONObject jsonObject = JSONObject.parseObject(res);
- if (jsonObject.get("code").toString().equals("500")) {
- // //任务执行情况状态设为失败,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行)
- // qrtzTaskDetail.setTaskDetailState(3);
- // qrtzTaskDetail.setSuccessNum(successNum);
- // qrtzTaskDetail.setDefaultNum(count - successNum);
- // qrtzTaskDetail.setEndTime(new Date());
- // qrtzTaskDetail.updateById();
- // if (qrtzTask.getTaskType() == 1) {
- // qrtzTask.setTaskState(4);
- // } else {
- // qrtzTask.setTaskState(3);
- // }
- // qrtzTask.updateById();
- // return null;
- //拿到当前这个下载失败的专利号,保存起来(具体怎么处理失败的专利考虑)
- String patentNo = patentCell.getPatentNo();
- //然后跳过本次专利,继续下载下一个专利
- continue;
- }
- }
- QrtzTaskDetail qrtzTaskDetail11 = qrTaskDetailService.getById(qrtzTaskDetail.getId());
- percentage = count == 0 ? 0 : (count.equals(successNum) ? (successNum * 1D) : (successNum + 1D) / count * 100D);
- percentage = MathUtils.saveTwoDecimal(percentage);
- WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
- .setId(webQueryDTO.getTaskId())
- .setProjectId(webQueryDTO.getProjectId())
- .setComplete(false)
- .setIndex(successNum)
- .setPercentage(percentage)
- .setTaskDetailState(qrtzTaskDetail11.getTaskDetailState())
- .setFileName("")
- .setUrl("")
- .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null");
- } catch (XiaoShiException e) {
- //虽然拿不到所有附图,但已经拿到了专利著录,能拿到啥是啥
- log.info(e.getMessage());
- } catch (Exception e) {
- qrtzTaskDetail.setTaskDetailState(3);
- qrtzTaskDetail.setSuccessNum(successNum);
- qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum);
- qrtzTaskDetail.setFailure("拉取节点失败");
- qrtzTaskDetail.setEndTime(new Date());
- qrtzTaskDetail.updateById();
- if (qrtzTask.getTaskType() == 1) {
- qrtzTask.setTaskState(4);
- } else {
- qrtzTask.setTaskState(3);
- }
- qrtzTask.updateById();
- WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
- .setId(webQueryDTO.getTaskId())
- .setProjectId(webQueryDTO.getProjectId())
- .setComplete(true)
- .setIndex(successNum)
- .setPercentage(100D)
- .setTaskDetailState(qrtzTaskDetail.getTaskDetailState())
- .setFileName("")
- .setUrl("")
- .setTotal(successNum), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "");
- }
- //4.保存专利信息(发送给专题库)
- patentCells.add(patentCell);
- }
- }
- WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO()
- .setId(webQueryDTO.getTaskId())
- .setProjectId(webQueryDTO.getProjectId())
- .setComplete(true)
- .setIndex(count)
- .setPercentage(100D)
- .setTaskDetailState(qrtzTaskDetail.getTaskDetailState())
- .setFileName("")
- .setUrl("")
- .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "");
- //任务执行情况状态设为成功,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行)
- qrtzTaskDetail.setTaskDetailState(2);
- qrtzTaskDetail.setSuccessNum(successNum);
- qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum);
- qrtzTaskDetail.setTaskProcess(100);
- qrtzTaskDetail.setEndTime(new Date());
- qrtzTaskDetail.updateById();
- if (qrtzTask.getTaskType() == 1) {
- qrtzTask.setTaskState(4);
- } else {
- qrtzTask.setTaskState(2);
- }
- qrtzTask.updateById();
- return patentCells;
- }
- /**
- * 装载说明书附件pdf方法
- *
- * @param link 附件链接
- * @param page 附件页数
- * @param type 附件类型
- * @param patentCell 实体类对象
- */
- private void setFuJian(String link, Integer page, String type, PatentCell patentCell, String FuJianSuffix) throws Exception {
- //合并说明书pdf文档时所需的读取流数组
- InputStream[] streams = new InputStream[page];
- //其他附图url数组
- ArrayList<String> otherUrls = new ArrayList<>();
- //遍历附件页数
- for (int i = 1; i <= page; i++) {
- GetFuTuParamsDTO getFuTuParamsDTO = new GetFuTuParamsDTO()
- .setLink(link)
- .setPage(i)
- .setType(type);
- byte[] buffer = outInterfaceService.getPatentFile(getFuTuParamsDTO);
- InputStream inputStream = new ByteArrayInputStream(buffer);
- streams[i - 1] = inputStream;
- // File file = File.createTempFile("new_url", FuJianSuffix);
- // FileOutputStream out = new FileOutputStream(file);
- // out.write(buffer);
- // out.close();
- // streams[i - 1] = new FileInputStream(file);
- // DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null);
- // FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName());
- // int bytesRead = 0;
- // buffer = new byte[8192];
- // try {
- // FileInputStream fis = new FileInputStream(file);
- // OutputStream os = item.getOutputStream();
- // int len = 8192;
- // while ((bytesRead = fis.read(buffer, 0, len)) != -1) {
- // os.write(buffer, 0, bytesRead);
- // }
- // os.close();
- // fis.close();
- // } catch (IOException e) {
- // e.printStackTrace();
- // }
- // MultipartFile multipartFile = new CommonsMultipartFile(item);
- // UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile);
- // switch (FuJianSuffix) {
- // case ".pdf":
- // streams[i - 1] = new FileInputStream(fileDTO.getPath());
- // break;
- // case ".jpeg":
- // patentCell.setPicUrl(fileDTO.getPath());
- // break;
- // case ".tiff":
- // otherUrls.add(fileDTO.getPath());
- // break;
- // }
- //如果不是说明书pdf
- if (!FuJianSuffix.equals(".pdf")) {
- File file = File.createTempFile("new_url", FuJianSuffix);
- FileOutputStream out = new FileOutputStream(file);
- out.write(buffer);
- out.close();
- streams[i - 1] = new FileInputStream(file);
- DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null);
- FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName());
- int bytesRead = 0;
- buffer = new byte[8192];
- try {
- FileInputStream fis = new FileInputStream(file);
- OutputStream os = item.getOutputStream();
- int len = 8192;
- while ((bytesRead = fis.read(buffer, 0, len)) != -1) {
- os.write(buffer, 0, bytesRead);
- }
- os.close();
- fis.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- MultipartFile multipartFile = new CommonsMultipartFile(item);
- UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile);
- //如果是摘要附图
- if (FuJianSuffix.equals(".jpeg")) {
- patentCell.setPicUrl(fileDTO.getPath());
- }
- //如果是其他附图
- if (FuJianSuffix.equals(".tiff")) {
- otherUrls.add(fileDTO.getPath());
- }
- }
- }
- //如果是说明书pdf
- if (FuJianSuffix.equals(".pdf")) {
- //合并说明书多个pdf文件
- PdfDocumentBase doc = PdfDocument.mergeFiles(streams);
- //保存结果文件
- doc.save("merge.pdf", FileFormat.PDF);
- doc.close();
- //手动将合并后的pdf文件转成MultipartFile,上传文件并获取path装载到patentCell
- File file = new File("merge.pdf");
- DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null);
- FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName());
- int bytesRead = 0;
- byte[] buffer = new byte[8192];
- try {
- FileInputStream fis = new FileInputStream(file);
- OutputStream os = item.getOutputStream();
- int len = 8192;
- while ((bytesRead = fis.read(buffer, 0, len)) != -1) {
- os.write(buffer, 0, bytesRead);
- }
- os.close();
- fis.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- MultipartFile multipartFile = new CommonsMultipartFile(item);
- UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile);
- patentCell.setPdf(fileDTO);
- } else if (FuJianSuffix.equals(".tiff")) {
- patentCell.setOtherUrls(otherUrls);
- }
- }
- /**
- * 获取Image信息方法(从信息中获取三种附图的检索参数)
- *
- * @param pubNo 公开号对象
- */
- private ImageInfo getImage(PubNo pubNo) throws IOException {
- String res = outInterfaceService.getImagesInfo(pubNo);
- JSONObject jsonObject = JSONObject.parseObject(res);
- if (!jsonObject.get("status").toString().equals("200")) {
- //若发生类似400、500等异常(杰哥处理)
- ThrowException.throwXiaoShiException("Image信息接口无法检索该国家专利");
- }
- //拿到检索结果(未格式化的含有页面等标签的说明书)
- String unFormatData = jsonObject.get("data").toString();
- String data = unFormatData.substring(unFormatData.indexOf("[") + 1, unFormatData.lastIndexOf("]"));
- ImageInfo imageInfo = JSONObject.parseObject(data, ImageInfo.class);
- return imageInfo;
- }
- /**
- * 装载说明书方法
- *
- * @param patentCell 实体类对象
- * @param pubNo 公开号对象
- */
- private void setPatentInstructionText(PatentCell patentCell, PubNo pubNo) throws IOException {
- GetDescriptionInfoParamsDTO getDescriptionInfoParamsDTO = new GetDescriptionInfoParamsDTO()
- .setCc(pubNo.getCountry())
- .setNumber(pubNo.getNumber())
- .setKind(pubNo.getKind());
- String res = outInterfaceService.getDescriptionInfo(getDescriptionInfoParamsDTO);
- JSONObject jsonObject = JSONObject.parseObject(res);
- if (!jsonObject.get("status").toString().equals("200")) {
- //若发生类似400、500等异常(杰哥处理)
- //ThrowException.throwXiaoShiException("说明书接口无法检索该国家专利");
- log.info("说明书接口无法检索该国家专利");
- return;
- }
- //拿到检索结果(未格式化的含有页面等标签的说明书)
- //String unformatInstructionText = JSONObject.parseObject(jsonObject.get("data").toString(), String.class);
- List<String> unformatInstructionText = JSONObject.parseObject(jsonObject.get("data").toString(), List.class);
- StringBuilder builder = new StringBuilder();
- for (String n : unformatInstructionText) {
- builder.append(n);
- }
- // String regex = "<p>(.+?)</p>";
- // Matcher matcher = Pattern.compile(regex).matcher(unformatInstructionText);
- // if (matcher.find()) {
- // patentCell.setPatentInstructionText(matcher.group());
- // }
- patentCell.setPatentInstructionText(builder + "");
- }
- /**
- * 装载权要方法
- *
- * @param patentCell 实体类对象
- * @param pubNo 公开号对象
- */
- private void setPatentClaim(PatentCell patentCell, PubNo pubNo) throws IOException {
- GetClaimsInfoParamsDTO getClaimsInfoParamsDTO = new GetClaimsInfoParamsDTO()
- .setCc(pubNo.getCountry())
- .setNumber(pubNo.getNumber())
- .setKind(pubNo.getKind());
- String res = outInterfaceService.getClaimsInfo(getClaimsInfoParamsDTO);
- JSONObject jsonObject = JSONObject.parseObject(res);
- if (!jsonObject.get("status").toString().equals("200")) {
- //若发生类似400、500等异常(杰哥处理)
- //ThrowException.throwXiaoShiException("权要接口无法检索该国家专利");
- log.info("权要接口无法检索该国家专利");
- return;
- }
- //拿到检索结果(未格式化的含有页面等标签的权要)并格式化权要
- //String unformatRight = JSONObject.parseObject(jsonObject.get("data").toString(), String.class);
- ArrayList<String> unformatRights = JSONObject.parseObject(jsonObject.get("data").toString(), ArrayList.class);
- //String[] rightArr = unformatRight.split("\n");
- //ArrayList<String> rights = new ArrayList<>(Arrays.asList(rightArr));
- patentCell.setRights(unformatRights);
- }
- /**
- * 装载著录方法
- *
- * @param patentCell 实体类对象
- * @param patent 专利对象
- * @param pubNo 公开号对象
- */
- private void setPatentZhuLu(PatentCell patentCell, PatentZhuLu patent, PubNo pubNo) {
- //装载申请号
- for (AppNo appNo : patent.getAppNos()) {
- if (appNo.getType().equals("docdb")) {
- patentCell.setApplicationNo(appNo.getCountry() + appNo.getNumber() + appNo.getKind());
- }
- }
- //装载申请日
- patentCell.setApplicationDate(patent.getAppDate());
- //装载国家/省市
- patentCell.setCountry(patent.getAppCountry());
- //装载公开号
- for (PubNo n : patent.getPubNos()) {
- if (n.getType().equals("docdb")) {
- BeanUtils.copyProperties(n, pubNo);
- patentCell.setPublicNo(n.getCountry() + n.getNumber() + n.getKind());
- }
- }
- //装载专利号
- patentCell.setPatentNo(patentCell.getPublicNo());
- //装载公开日
- patentCell.setPubilcDate(patent.getPubDate());
- //装载授权公告号(未找到)
- //装载授权公告日(未找到)
- //装载主分类号
- List<String> ipCs = patent.getIpCs();
- List<String> cpCs = patent.getCpCs();
- ipCs.addAll(cpCs);
- if (ipCs != null && ipCs.size() > 0) {
- patentCell.setMainIpc(ipCs.get(0));
- }
- //装载分类号
- if (ipCs != null && ipCs.size() > 0) {
- patentCell.setIpc(ipCs);
- }
- //装载申请人
- ArrayList<String> applicationPersons = new ArrayList<>();
- for (Application application : patent.getApplications()) {
- if (application.getOriginalName().contains("(标:)")) {
- application.setOriginalName(application.getOriginalName().substring(0, application.getOriginalName().indexOf("(标:)")));
- }
- applicationPersons.add(application.getOriginalName());
- }
- patentCell.setApplicationPersons(applicationPersons);
- //装载申请人地址(未找到)
- //装载发明人
- ArrayList<String> inventors = new ArrayList<>();
- for (Inventor inventor : patent.getInventors()) {
- inventors.add(inventor.getOriginalName());
- }
- patentCell.setInventors(inventors);
- //装载当前权利人(未找到)
- //装载代理人(未找到)
- //装载代理机构(未找到)
- //装载范畴分类(未找到)
- //装载当前状态(未找到)
- //装载同族号
- patentCell.setFamilyId(patent.getFamilyId());
- //装载著录标题
- String olTitle = patent.getOlTitle();
- String enTitle = patent.getEnTitle();
- if (olTitle == null) {
- patentCell.setTitle(enTitle);
- } else {
- patentCell.setTitle(olTitle);
- }
- //装载摘要
- String olAbstract = patent.getOlAbstract();
- String enAbstract = patent.getEnAbstract();
- if (olAbstract == null) {
- patentCell.setAbstrText(enAbstract);
- } else {
- patentCell.setAbstrText(olAbstract);
- }
- //装载优先权号、优先权国家、优先权日
- ArrayList<Priority> priorities = new ArrayList<>();
- List<Priorityy> priorties = patent.getPriorties();
- for (Priorityy priorty : priorties) {
- for (PriorityNumber number : priorty.getNumbers()) {
- if (number.getType().equals("epodoc")) {
- Priority priority = new Priority()
- .setPriorityNo(number.getNumber().substring(2))
- .setPriorityCountry(number.getNumber().substring(0, 2))
- .setPriorityDate(priorty.getDate());
- priorities.add(priority);
- }
- }
- }
- patentCell.setPriorities(priorities);
- }
- //调用接口获取一批专利著录信息
- private SerachBiblioData getSerachBiblioData(String conditions, Integer start, Integer size) throws IOException {
- GetSearchBiblioParamsDTO getSearchBiblioParamsDTO = new GetSearchBiblioParamsDTO()
- .setQuery(conditions)
- .setStart(start)
- .setEnd(size);
- String res = outInterfaceService.getSearchBiblio(getSearchBiblioParamsDTO);
- if (res == null || res.equals("")) {
- return null;
- }
- JSONObject jsonObject = JSONObject.parseObject(res);
- if (!jsonObject.get("status").toString().equals("200")) {
- //若发生类似400、500等异常(杰哥处理)
- return null;
- }
- //返回检索结果data
- return JSONObject.parseObject(jsonObject.get("data").toString(), SerachBiblioData.class);
- }
- /**
- * 下载爬取智慧芽专利数据
- *
- * @param patentVO
- * @return
- * @throws IOException
- * @throws InterruptedException
- */
- public List<PatentCell> getPatentya(String patentVO) throws InterruptedException {
- //1.获得驱动
- System.setProperty("webdriver.chrome.driver", "D:\\driver\\chromedriver.exe");
- // 2.页面最大化
- ChromeOptions chromeOptions = new ChromeOptions();
- chromeOptions.addArguments("--start-maximized");
- // 3.创建驱动
- WebDriver driver = new ChromeDriver(chromeOptions);
- // 4.创建动作
- Actions action = new Actions(driver);
- // 打开智慧芽首页
- driver.get("https://account.zhihuiya.com");
- //设置等待时间
- long formSecond1 = 50;
- WebDriverWait wait1 = new WebDriverWait(driver, formSecond1);
- //等待密码登录界面加载
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("tab-password")));
- // 点击密码登录
- driver.findElement(By.id("tab-password")).click();
- // 获取输入框,输入账号密码
- List<WebElement> ret = driver.findElements(By.className("el-input__inner"));
- ret.get(0).sendKeys("liting@china-wispro.com");
- ret.get(1).sendKeys("Lt199299");
- // 点击登录
- driver.findElement(By.className("el-button")).click();
- //等待加载
- TimeUnit.MILLISECONDS.sleep(1000);//毫秒
- List<WebElement> alerts = driver.findElements(By.className("patsnap-el-confirm"));
- if (alerts.size() != 0) {
- alerts.get(0).click();
- }
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("master_nav-item__2nd_lv")));
- WebElement searchTag = driver.findElement(By.className("master_nav-item__2nd_lv"));
- searchTag.click();
- //等待搜索框加载
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("div-textarea")));
- List<WebElement> ret1 = driver.findElements(By.className("div-textarea"));
- //搜索框赋值
- ret1.get(0).sendKeys(patentVO);
- TimeUnit.MILLISECONDS.sleep(2000);//毫秒
- wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("search-form__btn")));
- WebElement rr = driver.findElement(By.className("search-form__btn"));
- rr.click();
- TimeUnit.MILLISECONDS.sleep(1000);//毫秒
- List<PatentCell> patentCells = new ArrayList<>();
- long formSecond = 50;
- WebDriverWait wait = new WebDriverWait(driver, formSecond);
- wait.until(ExpectedConditions.presenceOfElementLocated(By.className("search-result-standard-view__patent")));
- // String js ="document.getElementsByClassName('sidebar-table__right')[0].scrollBy(0, 8000)";
- // ((JavascriptExecutor) driver).executeScript(js);
- List<WebElement> ret2 = driver.findElements(By.className("search-result-standard-view__patent"));
- ret2.remove(ret2.size() - 1);
- List<WebElement> ret3 = driver.findElements(By.className("pn-cell-popover"));
- for (int i = 0; i < ret2.size(); i++) {
- action.moveToElement(ret3.get(i)).perform();
- wait.until(ExpectedConditions.attributeToBeNotEmpty(ret3.get(i), "href"));
- String patenNo = ret3.get(i).getAttribute("data-link-data");
- System.out.println(patenNo);
- }
- for (WebElement item : ret2) {
- List<WebElement> aLinks = item.findElements(By.tagName("a"));
- PatentCell patentCell = new PatentCell();
- aLinks.forEach(tem -> {
- String type = tem.getAttribute("data-link-type");
- if (type != null && type.equals("TITLE")) {
- String title = tem.getAttribute("data-link-data");
- patentCell.setTitle(title);
- } else if (type != null && type.equals("PN")) {
- String url = tem.getAttribute("href");
- patentCell.setUrl(url);
- String patenNo = tem.getAttribute("data-link-data");
- patentCell.setPatentNo(patenNo);
- } else if (type != null && type.equals("ANC")) {
- String applications = tem.getAttribute("data-link-data");
- patentCell.setApplications(applications);
- }
- });
- String legal = driver.findElement(By.className("legal-tag__LEGAL_STATUS")).getText();
- patentCell.setLegal(legal);
- patentCells.add(patentCell);
- }
- return patentCells;
- }
- //格式化参数
- public String formatConditions(String condition) throws IOException {
- String jsons = FileUtils.readerMethod();
- JSONObject jsonObject = new JSONObject();
- List<JSONObject> sources = JSONObject.parseArray(jsons, JSONObject.class);
- for (JSONObject jsonObject1 : sources) {
- jsonObject = jsonObject1;
- }
- condition = condition.replace(" ", "");
- condition = condition.replace("&&", "*");
- condition = condition.replace("||", "+");
- StringBuilder stringBuilder = new StringBuilder("F XX ");
- String pattern = "\\([^)]*\\)";
- Pattern p = Pattern.compile(pattern);
- Matcher m = p.matcher(condition);
- while (m.find()) { // 当字符串中有匹配到 {} 时
- String param = m.group(0);
- String cell = formatParam(param, jsonObject);
- condition = condition.replace(param, cell);
- }
- return condition;
- }
- public String formatParam(String param, JSONObject jsonObject) {
- param = param.replace("(", "");
- param = param.replace(")", "");
- String[] params = param.split("=");// {} 和里面的内容
- StringBuilder stringBuilder = new StringBuilder("(" + params[1]);
- stringBuilder.append("/");
- String cell = jsonObject.get(params[0]).toString();
- stringBuilder.append(cell + ")");
- return stringBuilder.toString();
- }
- //将值转换为list
- public List<String> StringToList(String value, String split) {
- List<String> list = new ArrayList<>();
- if (value != null && value != "") {
- String[] temValue = value.split(split);
- list = new ArrayList<>(Arrays.asList(temValue));
- }
- return list;
- }
- }
|