package com.example.demo.service; import com.alibaba.fastjson.JSONObject; import com.example.demo.domain.dto.*; import com.example.demo.domain.entity.*; import com.example.demo.exception.XiaoShiException; import com.example.demo.model.dto.TaskWebSocketDTO; import com.example.demo.model.dto.WebQueryDTO; import com.example.demo.util.*; import com.spire.pdf.FileFormat; import com.spire.pdf.PdfDocument; import com.spire.pdf.PdfDocumentBase; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.fileupload.FileItem; import org.apache.commons.fileupload.disk.DiskFileItemFactory; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeOptions; import org.openqa.selenium.interactions.Actions; import org.openqa.selenium.support.ui.ExpectedConditions; import org.openqa.selenium.support.ui.WebDriverWait; import org.springframework.beans.BeanUtils; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Lazy; import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.commons.CommonsMultipartFile; import java.io.*; import java.util.*; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @author admin * @description 针对表【qrtz_task(定时任务表)】的数据库操作Service实现 * @createDate 2023-03-15 13:40:32 */ @Slf4j @Service @RequiredArgsConstructor(onConstructor_ = {@Lazy}) public class UploadFromWebService { private final FileUtils fileUtils; private final OutInterfaceService outInterfaceService; private final WebConfigService webConfigService; private final QrTaskService qrTaskService; private final QrTaskDetailService qrTaskDetailService; private final WebLoginConfigService webLoginConfigService; @Value("${driverUrl}") private String url; // 查询并上传(专利之星) public List getPatentStar(WebQueryDTO webQueryDTO) throws Exception { List patentCells = new ArrayList<>(); //当前任务执行情况 QrtzTaskDetail qrtzTaskDetail = qrTaskDetailService.getById(webQueryDTO.getTaskId()); Integer successNum = qrtzTaskDetail.getSuccessNum(); int startPage = successNum / 5; int startNum = successNum % 5; //当前任务条件 QrtzTask qrtzTask = qrTaskService.getById(qrtzTaskDetail.getTaskId()); //专利成功条数 try { List cells = webQueryDTO.getConCells(); String conditions = webQueryDTO.getConditions(); Integer id = webQueryDTO.getWebConfigId(); //conditions = this.formatConditions(conditions); //根据id 获得网站配置 WebConfig webConfig = webConfigService.getConfigById(id); //根据网站id和用户的租户获得登录信息 WebLoginConfig webLoginConfig = webLoginConfigService.getLoginConfig(webConfig.getId(), qrtzTask.getTenantId()); if (webLoginConfig == null) { qrtzTaskDetail.setTaskDetailState(3); qrtzTaskDetail.setSuccessNum(successNum); qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum); qrtzTaskDetail.setFailure("未配置登录信息"); qrtzTaskDetail.setEndTime(new Date()); qrtzTaskDetail.updateById(); WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO() .setId(webQueryDTO.getTaskId()) .setProjectId(webQueryDTO.getProjectId()) .setComplete(true) .setIndex(successNum) .setPercentage(0.0) .setTaskDetailState(3) .setFileName("") .setUrl("") .setTotal(successNum), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null"); return null; } //1.获得驱动 // System.setProperty("webdriver.chrome.driver", "D:\\driver\\chromedriver.exe"); System.setProperty("webdriver.chrome.driver", url); ChromeOptions chromeOptions = new ChromeOptions(); // 设置无头模式 chromeOptions.setHeadless(Boolean.TRUE); // 设置无轨 开发时还是不要加,可以看到浏览器效果 chromeOptions.addArguments("-headless"); chromeOptions.addArguments("no-sandbox"); // 3.创建驱动 WebDriver driver = new ChromeDriver(chromeOptions); // 4.创建动作 Actions action = new Actions(driver); //设置等待时间 long formSecond1 = 10; WebDriverWait wait1 = new WebDriverWait(driver, formSecond1); // 打开专利之星首页 driver.get(webConfig.getWebAddress()); wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("loginname"))); WebElement loginName = driver.findElement(By.id("loginname")); WebElement password = driver.findElement(By.id("password")); loginName.sendKeys(webLoginConfig.getLoginAccount()); password.sendKeys(webLoginConfig.getLoginPassword()); WebElement loginButton = driver.findElement(By.id("login")); loginButton.click(); TimeUnit.MILLISECONDS.sleep(5000);//毫秒 //获得表格搜索按钮并点击 wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("tablepage"))); WebElement tablePage = driver.findElement(By.id("tablepage")); WebElement tablePagea = tablePage.findElement(By.tagName("a")); tablePagea.click(); TimeUnit.MILLISECONDS.sleep(5000);//毫秒 //获得搜索框并点击 wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("TxtSearch"))); WebElement TxtSearch = driver.findElement(By.id("TxtSearch")); TxtSearch.sendKeys(conditions); WebElement searchBtn = driver.findElement(By.id("searchbtn2")); searchBtn.click(); wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("tcdNumber"))); List tcdNumbers = driver.findElements(By.className("tcdNumber")); String pagesText = tcdNumbers.get(tcdNumbers.size() - 1).getText(); int page = Integer.parseInt(pagesText); //获得总数 WebElement countEle = driver.findElement(By.className("allcountlab")); String countText = countEle.getText(); Integer count = Integer.parseInt(countText); //总数赋值给当前任务执行情况的总量属性 qrtzTaskDetail.setAllNum(count); qrtzTaskDetail.updateById(); String Handle = driver.getWindowHandle(); //创建一个map集合存放浏览器句柄 HashMap handleMap = new HashMap<>(); //将智慧芽句柄放到map中 handleMap.put("mainPage", Handle); WebElement pagNum = driver.findElement(By.className("page_num")); pagNum.clear(); pagNum.sendKeys(startPage + 1 + ""); WebElement pagBtn = driver.findElement(By.className("page_btn")); pagBtn.click(); TimeUnit.MILLISECONDS.sleep(10000);//毫秒 //计算进度值 double percentage = count == 0 ? 0 : (count.equals(successNum) ? (successNum * 1D) : (successNum + 1D) / count * 100D); percentage = MathUtils.saveTwoDecimal(percentage); for (int p = startPage; p < page; p++) { //等待数据加载 wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("patent"))); TimeUnit.MILLISECONDS.sleep(1000);//毫秒 //获得列表 List patentContents = driver.findElements(By.className("patent")); // 页面元素标签 for (int i = startNum; i < patentContents.size(); i++) { //查看任务状态 QrtzTaskDetail qrtzTaskDetail1 = qrTaskDetailService.getById(webQueryDTO.getTaskId()); if (qrtzTaskDetail1.getTaskDetailState().equals(7)) { qrtzTaskDetail1.setTaskDetailState(5); qrtzTaskDetail1.setSuccessNum(successNum); qrtzTaskDetail1.setTaskProcess(percentage); qrtzTaskDetail1.updateById(); WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO() .setId(webQueryDTO.getTaskId()) .setProjectId(webQueryDTO.getProjectId()) .setComplete(false) .setIndex(successNum) .setTaskDetailState(5) .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null"); return null; } else if (qrtzTaskDetail1.getTaskDetailState().equals(8)) { qrtzTaskDetail1.setTaskDetailState(6); qrtzTaskDetail1.setSuccessNum(successNum); qrtzTaskDetail1.setTaskProcess(percentage); qrtzTaskDetail1.updateById(); WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO() .setId(webQueryDTO.getTaskId()) .setProjectId(webQueryDTO.getProjectId()) .setComplete(false) .setIndex(successNum) .setTaskDetailState(6) .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null"); return null; } PatentCell patentCell = new PatentCell(); patentCell.setProjectId(webQueryDTO.getProjectId()); patentCell.setReportId(webQueryDTO.getReportId()); wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("title-color"))); WebElement titleA = patentContents.get(i).findElement(By.className("title-color")); titleA.click(); TimeUnit.MILLISECONDS.sleep(10000);//毫秒 //9、获取到所有的句柄 Set set = driver.getWindowHandles(); //10、循环找到详情窗口句柄 for (String s : set) { //10.1、将详情窗口的句柄放到map中 if (!s.equals(Handle)) { handleMap.put("detail", s); } } driver.switchTo().window(handleMap.get("detail")); wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("item-content"))); WebElement itemContent = driver.findElement(By.className("item-content")); List divItems = itemContent.findElements(By.tagName("div")); for (int t = 0; t < divItems.size(); t++) { WebElement temEle = divItems.get(t); String text = temEle.getText(); String value = ""; String[] strings = text.split(":"); if (strings.length > 1) { value = strings[1].trim(); String key = strings[0].trim(); switch (key) { case "申请号": patentCell.setApplicationNo(value); break; case "申请日": patentCell.setApplicationDate(value); break; case "国家/省市": patentCell.setCountry(value); break; case "公开号": patentCell.setPublicNo(value); break; case "公开日": patentCell.setPubilcDate(value); break; case "授权公告号": patentCell.setPublicAccreditNo(value); break; case "授权公告日": patentCell.setPublicAccreditDate(value); break; case "主分类号": patentCell.setMainIpc(value); break; case "分类号": patentCell.setIpc(this.StringToList(value, ";")); break; case "申请人": patentCell.setApplicationPersons(this.StringToList(value, " ")); break; case "申请人地址": patentCell.setApplicationAddress(this.StringToList(value, " ")); break; case "发明人": patentCell.setInventors(this.StringToList(value, " ")); break; case "当前权利人": patentCell.setApplicationCurrents(this.StringToList(value, " ")); break; case "代理人": patentCell.setAgencyPersons(this.StringToList(value, " ")); break; case "代理机构": patentCell.setAgency(value); break; case "范畴分类": patentCell.setClassical(value); break; case "当前状态": patentCell.setStatue(value); break; } } } //获得摘要 WebElement abstrElement = driver.findElement(By.className("item-summary")); List spans = abstrElement.findElements(By.tagName("span")); if (cells.contains("1")) { String abstrText = spans.get(0).getText(); patentCell.setAbstrText(abstrText); } //获得图片路径 wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("item-img"))); WebElement picElement = driver.findElement(By.className("item-img")); WebElement imag = picElement.findElement(By.tagName("img")); String url = imag.getAttribute("src"); url = fileUtils.uploadToLocal(url, ".jpg").getPath(); patentCell.setPicUrl(url); if (cells.contains("2")) { TimeUnit.MILLISECONDS.sleep(10000);//毫秒 WebElement CLElement = driver.findElement(By.id("itemCL")); CLElement.click(); //获得主权要 String mainRight = spans.get(1).getText(); patentCell.setMainRignt(mainRight); //获得权要 driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS); List rightElements = driver.findElements(By.tagName("claim-text")); if (rightElements.size() == 0) { WebElement suBtn = driver.findElement(By.className("layui-layer-btn0")); suBtn.click(); TimeUnit.MILLISECONDS.sleep(10000);//毫秒 } else { TimeUnit.MILLISECONDS.sleep(10000);//毫秒 List rights = new ArrayList<>(); rightElements.forEach(item -> { rights.add(item.getText()); }); patentCell.setRights(rights); } } if (cells.contains("3")) { //获得说明书 WebElement DSElement = driver.findElement(By.id("itemDS")); DSElement.click(); TimeUnit.MILLISECONDS.sleep(10000);//毫秒 driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS); List titleElements = driver.findElements(By.tagName("invention-title")); if (titleElements.size() == 0) { WebElement suBtn = driver.findElement(By.className("layui-layer-btn0")); suBtn.click(); TimeUnit.MILLISECONDS.sleep(10000);//毫秒 } else { WebElement titleElement = titleElements.get(0); patentCell.setTitle(titleElement.getText()); StringBuilder stringBuilder = new StringBuilder(); int flag = 1; //技术领域 WebElement textElement1 = driver.findElement(By.tagName("description")); List textP1 = textElement1.findElements(By.tagName("p")); stringBuilder.append(textP1.get(0).getText()); stringBuilder.append("\r\n"); for (int t = 1; t < textP1.size(); t++) { String tem = String.format("%04d", flag); stringBuilder.append("[").append(tem).append("]").append(textP1.get(t).getText()); stringBuilder.append("\r\n"); flag += 1; } patentCell.setPatentInstructionText(stringBuilder.toString()); } } //获得pdf文档 if (cells.contains("4")) { WebElement PDFElement = driver.findElement(By.id("itemPdf")); PDFElement.click(); TimeUnit.MILLISECONDS.sleep(10000);//毫秒 driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS); List PDFFile = driver.findElements(By.tagName("embed")); String pdfUrl = ""; if (PDFFile.size() == 0) { List PDFFiles = driver.findElements(By.className("pdfbtn")); if (PDFFiles.size() != 0) { pdfUrl = PDFFiles.get(0).getAttribute("data-url"); } else { WebElement suBtn = driver.findElement(By.className("layui-layer-btn0")); suBtn.click(); TimeUnit.MILLISECONDS.sleep(10000);//毫秒 } } else { pdfUrl = PDFFile.get(0).getAttribute("src"); } if (pdfUrl != "") { UploadFileDTO fileDTO = fileUtils.uploadToLocal(pdfUrl, ".PDF"); patentCell.setPDFName(fileDTO.getFileName()); patentCell.setPDFSize(fileDTO.getFileSize()); patentCell.setPDFUrl(fileDTO.getPath()); patentCell.setPdf(fileDTO); } } if (cells.contains("5")) { //获得法律状态 WebElement flztElement = driver.findElement(By.id("flztbtn")); flztElement.click(); TimeUnit.MILLISECONDS.sleep(10000);//毫秒 wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("legalContainer"))); WebElement tBody = driver.findElement(By.id("legalContainer")); TimeUnit.MILLISECONDS.sleep(1000);//毫秒 List trs = tBody.findElements(By.tagName("tr")); List affairs = new ArrayList<>(); trs.forEach(item -> { List tds = item.findElements(By.tagName("td")); PatentCell.PatentAffair affair = new PatentCell.PatentAffair(); affair.setDateTime(tds.get(0).getText()); affair.setSimpleStatus(tds.get(1).getText()); affair.setStatus(tds.get(2).getText()); affairs.add(affair); }); patentCell.setPatentAffairs(affairs); } successNum++; if (patentCell.getPublicNo() != null) { patentCell.setPatentNo(patentCell.getPublicNo()); } else { patentCell.setPatentNo(patentCell.getPublicAccreditNo()); patentCell.setPublicNo(patentCell.getPublicAccreditNo()); } if (patentCell.getPatentNo() != null) { String res = outInterfaceService.importPatents(patentCell); JSONObject jsonObject = JSONObject.parseObject(res); if (jsonObject.get("code").toString().equals("500")) { //任务执行情况状态设为失败,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行) qrtzTaskDetail.setTaskDetailState(3); qrtzTaskDetail.setSuccessNum(successNum); qrtzTaskDetail.setDefaultNum(count - successNum); qrtzTaskDetail.setEndTime(new Date()); qrtzTaskDetail.updateById(); if (qrtzTask.getTaskType() == 1) { qrtzTask.setTaskState(4); } else { qrtzTask.setTaskState(3); } qrtzTask.updateById(); return null; } } QrtzTaskDetail qrtzTaskDetail11 = qrTaskDetailService.getById(qrtzTaskDetail.getId()); percentage = count == 0 ? 0 : (count.equals(successNum) ? (successNum * 1D) : (successNum + 1D) / count * 100D); percentage = MathUtils.saveTwoDecimal(percentage); WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO() .setId(webQueryDTO.getTaskId()) .setProjectId(webQueryDTO.getProjectId()) .setComplete(false) .setIndex(successNum) .setPercentage(percentage) .setTaskDetailState(qrtzTaskDetail11.getTaskDetailState()) .setFileName("") .setUrl("") .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null"); patentCells.add(patentCell); driver.close(); driver.switchTo().window(handleMap.get("mainPage")); } wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("tcdPageCode"))); List nexts = driver.findElements(By.className("nextPage")); if (nexts.size() != 0) { nexts.get(0).click(); TimeUnit.MILLISECONDS.sleep(10000);//毫秒 } } WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO() .setId(webQueryDTO.getTaskId()) .setProjectId(webQueryDTO.getProjectId()) .setComplete(true) .setIndex(count) .setPercentage(100D) .setTaskDetailState(qrtzTaskDetail.getTaskDetailState()) .setFileName("") .setUrl("") .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), ""); //任务执行情况状态设为成功,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行) qrtzTaskDetail.setTaskDetailState(2); qrtzTaskDetail.setSuccessNum(successNum); qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum); qrtzTaskDetail.setTaskProcess(100); qrtzTaskDetail.setEndTime(new Date()); qrtzTaskDetail.updateById(); if (qrtzTask.getTaskType() == 1) { qrtzTask.setTaskState(4); } else { qrtzTask.setTaskState(2); } qrtzTask.updateById(); driver.quit(); } catch (Exception e) { log.error(e.toString()); //任务执行情况状态设为失败,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行) qrtzTaskDetail.setTaskDetailState(3); qrtzTaskDetail.setSuccessNum(successNum); qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum); qrtzTaskDetail.setFailure("拉取节点失败"); qrtzTaskDetail.setEndTime(new Date()); qrtzTaskDetail.updateById(); if (qrtzTask.getTaskType() == 1) { qrtzTask.setTaskState(4); } else { qrtzTask.setTaskState(3); } qrtzTask.updateById(); WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO() .setId(webQueryDTO.getTaskId()) .setProjectId(webQueryDTO.getProjectId()) .setComplete(true) .setIndex(successNum) .setPercentage(100D) .setTaskDetailState(qrtzTaskDetail.getTaskDetailState()) .setFileName("") .setUrl("") .setTotal(successNum), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), ""); } return patentCells; } /** * 下载欧专局专利数据 * * @param webQueryDTO 前台传来的下载条件等数据 * @return 返回 * @throws IOException 抛出IO异常 */ public List getEPO(WebQueryDTO webQueryDTO) throws Exception { log.info("开始处理【下载欧专局专利数据】的业务,参数为:{}", webQueryDTO); List patentCells = new ArrayList<>(); QrtzTaskDetail qrtzTaskDetail = qrTaskDetailService.getById(webQueryDTO.getTaskId()); Integer successNum = qrtzTaskDetail.getSuccessNum(); int startPage = successNum / 5; int startNum = successNum % 5; //当前任务条件 QrtzTask qrtzTask = qrTaskService.getById(qrtzTaskDetail.getTaskId()); //1.从DTO中取出网站id、检索信息条件、下载字段、专题库id、报告id Integer webId = webQueryDTO.getWebConfigId(); List cells = webQueryDTO.getConCells(); String conditions = webQueryDTO.getConditions(); Integer projectId = webQueryDTO.getProjectId(); Integer reportId = webQueryDTO.getReportId(); //根据id 获得网站配置 WebConfig webConfig = webConfigService.getConfigById(webId); //根据网站id和用户的租户获得登录信息 WebLoginConfig webLoginConfig = webLoginConfigService.getLoginConfig(webConfig.getId(), qrtzTask.getTenantId()); if (webLoginConfig == null) { qrtzTaskDetail.setTaskDetailState(3); qrtzTaskDetail.setSuccessNum(successNum); qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum); qrtzTaskDetail.setFailure("未配置登录信息"); qrtzTaskDetail.setEndTime(new Date()); qrtzTaskDetail.updateById(); WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO() .setId(webQueryDTO.getTaskId()) .setProjectId(webQueryDTO.getProjectId()) .setComplete(true) .setIndex(successNum) .setPercentage(0.0) .setTaskDetailState(3) .setFileName("") .setUrl("") .setTotal(successNum), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null"); return null; } //定义每次检索的专利数量(每次检索50件) int size = 50; //2.根据检索式conditions先检索一件专利著录信息【此操作主要是为了获得专利总数量count】 SerachBiblioData serachBiblioData = getSerachBiblioData(conditions, 1, 1); if (serachBiblioData == null || serachBiblioData.getTotals() == 0) { //conditions没有检索到任何相关专利(杰哥处理) ThrowException.throwXiaoShiException("检索失败,未检索出任何相关专利信息"); } //获得专利总数量 Integer count = serachBiblioData.getTotals(); //计算进度值 double percentage = count == 0 ? 0 : (count.equals(successNum) ? (successNum * 1D) : (successNum + 1D) / count * 100D); percentage = MathUtils.saveTwoDecimal(percentage); //3.根据专利总数量count计算检索次数,来循环检索 for (int i = 1; i <= count; i += size) { //查看任务状态 QrtzTaskDetail qrtzTaskDetail1 = qrTaskDetailService.getById(webQueryDTO.getTaskId()); if (qrtzTaskDetail1.getTaskDetailState().equals(7)) { qrtzTaskDetail1.setTaskDetailState(5); qrtzTaskDetail1.setSuccessNum(successNum); qrtzTaskDetail1.setTaskProcess(percentage); qrtzTaskDetail1.updateById(); WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO() .setId(webQueryDTO.getTaskId()) .setProjectId(webQueryDTO.getProjectId()) .setComplete(false) .setIndex(successNum) .setTaskDetailState(5) .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null"); return null; } else if (qrtzTaskDetail1.getTaskDetailState().equals(8)) { qrtzTaskDetail1.setTaskDetailState(6); qrtzTaskDetail1.setSuccessNum(successNum); qrtzTaskDetail1.setTaskProcess(percentage); qrtzTaskDetail1.updateById(); WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO() .setId(webQueryDTO.getTaskId()) .setProjectId(webQueryDTO.getProjectId()) .setComplete(false) .setIndex(successNum) .setTaskDetailState(6) .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null"); return null; } //3.1根据检索式conditions、专利开始数i、专利最后数i + size - 1检索专利著录信息 serachBiblioData = getSerachBiblioData(conditions, i, i + size - 1); //获取检索结果中的所有专利著录信息 if (serachBiblioData == null || serachBiblioData.getTotals() == 0) { continue; } List patents = serachBiblioData.getPatents(); //3.2遍历专利 for (PatentZhuLu patent : patents) { PatentCell patentCell = new PatentCell(); patentCell.setProjectId(qrtzTask.getProjectId()); PubNo pubNO = new PubNo(); //装载专利著录 if (cells.contains("1")) { setPatentZhuLu(patentCell, patent, pubNO); } //装载权要 if (cells.contains("2")) { setPatentClaim(patentCell, pubNO); } //装载说明书文本 if (cells.contains("3")) { setPatentInstructionText(patentCell, pubNO); } //以下代码是在准备一会要调用拿取各种附图的接口所需的参数(FullDocument->说明书pdf、Drawing->其他附图、FirstPageClipping->摘要附图) String fullDocumentLink = "", fullDocumentType = "", drawingLink = "", drawingType = "", firstPageClippingLink = "", firstPageClippingType = ""; Integer fullDocumentPage = 0, drawingPage = 0, firstPageClippingPage = 0; //根据当前专利号调用接口获取一会调用拿取各种附图的接口的参数 try { ImageInfo imageInfo = getImage(pubNO); for (Image image : imageInfo.getImages()) { //如果附件类型是说明书 if (image.getImageType().equals("FullDocument")) { fullDocumentLink = image.getUrlLink(); fullDocumentPage = image.getNumberOfPages(); for (String formatOption : image.getFormatOptions()) { if (formatOption.contains("pdf")) { fullDocumentType = formatOption; } } } //如果附件类型是其他附件 if (image.getImageType().equals("Drawing")) { drawingLink = image.getUrlLink(); drawingPage = image.getNumberOfPages(); for (String formatOption : image.getFormatOptions()) { if (formatOption.contains("tiff")) { drawingType = formatOption; } } } //如果附件类型是摘要附图 if (image.getImageType().equals("FirstPageClipping")) { firstPageClippingLink = image.getUrlLink(); firstPageClippingPage = image.getNumberOfPages(); for (String formatOption : image.getFormatOptions()) { if (formatOption.contains("jpeg")) { firstPageClippingType = formatOption; } } } } //装载说明书pdf if (cells.contains("4")) { setFuJian(fullDocumentLink, fullDocumentPage, fullDocumentType, patentCell, ".pdf"); } //装载摘要附图 if (cells.contains("6")) { setFuJian(firstPageClippingLink, firstPageClippingPage, firstPageClippingType, patentCell, ".jpeg"); } //装载其他附图 if (cells.contains("7")) { setFuJian(drawingLink, drawingPage, drawingType, patentCell, ".tiff"); } if (patentCell.getPatentNo() != null) { String res = outInterfaceService.importPatents(patentCell); JSONObject jsonObject = JSONObject.parseObject(res); if (jsonObject.get("code").toString().equals("500")) { // //任务执行情况状态设为失败,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行) // qrtzTaskDetail.setTaskDetailState(3); // qrtzTaskDetail.setSuccessNum(successNum); // qrtzTaskDetail.setDefaultNum(count - successNum); // qrtzTaskDetail.setEndTime(new Date()); // qrtzTaskDetail.updateById(); // if (qrtzTask.getTaskType() == 1) { // qrtzTask.setTaskState(4); // } else { // qrtzTask.setTaskState(3); // } // qrtzTask.updateById(); // return null; //拿到当前这个下载失败的专利号,保存起来(具体怎么处理失败的专利考虑) String patentNo = patentCell.getPatentNo(); //然后跳过本次专利,继续下载下一个专利 continue; } } QrtzTaskDetail qrtzTaskDetail11 = qrTaskDetailService.getById(qrtzTaskDetail.getId()); percentage = count == 0 ? 0 : (count.equals(successNum) ? (successNum * 1D) : (successNum + 1D) / count * 100D); percentage = MathUtils.saveTwoDecimal(percentage); WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO() .setId(webQueryDTO.getTaskId()) .setProjectId(webQueryDTO.getProjectId()) .setComplete(false) .setIndex(successNum) .setPercentage(percentage) .setTaskDetailState(qrtzTaskDetail11.getTaskDetailState()) .setFileName("") .setUrl("") .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), "null"); } catch (XiaoShiException e) { //虽然拿不到所有附图,但已经拿到了专利著录,能拿到啥是啥 log.info(e.getMessage()); } catch (Exception e) { qrtzTaskDetail.setTaskDetailState(3); qrtzTaskDetail.setSuccessNum(successNum); qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum); qrtzTaskDetail.setFailure("拉取节点失败"); qrtzTaskDetail.setEndTime(new Date()); qrtzTaskDetail.updateById(); if (qrtzTask.getTaskType() == 1) { qrtzTask.setTaskState(4); } else { qrtzTask.setTaskState(3); } qrtzTask.updateById(); WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO() .setId(webQueryDTO.getTaskId()) .setProjectId(webQueryDTO.getProjectId()) .setComplete(true) .setIndex(successNum) .setPercentage(100D) .setTaskDetailState(qrtzTaskDetail.getTaskDetailState()) .setFileName("") .setUrl("") .setTotal(successNum), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), ""); } //4.保存专利信息(发送给专题库) patentCells.add(patentCell); } } WebSocketServer.sendInfo(Response.websocket(new TaskWebSocketDTO() .setId(webQueryDTO.getTaskId()) .setProjectId(webQueryDTO.getProjectId()) .setComplete(true) .setIndex(count) .setPercentage(100D) .setTaskDetailState(qrtzTaskDetail.getTaskDetailState()) .setFileName("") .setUrl("") .setTotal(count), ResponseEnum.PATENT_IMPORT_TASK_SUCCESS), ""); //任务执行情况状态设为成功,任务条件状态设为已完成(若是任务条件是定时任务则设为等待下一次执行) qrtzTaskDetail.setTaskDetailState(2); qrtzTaskDetail.setSuccessNum(successNum); qrtzTaskDetail.setDefaultNum(qrtzTaskDetail.getAllNum() - successNum); qrtzTaskDetail.setTaskProcess(100); qrtzTaskDetail.setEndTime(new Date()); qrtzTaskDetail.updateById(); if (qrtzTask.getTaskType() == 1) { qrtzTask.setTaskState(4); } else { qrtzTask.setTaskState(2); } qrtzTask.updateById(); return patentCells; } /** * 装载说明书附件pdf方法 * * @param link 附件链接 * @param page 附件页数 * @param type 附件类型 * @param patentCell 实体类对象 */ private void setFuJian(String link, Integer page, String type, PatentCell patentCell, String FuJianSuffix) throws Exception { //合并说明书pdf文档时所需的读取流数组 InputStream[] streams = new InputStream[page]; //其他附图url数组 ArrayList otherUrls = new ArrayList<>(); //遍历附件页数 for (int i = 1; i <= page; i++) { GetFuTuParamsDTO getFuTuParamsDTO = new GetFuTuParamsDTO() .setLink(link) .setPage(i) .setType(type); byte[] buffer = outInterfaceService.getPatentFile(getFuTuParamsDTO); InputStream inputStream = new ByteArrayInputStream(buffer); streams[i - 1] = inputStream; // File file = File.createTempFile("new_url", FuJianSuffix); // FileOutputStream out = new FileOutputStream(file); // out.write(buffer); // out.close(); // streams[i - 1] = new FileInputStream(file); // DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null); // FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName()); // int bytesRead = 0; // buffer = new byte[8192]; // try { // FileInputStream fis = new FileInputStream(file); // OutputStream os = item.getOutputStream(); // int len = 8192; // while ((bytesRead = fis.read(buffer, 0, len)) != -1) { // os.write(buffer, 0, bytesRead); // } // os.close(); // fis.close(); // } catch (IOException e) { // e.printStackTrace(); // } // MultipartFile multipartFile = new CommonsMultipartFile(item); // UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile); // switch (FuJianSuffix) { // case ".pdf": // streams[i - 1] = new FileInputStream(fileDTO.getPath()); // break; // case ".jpeg": // patentCell.setPicUrl(fileDTO.getPath()); // break; // case ".tiff": // otherUrls.add(fileDTO.getPath()); // break; // } //如果不是说明书pdf if (!FuJianSuffix.equals(".pdf")) { File file = File.createTempFile("new_url", FuJianSuffix); FileOutputStream out = new FileOutputStream(file); out.write(buffer); out.close(); streams[i - 1] = new FileInputStream(file); DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null); FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName()); int bytesRead = 0; buffer = new byte[8192]; try { FileInputStream fis = new FileInputStream(file); OutputStream os = item.getOutputStream(); int len = 8192; while ((bytesRead = fis.read(buffer, 0, len)) != -1) { os.write(buffer, 0, bytesRead); } os.close(); fis.close(); } catch (IOException e) { e.printStackTrace(); } MultipartFile multipartFile = new CommonsMultipartFile(item); UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile); //如果是摘要附图 if (FuJianSuffix.equals(".jpeg")) { patentCell.setPicUrl(fileDTO.getPath()); } //如果是其他附图 if (FuJianSuffix.equals(".tiff")) { otherUrls.add(fileDTO.getPath()); } } } //如果是说明书pdf if (FuJianSuffix.equals(".pdf")) { //合并说明书多个pdf文件 PdfDocumentBase doc = PdfDocument.mergeFiles(streams); //保存结果文件 doc.save("merge.pdf", FileFormat.PDF); doc.close(); //手动将合并后的pdf文件转成MultipartFile,上传文件并获取path装载到patentCell File file = new File("merge.pdf"); DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null); FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName()); int bytesRead = 0; byte[] buffer = new byte[8192]; try { FileInputStream fis = new FileInputStream(file); OutputStream os = item.getOutputStream(); int len = 8192; while ((bytesRead = fis.read(buffer, 0, len)) != -1) { os.write(buffer, 0, bytesRead); } os.close(); fis.close(); } catch (IOException e) { e.printStackTrace(); } MultipartFile multipartFile = new CommonsMultipartFile(item); UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile); patentCell.setPdf(fileDTO); } else if (FuJianSuffix.equals(".tiff")) { patentCell.setOtherUrls(otherUrls); } } /** * 获取Image信息方法(从信息中获取三种附图的检索参数) * * @param pubNo 公开号对象 */ private ImageInfo getImage(PubNo pubNo) throws IOException { String res = outInterfaceService.getImagesInfo(pubNo); JSONObject jsonObject = JSONObject.parseObject(res); if (!jsonObject.get("status").toString().equals("200")) { //若发生类似400、500等异常(杰哥处理) ThrowException.throwXiaoShiException("Image信息接口无法检索该国家专利"); } //拿到检索结果(未格式化的含有页面等标签的说明书) String unFormatData = jsonObject.get("data").toString(); String data = unFormatData.substring(unFormatData.indexOf("[") + 1, unFormatData.lastIndexOf("]")); ImageInfo imageInfo = JSONObject.parseObject(data, ImageInfo.class); return imageInfo; } /** * 装载说明书方法 * * @param patentCell 实体类对象 * @param pubNo 公开号对象 */ private void setPatentInstructionText(PatentCell patentCell, PubNo pubNo) throws IOException { GetDescriptionInfoParamsDTO getDescriptionInfoParamsDTO = new GetDescriptionInfoParamsDTO() .setCc(pubNo.getCountry()) .setNumber(pubNo.getNumber()) .setKind(pubNo.getKind()); String res = outInterfaceService.getDescriptionInfo(getDescriptionInfoParamsDTO); JSONObject jsonObject = JSONObject.parseObject(res); if (!jsonObject.get("status").toString().equals("200")) { //若发生类似400、500等异常(杰哥处理) //ThrowException.throwXiaoShiException("说明书接口无法检索该国家专利"); log.info("说明书接口无法检索该国家专利"); return; } //拿到检索结果(未格式化的含有页面等标签的说明书) //String unformatInstructionText = JSONObject.parseObject(jsonObject.get("data").toString(), String.class); List unformatInstructionText = JSONObject.parseObject(jsonObject.get("data").toString(), List.class); StringBuilder builder = new StringBuilder(); for (String n : unformatInstructionText) { builder.append(n); } // String regex = "

(.+?)

"; // Matcher matcher = Pattern.compile(regex).matcher(unformatInstructionText); // if (matcher.find()) { // patentCell.setPatentInstructionText(matcher.group()); // } patentCell.setPatentInstructionText(builder + ""); } /** * 装载权要方法 * * @param patentCell 实体类对象 * @param pubNo 公开号对象 */ private void setPatentClaim(PatentCell patentCell, PubNo pubNo) throws IOException { GetClaimsInfoParamsDTO getClaimsInfoParamsDTO = new GetClaimsInfoParamsDTO() .setCc(pubNo.getCountry()) .setNumber(pubNo.getNumber()) .setKind(pubNo.getKind()); String res = outInterfaceService.getClaimsInfo(getClaimsInfoParamsDTO); JSONObject jsonObject = JSONObject.parseObject(res); if (!jsonObject.get("status").toString().equals("200")) { //若发生类似400、500等异常(杰哥处理) //ThrowException.throwXiaoShiException("权要接口无法检索该国家专利"); log.info("权要接口无法检索该国家专利"); return; } //拿到检索结果(未格式化的含有页面等标签的权要)并格式化权要 //String unformatRight = JSONObject.parseObject(jsonObject.get("data").toString(), String.class); ArrayList unformatRights = JSONObject.parseObject(jsonObject.get("data").toString(), ArrayList.class); //String[] rightArr = unformatRight.split("\n"); //ArrayList rights = new ArrayList<>(Arrays.asList(rightArr)); patentCell.setRights(unformatRights); } /** * 装载著录方法 * * @param patentCell 实体类对象 * @param patent 专利对象 * @param pubNo 公开号对象 */ private void setPatentZhuLu(PatentCell patentCell, PatentZhuLu patent, PubNo pubNo) { //装载申请号 for (AppNo appNo : patent.getAppNos()) { if (appNo.getType().equals("docdb")) { patentCell.setApplicationNo(appNo.getCountry() + appNo.getNumber() + appNo.getKind()); } } //装载申请日 patentCell.setApplicationDate(patent.getAppDate()); //装载国家/省市 patentCell.setCountry(patent.getAppCountry()); //装载公开号 for (PubNo n : patent.getPubNos()) { if (n.getType().equals("docdb")) { BeanUtils.copyProperties(n, pubNo); patentCell.setPublicNo(n.getCountry() + n.getNumber() + n.getKind()); } } //装载专利号 patentCell.setPatentNo(patentCell.getPublicNo()); //装载公开日 patentCell.setPubilcDate(patent.getPubDate()); //装载授权公告号(未找到) //装载授权公告日(未找到) //装载主分类号 List ipCs = patent.getIpCs(); List cpCs = patent.getCpCs(); ipCs.addAll(cpCs); if (ipCs != null && ipCs.size() > 0) { patentCell.setMainIpc(ipCs.get(0)); } //装载分类号 if (ipCs != null && ipCs.size() > 0) { patentCell.setIpc(ipCs); } //装载申请人 ArrayList applicationPersons = new ArrayList<>(); for (Application application : patent.getApplications()) { if (application.getOriginalName().contains("(标:)")) { application.setOriginalName(application.getOriginalName().substring(0, application.getOriginalName().indexOf("(标:)"))); } applicationPersons.add(application.getOriginalName()); } patentCell.setApplicationPersons(applicationPersons); //装载申请人地址(未找到) //装载发明人 ArrayList inventors = new ArrayList<>(); for (Inventor inventor : patent.getInventors()) { inventors.add(inventor.getOriginalName()); } patentCell.setInventors(inventors); //装载当前权利人(未找到) //装载代理人(未找到) //装载代理机构(未找到) //装载范畴分类(未找到) //装载当前状态(未找到) //装载同族号 patentCell.setFamilyId(patent.getFamilyId()); //装载著录标题 String olTitle = patent.getOlTitle(); String enTitle = patent.getEnTitle(); if (olTitle == null) { patentCell.setTitle(enTitle); } else { patentCell.setTitle(olTitle); } //装载摘要 String olAbstract = patent.getOlAbstract(); String enAbstract = patent.getEnAbstract(); if (olAbstract == null) { patentCell.setAbstrText(enAbstract); } else { patentCell.setAbstrText(olAbstract); } //装载优先权号、优先权国家、优先权日 ArrayList priorities = new ArrayList<>(); List priorties = patent.getPriorties(); for (Priorityy priorty : priorties) { for (PriorityNumber number : priorty.getNumbers()) { if (number.getType().equals("epodoc")) { Priority priority = new Priority() .setPriorityNo(number.getNumber().substring(2)) .setPriorityCountry(number.getNumber().substring(0, 2)) .setPriorityDate(priorty.getDate()); priorities.add(priority); } } } patentCell.setPriorities(priorities); } //调用接口获取一批专利著录信息 private SerachBiblioData getSerachBiblioData(String conditions, Integer start, Integer size) throws IOException { GetSearchBiblioParamsDTO getSearchBiblioParamsDTO = new GetSearchBiblioParamsDTO() .setQuery(conditions) .setStart(start) .setEnd(size); String res = outInterfaceService.getSearchBiblio(getSearchBiblioParamsDTO); if (res == null || res.equals("")) { return null; } JSONObject jsonObject = JSONObject.parseObject(res); if (!jsonObject.get("status").toString().equals("200")) { //若发生类似400、500等异常(杰哥处理) return null; } //返回检索结果data return JSONObject.parseObject(jsonObject.get("data").toString(), SerachBiblioData.class); } /** * 下载爬取智慧芽专利数据 * * @param patentVO * @return * @throws IOException * @throws InterruptedException */ public List getPatentya(String patentVO) throws InterruptedException { //1.获得驱动 System.setProperty("webdriver.chrome.driver", "D:\\driver\\chromedriver.exe"); // 2.页面最大化 ChromeOptions chromeOptions = new ChromeOptions(); chromeOptions.addArguments("--start-maximized"); // 3.创建驱动 WebDriver driver = new ChromeDriver(chromeOptions); // 4.创建动作 Actions action = new Actions(driver); // 打开智慧芽首页 driver.get("https://account.zhihuiya.com"); //设置等待时间 long formSecond1 = 50; WebDriverWait wait1 = new WebDriverWait(driver, formSecond1); //等待密码登录界面加载 wait1.until(ExpectedConditions.presenceOfElementLocated(By.id("tab-password"))); // 点击密码登录 driver.findElement(By.id("tab-password")).click(); // 获取输入框,输入账号密码 List ret = driver.findElements(By.className("el-input__inner")); ret.get(0).sendKeys("liting@china-wispro.com"); ret.get(1).sendKeys("Lt199299"); // 点击登录 driver.findElement(By.className("el-button")).click(); //等待加载 TimeUnit.MILLISECONDS.sleep(1000);//毫秒 List alerts = driver.findElements(By.className("patsnap-el-confirm")); if (alerts.size() != 0) { alerts.get(0).click(); } wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("master_nav-item__2nd_lv"))); WebElement searchTag = driver.findElement(By.className("master_nav-item__2nd_lv")); searchTag.click(); //等待搜索框加载 wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("div-textarea"))); List ret1 = driver.findElements(By.className("div-textarea")); //搜索框赋值 ret1.get(0).sendKeys(patentVO); TimeUnit.MILLISECONDS.sleep(2000);//毫秒 wait1.until(ExpectedConditions.presenceOfElementLocated(By.className("search-form__btn"))); WebElement rr = driver.findElement(By.className("search-form__btn")); rr.click(); TimeUnit.MILLISECONDS.sleep(1000);//毫秒 List patentCells = new ArrayList<>(); long formSecond = 50; WebDriverWait wait = new WebDriverWait(driver, formSecond); wait.until(ExpectedConditions.presenceOfElementLocated(By.className("search-result-standard-view__patent"))); // String js ="document.getElementsByClassName('sidebar-table__right')[0].scrollBy(0, 8000)"; // ((JavascriptExecutor) driver).executeScript(js); List ret2 = driver.findElements(By.className("search-result-standard-view__patent")); ret2.remove(ret2.size() - 1); List ret3 = driver.findElements(By.className("pn-cell-popover")); for (int i = 0; i < ret2.size(); i++) { action.moveToElement(ret3.get(i)).perform(); wait.until(ExpectedConditions.attributeToBeNotEmpty(ret3.get(i), "href")); String patenNo = ret3.get(i).getAttribute("data-link-data"); System.out.println(patenNo); } for (WebElement item : ret2) { List aLinks = item.findElements(By.tagName("a")); PatentCell patentCell = new PatentCell(); aLinks.forEach(tem -> { String type = tem.getAttribute("data-link-type"); if (type != null && type.equals("TITLE")) { String title = tem.getAttribute("data-link-data"); patentCell.setTitle(title); } else if (type != null && type.equals("PN")) { String url = tem.getAttribute("href"); patentCell.setUrl(url); String patenNo = tem.getAttribute("data-link-data"); patentCell.setPatentNo(patenNo); } else if (type != null && type.equals("ANC")) { String applications = tem.getAttribute("data-link-data"); patentCell.setApplications(applications); } }); String legal = driver.findElement(By.className("legal-tag__LEGAL_STATUS")).getText(); patentCell.setLegal(legal); patentCells.add(patentCell); } return patentCells; } //格式化参数 public String formatConditions(String condition) throws IOException { String jsons = FileUtils.readerMethod(); JSONObject jsonObject = new JSONObject(); List sources = JSONObject.parseArray(jsons, JSONObject.class); for (JSONObject jsonObject1 : sources) { jsonObject = jsonObject1; } condition = condition.replace(" ", ""); condition = condition.replace("&&", "*"); condition = condition.replace("||", "+"); StringBuilder stringBuilder = new StringBuilder("F XX "); String pattern = "\\([^)]*\\)"; Pattern p = Pattern.compile(pattern); Matcher m = p.matcher(condition); while (m.find()) { // 当字符串中有匹配到 {} 时 String param = m.group(0); String cell = formatParam(param, jsonObject); condition = condition.replace(param, cell); } return condition; } public String formatParam(String param, JSONObject jsonObject) { param = param.replace("(", ""); param = param.replace(")", ""); String[] params = param.split("=");// {} 和里面的内容 StringBuilder stringBuilder = new StringBuilder("(" + params[1]); stringBuilder.append("/"); String cell = jsonObject.get(params[0]).toString(); stringBuilder.append(cell + ")"); return stringBuilder.toString(); } //将值转换为list public List StringToList(String value, String split) { List list = new ArrayList<>(); if (value != null && value != "") { String[] temValue = value.split(split); list = new ArrayList<>(Arrays.asList(temValue)); } return list; } }