package cn.cslg.pas.service.upLoadPatent; import cn.cslg.pas.common.model.PatentCell; import cn.cslg.pas.common.model.dto.*; import cn.cslg.pas.common.model.vo.UploadParamsVO; import cn.cslg.pas.common.utils.FileUtils; import cn.cslg.pas.common.utils.StringUtils; import cn.cslg.pas.common.utils.ThrowException; import cn.cslg.pas.domain.*; import cn.cslg.pas.domain.asso.AssoOsTaskQrtzTask; import cn.cslg.pas.exception.XiaoShiException; import cn.cslg.pas.service.*; import cn.cslg.pas.service.asso.AssoOsTaskQrtzTaskService; import com.alibaba.fastjson.JSONObject; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.spire.pdf.FileFormat; import com.spire.pdf.PdfDocument; import com.spire.pdf.PdfDocumentBase; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.fileupload.FileItem; import org.apache.commons.fileupload.disk.DiskFileItemFactory; import org.springframework.beans.BeanUtils; import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.commons.CommonsMultipartFile; import java.io.*; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.List; /** * Epo类型任务解析获取专利类 * * @Author chenyu * @Date 2023/6/16 */ @Slf4j @RequiredArgsConstructor @Service public class ExcutePatentDataEpo implements IExcutePatentData { private final FileUtils fileUtils; private final PantentQueueService pantentQueueService; private final AssoOsTaskQrtzTaskService assoOsTaskQrtzTaskService; private final OutInterfaceService outInterfaceService; private final PatentInstructionService patentInstructionService; private final PatentAgencyService patentAgencyService; private final PatentAffairService patentAffairService; private final ProjectPatentLinkService projectPatentLinkService; private final PatentService patentService; private final PatentImageService patentImageService; /** * 解析获取专利数据 * * @param task 任务 */ @Override public void startExcute(Task task) throws IOException { //从任务关联网站导入任务表中取出当前任务 List assoOsTaskQrtzTasks = assoOsTaskQrtzTaskService.list(new LambdaQueryWrapper().eq(AssoOsTaskQrtzTask::getTaskId, task.getId())); AssoOsTaskQrtzTask assoOsTaskQrtzTask = assoOsTaskQrtzTasks.get(0); //从任务数据中获取下载字段、检索式 String cellsStr = assoOsTaskQrtzTask.getConfigCells(); List cells = Arrays.asList(cellsStr.split(",")); String conditions = assoOsTaskQrtzTask.getConditions(); //定义每次检索的专利数量(每次检索50件) int size = 50; //获得专利总数量 Integer count = task.getTotal(); //1.根据专利总数量count遍历检索专利 for (int i = 1; i <= count; i += size) { //根据检索式conditions、专利开始数i、专利最后数i + size - 1检索专利著录信息 SerachBiblioData serachBiblioData = getSerachBiblioData(conditions, i, i + size - 1); //获取检索结果中的所有专利著录信息 if (serachBiblioData == null || serachBiblioData.getTotals() == 0) { continue; } List patents = serachBiblioData.getPatents(); //2.遍历专利 for (PatentZhuLu patent : patents) { PatentCell patentCell = new PatentCell(); patentCell.setProjectId(task.getProjectId()); PubNo pubNO = new PubNo(); //装载专利著录 if (cells.contains("1")) { setPatentZhuLu(patentCell, patent, pubNO); } //装载权要 if (cells.contains("2")) { setPatentClaim(patentCell, pubNO); } //装载说明书文本 if (cells.contains("3")) { setPatentInstructionText(patentCell, pubNO); } //以下代码是在准备一会要调用拿取各种附图的接口所需的参数(FullDocument->说明书pdf、Drawing->其他附图、FirstPageClipping->摘要附图) String fullDocumentLink = "", fullDocumentType = "", drawingLink = "", drawingType = "", firstPageClippingLink = "", firstPageClippingType = ""; Integer fullDocumentPage = 0, drawingPage = 0, firstPageClippingPage = 0; //根据当前专利号调用接口获取一会调用拿取各种附图的接口的参数 try { ImageInfo imageInfo = getImage(pubNO); for (Image image : imageInfo.getImages()) { //如果附件类型是说明书 if (image.getImageType().equals("FullDocument")) { fullDocumentLink = image.getUrlLink(); fullDocumentPage = image.getNumberOfPages(); for (String formatOption : image.getFormatOptions()) { if (formatOption.contains("pdf")) { fullDocumentType = formatOption; } } } //如果附件类型是其他附件 if (image.getImageType().equals("Drawing")) { drawingLink = image.getUrlLink(); drawingPage = image.getNumberOfPages(); for (String formatOption : image.getFormatOptions()) { if (formatOption.contains("tiff")) { drawingType = formatOption; } } } //如果附件类型是摘要附图 if (image.getImageType().equals("FirstPageClipping")) { firstPageClippingLink = image.getUrlLink(); firstPageClippingPage = image.getNumberOfPages(); for (String formatOption : image.getFormatOptions()) { if (formatOption.contains("jpeg")) { firstPageClippingType = formatOption; } } } } //装载说明书pdf if (cells.contains("4")) { setFuJian(fullDocumentLink, fullDocumentPage, fullDocumentType, patentCell, ".pdf"); } //装载摘要附图 if (cells.contains("6")) { setFuJian(firstPageClippingLink, firstPageClippingPage, firstPageClippingType, patentCell, ".jpeg"); } //装载其他附图 if (cells.contains("7")) { setFuJian(drawingLink, drawingPage, drawingType, patentCell, ".tiff"); } //先将部分入库,再将其余扔给消费者来入库(消费者方更改任务状态发送进度通知) if (patentCell.getPatentNo() != null) { UploadParamsVO uploadParamsVO = new UploadParamsVO(); Patent patent2 = new Patent(); //设置专利号 patent2.setPatentNo(patentCell.getPatentNo()); //设置摘要 patent2.setAbstractStr(patentCell.getAbstrText()); //设置标题 patent2.setName(patentCell.getTitle()); //设置公开日 SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd"); if (patentCell.getPubilcDate() != null) { Date date = simpleDateFormat.parse(patentCell.getPubilcDate()); Integer ts = (int) date.getTime(); patent2.setPublicDate(ts); } //设置申请号 patent2.setApplicationNo(patentCell.getApplicationNo()); //设置申请日 if (patentCell.getApplicationDate() != null) { Date date = simpleDateFormat.parse(patentCell.getApplicationDate()); Integer ts = (int) date.getTime(); patent2.setApplicationDate(ts); } //设置摘要附图 patent2.setAbstractPath(patentCell.getPicUrl()); //设置pdf文档 if (patentCell.getPdf() != null) { patentInstructionService.edit(patentCell.getPatentNo(), patentCell.getPdf()); } //设置公开号 patent2.setPublicNo(patentCell.getPublicNo()); uploadParamsVO.setSimpleStatus(patentCell.getStatue()); uploadParamsVO.setPatent(patent2); getOneOrInsertOne(uploadParamsVO); //摘要附图上传专利附图表"os_patent_img"根据专利id删除该专利的摘要图片数据,以免上传重复 patentImageService.deleteByPatentIdAndStatus(uploadParamsVO.getPatent().getId(), 1); PatentImage patentImage = new PatentImage(); patentImage.setStatus(1); patentImage.setPatentId(uploadParamsVO.getPatent().getId()); patentImage.setUrl(patentCell.getPicUrl()); patentImage.setFileName(patentCell.getFileName()); //摘要附图数据入库 patentImage.insert(); if (patentCell.getAgency() != null && !patentCell.getAgency().equals("")) { uploadParamsVO.getPatent().setAgencyId(patentAgencyService.getAgencyStringIdByName(patentCell.getAgency())); } uploadParamsVO.setPatentRight(new PatentRight()); uploadParamsVO.getPatentRight().setContent(StringUtils.join(patentCell.getRights(), "")); uploadParamsVO.setSelfContent(patentCell.getMainRignt()); uploadParamsVO.setPatentInstructionText(new PatentInstructionText()); uploadParamsVO.getPatentInstructionText().setManual(patentCell.getPatentInstructionText()); //发明人 List inventors = patentCell.getInventors(); if (inventors != null && inventors.size() > 0) { ArrayList patentInventors = new ArrayList<>(); for (String inventor : inventors) { PatentInventor patentInventor = new PatentInventor(); patentInventor.setName(inventor); patentInventors.add(patentInventor); } uploadParamsVO.setPatentInventorList(patentInventors); } uploadParamsVO.setPatentApplicantCurrentName(patentCell.getApplicationCurrents()); uploadParamsVO.setPatentApplicantOriginalName(patentCell.getApplicationPersons()); uploadParamsVO.setPatentApplicantOriginalAddress(patentCell.getApplicationAddress()); uploadParamsVO.setIpcList(patentCell.getIpc()); uploadParamsVO.setMainIpc(patentCell.getMainIpc()); //代理人 List agencyPersons = patentCell.getAgencyPersons(); if (agencyPersons != null && agencyPersons.size() > 0) { ArrayList patentAgents = new ArrayList<>(); for (String agencyPerson : agencyPersons) { PatentAgent patentAgent = new PatentAgent(); patentAgent.setName(agencyPerson); patentAgents.add(patentAgent); } uploadParamsVO.setPatentAgentList(patentAgents); } //事务信息 OS_PATENT_AFFAIR if (patentCell.getPatentAffairs() != null && patentCell.getPatentAffairs().size() > 0) { patentAffairService.updatePatientAffairs(patentCell.getPatentAffairs(), uploadParamsVO.getPatent().getId()); } //当专题库id不为null时添加专利到专题库 if (patentCell.getProjectId() != null) { projectPatentLinkService.updateProjectPatent(patentCell.getProjectId(), uploadParamsVO.getPatent().getId()); } pantentQueueService.patentToQueue(task, uploadParamsVO, null); } } catch (XiaoShiException e) { //虽然拿不到所有附图,但已经拿到了专利著录,能拿到啥是啥 log.info(e.getMessage()); } catch (Exception e) { } } } } //调用外部接口获取一批专利著录信息 public SerachBiblioData getSerachBiblioData(String conditions, Integer start, Integer size) throws IOException { GetSearchBiblioParamsDTO getSearchBiblioParamsDTO = new GetSearchBiblioParamsDTO() .setQuery(conditions) .setStart(start) .setEnd(size); String res = outInterfaceService.getSearchBiblio(getSearchBiblioParamsDTO); if (res == null || res.equals("")) { return null; } JSONObject jsonObject = JSONObject.parseObject(res); if (!jsonObject.get("status").toString().equals("200")) { //若发生类似400、500等异常(杰哥处理) return null; } //返回检索结果data return JSONObject.parseObject(jsonObject.get("data").toString(), SerachBiblioData.class); } /** * 装载著录方法 * * @param patentCell 实体类对象 * @param patent 专利对象 * @param pubNo 公开号对象 */ private void setPatentZhuLu(PatentCell patentCell, PatentZhuLu patent, PubNo pubNo) { //装载申请号 for (AppNo appNo : patent.getAppNos()) { if (appNo.getType().equals("docdb")) { patentCell.setApplicationNo(appNo.getCountry() + appNo.getNumber() + appNo.getKind()); } } //装载申请日 patentCell.setApplicationDate(patent.getAppDate()); //装载国家/省市 patentCell.setCountry(patent.getAppCountry()); //装载公开号 for (PubNo n : patent.getPubNos()) { if (n.getType().equals("docdb")) { BeanUtils.copyProperties(n, pubNo); patentCell.setPublicNo(n.getCountry() + n.getNumber() + n.getKind()); } } //装载专利号 patentCell.setPatentNo(patentCell.getPublicNo()); //装载公开日 patentCell.setPubilcDate(patent.getPubDate()); //装载授权公告号(未找到) //装载授权公告日(未找到) //装载主分类号 List ipCs = patent.getIpCs(); List cpCs = patent.getCpCs(); ipCs.addAll(cpCs); if (ipCs != null && ipCs.size() > 0) { patentCell.setMainIpc(ipCs.get(0)); } //装载分类号 if (ipCs != null && ipCs.size() > 0) { patentCell.setIpc(ipCs); } //装载申请人 ArrayList applicationPersons = new ArrayList<>(); for (Application application : patent.getApplications()) { if (application.getOriginalName().contains("(标:)")) { application.setOriginalName(application.getOriginalName().substring(0, application.getOriginalName().indexOf("(标:)"))); } applicationPersons.add(application.getOriginalName()); } patentCell.setApplicationPersons(applicationPersons); //装载申请人地址(未找到) //装载发明人 ArrayList inventors = new ArrayList<>(); for (Inventor inventor : patent.getInventors()) { inventors.add(inventor.getOriginalName()); } patentCell.setInventors(inventors); //装载当前权利人(未找到) //装载代理人(未找到) //装载代理机构(未找到) //装载范畴分类(未找到) //装载当前状态(未找到) //装载同族号 patentCell.setFamilyId(patent.getFamilyId()); //装载著录标题 String olTitle = patent.getOlTitle(); String enTitle = patent.getEnTitle(); if (olTitle == null) { patentCell.setTitle(enTitle); } else { patentCell.setTitle(olTitle); } //装载摘要 String olAbstract = patent.getOlAbstract(); String enAbstract = patent.getEnAbstract(); if (olAbstract == null) { patentCell.setAbstrText(enAbstract); } else { patentCell.setAbstrText(olAbstract); } //装载优先权号、优先权国家、优先权日 ArrayList priorities = new ArrayList<>(); List priorties = patent.getPriorties(); for (Priorityy priorty : priorties) { for (PriorityNumber number : priorty.getNumbers()) { if (number.getType().equals("epodoc")) { Priority priority = new Priority() .setPriorityNo(number.getNumber().substring(2)) .setPriorityCountry(number.getNumber().substring(0, 2)) .setPriorityDate(priorty.getDate()); priorities.add(priority); } } } patentCell.setPriorities(priorities); } /** * 装载权要方法 * * @param patentCell 实体类对象 * @param pubNo 公开号对象 */ private void setPatentClaim(PatentCell patentCell, PubNo pubNo) throws IOException { GetClaimsInfoParamsDTO getClaimsInfoParamsDTO = new GetClaimsInfoParamsDTO() .setCc(pubNo.getCountry()) .setNumber(pubNo.getNumber()) .setKind(pubNo.getKind()); String res = outInterfaceService.getClaimsInfo(getClaimsInfoParamsDTO); JSONObject jsonObject = JSONObject.parseObject(res); if (!jsonObject.get("status").toString().equals("200")) { //若发生类似400、500等异常(杰哥处理) //ThrowException.throwXiaoShiException("权要接口无法检索该国家专利"); log.info("权要接口无法检索该国家专利"); return; } //拿到检索结果(未格式化的含有页面等标签的权要)并格式化权要 //String unformatRight = JSONObject.parseObject(jsonObject.get("data").toString(), String.class); ArrayList unformatRights = JSONObject.parseObject(jsonObject.get("data").toString(), ArrayList.class); //String[] rightArr = unformatRight.split("\n"); //ArrayList rights = new ArrayList<>(Arrays.asList(rightArr)); patentCell.setRights(unformatRights); } /** * 装载说明书方法 * * @param patentCell 实体类对象 * @param pubNo 公开号对象 */ private void setPatentInstructionText(PatentCell patentCell, PubNo pubNo) throws IOException { GetDescriptionInfoParamsDTO getDescriptionInfoParamsDTO = new GetDescriptionInfoParamsDTO() .setCc(pubNo.getCountry()) .setNumber(pubNo.getNumber()) .setKind(pubNo.getKind()); String res = outInterfaceService.getDescriptionInfo(getDescriptionInfoParamsDTO); JSONObject jsonObject = JSONObject.parseObject(res); if (!jsonObject.get("status").toString().equals("200")) { //若发生类似400、500等异常(杰哥处理) //ThrowException.throwXiaoShiException("说明书接口无法检索该国家专利"); log.info("说明书接口无法检索该国家专利"); return; } //拿到检索结果(未格式化的含有页面等标签的说明书) //String unformatInstructionText = JSONObject.parseObject(jsonObject.get("data").toString(), String.class); List unformatInstructionText = JSONObject.parseObject(jsonObject.get("data").toString(), List.class); StringBuilder builder = new StringBuilder(); for (String n : unformatInstructionText) { builder.append(n); } // String regex = "

(.+?)

"; // Matcher matcher = Pattern.compile(regex).matcher(unformatInstructionText); // if (matcher.find()) { // patentCell.setPatentInstructionText(matcher.group()); // } patentCell.setPatentInstructionText(builder + ""); } /** * 获取Image信息方法(从信息中获取三种附图的检索参数) * * @param pubNo 公开号对象 */ private ImageInfo getImage(PubNo pubNo) throws IOException { String res = outInterfaceService.getImagesInfo(pubNo); JSONObject jsonObject = JSONObject.parseObject(res); if (!jsonObject.get("status").toString().equals("200")) { //若发生类似400、500等异常(杰哥处理) ThrowException.throwXiaoShiException("Image信息接口无法检索该国家专利"); } //拿到检索结果(未格式化的含有页面等标签的说明书) String unFormatData = jsonObject.get("data").toString(); String data = unFormatData.substring(unFormatData.indexOf("[") + 1, unFormatData.lastIndexOf("]")); ImageInfo imageInfo = JSONObject.parseObject(data, ImageInfo.class); return imageInfo; } /** * 装载说明书附件pdf方法 * * @param link 附件链接 * @param page 附件页数 * @param type 附件类型 * @param patentCell 实体类对象 */ private void setFuJian(String link, Integer page, String type, PatentCell patentCell, String FuJianSuffix) throws Exception { //合并说明书pdf文档时所需的读取流数组 InputStream[] streams = new InputStream[page]; //其他附图url数组 ArrayList otherUrls = new ArrayList<>(); //遍历附件页数 for (int i = 1; i <= page; i++) { GetFuTuParamsDTO getFuTuParamsDTO = new GetFuTuParamsDTO() .setLink(link) .setPage(i) .setType(type); byte[] buffer = outInterfaceService.getPatentFile(getFuTuParamsDTO); InputStream inputStream = new ByteArrayInputStream(buffer); streams[i - 1] = inputStream; //如果不是说明书pdf if (!FuJianSuffix.equals(".pdf")) { File file = File.createTempFile("new_url", FuJianSuffix); FileOutputStream out = new FileOutputStream(file); out.write(buffer); out.close(); streams[i - 1] = new FileInputStream(file); DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null); FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName()); int bytesRead = 0; buffer = new byte[8192]; try { FileInputStream fis = new FileInputStream(file); OutputStream os = item.getOutputStream(); int len = 8192; while ((bytesRead = fis.read(buffer, 0, len)) != -1) { os.write(buffer, 0, bytesRead); } os.close(); fis.close(); } catch (IOException e) { e.printStackTrace(); } MultipartFile multipartFile = new CommonsMultipartFile(item); UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile); //如果是摘要附图 if (FuJianSuffix.equals(".jpeg")) { patentCell.setPicUrl(fileDTO.getPath()); patentCell.setFileName(fileDTO.getFileName()); } //如果是其他附图 if (FuJianSuffix.equals(".tiff")) { otherUrls.add(fileDTO.getPath()); } } } //如果是说明书pdf if (FuJianSuffix.equals(".pdf")) { //合并说明书多个pdf文件 PdfDocumentBase doc = PdfDocument.mergeFiles(streams); //保存结果文件 doc.save("merge.pdf", FileFormat.PDF); doc.close(); //手动将合并后的pdf文件转成MultipartFile,上传文件并获取path装载到patentCell File file = new File("merge.pdf"); DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null); FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName()); int bytesRead = 0; byte[] buffer = new byte[8192]; try { FileInputStream fis = new FileInputStream(file); OutputStream os = item.getOutputStream(); int len = 8192; while ((bytesRead = fis.read(buffer, 0, len)) != -1) { os.write(buffer, 0, bytesRead); } os.close(); fis.close(); } catch (IOException e) { e.printStackTrace(); } MultipartFile multipartFile = new CommonsMultipartFile(item); UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile); patentCell.setPdf(fileDTO); } else if (FuJianSuffix.equals(".tiff")) { patentCell.setOtherUrls(otherUrls); } } /** * @param uploadParamsVO 全局类 * @date 2022-7-11 * @name 工具方法 * @description 查询数据是否存在 如果存在 返回该条数据 如果不存在 新增一条数据 */ public void getOneOrInsertOne(UploadParamsVO uploadParamsVO) { if (uploadParamsVO.getPatent() != null) { uploadParamsVO.getPatent().setPatentNo(uploadParamsVO.getPatent().getPatentNo().trim()); //用专利号查询该条数据是否存在 Patent patent = patentService.getByPatentNo(uploadParamsVO.getPatent().getPatentNo()); //如果不存在就新增一条 if (patent == null) { uploadParamsVO.getPatent().insert(); } else { uploadParamsVO.getPatent().setId(patent.getId()); } } } }