package cn.cslg.pas.service.upLoadPatent; import cn.cslg.pas.common.model.dto.UploadFileDTO; import cn.cslg.pas.common.model.outApi.PatentStarListDto; import cn.cslg.pas.common.model.vo.UploadParamsVO; import cn.cslg.pas.common.model.vo.outApi.StarPatentVO; import cn.cslg.pas.common.utils.FileUtils; import cn.cslg.pas.common.utils.RemoveHtmlTagsUtils; import cn.cslg.pas.domain.*; import cn.cslg.pas.domain.asso.AssoOsTaskQrtzTask; import cn.cslg.pas.service.UploadPatentBatchService; import cn.cslg.pas.service.asso.AssoOsTaskQrtzTaskService; import cn.cslg.pas.service.outApi.PatentStarApiService; import com.alibaba.fastjson.JSON; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.fileupload.FileItem; import org.apache.commons.fileupload.disk.DiskFileItemFactory; import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.commons.CommonsMultipartFile; import java.io.*; import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 专利之星类型任务解析获取专利类 * * @Author chenyu * @Date 2023/6/25 */ @Slf4j @RequiredArgsConstructor @Service public class ExcutePatentDataStar implements IExcutePatentData { private final AssoOsTaskQrtzTaskService assoOsTaskQrtzTaskService; private final PatentStarApiService patentStarApiService; private final FileUtils fileUtils; private final PantentQueueService pantentQueueService; private final UploadPatentBatchService uploadPatentBatchService; /** * 解析获取专利数据 * * @param task 任务 */ @Override public void startExcute(Task task) throws IOException { //从任务关联网站导入任务表中取出当前任务 List assoOsTaskQrtzTasks = assoOsTaskQrtzTaskService.list(new LambdaQueryWrapper().eq(AssoOsTaskQrtzTask::getTaskId, task.getId())); AssoOsTaskQrtzTask assoOsTaskQrtzTask = assoOsTaskQrtzTasks.get(0); //从任务数据中获取下载字段、检索式 String cellsStr = assoOsTaskQrtzTask.getConfigCells(); List cells = Arrays.asList(cellsStr.split(",")); String conditions = assoOsTaskQrtzTask.getConditions(); //定义每次检索的专利数量(每次检索50件) int size = 50; //获得专利总数量 Integer count = task.getTotal(); //1.根据专利总数量count遍历检索专利 int pageNum = 0; //页码 //int exceptionNum = 0; //中途出错未生产完成的专利数量 for (int i = 1; i <= count; i += size) { pageNum++; PatentStarListDto patentStarListDto = new PatentStarListDto() .setCurrentQuery(conditions) .setOrderBy("AD") .setOrderByType("DESC") .setPageNum(pageNum) .setRowCount(size) .setDBType("CN"); //调用一般接口 Map resultMap = patentStarApiService.patentStarSearchApi(patentStarListDto); if (resultMap == null || (Integer) resultMap.get("total") == 0) { continue; } //调用一般接口返回一批专利著录相关数据 List patents = (List) resultMap.get("records"); //遍历50个专利 for (StarPatentVO starPatent : patents) { try { UploadParamsVO uploadParamsVO = new UploadParamsVO(); setPatentZhuLu(starPatent, uploadParamsVO); //保存专利基础数据(专利表"os_patent") uploadPatentBatchService.getOneOrInsertOne(uploadParamsVO); PQueueData pQueueData = new PQueueData() .setTask(task) .setStarPatent(starPatent) .setUploadParamsVO(uploadParamsVO); //装载专利著录 if (cells.contains("1")) { pantentQueueService.zhuluToPQueue(pQueueData); } //装载权要 if (cells.contains("2")) { //setPatentClaim(starPatent, uploadParamsVO); pantentQueueService.rightToPQueue(pQueueData); } //装载说明书文本 if (cells.contains("3")) { //setPatentInstructionText(starPatent, uploadParamsVO); pantentQueueService.instructionTextToPQueue(pQueueData); } //装载说明书pdf if (cells.contains("4")) { //setPatentInstructionPDF(starPatent, uploadParamsVO); pantentQueueService.instructionPDFToPQueue(pQueueData); } //装载摘要附图 if (cells.contains("6")) { //setPatentPicture(starPatent, uploadParamsVO); pantentQueueService.imageToPQueue(pQueueData); } //将该专利存入5个消费者队列 //ProjectImportPatentVO projectImportPatentVO = new ProjectImportPatentVO(); //projectImportPatentVO.setProjectId(task.getProjectId()); //task.setExceptionNum(exceptionNum); //专利丢入5个消费者队列,并唤醒5个消费者线程 //pantentQueueService.patentToQueue(task, uploadParamsVO, projectImportPatentVO); } catch (Exception e) { e.printStackTrace(); //exceptionNum++; //跳过当前生产出问题的专利,继续生产下一个专利 } } } } /** * 装载著录方法 * * @param starPatent 专利之星著录对象 * @param uploadParamsVO 专利实体类对象 */ public void setPatentZhuLu(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) { //以下 ↓装载的是调用"一般检索"接口返回的专利相关数据 Patent patent = new Patent(); //装载专利号 patent.setPatentNo(starPatent.getPatentNo()); //装载摘要 patent.setAbstractStr(starPatent.getAbstractStr()); //装载标题 patent.setName(starPatent.getName()); //装载申请号 patent.setApplicationNo(starPatent.getApplicationNo()); //装载申请日 if (starPatent.getApplicationDate() != null && !starPatent.getApplicationDate().equals("")) { patent.setApplicationDate(Integer.parseInt(starPatent.getApplicationDate())); } //装载公开号 patent.setPublicNo(starPatent.getPublicNo()); //装载公开日 if (starPatent.getPublicDate() != null && !starPatent.getPublicDate().equals("")) patent.setPublicDate(Integer.parseInt(starPatent.getPublicDate())); //装载申请人 if (starPatent.getAbstractStr() != null && !starPatent.getAbstractStr().equals("")) { uploadParamsVO.setPatentApplicantOriginalName(Arrays.asList(starPatent.getApplicantStr().split(";"))); } //装载权利人 if (starPatent.getCurrentApplicantStr() != null && !starPatent.getCurrentApplicantStr().equals("")) { uploadParamsVO.setPatentApplicantCurrentName(Arrays.asList(starPatent.getCurrentApplicantStr().split(";"))); } //装载IPC分类号 if (starPatent.getIpcListStr() != null && !starPatent.getIpcListStr().equals("")) { String[] ipcArr = starPatent.getIpcListStr().split(";"); //装载IPC分类号 uploadParamsVO.setMainIpc(ipcArr[0]); uploadParamsVO.setIpcList(Arrays.asList(ipcArr)); } //以下 ↓装载的是调用"获得中国专利著录"接口返回的专利相关数据 String appNo = null; if (starPatent.getApplicationNo().contains(".")) { appNo = starPatent.getApplicationNo().substring(0, starPatent.getApplicationNo().lastIndexOf(".")); } else { appNo = starPatent.getApplicationNo(); } //调用中国专利著录接口返回的专利相关数据最外层是一个集合"[]",但是集合中只有一个对象"{}",以下方式处理 String chinaPatentZhuLuStr = patentStarApiService.getCnBibApi(appNo); //chinaPatentZhuLuStr = chinaPatentZhuLuStr.substring(chinaPatentZhuLuStr.indexOf("["), chinaPatentZhuLuStr.lastIndexOf("[")).trim(); //ChinaPatentZhuLu chinaPatentZhuLu = JSONObject.parseObject(chinaPatentZhuLuStr, ChinaPatentZhuLu.class); //以上暂无需处理 ↑ 以下 ↓有现成的json数组字符串转为集合方法 List chinaPatentZhuLus = JSON.parseArray(chinaPatentZhuLuStr, ChinaPatentZhuLu.class); ChinaPatentZhuLu chinaPatentZhuLu = chinaPatentZhuLus.get(0); //装载申请人地址 ArrayList patentApplicantOriginalAddresss = new ArrayList<>(); if (chinaPatentZhuLu.getDZ().contains(" ")) { patentApplicantOriginalAddresss.add(chinaPatentZhuLu.getDZ().substring(chinaPatentZhuLu.getDZ().indexOf(" ") + 1)); } else { patentApplicantOriginalAddresss.add(chinaPatentZhuLu.getDZ()); } uploadParamsVO.setPatentApplicantOriginalAddress(patentApplicantOriginalAddresss); //装载代理人 List patentAgents = Arrays.asList(chinaPatentZhuLu.getAT().split(";")); ArrayList patentAgentList = new ArrayList<>(); for (String n : patentAgents) { PatentAgent patentAgent = new PatentAgent(); patentAgent.setName(n); patentAgentList.add(patentAgent); } uploadParamsVO.setPatentAgentList(patentAgentList); //装载代理机构地址 if (chinaPatentZhuLu.getAGN() != null && !chinaPatentZhuLu.getAGN().equals("")) { String agencyAddress; if (chinaPatentZhuLu.getAGN().contains(" ")) { agencyAddress = chinaPatentZhuLu.getAGN().substring(0, chinaPatentZhuLu.getAGN().lastIndexOf(" ")); } else { agencyAddress = chinaPatentZhuLu.getAGN(); } patent.setAgencyId(agencyAddress); } //装载发明人 List patentInventorNames = Arrays.asList(chinaPatentZhuLu.getIV().split(";")); ArrayList patentInventors = new ArrayList<>(); for (String patentInventorName : patentInventorNames) { PatentInventor patentInventor = new PatentInventor(); patentInventor.setName(patentInventorName); patentInventors.add(patentInventor); } uploadParamsVO.setPatentInventorList(patentInventors); //装载优先权号、优先权国家、优先权日 String priorityInfo = chinaPatentZhuLu.getPR(); patent.setPriorityNo(priorityInfo); //以下 ↓装载的是调用"获得同族专利"接口返回的专利相关数据 String familyPatentNoStr = patentStarApiService.getFamilyByPubNoApi(starPatent.getPatentNo()); FamilyPatentNo familyPatentNo = JSON.parseObject(familyPatentNoStr, FamilyPatentNo.class); //装载同族号 if (familyPatentNo.getFamilyinfo() != null && !familyPatentNo.getFamilyinfo().equals("")) { List simpleFamily = Arrays.asList(familyPatentNo.getFamilyinfo().split(";")); uploadParamsVO.setSimpleFamily(simpleFamily); } //以下 ↓装载的是调用"获得中国专利法律状态"接口返回的专利相关数据 String cnLegalApiStr = patentStarApiService.getCnLegalApi(appNo); List chinaLeagalStatuses = JSON.parseArray(cnLegalApiStr, ChinaLeagalStatus.class); ChinaLeagalStatus chinaLeagalStatus = chinaLeagalStatuses.get(0); //装载法律状态 uploadParamsVO.setPatentSimpleStatus(chinaLeagalStatus.getLegalStatus()); uploadParamsVO.setSimpleStatus(chinaLeagalStatus.getLegalStatus()); //最后将 patent装载到 uploadParamsVO uploadParamsVO.setPatent(patent); } /** * 装载权要方法 * * @param starPatent 专利之星著录对象 * @param uploadParamsVO 专利实体类对象 */ public void setPatentClaim(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) throws IOException { String appNo = null; if (starPatent.getApplicationNo().contains(".")) { appNo = starPatent.getApplicationNo().substring(0, starPatent.getApplicationNo().lastIndexOf(".")); } else { appNo = starPatent.getApplicationNo(); } //根据申请号调用"获得中国专利全文文本"接口,获得包含各种xml标签的专利全文内容的长字符串 cnFullXmlStr String cnFullXmlStr = patentStarApiService.getCnFullXmlApi(appNo); //使用正则表达式拼接出权要原文 String regex = "(?<=)[\\w\\W]+?(?=)"; Pattern compile = Pattern.compile(regex); Matcher matcher = compile.matcher(cnFullXmlStr); StringBuilder builder = new StringBuilder(); while (matcher.find()) { builder.append(matcher.group()).append("\r\n"); } String patentRightText = builder + ""; //使用工具类去除字符串文本中的所有HTML格式标签 patentRightText = RemoveHtmlTagsUtils.removeHtmlTags(patentRightText); //装载权利要求原文 PatentRight patentRight = new PatentRight(); patentRight.setContent(patentRightText); uploadParamsVO.setPatentRight(patentRight); } /** * 装载说明书文本 * * @param starPatent 专利之星著录对象 * @param uploadParamsVO 专利实体类对象 */ public void setPatentInstructionText(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) throws IOException { String appNo = null; if (starPatent.getApplicationNo().contains(".")) { appNo = starPatent.getApplicationNo().substring(0, starPatent.getApplicationNo().lastIndexOf(".")); } else { appNo = starPatent.getApplicationNo(); } //根据申请号调用"获得中国专利全文文本"接口,获得包含各种xml标签的专利全文内容的长字符串 cnFullXmlStr String cnFullXmlStr = patentStarApiService.getCnFullXmlApi(appNo); //使用正则表达式拼接出说明书文本全文 String regex = "(?<=

)"; Pattern compile = Pattern.compile(regex); Matcher matcher = compile.matcher(cnFullXmlStr); StringBuilder builder = new StringBuilder(); while (matcher.find()) { String oldRow = matcher.group(); if (oldRow.contains("num=\"n")) { oldRow = oldRow.substring(oldRow.indexOf("num=\"n") + 6); oldRow = "[" + oldRow; oldRow = oldRow.replace("\">", "]"); } else if (oldRow.contains("num=")) { oldRow = oldRow.substring(oldRow.indexOf("num=") + 5); oldRow = "[" + oldRow; oldRow = oldRow.replace("\">", "]"); } else { oldRow = oldRow.substring(oldRow.indexOf("\">") + 2); } builder.append(oldRow).append("\r\n"); } String instructionText = builder + ""; //使用工具类去除字符串文本中的所有HTML格式标签 instructionText = RemoveHtmlTagsUtils.removeHtmlTags(instructionText); //装载说明书文本全文 PatentInstructionText patentInstructionText = new PatentInstructionText(); patentInstructionText.setManual(instructionText); uploadParamsVO.setPatentInstructionText(patentInstructionText); } /** * 装载说明书pdf * * @param starPatent 专利之星著录对象 * @param uploadParamsVO 专利实体类对象 */ public void setPatentInstructionPDF(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) { String patentNo = starPatent.getPatentNo(); //根据专利号调用"获得世界专利pdf"接口,获得pdf的url地址 String pdfUrl = patentStarApiService.getEnPdfApi(patentNo); try { URL url = new URL(pdfUrl); //想要读取的url地址 InputStream in = url.openStream(); File file = File.createTempFile("new_url", ".pdf"); //创建文件 OutputStream os = new FileOutputStream(file); //创建文件输出流 int bytesRead; byte[] buffer = new byte[8192]; int len = 8192; while ((bytesRead = in.read(buffer, 0, len)) != -1) { os.write(buffer, 0, bytesRead); } //关闭释放流 os.close(); in.close(); DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null); FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName()); buffer = new byte[8192]; FileInputStream fis = new FileInputStream(file); OutputStream fos = item.getOutputStream(); len = 8192; while ((bytesRead = fis.read(buffer, 0, len)) != -1) { fos.write(buffer, 0, bytesRead); } //关闭释放流 fos.close(); fis.close(); MultipartFile multipartFile = new CommonsMultipartFile(item); UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile); uploadParamsVO.setFileDTO(fileDTO); } catch (Exception e) { e.printStackTrace(); } } /** * 装载摘要附图 * * @param starPatent 专利之星著录对象 * @param uploadParamsVO 专利实体类对象 */ public void setPatentPicture(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) { String appNo = null; if (starPatent.getApplicationNo().contains(".")) { appNo = starPatent.getApplicationNo().substring(0, starPatent.getApplicationNo().lastIndexOf(".")); } else { appNo = starPatent.getApplicationNo(); } //根据申请号调用"获得中国专利摘要附图"接口,获得摘要附图的url地址 String pictureUrl = patentStarApiService.getPictureApi(appNo); try { URL url = new URL(pictureUrl); //想要读取的url地址 InputStream in = url.openStream(); File file = File.createTempFile("new_url", ".jpg"); //创建文件 OutputStream os = new FileOutputStream(file); //创建文件输出流 int bytesRead; byte[] buffer = new byte[8192]; int len = 8192; while ((bytesRead = in.read(buffer, 0, len)) != -1) { os.write(buffer, 0, bytesRead); } //关闭释放流 os.close(); in.close(); DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null); FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName()); buffer = new byte[8192]; FileInputStream fis = new FileInputStream(file); OutputStream fos = item.getOutputStream(); len = 8192; while ((bytesRead = fis.read(buffer, 0, len)) != -1) { fos.write(buffer, 0, bytesRead); } //关闭释放流 fos.close(); fis.close(); MultipartFile multipartFile = new CommonsMultipartFile(item); UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile); uploadParamsVO.setFileDTO(fileDTO); } catch (Exception e) { e.printStackTrace(); } } }