123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462 |
- package cn.cslg.pas.service.upLoadPatent;
- import cn.cslg.pas.common.model.dto.UploadFileDTO;
- import cn.cslg.pas.common.model.outApi.PatentStarListDto;
- import cn.cslg.pas.common.model.vo.UploadParamsVO;
- import cn.cslg.pas.common.model.vo.outApi.StarPatentVO;
- import cn.cslg.pas.common.utils.FileUtils;
- import cn.cslg.pas.common.utils.RemoveHtmlTagsUtils;
- import cn.cslg.pas.domain.*;
- import cn.cslg.pas.domain.asso.AssoOsTaskQrtzTask;
- import cn.cslg.pas.service.UploadPatentBatchService;
- import cn.cslg.pas.service.asso.AssoOsTaskQrtzTaskService;
- import cn.cslg.pas.service.outApi.PatentStarApiService;
- import com.alibaba.fastjson.JSON;
- import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
- import lombok.RequiredArgsConstructor;
- import lombok.extern.slf4j.Slf4j;
- import org.apache.commons.fileupload.FileItem;
- import org.apache.commons.fileupload.disk.DiskFileItemFactory;
- import org.springframework.stereotype.Service;
- import org.springframework.web.multipart.MultipartFile;
- import org.springframework.web.multipart.commons.CommonsMultipartFile;
- import java.io.*;
- import java.net.URL;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.List;
- import java.util.Map;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- /**
- * 专利之星类型任务解析获取专利类
- *
- * @Author chenyu
- * @Date 2023/6/25
- */
- @Slf4j
- @RequiredArgsConstructor
- @Service
- public class ExcutePatentDataStar implements IExcutePatentData {
- private final AssoOsTaskQrtzTaskService assoOsTaskQrtzTaskService;
- private final PatentStarApiService patentStarApiService;
- private final FileUtils fileUtils;
- private final PantentQueueService pantentQueueService;
- private final UploadPatentBatchService uploadPatentBatchService;
- /**
- * 解析获取专利数据
- *
- * @param task 任务
- */
- @Override
- public void startExcute(Task task) throws IOException {
- //从任务关联网站导入任务表中取出当前任务
- List<AssoOsTaskQrtzTask> assoOsTaskQrtzTasks = assoOsTaskQrtzTaskService.list(new LambdaQueryWrapper<AssoOsTaskQrtzTask>().eq(AssoOsTaskQrtzTask::getTaskId, task.getId()));
- AssoOsTaskQrtzTask assoOsTaskQrtzTask = assoOsTaskQrtzTasks.get(0);
- //从任务数据中获取下载字段、检索式
- String cellsStr = assoOsTaskQrtzTask.getConfigCells();
- List<String> cells = Arrays.asList(cellsStr.split(","));
- String conditions = assoOsTaskQrtzTask.getConditions();
- //定义每次检索的专利数量(每次检索50件)
- int size = 50;
- //获得专利总数量
- Integer count = task.getTotal();
- //1.根据专利总数量count遍历检索专利
- int pageNum = 0; //页码
- //int exceptionNum = 0; //中途出错未生产完成的专利数量
- for (int i = 1; i <= count; i += size) {
- pageNum++;
- PatentStarListDto patentStarListDto = new PatentStarListDto()
- .setCurrentQuery(conditions)
- .setOrderBy("AD")
- .setOrderByType("DESC")
- .setPageNum(pageNum)
- .setRowCount(size)
- .setDBType("CN");
- //调用一般接口
- Map<String, Object> resultMap = patentStarApiService.patentStarSearchApi(patentStarListDto);
- if (resultMap == null || (Integer) resultMap.get("total") == 0) {
- continue;
- }
- //调用一般接口返回一批专利著录相关数据
- List<StarPatentVO> patents = (List<StarPatentVO>) resultMap.get("records");
- //遍历50个专利
- for (StarPatentVO starPatent : patents) {
- try {
- UploadParamsVO uploadParamsVO = new UploadParamsVO();
- setPatentZhuLu(starPatent, uploadParamsVO);
- //保存专利基础数据(专利表"os_patent")
- uploadPatentBatchService.getOneOrInsertOne(uploadParamsVO);
- PQueueData pQueueData = new PQueueData()
- .setTask(task)
- .setStarPatent(starPatent)
- .setUploadParamsVO(uploadParamsVO);
- //装载专利著录
- if (cells.contains("1")) {
- pantentQueueService.zhuluToPQueue(pQueueData);
- }
- //装载权要
- if (cells.contains("2")) {
- //setPatentClaim(starPatent, uploadParamsVO);
- pantentQueueService.rightToPQueue(pQueueData);
- }
- //装载说明书文本
- if (cells.contains("3")) {
- //setPatentInstructionText(starPatent, uploadParamsVO);
- pantentQueueService.instructionTextToPQueue(pQueueData);
- }
- //装载说明书pdf
- if (cells.contains("4")) {
- //setPatentInstructionPDF(starPatent, uploadParamsVO);
- pantentQueueService.instructionPDFToPQueue(pQueueData);
- }
- //装载摘要附图
- if (cells.contains("6")) {
- //setPatentPicture(starPatent, uploadParamsVO);
- pantentQueueService.imageToPQueue(pQueueData);
- }
- //将该专利存入5个消费者队列
- //ProjectImportPatentVO projectImportPatentVO = new ProjectImportPatentVO();
- //projectImportPatentVO.setProjectId(task.getProjectId());
- //task.setExceptionNum(exceptionNum);
- //专利丢入5个消费者队列,并唤醒5个消费者线程
- //pantentQueueService.patentToQueue(task, uploadParamsVO, projectImportPatentVO);
- } catch (Exception e) {
- e.printStackTrace();
- //exceptionNum++;
- //跳过当前生产出问题的专利,继续生产下一个专利
- }
- }
- }
- }
- /**
- * 装载著录方法
- *
- * @param starPatent 专利之星著录对象
- * @param uploadParamsVO 专利实体类对象
- */
- public void setPatentZhuLu(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) {
- //以下 ↓装载的是调用"一般检索"接口返回的专利相关数据
- Patent patent = new Patent();
- //装载专利号
- patent.setPatentNo(starPatent.getPatentNo());
- //装载摘要
- patent.setAbstractStr(starPatent.getAbstractStr());
- //装载标题
- patent.setName(starPatent.getName());
- //装载申请号
- patent.setApplicationNo(starPatent.getApplicationNo());
- //装载申请日
- if (starPatent.getApplicationDate() != null && !starPatent.getApplicationDate().equals("")) {
- patent.setApplicationDate(Integer.parseInt(starPatent.getApplicationDate()));
- }
- //装载公开号
- patent.setPublicNo(starPatent.getPublicNo());
- //装载公开日
- if (starPatent.getPublicDate() != null && !starPatent.getPublicDate().equals(""))
- patent.setPublicDate(Integer.parseInt(starPatent.getPublicDate()));
- //装载申请人
- if (starPatent.getAbstractStr() != null && !starPatent.getAbstractStr().equals("")) {
- uploadParamsVO.setPatentApplicantOriginalName(Arrays.asList(starPatent.getApplicantStr().split(";")));
- }
- //装载权利人
- if (starPatent.getCurrentApplicantStr() != null && !starPatent.getCurrentApplicantStr().equals("")) {
- uploadParamsVO.setPatentApplicantCurrentName(Arrays.asList(starPatent.getCurrentApplicantStr().split(";")));
- }
- //装载IPC分类号
- if (starPatent.getIpcListStr() != null && !starPatent.getIpcListStr().equals("")) {
- String[] ipcArr = starPatent.getIpcListStr().split(";");
- //装载IPC分类号
- uploadParamsVO.setMainIpc(ipcArr[0]);
- uploadParamsVO.setIpcList(Arrays.asList(ipcArr));
- }
- //以下 ↓装载的是调用"获得中国专利著录"接口返回的专利相关数据
- String appNo = null;
- if (starPatent.getApplicationNo().contains(".")) {
- appNo = starPatent.getApplicationNo().substring(0, starPatent.getApplicationNo().lastIndexOf("."));
- } else {
- appNo = starPatent.getApplicationNo();
- }
- //调用中国专利著录接口返回的专利相关数据最外层是一个集合"[]",但是集合中只有一个对象"{}",以下方式处理
- String chinaPatentZhuLuStr = patentStarApiService.getCnBibApi(appNo);
- //chinaPatentZhuLuStr = chinaPatentZhuLuStr.substring(chinaPatentZhuLuStr.indexOf("["), chinaPatentZhuLuStr.lastIndexOf("[")).trim();
- //ChinaPatentZhuLu chinaPatentZhuLu = JSONObject.parseObject(chinaPatentZhuLuStr, ChinaPatentZhuLu.class);
- //以上暂无需处理 ↑ 以下 ↓有现成的json数组字符串转为集合方法
- List<ChinaPatentZhuLu> chinaPatentZhuLus = JSON.parseArray(chinaPatentZhuLuStr, ChinaPatentZhuLu.class);
- ChinaPatentZhuLu chinaPatentZhuLu = chinaPatentZhuLus.get(0);
- //装载申请人地址
- ArrayList<String> patentApplicantOriginalAddresss = new ArrayList<>();
- if (chinaPatentZhuLu.getDZ().contains(" ")) {
- patentApplicantOriginalAddresss.add(chinaPatentZhuLu.getDZ().substring(chinaPatentZhuLu.getDZ().indexOf(" ") + 1));
- } else {
- patentApplicantOriginalAddresss.add(chinaPatentZhuLu.getDZ());
- }
- uploadParamsVO.setPatentApplicantOriginalAddress(patentApplicantOriginalAddresss);
- //装载代理人
- List<String> patentAgents = Arrays.asList(chinaPatentZhuLu.getAT().split(";"));
- ArrayList<PatentAgent> patentAgentList = new ArrayList<>();
- for (String n : patentAgents) {
- PatentAgent patentAgent = new PatentAgent();
- patentAgent.setName(n);
- patentAgentList.add(patentAgent);
- }
- uploadParamsVO.setPatentAgentList(patentAgentList);
- //装载代理机构地址
- if (chinaPatentZhuLu.getAGN() != null && !chinaPatentZhuLu.getAGN().equals("")) {
- String agencyAddress;
- if (chinaPatentZhuLu.getAGN().contains(" ")) {
- agencyAddress = chinaPatentZhuLu.getAGN().substring(0, chinaPatentZhuLu.getAGN().lastIndexOf(" "));
- } else {
- agencyAddress = chinaPatentZhuLu.getAGN();
- }
- patent.setAgencyId(agencyAddress);
- }
- //装载发明人
- List<String> patentInventorNames = Arrays.asList(chinaPatentZhuLu.getIV().split(";"));
- ArrayList<PatentInventor> patentInventors = new ArrayList<>();
- for (String patentInventorName : patentInventorNames) {
- PatentInventor patentInventor = new PatentInventor();
- patentInventor.setName(patentInventorName);
- patentInventors.add(patentInventor);
- }
- uploadParamsVO.setPatentInventorList(patentInventors);
- //装载优先权号、优先权国家、优先权日
- String priorityInfo = chinaPatentZhuLu.getPR();
- patent.setPriorityNo(priorityInfo);
- //以下 ↓装载的是调用"获得同族专利"接口返回的专利相关数据
- String familyPatentNoStr = patentStarApiService.getFamilyByPubNoApi(starPatent.getPatentNo());
- FamilyPatentNo familyPatentNo = JSON.parseObject(familyPatentNoStr, FamilyPatentNo.class);
- //装载同族号
- if (familyPatentNo.getFamilyinfo() != null && !familyPatentNo.getFamilyinfo().equals("")) {
- List<String> simpleFamily = Arrays.asList(familyPatentNo.getFamilyinfo().split(";"));
- uploadParamsVO.setSimpleFamily(simpleFamily);
- }
- //以下 ↓装载的是调用"获得中国专利法律状态"接口返回的专利相关数据
- String cnLegalApiStr = patentStarApiService.getCnLegalApi(appNo);
- List<ChinaLeagalStatus> chinaLeagalStatuses = JSON.parseArray(cnLegalApiStr, ChinaLeagalStatus.class);
- ChinaLeagalStatus chinaLeagalStatus = chinaLeagalStatuses.get(0);
- //装载法律状态
- uploadParamsVO.setPatentSimpleStatus(chinaLeagalStatus.getLegalStatus());
- uploadParamsVO.setSimpleStatus(chinaLeagalStatus.getLegalStatus());
- //最后将 patent装载到 uploadParamsVO
- uploadParamsVO.setPatent(patent);
- }
- /**
- * 装载权要方法
- *
- * @param starPatent 专利之星著录对象
- * @param uploadParamsVO 专利实体类对象
- */
- public void setPatentClaim(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) throws IOException {
- String appNo = null;
- if (starPatent.getApplicationNo().contains(".")) {
- appNo = starPatent.getApplicationNo().substring(0, starPatent.getApplicationNo().lastIndexOf("."));
- } else {
- appNo = starPatent.getApplicationNo();
- }
- //根据申请号调用"获得中国专利全文文本"接口,获得包含各种xml标签的专利全文内容的长字符串 cnFullXmlStr
- String cnFullXmlStr = patentStarApiService.getCnFullXmlApi(appNo);
- //使用正则表达式拼接出权要原文
- String regex = "(?<=<claim-text>)[\\w\\W]+?(?=</claim-text>)";
- Pattern compile = Pattern.compile(regex);
- Matcher matcher = compile.matcher(cnFullXmlStr);
- StringBuilder builder = new StringBuilder();
- while (matcher.find()) {
- builder.append(matcher.group()).append("\r\n");
- }
- String patentRightText = builder + "";
- //使用工具类去除字符串文本中的所有HTML格式标签
- patentRightText = RemoveHtmlTagsUtils.removeHtmlTags(patentRightText);
- //装载权利要求原文
- PatentRight patentRight = new PatentRight();
- patentRight.setContent(patentRightText);
- uploadParamsVO.setPatentRight(patentRight);
- }
- /**
- * 装载说明书文本
- *
- * @param starPatent 专利之星著录对象
- * @param uploadParamsVO 专利实体类对象
- */
- public void setPatentInstructionText(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) throws IOException {
- String appNo = null;
- if (starPatent.getApplicationNo().contains(".")) {
- appNo = starPatent.getApplicationNo().substring(0, starPatent.getApplicationNo().lastIndexOf("."));
- } else {
- appNo = starPatent.getApplicationNo();
- }
- //根据申请号调用"获得中国专利全文文本"接口,获得包含各种xml标签的专利全文内容的长字符串 cnFullXmlStr
- String cnFullXmlStr = patentStarApiService.getCnFullXmlApi(appNo);
- //使用正则表达式拼接出说明书文本全文
- String regex = "(?<=<p id=\"p)[\\w\\W]+?(?=</p>)";
- Pattern compile = Pattern.compile(regex);
- Matcher matcher = compile.matcher(cnFullXmlStr);
- StringBuilder builder = new StringBuilder();
- while (matcher.find()) {
- String oldRow = matcher.group();
- if (oldRow.contains("num=\"n")) {
- oldRow = oldRow.substring(oldRow.indexOf("num=\"n") + 6);
- oldRow = "[" + oldRow;
- oldRow = oldRow.replace("\">", "]");
- } else if (oldRow.contains("num=")) {
- oldRow = oldRow.substring(oldRow.indexOf("num=") + 5);
- oldRow = "[" + oldRow;
- oldRow = oldRow.replace("\">", "]");
- } else {
- oldRow = oldRow.substring(oldRow.indexOf("\">") + 2);
- }
- builder.append(oldRow).append("\r\n");
- }
- String instructionText = builder + "";
- //使用工具类去除字符串文本中的所有HTML格式标签
- instructionText = RemoveHtmlTagsUtils.removeHtmlTags(instructionText);
- //装载说明书文本全文
- PatentInstructionText patentInstructionText = new PatentInstructionText();
- patentInstructionText.setManual(instructionText);
- uploadParamsVO.setPatentInstructionText(patentInstructionText);
- }
- /**
- * 装载说明书pdf
- *
- * @param starPatent 专利之星著录对象
- * @param uploadParamsVO 专利实体类对象
- */
- public void setPatentInstructionPDF(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) {
- String patentNo = starPatent.getPatentNo();
- //根据专利号调用"获得世界专利pdf"接口,获得pdf的url地址
- String pdfUrl = patentStarApiService.getEnPdfApi(patentNo);
- try {
- URL url = new URL(pdfUrl); //想要读取的url地址
- InputStream in = url.openStream();
- File file = File.createTempFile("new_url", ".pdf"); //创建文件
- OutputStream os = new FileOutputStream(file); //创建文件输出流
- int bytesRead;
- byte[] buffer = new byte[8192];
- int len = 8192;
- while ((bytesRead = in.read(buffer, 0, len)) != -1) {
- os.write(buffer, 0, bytesRead);
- }
- //关闭释放流
- os.close();
- in.close();
- DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null);
- FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName());
- buffer = new byte[8192];
- FileInputStream fis = new FileInputStream(file);
- OutputStream fos = item.getOutputStream();
- len = 8192;
- while ((bytesRead = fis.read(buffer, 0, len)) != -1) {
- fos.write(buffer, 0, bytesRead);
- }
- //关闭释放流
- fos.close();
- fis.close();
- MultipartFile multipartFile = new CommonsMultipartFile(item);
- UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile);
- uploadParamsVO.setFileDTO(fileDTO);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- /**
- * 装载摘要附图
- *
- * @param starPatent 专利之星著录对象
- * @param uploadParamsVO 专利实体类对象
- */
- public void setPatentPicture(StarPatentVO starPatent, UploadParamsVO uploadParamsVO) {
- String appNo = null;
- if (starPatent.getApplicationNo().contains(".")) {
- appNo = starPatent.getApplicationNo().substring(0, starPatent.getApplicationNo().lastIndexOf("."));
- } else {
- appNo = starPatent.getApplicationNo();
- }
- //根据申请号调用"获得中国专利摘要附图"接口,获得摘要附图的url地址
- String pictureUrl = patentStarApiService.getPictureApi(appNo);
- try {
- URL url = new URL(pictureUrl); //想要读取的url地址
- InputStream in = url.openStream();
- File file = File.createTempFile("new_url", ".jpg"); //创建文件
- OutputStream os = new FileOutputStream(file); //创建文件输出流
- int bytesRead;
- byte[] buffer = new byte[8192];
- int len = 8192;
- while ((bytesRead = in.read(buffer, 0, len)) != -1) {
- os.write(buffer, 0, bytesRead);
- }
- //关闭释放流
- os.close();
- in.close();
- DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(16, null);
- FileItem item = diskFileItemFactory.createItem(file.getName(), "text/plain", true, file.getName());
- buffer = new byte[8192];
- FileInputStream fis = new FileInputStream(file);
- OutputStream fos = item.getOutputStream();
- len = 8192;
- while ((bytesRead = fis.read(buffer, 0, len)) != -1) {
- fos.write(buffer, 0, bytesRead);
- }
- //关闭释放流
- fos.close();
- fis.close();
- MultipartFile multipartFile = new CommonsMultipartFile(item);
- UploadFileDTO fileDTO = fileUtils.uploadFile(multipartFile);
- uploadParamsVO.setFileDTO(fileDTO);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- }
|