package cn.cslg.pas.service.common; import cn.cslg.pas.common.dto.PatentColumnDTO; import cn.cslg.pas.common.dto.TranslateDTO; import cn.cslg.pas.common.dto.business.SelectClaimDTO; import cn.cslg.pas.common.dto.es.EsTranslateDTO; import cn.cslg.pas.common.utils.ClaimUtils.ClaimSplitUtils; import cn.cslg.pas.common.vo.EsExplainTextVO; import cn.cslg.pas.common.vo.PatentRightParams; import cn.cslg.pas.common.vo.RePatentClaim; import cn.cslg.pas.common.vo.TranslateVO; import cn.cslg.pas.domain.es.Content; import cn.cslg.pas.domain.es.Patent; import cn.cslg.pas.domain.es.PatentTranslate; import cn.cslg.pas.domain.es.Text; import cn.cslg.pas.exception.XiaoShiException; import co.elastic.clients.elasticsearch.ElasticsearchClient; import co.elastic.clients.elasticsearch._types.InlineScript; import co.elastic.clients.elasticsearch._types.Refresh; import co.elastic.clients.elasticsearch._types.Script; import co.elastic.clients.elasticsearch._types.query_dsl.Query; import co.elastic.clients.elasticsearch._types.query_dsl.QueryBuilders; import co.elastic.clients.elasticsearch.core.IndexResponse; import co.elastic.clients.elasticsearch.core.SearchRequest; import co.elastic.clients.elasticsearch.core.SearchResponse; import co.elastic.clients.elasticsearch.core.UpdateByQueryRequest; import co.elastic.clients.elasticsearch.core.search.Hit; import com.aliyun.alimt20181012.models.TranslateGeneralResponse; import com.aliyun.alimt20181012.models.TranslateGeneralResponseBody; import com.aliyun.tea.TeaException; import lombok.RequiredArgsConstructor; import org.apache.commons.lang3.ObjectUtils; import org.apache.commons.lang3.StringUtils; import org.springframework.beans.BeanUtils; import org.springframework.context.annotation.Lazy; import org.springframework.stereotype.Service; import org.springframework.util.CollectionUtils; import java.io.IOException; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; @Service @RequiredArgsConstructor(onConstructor_ = {@Lazy}) public class TranslateService { private final ElasticsearchClient client; //---------------------外部翻译接口------------------------ public static com.aliyun.alimt20181012.Client createClient() throws Exception { String key = "LTAI5tGyG1Q7fKprgg1nWhXj"; String sercet = "Y6Erboh5lEFiRPR4XK8oCPMvUzYGLN"; // 工程代码泄露可能会导致 AccessKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考。 // 建议使用更安全的 STS 方式,更多鉴权访问方式请参见:https://help.aliyun.com/document_detail/378657.html。 com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config() // 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_ID。 .setAccessKeyId(key) // 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。 .setAccessKeySecret(sercet); // Endpoint 请参考 https://api.aliyun.com/product/alimt config.endpoint = "mt.aliyuncs.com"; return new com.aliyun.alimt20181012.Client(config); } public String getTranslateContent(String content) throws Exception { com.aliyun.alimt20181012.Client client = TranslateService.createClient(); com.aliyun.alimt20181012.models.TranslateGeneralRequest translateGeneralRequest = new com.aliyun.alimt20181012.models.TranslateGeneralRequest() .setFormatType("text") .setSourceLanguage("auto") .setTargetLanguage("zh") .setSourceText(content) .setScene("general"); com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); try { // 复制代码运行请自行打印 API 的返回值 TranslateGeneralResponse translateGeneralResponse = client.translateGeneralWithOptions(translateGeneralRequest, runtime); String re = translateGeneralResponse.getBody().getData().translated; return re; } catch (TeaException error) { // 此处仅做打印展示,请谨慎对待异常处理,在工程项目中切勿直接忽略异常。 // 错误 message System.out.println(error.getMessage()); // 诊断地址 System.out.println(error.getData().get("Recommend")); com.aliyun.teautil.Common.assertAsString(error.message); } catch (Exception _error) { TeaException error = new TeaException(_error.getMessage(), _error); // 此处仅做打印展示,请谨慎对待异常处理,在工程项目中切勿直接忽略异常。 // 错误 message System.out.println(error.getMessage()); // 诊断地址 System.out.println(error.getData().get("Recommend")); com.aliyun.teautil.Common.assertAsString(error.message); } return ""; } //根据专利号获取专利信息 public PatentColumnDTO getPatentByPatentNo(String patentNo) throws IOException { PatentColumnDTO patentColumnDTO = new PatentColumnDTO(); SearchRequest.Builder builder = new SearchRequest.Builder(); //设置查询索引 builder.index("patent"); Query query = QueryBuilders.term(t -> t.field("patent_no.keyword").value(patentNo)); builder.query(query); SearchResponse response = client.search(builder.build(), Patent.class); List> hits = response.hits().hits(); for (Hit hit : hits) { Patent patent = hit.source(); BeanUtils.copyProperties(patent, patentColumnDTO); } return patentColumnDTO; } //根据专利号、字段栏位、序号获取专利翻译信息 public List getPatentTranslateByPatentNo(TranslateDTO vo) throws IOException { String patentNo = vo.getPatentNo(); String patentField = vo.getPatentField(); Integer order = vo.getOrder(); SearchRequest.Builder builder = new SearchRequest.Builder(); //设置查询索引 builder.index("translate"); Query q1 = QueryBuilders.term(t -> t.field("patent_no").value(patentNo)); Query q2 = QueryBuilders.term(t -> t.field("patent_field").value(patentField)); Query bool = null; if (order != null && (patentField.equals("3") || patentField.equals("4"))) { Query q3 = QueryBuilders.term(t -> t.field("content.order").value(order)); bool = QueryBuilders.bool(i -> i.must(q1, q2, q3)); } else { bool = QueryBuilders.bool(i -> i.must(q1, q2)); } builder.query(bool); builder.size(1000); builder.trackTotalHits(i -> i.enabled(true)); SearchResponse response = client.search(builder.build(), PatentTranslate.class); List> hits = response.hits().hits(); List translateDTOS = new ArrayList<>(); for (Hit hit : hits) { PatentTranslate translate = hit.source(); EsTranslateDTO translateDTO = new EsTranslateDTO(); BeanUtils.copyProperties(translate, translateDTO); translateDTOS.add(translateDTO); } return translateDTOS; } //添加 public String addPatentTranslate(PatentTranslate translate) throws Exception { IndexResponse indexResponse = client.index(i -> i .index("translate") //传入user对象 .document(translate).refresh(Refresh.True) ); return indexResponse.id(); } //封装添加参数 public String loadingTranslate(String patentNo, String patentField,String language,Boolean ifOrigin,Integer order,String parentSort,String textContent) throws Exception { PatentTranslate translate = new PatentTranslate(); translate.setLanguage(language); translate.setPatentNo(patentNo); translate.setPatentField(patentField); translate.setIfOrigin(ifOrigin); Content content = new Content(); content.setOrder(order); content.setParentSort(parentSort); content.setTextContent(textContent); translate.setContent(content); return this.addPatentTranslate(translate); } //添加标题、摘要原/译文 public Integer addTranslate(Text text, String patentNo,String patentField) throws Exception { int i = 0; String textContent = text.getTextContent(); //原文 String s = this.loadingTranslate(patentNo, patentField, text.getLanguage(), true, 0, "-1", textContent); if (StringUtils.isNotEmpty(s)) { i += 1; } //译文 String translateContent = this.getLimitTranslateContent(textContent); String s1 = this.loadingTranslate(patentNo, patentField, "CN", false, 0, "-1", translateContent); if (StringUtils.isNotEmpty(s1)) { i += 1; } return i; } //------------------------------------返回翻译内容-------------------------- /** * 根据标题和摘要获取翻译内容 * * @param vo * @return */ public TranslateVO getTranslateByTIAndAb(TranslateDTO vo) throws Exception { String patentNo = vo.getPatentNo(); String patentField = vo.getPatentField(); TranslateVO translateVO = new TranslateVO(); List translateDTOS = this.getPatentTranslateByPatentNo(vo); if (CollectionUtils.isEmpty(translateDTOS)) { PatentColumnDTO columnDTO = this.getPatentByPatentNo(patentNo); if (patentField.equals("1")) { List title = columnDTO.getTitle(); if (!CollectionUtils.isEmpty(title)) { Text titleText = title.get(0); Integer translateNum = this.addTranslate(titleText, patentNo,patentField); if (translateNum < 1) { throw new XiaoShiException("添加标题翻译失败"); } } } else if (patentField.equals("2")) { List abstractStr = columnDTO.getAbstractStr(); if (!CollectionUtils.isEmpty(abstractStr)) { Text abstractText = abstractStr.get(0); Integer translateNum = this.addTranslate(abstractText, patentNo,patentField); if (translateNum < 1) { throw new XiaoShiException("添加摘要翻译失败"); } } } try { Thread.sleep(800); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } translateDTOS = this.getPatentTranslateByPatentNo(vo); translateVO = this.loadTIAndAbTranslate(translateDTOS); } else { translateVO = this.loadTIAndAbTranslate(translateDTOS); } return translateVO; } //加载标题、摘要的翻译返回内容 public TranslateVO loadTIAndAbTranslate(List translateDTOS) { TranslateVO translateVO = new TranslateVO(); List tList = translateDTOS.stream().filter(EsTranslateDTO::getIfOrigin).collect(Collectors.toList()); if (!CollectionUtils.isEmpty(tList)) { EsTranslateDTO translateDTO = tList.get(0); translateVO.setOriginalContent(translateDTO.getContent().getTextContent()); } List fList = translateDTOS.stream().filter(i -> !i.getIfOrigin()).collect(Collectors.toList()); if (!CollectionUtils.isEmpty(fList)) { EsTranslateDTO translateDTO = fList.get(0); translateVO.setPatentNo(translateDTO.getPatentNo()); translateVO.setPatentField(translateDTO.getPatentField()); translateVO.setLanguage(translateDTO.getLanguage()); translateVO.setTranslationContent(translateDTO.getContent().getTextContent()); } return translateVO; } /** * 根据权利要求获取翻译内容 * * @param vo * @return */ public List getTranslate(TranslateDTO vo) throws Exception { List translateVOS = new ArrayList<>(); String patentNo = vo.getPatentNo(); String patentField = vo.getPatentField(); List translateDTOS = this.getPatentTranslateByPatentNo(vo); if (CollectionUtils.isEmpty(translateDTOS)) { PatentColumnDTO columnDTO = this.getPatentByPatentNo(patentNo); if (patentField.equals("3")) { List claim = columnDTO.getClaim(); this.addOldClaimTranslate(claim, patentNo, patentField); } else if (patentField.equals("4")) { List explainText = columnDTO.getPublicFullText(); this.addOldPFTTranslate(explainText, patentNo, patentField); } try { Thread.sleep(800); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } translateDTOS = this.getPatentTranslateByPatentNo(vo); translateVOS = this.loadTranslate(translateDTOS); } else { translateVOS = this.loadTranslate(translateDTOS); } return translateVOS; } /** * 添加权利要求原文 * @param claim * @param patentNo * @param patentField * @throws Exception */ public void addOldClaimTranslate(List claim, String patentNo, String patentField) throws Exception { if (!CollectionUtils.isEmpty(claim)) { int i = 0; List tList = claim.stream().filter(Text::getIfOrigin).collect(Collectors.toList()); if (!CollectionUtils.isEmpty(tList)) { Text text = tList.get(0); PatentRightParams params = new PatentRightParams(); params.setContent(text.getTextContent()); params.setCountry(text.getLanguage()); List rePatentClaims = ClaimSplitUtils.formatPatentRight(params); for (RePatentClaim rePatentClaim : rePatentClaims) { if (rePatentClaim.getContent().contains(" ")) { String replace = rePatentClaim.getContent().replace(" ", " "); rePatentClaim.setContent(replace); } //原文 String s = this.loadingTranslate(patentNo, patentField, text.getLanguage(), true, rePatentClaim.getSort(), rePatentClaim.getParentSort(), rePatentClaim.getContent()); if (StringUtils.isNotEmpty(s)) { i += 1; } } if (i < 1) { throw new XiaoShiException("添加权利要求原文失败"); } } } } /** * 添加原文说明书 * @param text * @param patentNo * @param patentField * @throws Exception */ public void addOldPFTTranslate(List text, String patentNo, String patentField) throws Exception { if (!CollectionUtils.isEmpty(text)) { int i = 0; List explainTextVOS = this.splitPatentExplainText(text); for (EsExplainTextVO explainTextVO : explainTextVOS) { //原文 String s = this.loadingTranslate(patentNo, patentField, explainTextVO.getLanguage(), explainTextVO.getIfOrigin(), explainTextVO.getSort(), "-1", explainTextVO.getContent()); if (StringUtils.isNotEmpty(s)) { i += 1; } } if (i < 1) { throw new XiaoShiException("添加说明书原文失败"); } } } //封装说明书、权利要求数据 public List loadTranslate(List translateDTOS) { List VOS = new ArrayList<>(); if (!CollectionUtils.isEmpty(translateDTOS)) { List list = new ArrayList<>(); List tVOS = new ArrayList<>(); List tList = translateDTOS.stream().filter(EsTranslateDTO::getIfOrigin).collect(Collectors.toList()); for (EsTranslateDTO translateDTO : tList) { Content content = translateDTO.getContent(); TranslateVO translateVO = new TranslateVO(); translateVO.setPatentNo(translateDTO.getPatentNo()); translateVO.setLanguage(translateDTO.getLanguage()); translateVO.setPatentField(translateDTO.getPatentField()); translateVO.setOrder(content.getOrder()); translateVO.setOriginalContent(content.getTextContent()); translateVO.setParentSort(content.getParentSort()); tVOS.add(translateVO); } List fList = translateDTOS.stream().filter(i -> !i.getIfOrigin()).collect(Collectors.toList()); if (!CollectionUtils.isEmpty(fList)) { List fVOS = new ArrayList<>(); for (EsTranslateDTO translateDTO : fList) { Content content = translateDTO.getContent(); TranslateVO translateVO = new TranslateVO(); translateVO.setPatentNo(translateDTO.getPatentNo()); translateVO.setLanguage(translateDTO.getLanguage()); translateVO.setPatentField(translateDTO.getPatentField()); translateVO.setOrder(content.getOrder()); translateVO.setTranslationContent(content.getTextContent()); translateVO.setParentSort(content.getParentSort()); fVOS.add(translateVO); } Map fVOMap = new HashMap<>(); for (TranslateVO fVO : fVOS) { fVOMap.put(fVO.getOrder(), fVO); } for (TranslateVO tVO : tVOS) { TranslateVO fVO = fVOMap.get(tVO.getOrder()); if (fVO != null) { TranslateVO translateVO = new TranslateVO(); translateVO.setPatentNo(tVO.getPatentNo()); translateVO.setLanguage(tVO.getLanguage()); translateVO.setPatentField(tVO.getPatentField()); translateVO.setOriginalContent(tVO.getOriginalContent()); translateVO.setParentSort(tVO.getParentSort()); translateVO.setOrder(tVO.getOrder()); translateVO.setTranslationContent(fVO.getTranslationContent()); list.add(translateVO); } else { list.add(tVO); } } } else { list.addAll(tVOS); } List translateVOS = list.stream().sorted(Comparator.comparing(TranslateVO::getOrder)).collect(Collectors.toList()); VOS.addAll(translateVOS); } return VOS; } public List getTranslateOrder(TranslateDTO vo) throws Exception { List translateVOS = new ArrayList<>(); List translateDTOS = this.getPatentTranslateByPatentNo(vo); List fList = translateDTOS.stream().filter(i -> !i.getIfOrigin()).collect(Collectors.toList()); if (!CollectionUtils.isEmpty(translateDTOS) && CollectionUtils.isEmpty(fList)) { int i = 0; for (EsTranslateDTO translateDTO : translateDTOS) { Content content = translateDTO.getContent(); String textContent = content.getTextContent(); String translateContent = this.getLimitTranslateContent(textContent); String s = this.loadingTranslate(translateDTO.getPatentNo(), translateDTO.getPatentField(), "CN", false, content.getOrder(), content.getParentSort(), translateContent); if (StringUtils.isNotEmpty(s)) { i++; } } if (i < 1) { throw new XiaoShiException("添加译文失败"); } try { Thread.sleep(1000); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } translateDTOS = this.getPatentTranslateByPatentNo(vo); translateVOS = this.loadTranslate(translateDTOS); } else { translateVOS = this.loadTranslate(translateDTOS); } return translateVOS; } //判断文本翻译是否超出限制5000 public String getLimitTranslateContent(String content) throws Exception { /*if (content.length() > 5000) { String s = content.substring(0, 5000); int i = s.lastIndexOf(",") + 1; String s1 = s.substring(0, i); str = str + this.getTranslateContent(s1); String s2 = s.substring(i); String s3 = this.getLimitTranslateContent(s2); str = str + s3; } else { str = this.getTranslateContent(content); }*/ String str = ""; String regex = "(? 5000) { String s2 = content.substring(0, 5000); Matcher matcher = pattern.matcher(s2); Matcher matcher1 = pattern1.matcher(s2); int lastIndex = -1; while (matcher.find()) { lastIndex = matcher.end() - 1; // matcher.end() 返回的是匹配项之后的索引,所以减1得到最后一个字符的索引 } if (lastIndex == -1) { while (matcher1.find()) { lastIndex = matcher1.end() - 1; // matcher.end() 返回的是匹配项之后的索引,所以减1得到最后一个字符的索引 } } String s = content.substring(0, lastIndex + 1); String translateContent = this.getTranslateContent(s); str = str + translateContent; content = content.substring(lastIndex + 1); } String translateContent = this.getTranslateContent(content); str = str + translateContent; return str; } /** * 根据文本获取翻译内容 * * @param vo * @return */ public TranslateVO getTranslateByText(TranslateDTO vo) throws Exception { String translateContent = this.getLimitTranslateContent(vo.getContent()); TranslateVO translateVO = new TranslateVO(); translateVO.setOriginalContent(vo.getContent()); translateVO.setTranslationContent(translateContent); return translateVO; } //拆分说明书 public List splitPatentExplainText(List list) { List explainTextVOS = new ArrayList<>(); List collect = list.stream().filter(Text::getIfOrigin).collect(Collectors.toList()); if (!CollectionUtils.isEmpty(collect)) { Text text = collect.get(0); int j = 0; String content = text.getTextContent(); String[] split = content.split("(\r\n){1,}"); for (String s1 : split) { if (StringUtils.isNotEmpty(s1) && !s1.equals(" ")) { EsExplainTextVO textVO = new EsExplainTextVO(); textVO.setSort(j); textVO.setContent(s1); textVO.setLanguage(text.getLanguage()); textVO.setIfOrigin(text.getIfOrigin()); explainTextVOS.add(textVO); j++; } } } return explainTextVOS; } //根据专利号和序号查询是否已有翻译 public List getTranslateOrder(String patentNo, Integer sort) throws IOException { List list = new ArrayList<>(); SearchRequest.Builder builder = new SearchRequest.Builder(); //设置查询索引 builder.index("translate"); Query q1 = QueryBuilders.term(t -> t.field("patent_no").value(patentNo)); Query q2 = QueryBuilders.term(t -> t.field("patent_field").value("3")); Query q3 = QueryBuilders.term(t -> t.field("content.order").value(sort)); Query q4 = QueryBuilders.term(i -> i.field("language").value("CN")); Query bool = QueryBuilders.bool(i -> i.must(q1, q2, q3,q4)); builder.query(bool); builder.size(1000); builder.trackTotalHits(i -> i.enabled(true)); SearchResponse response = client.search(builder.build(), PatentTranslate.class); List> hits = response.hits().hits(); for (Hit hit : hits) { PatentTranslate source = hit.source(); String content = source.getContent().getTextContent(); if (StringUtils.isNotEmpty(content)) { list.add(content); } } return list; } //根据专利号和序号查询是否已有翻译 public Long getTranslateByPatentNo(String patentNo) throws IOException { List list = new ArrayList<>(); SearchRequest.Builder builder = new SearchRequest.Builder(); //设置查询索引 builder.index("translate"); Query q1 = QueryBuilders.term(t -> t.field("patent_no").value(patentNo)); Query q2 = QueryBuilders.term(t -> t.field("patent_field").value("3")); Query q3 = QueryBuilders.term(t -> t.field("if_origin").value(true)); Query bool = QueryBuilders.bool(i -> i.must(q1, q2, q3)); builder.query(bool); builder.size(1000); builder.trackTotalHits(i -> i.enabled(true)); SearchResponse response = client.search(builder.build(), PatentTranslate.class); // List> hits = response.hits().hits(); // for (Hit hit : hits) { // PatentTranslate source = hit.source(); // String content = source.getContent().getTextContent(); // if (StringUtils.isNotEmpty(content)) { // list.add(content); // } // } return response.hits().total().value(); } }