123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529 |
- package cn.cslg.pas.service.business.es;
- import cn.cslg.pas.common.dto.business.EsCountDTO;
- import cn.cslg.pas.common.dto.business.EsCountDetailDTO;
- import cn.cslg.pas.common.utils.parseQueryToTree.expressManager;
- import cn.cslg.pas.common.utils.parseQueryToTree.operateNode;
- import cn.cslg.pas.common.utils.parseQueryToTree.treeNode;
- import cn.cslg.pas.common.vo.EsConfigVO;
- import cn.cslg.pas.common.vo.business.EsCountVO;
- import cn.cslg.pas.domain.es.Patent;
- import cn.cslg.pas.factorys.EsAnalysisBuilderFactory.EsAnalysisBuilderFactory;
- import cn.cslg.pas.factorys.EsAnalysisBuilderFactory.IEsAnalysisBuilder;
- import cn.cslg.pas.factorys.EsCountBuilderFactory.EsCountBuilderFactory;
- import cn.cslg.pas.factorys.EsCountBuilderFactory.IEsCountBuilder;
- import cn.cslg.pas.service.business.CommonService;
- import cn.cslg.pas.service.query.FormatQueryService;
- import cn.hutool.json.JSONUtil;
- import co.elastic.clients.elasticsearch.ElasticsearchClient;
- import co.elastic.clients.elasticsearch._types.aggregations.*;
- import co.elastic.clients.elasticsearch._types.query_dsl.Query;
- import co.elastic.clients.elasticsearch._types.query_dsl.QueryBuilders;
- import co.elastic.clients.elasticsearch.core.SearchRequest;
- import co.elastic.clients.elasticsearch.core.SearchResponse;
- import com.alibaba.fastjson.JSON;
- import lombok.RequiredArgsConstructor;
- import org.apache.commons.lang3.StringUtils;
- import org.springframework.beans.factory.annotation.Autowired;
- import org.springframework.context.annotation.Lazy;
- import org.springframework.stereotype.Service;
- import org.springframework.util.CollectionUtils;
- import java.util.*;
- import java.util.stream.Collectors;
- @Service
- @RequiredArgsConstructor(onConstructor_ = {@Lazy})
- public class EsCountService {
- private final List<String> childList = Arrays.asList("field");
- private final List<String> nestedList = Arrays.asList("PA", "IN", "PE", "SAT", "MAT", "SRH", "MRH");
- private final List<String> dateList = Arrays.asList("PD", "AD", "GD");
- private final List<String> numberList = Arrays.asList("QPN", "QDPN", "SFN", "IFN", "PFN");
- private final ElasticsearchClient client;
- @Autowired
- private EsCountBuilderFactory esCountBuilderFactory;
- @Autowired
- private EsAnalysisBuilderFactory esAnalysisBuilderFactory;
- @Autowired
- private FormatQueryService formatQueryService;
- /**
- * 查询专利库中的专利分组聚合统计
- *
- * @param countVOS
- * @return
- * @throws Exception
- */
- public EsCountDTO esCountSearch(List<EsCountVO> countVOS) throws Exception {
- EsCountDTO esCountDTO = new EsCountDTO();
- List<EsCountDetailDTO> detailDTOS = new ArrayList<>();
- for (EsCountVO vo : countVOS) {
- String field = vo.getField();
- Integer topN = vo.getTopN();
- String condition = vo.getCondition();
- //查询es返回数据
- SearchRequest.Builder builder = new SearchRequest.Builder();
- Aggregation aggregation = this.selectAggregation(builder, vo);
- if (StringUtils.isNotEmpty(condition)) {
- //解析检索条件
- treeNode tree = expressManager.getInstance().Parse(condition, false);
- //从es中检索数据
- Query query = formatQueryService.EsQueryToQuery((operateNode) tree, "patent");
- Aggregation filtersAgg = null;
- if (aggregation != null) {
- filtersAgg = new Aggregation.Builder().filters(new FiltersAggregation.Builder()
- .filters(i -> i.array(Arrays.asList(query))).build())
- .aggregations(new HashMap() {{
- put("filters_agg", aggregation);
- }}).build();
- } else {
- filtersAgg = AggregationBuilders.filter(i -> i.bool(j -> j.must(query)));
- }
- builder.aggregations("Agg", filtersAgg);
- } else {
- builder.aggregations("Agg", aggregation);
- }
- SearchResponse<Patent> response = client.search(builder.build(), Patent.class);
- Aggregate agg = response.aggregations().get("Agg");
- if (StringUtils.isNotEmpty(condition)) {
- if (StringUtils.isNotEmpty(field)) {
- List<FiltersBucket> filtersBuckets = agg.filters().buckets().array();
- if (dateList.contains(field)) {
- filtersBuckets.forEach(filtersBucket -> {
- this.getFiltersCountDTO(filtersBucket, condition, detailDTOS);
- Aggregate filtersAgg = filtersBucket.aggregations().get("filters_agg");
- this.getDateCountDTOS(filtersAgg, field, topN, detailDTOS);
- });
- } else if (nestedList.contains(field)) {
- filtersBuckets.forEach(filtersBucket -> {
- this.getFiltersCountDTO(filtersBucket, condition, detailDTOS);
- Aggregate filtersAgg = filtersBucket.aggregations().get("filters_agg");
- this.getNestedCountDTOS(filtersAgg, field, detailDTOS);
- });
- } else if (childList.contains(field)) {
- filtersBuckets.forEach(filtersBucket -> {
- this.getFiltersCountDTO(filtersBucket, condition, detailDTOS);
- Aggregate filtersAgg = filtersBucket.aggregations().get("filters_agg");
- this.getChildCountDTOS(filtersAgg, field, detailDTOS);
- });
- } else {
- filtersBuckets.forEach(filtersBucket -> {
- this.getFiltersCountDTO(filtersBucket, condition, detailDTOS);
- Aggregate filtersAgg = filtersBucket.aggregations().get("filters_agg");
- this.getTermCountDTOS(filtersAgg, field, detailDTOS);
- });
- }
- } else {
- this.getFilterCountDTO(agg, condition, detailDTOS);
- }
- } else {
- if (dateList.contains(field)) {
- this.getDateCountDTOS(agg, field, topN, detailDTOS);
- } else if (nestedList.contains(field)) {
- this.getNestedCountDTOS(agg, field, detailDTOS);
- } else if (childList.contains(field)) {
- this.getChildCountDTOS(agg, field, detailDTOS);
- } else {
- this.getTermCountDTOS(agg, field, detailDTOS);
- }
- }
- }
- esCountDTO.setDetailDTOS(detailDTOS);
- return esCountDTO;
- }
- /**
- * 专利的聚合分析
- *
- * @param countVOS
- * @return
- * @throws Exception
- */
- public EsCountDTO esAnalysisSearch(List<EsCountVO> countVOS) throws Exception {
- EsCountDTO esCountDTO = new EsCountDTO();
- List<EsCountDetailDTO> detailDTOS = new ArrayList<>();
- for (EsCountVO vo : countVOS) {
- String field = vo.getField();
- Integer topN = vo.getTopN();
- Boolean ifHaveChild = vo.getIfHaveChild();
- String fieldId = vo.getFieldId();
- List<String> values = vo.getValues();
- //查询es返回数据
- SearchRequest.Builder builder = new SearchRequest.Builder();
- builder.index("patent");
- IEsAnalysisBuilder iEsAnalysisBuilder = null;
- String json = CommonService.readJsonFile("esAnalysis.json");
- List<EsConfigVO> esConfigVOS = JSON.parseArray(json, EsConfigVO.class);
- EsConfigVO esConfigVO = esConfigVOS.stream().filter(item -> item.getField().equals(field))
- .findFirst().orElse(null);
- if (esConfigVO != null) {
- iEsAnalysisBuilder = esAnalysisBuilderFactory.getClass(esConfigVO.getEsClass());
- iEsAnalysisBuilder.setField(esConfigVO.getEsField());
- iEsAnalysisBuilder.setTopN(topN);
- iEsAnalysisBuilder.setIfHaveChild(ifHaveChild);
- iEsAnalysisBuilder.setFieldValue(fieldId);
- if (iEsAnalysisBuilder.getField().contains(".")) {
- String path = iEsAnalysisBuilder.getField().substring(0, iEsAnalysisBuilder.getField().indexOf("."));
- iEsAnalysisBuilder.setPath(path);
- }
- iEsAnalysisBuilder.setValues(values);
- Aggregation aggregation = iEsAnalysisBuilder.createAnalyseAgg();
- builder.aggregations("Agg", aggregation);
- SearchResponse<Patent> response = client.search(builder.build(), Patent.class);
- Aggregate agg = response.aggregations().get("Agg");
- if (dateList.contains(field)) {
- this.getDateAnalysisDTOS(agg, field, topN, detailDTOS);
- } else if (nestedList.contains(field)) {
- this.getNestedCountDTOS(agg, field, detailDTOS);
- } else if (childList.contains(field)) {
- this.getChildAnalysisDTOS(agg, field, detailDTOS);
- } else if (numberList.contains(field)) {
- this.getNumberAnalysisDTOS(agg, field,detailDTOS);
- } else {
- this.getTermCountDTOS(agg, field, detailDTOS);
- }
- }
- }
- esCountDTO.setDetailDTOS(detailDTOS);
- return esCountDTO;
- }
- //测试用的
- public EsCountDTO esAnalysis(EsCountVO vo) throws Exception {
- EsCountDTO esCountDTO = new EsCountDTO();
- List<EsCountDetailDTO> detailDTOS = new ArrayList<>();
- String field = vo.getField();
- Integer topN = vo.getTopN();
- Boolean ifHaveChild = vo.getIfHaveChild();
- String fieldId = vo.getFieldId();
- List<String> values = vo.getValues();
- //查询es返回数据
- SearchRequest.Builder builder = new SearchRequest.Builder();
- builder.index("patent");
- IEsAnalysisBuilder iEsAnalysisBuilder = null;
- String json = CommonService.readJsonFile("esAnalysis.json");
- List<EsConfigVO> esConfigVOS = JSON.parseArray(json, EsConfigVO.class);
- EsConfigVO esConfigVO = esConfigVOS.stream().filter(item -> item.getField().equals(field))
- .findFirst().orElse(null);
- if (esConfigVO != null) {
- iEsAnalysisBuilder = esAnalysisBuilderFactory.getClass(esConfigVO.getEsClass());
- iEsAnalysisBuilder.setField(esConfigVO.getEsField());
- iEsAnalysisBuilder.setTopN(topN);
- iEsAnalysisBuilder.setIfHaveChild(ifHaveChild);
- iEsAnalysisBuilder.setFieldValue(fieldId);
- if (iEsAnalysisBuilder.getField().contains(".")) {
- String path = iEsAnalysisBuilder.getField().substring(0, iEsAnalysisBuilder.getField().indexOf("."));
- iEsAnalysisBuilder.setPath(path);
- }
- iEsAnalysisBuilder.setValues(values);
- Aggregation aggregation = iEsAnalysisBuilder.createAnalyseAgg();
- builder.aggregations("Agg", aggregation);
- SearchResponse<Patent> response = client.search(builder.build(), Patent.class);
- Aggregate agg = response.aggregations().get("Agg");
- if (dateList.contains(field)) {
- this.getDateAnalysisDTOS(agg, field, topN, detailDTOS);
- } else if (nestedList.contains(field)) {
- this.getNestedCountDTOS(agg, field, detailDTOS);
- } else if (childList.contains(field)) {
- this.getChildAnalysisDTOS(agg, field, detailDTOS);
- } else if (numberList.contains(field)) {
- this.getNumberAnalysisDTOS(agg, field,detailDTOS);
- } else {
- this.getTermCountDTOS(agg, field, detailDTOS);
- }
- }
- esCountDTO.setDetailDTOS(detailDTOS);
- return esCountDTO;
- }
- /**
- * 获取聚合后的aggregation
- *
- * @param builder
- * @param vo
- * @return
- * @throws Exception
- */
- public Aggregation selectAggregation(SearchRequest.Builder builder, EsCountVO vo) throws Exception {
- String valueOne = vo.getValueOne();
- String valueTwo = vo.getValueTwo();
- Integer topN = vo.getTopN();
- Boolean ifHaveChild = vo.getIfHaveChild();
- String field = vo.getField();
- Integer fieldType = vo.getFieldType();
- String fieldId = vo.getFieldId();
- Aggregation aggregation = null;
- builder.index("patent");
- IEsCountBuilder iEsCountBuilder = null;
- String json = CommonService.readJsonFile("esCount.json");
- List<EsConfigVO> esConfigVOS = JSON.parseArray(json, EsConfigVO.class);
- EsConfigVO esConfigVO = esConfigVOS.stream().filter(item -> item.getField().equals(field)).findFirst().orElse(null);
- if (esConfigVO != null) {
- iEsCountBuilder = esCountBuilderFactory.getClass(esConfigVO.getEsClass());
- iEsCountBuilder.setField(esConfigVO.getEsField());
- iEsCountBuilder.setFieldId(fieldId);
- iEsCountBuilder.setValueOne(valueOne);
- iEsCountBuilder.setValueTwo(valueTwo);
- iEsCountBuilder.setTopN(topN);
- iEsCountBuilder.setIfHaveChild(ifHaveChild);
- iEsCountBuilder.setFieldType(String.valueOf(fieldType));
- if (iEsCountBuilder.getField().contains(".")) {
- String path = iEsCountBuilder.getField().substring(0, iEsCountBuilder.getField().indexOf("."));
- iEsCountBuilder.setPath(path);
- }
- aggregation = iEsCountBuilder.createAggregation();
- }
- return aggregation;
- }
- /**
- * 获取Filter聚合返回数据
- *
- * @param agg
- * @param condition
- * @return
- */
- public void getFilterCountDTO(Aggregate agg, String condition, List<EsCountDetailDTO> detailDTOS) {
- EsCountDetailDTO filterDTO = new EsCountDetailDTO();
- filterDTO.setField("condition");
- filterDTO.setName(condition);
- filterDTO.setNumber(agg.filter().docCount());
- if (filterDTO.getNumber() > 0) {
- detailDTOS.add(filterDTO);
- }
- }
- /**
- * 获取Filters聚合返回数据
- *
- * @param filtersBucket
- * @param condition
- * @return
- */
- public void getFiltersCountDTO(FiltersBucket filtersBucket, String condition, List<EsCountDetailDTO> detailDTOS) {
- EsCountDetailDTO filtersDTO = new EsCountDetailDTO();
- filtersDTO.setField("condition");
- filtersDTO.setName(condition);
- filtersDTO.setNumber(filtersBucket.docCount());
- if (filtersDTO.getNumber() > 0) {
- detailDTOS.add(filtersDTO);
- }
- }
- /**
- * 获取Terms聚合后数据
- *
- * @param agg
- * @param field
- * @param detailDTOS
- */
- public void getTermCountDTOS(Aggregate agg, String field, List<EsCountDetailDTO> detailDTOS) {
- List<StringTermsBucket> list = agg.sterms().buckets().array();
- list.forEach(bucket -> {
- EsCountDetailDTO dto = new EsCountDetailDTO();
- dto.setField(field);
- Aggregate aggregate = bucket.aggregations().get("filter_agg");
- dto.setName(bucket.key().stringValue());
- dto.setNumber(bucket.docCount());
- if (aggregate != null) {
- dto.setNumber(aggregate.filter().docCount());
- }
- if (dto.getNumber() > 0) {
- detailDTOS.add(dto);
- }
- });
- }
- /**
- * 获取children聚合后数据
- *
- * @param agg
- * @param field
- * @param detailDTOS
- */
- public void getChildCountDTOS(Aggregate agg, String field, List<EsCountDetailDTO> detailDTOS) {
- Aggregate childAgg = agg.children().aggregations().get("child_agg");
- List<StringTermsBucket> list = childAgg.sterms().buckets().array();
- for (StringTermsBucket bucket : list) {
- Aggregate aggregate = bucket.aggregations().get("filter_agg");
- List<FiltersBucket> list1 = aggregate.filters().buckets().array();
- for (FiltersBucket filtersBucket : list1) {
- Aggregate filtersAgg = filtersBucket.aggregations().get("filters_agg");
- List<StringTermsBucket> list2 = filtersAgg.sterms().buckets().array();
- if (!CollectionUtils.isEmpty(list2)) {
- for (StringTermsBucket termsBucket : list2) {
- EsCountDetailDTO dto = new EsCountDetailDTO();
- dto.setField(field);
- dto.setName(termsBucket.key().stringValue());
- dto.setNumber(termsBucket.docCount());
- if (dto.getNumber() > 0) {
- detailDTOS.add(dto);
- }
- }
- }
- }
- }
- // list.forEach(bucket -> {
- // EsCountDetailDTO dto = new EsCountDetailDTO();
- // dto.setField(field);
- // Aggregate aggregate = bucket.aggregations().get("filter_agg");
- // List<StringTermsBucket> buckets = aggregate.sterms().buckets().array();
- // for (StringTermsBucket termsBucket : buckets) {
- // dto.setName(termsBucket.key().stringValue());
- // dto.setNumber(termsBucket.docCount());
- // if (dto.getNumber() > 0) {
- // detailDTOS.add(dto);
- // }
- // }
- // });
- }
- /**
- * 获取children分析后数据
- *
- * @param agg
- * @param field
- * @param detailDTOS
- */
- public void getChildAnalysisDTOS(Aggregate agg, String field, List<EsCountDetailDTO> detailDTOS) {
- Aggregate childAgg = agg.children().aggregations().get("child_agg");
- List<StringTermsBucket> list = childAgg.sterms().buckets().array();
- for (StringTermsBucket bucket : list) {
- Aggregate termAgg = bucket.aggregations().get("term_agg");
- List<StringTermsBucket> bucketList = termAgg.sterms().buckets().array();
- if (!CollectionUtils.isEmpty(bucketList)) {
- for (StringTermsBucket termsBucket : bucketList) {
- EsCountDetailDTO dto = new EsCountDetailDTO();
- dto.setField(field);
- dto.setName(termsBucket.key().stringValue());
- Aggregate aggregate = termsBucket.aggregations().get("filterAgg");
- List<FiltersBucket> filtersBuckets = aggregate.filters().buckets().array();
- for (int i = 0; i < filtersBuckets.size() - 1; i++) {
- FiltersBucket filtersBucket = filtersBuckets.get(i);
- if (filtersBucket.docCount() > 0) {
- dto.setNumber(filtersBucket.docCount());
- if (dto.getNumber() > 0) {
- detailDTOS.add(dto);
- }
- break;
- }
- }
- }
- }
- }
- }
- /**
- * 获取range分析后数据
- *
- * @param agg
- * @param field
- * @param detailDTOS
- */
- public void getNumberAnalysisDTOS(Aggregate agg, String field,List<EsCountDetailDTO> detailDTOS) {
- List<RangeBucket> list = agg.range().buckets().array();
- for (RangeBucket bucket : list) {
- EsCountDetailDTO dto = new EsCountDetailDTO();
- dto.setField(field);
- dto.setName(bucket.key());
- dto.setNumber(bucket.docCount());
- if (dto.getNumber() > 0) {
- detailDTOS.add(dto);
- }
- }
- }
- /**
- * 获取dateHistogram聚合后数据
- *
- * @param agg
- * @param field
- * @param detailDTOS
- */
- public void getDateCountDTOS(Aggregate agg, String field, Integer topN, List<EsCountDetailDTO> detailDTOS) {
- List<DateHistogramBucket> list = agg.dateHistogram().buckets().array();
- List<EsCountDetailDTO> esCountDetailDTOS = new ArrayList<>();
- list.forEach(bucket -> {
- EsCountDetailDTO dto = new EsCountDetailDTO();
- dto.setField(field);
- Aggregate aggregate = bucket.aggregations().get("filter_agg");
- dto.setName(bucket.keyAsString());
- dto.setNumber(bucket.docCount());
- if (aggregate != null) {
- dto.setNumber(aggregate.filter().docCount());
- }
- if (dto.getNumber() > 0) {
- esCountDetailDTOS.add(dto);
- }
- });
- if (!CollectionUtils.isEmpty(esCountDetailDTOS)) {
- List<EsCountDetailDTO> collect = esCountDetailDTOS.stream()
- .sorted(Comparator.comparing(EsCountDetailDTO::getName).reversed()).limit(topN).collect(Collectors.toList());
- detailDTOS.addAll(collect);
- }
- }
- /**
- * 获取dateHistogram分析后数据
- *
- * @param agg
- * @param field
- * @param topN
- * @param detailDTOS
- */
- public void getDateAnalysisDTOS(Aggregate agg, String field, Integer topN, List<EsCountDetailDTO> detailDTOS) {
- List<RangeBucket> list1 = agg.dateRange().buckets().array();
- List<EsCountDetailDTO> esCountDetailDTOS = new ArrayList<>();
- for (RangeBucket bucket : list1) {
- EsCountDetailDTO dto = new EsCountDetailDTO();
- dto.setField(field);
- dto.setName(bucket.key());
- dto.setNumber(bucket.docCount());
- if (dto.getNumber() > 0) {
- esCountDetailDTOS.add(dto);
- }
- }
- if (!CollectionUtils.isEmpty(esCountDetailDTOS)) {
- List<EsCountDetailDTO> collect = esCountDetailDTOS.stream()
- .sorted(Comparator.comparing(EsCountDetailDTO::getName).reversed()).limit(topN).collect(Collectors.toList());
- detailDTOS.addAll(collect);
- }
- }
- /**
- * 获取nested聚合后数据
- *
- * @param agg
- * @param field
- * @param detailDTOS
- */
- public void getNestedCountDTOS(Aggregate agg, String field, List<EsCountDetailDTO> detailDTOS) {
- Aggregate termsAgg = agg.nested().aggregations().get("terms_agg");
- List<StringTermsBucket> list = termsAgg.sterms().buckets().array();
- list.forEach(bucket -> {
- EsCountDetailDTO dto = new EsCountDetailDTO();
- dto.setField(field);
- Aggregate aggregate = bucket.aggregations().get("filter_agg");
- dto.setName(bucket.key().stringValue());
- dto.setNumber(bucket.docCount());
- if (aggregate != null) {
- dto.setNumber(aggregate.filter().docCount());
- }
- if (dto.getNumber() > 0) {
- detailDTOS.add(dto);
- }
- });
- }
- }
|