EsCountService.java 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. package cn.cslg.pas.service.business.es;
  2. import cn.cslg.pas.common.dto.business.EsCountDTO;
  3. import cn.cslg.pas.common.dto.business.EsCountDetailDTO;
  4. import cn.cslg.pas.common.utils.parseQueryToTree.expressManager;
  5. import cn.cslg.pas.common.utils.parseQueryToTree.operateNode;
  6. import cn.cslg.pas.common.utils.parseQueryToTree.treeNode;
  7. import cn.cslg.pas.common.vo.EsConfigVO;
  8. import cn.cslg.pas.common.vo.business.EsCountVO;
  9. import cn.cslg.pas.domain.es.Patent;
  10. import cn.cslg.pas.factorys.EsAnalysisBuilderFactory.EsAnalysisBuilderFactory;
  11. import cn.cslg.pas.factorys.EsAnalysisBuilderFactory.IEsAnalysisBuilder;
  12. import cn.cslg.pas.factorys.EsCountBuilderFactory.EsCountBuilderFactory;
  13. import cn.cslg.pas.factorys.EsCountBuilderFactory.IEsCountBuilder;
  14. import cn.cslg.pas.service.business.CommonService;
  15. import cn.cslg.pas.service.query.FormatQueryService;
  16. import co.elastic.clients.elasticsearch.ElasticsearchClient;
  17. import co.elastic.clients.elasticsearch._types.aggregations.*;
  18. import co.elastic.clients.elasticsearch._types.query_dsl.Query;
  19. import co.elastic.clients.elasticsearch._types.query_dsl.QueryBuilders;
  20. import co.elastic.clients.elasticsearch.core.SearchRequest;
  21. import co.elastic.clients.elasticsearch.core.SearchResponse;
  22. import com.alibaba.fastjson.JSON;
  23. import lombok.RequiredArgsConstructor;
  24. import org.apache.commons.lang3.StringUtils;
  25. import org.springframework.beans.factory.annotation.Autowired;
  26. import org.springframework.context.annotation.Lazy;
  27. import org.springframework.stereotype.Service;
  28. import org.springframework.util.CollectionUtils;
  29. import java.util.*;
  30. import java.util.stream.Collectors;
  31. @Service
  32. @RequiredArgsConstructor(onConstructor_ = {@Lazy})
  33. public class EsCountService {
  34. private final List<String> childList = Arrays.asList("field");
  35. private final List<String> nestedList = Arrays.asList("PA", "IN", "PE", "SAT", "MAT", "SRH", "MRH");
  36. private final List<String> dateList = Arrays.asList("PD", "AD", "GD");
  37. private final List<String> numberList = Arrays.asList("QPN", "QDPN", "SFN", "IFN", "PFN");
  38. private final ElasticsearchClient client;
  39. @Autowired
  40. private EsCountBuilderFactory esCountBuilderFactory;
  41. @Autowired
  42. private EsAnalysisBuilderFactory esAnalysisBuilderFactory;
  43. @Autowired
  44. private FormatQueryService formatQueryService;
  45. /**
  46. * 查询专利库中的专利分组聚合统计
  47. *
  48. * @param countVOS
  49. * @return
  50. * @throws Exception
  51. */
  52. public EsCountDTO esCountSearch(List<EsCountVO> countVOS) throws Exception {
  53. EsCountDTO esCountDTO = new EsCountDTO();
  54. List<EsCountDetailDTO> detailDTOS = new ArrayList<>();
  55. for (EsCountVO vo : countVOS) {
  56. String field = vo.getField();
  57. Integer topN = vo.getTopN();
  58. String condition = vo.getCondition();
  59. //查询es返回数据
  60. SearchRequest.Builder builder = new SearchRequest.Builder();
  61. Aggregation aggregation = this.selectAggregation(builder, vo);
  62. if (StringUtils.isNotEmpty(condition)) {
  63. //解析检索条件
  64. treeNode tree = expressManager.getInstance().Parse(condition, false);
  65. //从es中检索数据
  66. Query query = formatQueryService.EsQueryToQuery((operateNode) tree, "patent");
  67. Aggregation filtersAgg = null;
  68. if (aggregation != null) {
  69. filtersAgg = new Aggregation.Builder().filters(new FiltersAggregation.Builder()
  70. .filters(i -> i.array(Arrays.asList(query))).build())
  71. .aggregations(new HashMap() {{
  72. put("filters_agg", aggregation);
  73. }}).build();
  74. } else {
  75. filtersAgg = AggregationBuilders.filter(i -> i.bool(j -> j.must(query)));
  76. }
  77. builder.aggregations("Agg", filtersAgg);
  78. } else {
  79. builder.aggregations("Agg", aggregation);
  80. }
  81. SearchResponse<Patent> response = client.search(builder.build(), Patent.class);
  82. Aggregate agg = response.aggregations().get("Agg");
  83. if (StringUtils.isNotEmpty(condition)) {
  84. if (StringUtils.isNotEmpty(field)) {
  85. List<FiltersBucket> filtersBuckets = agg.filters().buckets().array();
  86. if (dateList.contains(field)) {
  87. filtersBuckets.forEach(filtersBucket -> {
  88. this.getFiltersCountDTO(filtersBucket, condition, detailDTOS);
  89. Aggregate filtersAgg = filtersBucket.aggregations().get("filters_agg");
  90. this.getDateCountDTOS(filtersAgg, field, topN, detailDTOS);
  91. });
  92. } else if (nestedList.contains(field)) {
  93. filtersBuckets.forEach(filtersBucket -> {
  94. this.getFiltersCountDTO(filtersBucket, condition, detailDTOS);
  95. Aggregate filtersAgg = filtersBucket.aggregations().get("filters_agg");
  96. this.getNestedCountDTOS(filtersAgg, field, detailDTOS);
  97. });
  98. } else if (childList.contains(field)) {
  99. filtersBuckets.forEach(filtersBucket -> {
  100. this.getFiltersCountDTO(filtersBucket, condition, detailDTOS);
  101. Aggregate filtersAgg = filtersBucket.aggregations().get("filters_agg");
  102. this.getChildCountDTOS(filtersAgg, field, detailDTOS);
  103. });
  104. } else {
  105. filtersBuckets.forEach(filtersBucket -> {
  106. this.getFiltersCountDTO(filtersBucket, condition, detailDTOS);
  107. Aggregate filtersAgg = filtersBucket.aggregations().get("filters_agg");
  108. this.getTermCountDTOS(filtersAgg, field, detailDTOS);
  109. });
  110. }
  111. } else {
  112. this.getFilterCountDTO(agg, condition, detailDTOS);
  113. }
  114. } else {
  115. if (dateList.contains(field)) {
  116. this.getDateCountDTOS(agg, field, topN, detailDTOS);
  117. } else if (nestedList.contains(field)) {
  118. this.getNestedCountDTOS(agg, field, detailDTOS);
  119. } else if (childList.contains(field)) {
  120. this.getChildCountDTOS(agg, field, detailDTOS);
  121. } else {
  122. this.getTermCountDTOS(agg, field, detailDTOS);
  123. }
  124. }
  125. }
  126. esCountDTO.setDetailDTOS(detailDTOS);
  127. return esCountDTO;
  128. }
  129. /**
  130. * 专利的聚合分析
  131. *
  132. * @param countVOS
  133. * @return
  134. * @throws Exception
  135. */
  136. public EsCountDTO esAnalysisSearch(List<EsCountVO> countVOS) throws Exception {
  137. EsCountDTO esCountDTO = new EsCountDTO();
  138. List<EsCountDetailDTO> detailDTOS = new ArrayList<>();
  139. for (EsCountVO vo : countVOS) {
  140. String field = vo.getField();
  141. Integer topN = vo.getTopN();
  142. Boolean ifHaveChild = vo.getIfHaveChild();
  143. List<String> values = vo.getValues();
  144. //查询es返回数据
  145. SearchRequest.Builder builder = new SearchRequest.Builder();
  146. builder.index("patent");
  147. IEsAnalysisBuilder iEsAnalysisBuilder = null;
  148. String json = CommonService.readJsonFile("esAnalysis.json");
  149. List<EsConfigVO> esConfigVOS = JSON.parseArray(json, EsConfigVO.class);
  150. EsConfigVO esConfigVO = esConfigVOS.stream().filter(item -> item.getField().equals(field))
  151. .findFirst().orElse(null);
  152. if (esConfigVO != null) {
  153. iEsAnalysisBuilder = esAnalysisBuilderFactory.getClass(esConfigVO.getEsClass());
  154. iEsAnalysisBuilder.setField(esConfigVO.getEsField());
  155. iEsAnalysisBuilder.setTopN(topN);
  156. iEsAnalysisBuilder.setIfHaveChild(ifHaveChild);
  157. for (String value : values) {
  158. if (iEsAnalysisBuilder.getField().contains(".")) {
  159. String path = iEsAnalysisBuilder.getField().substring(0, iEsAnalysisBuilder.getField().indexOf("."));
  160. iEsAnalysisBuilder.setPath(path);
  161. }
  162. iEsAnalysisBuilder.setFieldValue(value);
  163. Aggregation aggregation = iEsAnalysisBuilder.createAnalyseAgg();
  164. builder.aggregations("Agg", aggregation);
  165. SearchResponse<Patent> response = client.search(builder.build(), Patent.class);
  166. Aggregate agg = response.aggregations().get("Agg");
  167. if (dateList.contains(field)) {
  168. this.getDateAnalysisDTOS(agg, field, value, topN, detailDTOS);
  169. } else if (nestedList.contains(field)) {
  170. this.getNestedCountDTOS(agg, field, detailDTOS);
  171. } else if (childList.contains(field)) {
  172. this.getChildAnalysisDTOS(agg, field, value, detailDTOS);
  173. } else if (numberList.contains(field)) {
  174. this.getNumberAnalysisDTOS(agg, field, value, detailDTOS);
  175. } else {
  176. this.getTermCountDTOS(agg, field, detailDTOS);
  177. }
  178. }
  179. }
  180. }
  181. esCountDTO.setDetailDTOS(detailDTOS);
  182. return esCountDTO;
  183. }
  184. public EsCountDTO esAnalysis(EsCountVO vo) throws Exception {
  185. EsCountDTO esCountDTO = new EsCountDTO();
  186. List<EsCountDetailDTO> detailDTOS = new ArrayList<>();
  187. String field = vo.getField();
  188. Integer topN = vo.getTopN();
  189. Boolean ifHaveChild = vo.getIfHaveChild();
  190. List<String> values = vo.getValues();
  191. //查询es返回数据
  192. SearchRequest.Builder builder = new SearchRequest.Builder();
  193. builder.index("patent");
  194. IEsAnalysisBuilder iEsAnalysisBuilder = null;
  195. String json = CommonService.readJsonFile("esAnalysis.json");
  196. List<EsConfigVO> esConfigVOS = JSON.parseArray(json, EsConfigVO.class);
  197. EsConfigVO esConfigVO = esConfigVOS.stream().filter(item -> item.getField().equals(field))
  198. .findFirst().orElse(null);
  199. if (esConfigVO != null) {
  200. iEsAnalysisBuilder = esAnalysisBuilderFactory.getClass(esConfigVO.getEsClass());
  201. iEsAnalysisBuilder.setField(esConfigVO.getEsField());
  202. iEsAnalysisBuilder.setTopN(topN);
  203. iEsAnalysisBuilder.setIfHaveChild(ifHaveChild);
  204. for (String value : values) {
  205. if (iEsAnalysisBuilder.getField().contains(".")) {
  206. String path = iEsAnalysisBuilder.getField().substring(0, iEsAnalysisBuilder.getField().indexOf("."));
  207. iEsAnalysisBuilder.setPath(path);
  208. }
  209. iEsAnalysisBuilder.setFieldValue(value);
  210. Aggregation aggregation = iEsAnalysisBuilder.createAnalyseAgg();
  211. builder.aggregations("Agg", aggregation);
  212. SearchResponse<Patent> response = client.search(builder.build(), Patent.class);
  213. Aggregate agg = response.aggregations().get("Agg");
  214. if (dateList.contains(field)) {
  215. this.getDateAnalysisDTOS(agg, field, value, topN, detailDTOS);
  216. } else if (nestedList.contains(field)) {
  217. this.getNestedCountDTOS(agg, field, detailDTOS);
  218. } else if (childList.contains(field)) {
  219. this.getChildAnalysisDTOS(agg, field, value, detailDTOS);
  220. } else if (numberList.contains(field)) {
  221. this.getNumberAnalysisDTOS(agg, field, value, detailDTOS);
  222. } else {
  223. this.getTermCountDTOS(agg, field, detailDTOS);
  224. }
  225. }
  226. }
  227. esCountDTO.setDetailDTOS(detailDTOS);
  228. return esCountDTO;
  229. }
  230. /**
  231. * 获取聚合后的aggregation
  232. *
  233. * @param builder
  234. * @param vo
  235. * @return
  236. * @throws Exception
  237. */
  238. public Aggregation selectAggregation(SearchRequest.Builder builder, EsCountVO vo) throws Exception {
  239. String valueOne = vo.getValueOne();
  240. String valueTwo = vo.getValueTwo();
  241. Integer topN = vo.getTopN();
  242. Boolean ifHaveChild = vo.getIfHaveChild();
  243. String field = vo.getField();
  244. Aggregation aggregation = null;
  245. builder.index("patent");
  246. IEsCountBuilder iEsCountBuilder = null;
  247. String json = CommonService.readJsonFile("esCount.json");
  248. List<EsConfigVO> esConfigVOS = JSON.parseArray(json, EsConfigVO.class);
  249. EsConfigVO esConfigVO = esConfigVOS.stream().filter(item -> item.getField().equals(field)).findFirst().orElse(null);
  250. if (esConfigVO != null) {
  251. iEsCountBuilder = esCountBuilderFactory.getClass(esConfigVO.getEsClass());
  252. iEsCountBuilder.setField(esConfigVO.getEsField());
  253. iEsCountBuilder.setValueOne(valueOne);
  254. iEsCountBuilder.setValueTwo(valueTwo);
  255. iEsCountBuilder.setTopN(topN);
  256. iEsCountBuilder.setIfHaveChild(ifHaveChild);
  257. if (iEsCountBuilder.getField().contains(".")) {
  258. String path = iEsCountBuilder.getField().substring(0, iEsCountBuilder.getField().indexOf("."));
  259. iEsCountBuilder.setPath(path);
  260. }
  261. aggregation = iEsCountBuilder.createAggregation();
  262. }
  263. return aggregation;
  264. }
  265. /**
  266. * 获取Filter聚合返回数据
  267. *
  268. * @param agg
  269. * @param condition
  270. * @return
  271. */
  272. public void getFilterCountDTO(Aggregate agg, String condition, List<EsCountDetailDTO> detailDTOS) {
  273. EsCountDetailDTO filterDTO = new EsCountDetailDTO();
  274. filterDTO.setField("condition");
  275. filterDTO.setName(condition);
  276. filterDTO.setNumber(agg.filter().docCount());
  277. if (filterDTO.getNumber() > 0) {
  278. detailDTOS.add(filterDTO);
  279. }
  280. }
  281. /**
  282. * 获取Filters聚合返回数据
  283. *
  284. * @param filtersBucket
  285. * @param condition
  286. * @return
  287. */
  288. public void getFiltersCountDTO(FiltersBucket filtersBucket, String condition, List<EsCountDetailDTO> detailDTOS) {
  289. EsCountDetailDTO filtersDTO = new EsCountDetailDTO();
  290. filtersDTO.setField("condition");
  291. filtersDTO.setName(condition);
  292. filtersDTO.setNumber(filtersBucket.docCount());
  293. if (filtersDTO.getNumber() > 0) {
  294. detailDTOS.add(filtersDTO);
  295. }
  296. }
  297. /**
  298. * 获取Terms聚合后数据
  299. *
  300. * @param agg
  301. * @param field
  302. * @param detailDTOS
  303. */
  304. public void getTermCountDTOS(Aggregate agg, String field, List<EsCountDetailDTO> detailDTOS) {
  305. List<StringTermsBucket> list = agg.sterms().buckets().array();
  306. list.forEach(bucket -> {
  307. EsCountDetailDTO dto = new EsCountDetailDTO();
  308. dto.setField(field);
  309. Aggregate aggregate = bucket.aggregations().get("filter_agg");
  310. dto.setName(bucket.key().stringValue());
  311. dto.setNumber(bucket.docCount());
  312. if (aggregate != null) {
  313. dto.setNumber(aggregate.filter().docCount());
  314. }
  315. if (dto.getNumber() > 0) {
  316. detailDTOS.add(dto);
  317. }
  318. });
  319. }
  320. /**
  321. * 获取children聚合后数据
  322. *
  323. * @param agg
  324. * @param field
  325. * @param detailDTOS
  326. */
  327. public void getChildCountDTOS(Aggregate agg, String field, List<EsCountDetailDTO> detailDTOS) {
  328. Aggregate childAgg = agg.children().aggregations().get("child_agg");
  329. List<StringTermsBucket> list = childAgg.sterms().buckets().array();
  330. list.forEach(bucket -> {
  331. EsCountDetailDTO dto = new EsCountDetailDTO();
  332. dto.setField(field);
  333. Aggregate aggregate = bucket.aggregations().get("filter_agg");
  334. List<StringTermsBucket> buckets = aggregate.sterms().buckets().array();
  335. for (StringTermsBucket termsBucket : buckets) {
  336. dto.setName(termsBucket.key().stringValue());
  337. dto.setNumber(termsBucket.docCount());
  338. if (dto.getNumber() > 0) {
  339. detailDTOS.add(dto);
  340. }
  341. }
  342. });
  343. }
  344. /**
  345. * 获取children分析后数据
  346. *
  347. * @param agg
  348. * @param field
  349. * @param value
  350. * @param detailDTOS
  351. */
  352. public void getChildAnalysisDTOS(Aggregate agg, String field, String value, List<EsCountDetailDTO> detailDTOS) {
  353. Aggregate childAgg = agg.children().aggregations().get("child_agg");
  354. List<StringTermsBucket> list = childAgg.sterms().buckets().array();
  355. list.forEach(bucket -> {
  356. EsCountDetailDTO dto = new EsCountDetailDTO();
  357. dto.setField(field);
  358. Aggregate aggregate = bucket.aggregations().get("filter_agg");
  359. dto.setName(value);
  360. dto.setNumber(aggregate.filter().docCount());
  361. if (dto.getNumber() > 0) {
  362. detailDTOS.add(dto);
  363. }
  364. });
  365. }
  366. /**
  367. * 获取range分析后数据
  368. *
  369. * @param agg
  370. * @param field
  371. * @param value
  372. * @param detailDTOS
  373. */
  374. public void getNumberAnalysisDTOS(Aggregate agg, String field,String value, List<EsCountDetailDTO> detailDTOS) {
  375. List<RangeBucket> list = agg.range().buckets().array();
  376. list.forEach(bucket -> {
  377. EsCountDetailDTO dto = new EsCountDetailDTO();
  378. dto.setField(field);
  379. dto.setName(value);
  380. dto.setNumber(bucket.docCount());
  381. if (dto.getNumber() > 0) {
  382. detailDTOS.add(dto);
  383. }
  384. });
  385. }
  386. /**
  387. * 获取dateHistogram聚合后数据
  388. *
  389. * @param agg
  390. * @param field
  391. * @param detailDTOS
  392. */
  393. public void getDateCountDTOS(Aggregate agg, String field, Integer topN, List<EsCountDetailDTO> detailDTOS) {
  394. List<DateHistogramBucket> list = agg.dateHistogram().buckets().array();
  395. List<EsCountDetailDTO> esCountDetailDTOS = new ArrayList<>();
  396. list.forEach(bucket -> {
  397. EsCountDetailDTO dto = new EsCountDetailDTO();
  398. dto.setField(field);
  399. Aggregate aggregate = bucket.aggregations().get("filter_agg");
  400. dto.setName(bucket.keyAsString());
  401. dto.setNumber(bucket.docCount());
  402. if (aggregate != null) {
  403. dto.setNumber(aggregate.filter().docCount());
  404. }
  405. if (dto.getNumber() > 0) {
  406. esCountDetailDTOS.add(dto);
  407. }
  408. });
  409. if (!CollectionUtils.isEmpty(esCountDetailDTOS)) {
  410. List<EsCountDetailDTO> collect = esCountDetailDTOS.stream()
  411. .sorted(Comparator.comparing(EsCountDetailDTO::getName).reversed()).limit(topN).collect(Collectors.toList());
  412. detailDTOS.addAll(collect);
  413. }
  414. }
  415. /**
  416. * 获取dateHistogram分析后数据
  417. *
  418. * @param agg
  419. * @param field
  420. * @param value
  421. * @param topN
  422. * @param detailDTOS
  423. */
  424. public void getDateAnalysisDTOS(Aggregate agg, String field, String value, Integer topN, List<EsCountDetailDTO> detailDTOS) {
  425. List<DateHistogramBucket> list = agg.dateHistogram().buckets().array();
  426. List<EsCountDetailDTO> esCountDetailDTOS = new ArrayList<>();
  427. if (value.contains("H")) {
  428. long sum = 0L;
  429. for (DateHistogramBucket bucket : list) {
  430. Aggregate aggregate = bucket.aggregations().get("filter_agg");
  431. long docCount = aggregate.filter().docCount();
  432. sum += docCount;
  433. }
  434. EsCountDetailDTO dto = new EsCountDetailDTO();
  435. dto.setField(field);
  436. dto.setName(value);
  437. dto.setNumber(sum);
  438. if (dto.getNumber() > 0) {
  439. esCountDetailDTOS.add(dto);
  440. }
  441. } else {
  442. list.forEach(bucket -> {
  443. EsCountDetailDTO dto = new EsCountDetailDTO();
  444. dto.setField(field);
  445. Aggregate aggregate = bucket.aggregations().get("filter_agg");
  446. dto.setName(value);
  447. dto.setNumber(bucket.docCount());
  448. if (aggregate != null) {
  449. dto.setNumber(aggregate.filter().docCount());
  450. }
  451. if (dto.getNumber() > 0) {
  452. esCountDetailDTOS.add(dto);
  453. }
  454. });
  455. }
  456. if (!CollectionUtils.isEmpty(esCountDetailDTOS)) {
  457. List<EsCountDetailDTO> collect = esCountDetailDTOS.stream()
  458. .sorted(Comparator.comparing(EsCountDetailDTO::getName).reversed()).limit(topN).collect(Collectors.toList());
  459. detailDTOS.addAll(collect);
  460. }
  461. }
  462. /**
  463. * 获取nested聚合后数据
  464. *
  465. * @param agg
  466. * @param field
  467. * @param detailDTOS
  468. */
  469. public void getNestedCountDTOS(Aggregate agg, String field, List<EsCountDetailDTO> detailDTOS) {
  470. Aggregate termsAgg = agg.nested().aggregations().get("terms_agg");
  471. List<StringTermsBucket> list = termsAgg.sterms().buckets().array();
  472. list.forEach(bucket -> {
  473. EsCountDetailDTO dto = new EsCountDetailDTO();
  474. dto.setField(field);
  475. Aggregate aggregate = bucket.aggregations().get("filter_agg");
  476. dto.setName(bucket.key().stringValue());
  477. dto.setNumber(bucket.docCount());
  478. if (aggregate != null) {
  479. dto.setNumber(aggregate.filter().docCount());
  480. }
  481. if (dto.getNumber() > 0) {
  482. detailDTOS.add(dto);
  483. }
  484. });
  485. }
  486. }