Jelajahi Sumber

fixed es search

zero 1 tahun lalu
induk
melakukan
1efedbd045

+ 19 - 7
src/main/java/com/example/xiaoshiweixinback/controller/PatentController.java

@@ -1,10 +1,13 @@
 package com.example.xiaoshiweixinback.controller;
 
+import com.alibaba.fastjson2.JSONObject;
 import com.example.xiaoshiweixinback.business.common.Constants;
 import com.example.xiaoshiweixinback.business.common.Response;
-import com.example.xiaoshiweixinback.entity.dto.esPicture.EsPatentSearchDTO;
+import com.example.xiaoshiweixinback.business.common.base.Records;
+import com.example.xiaoshiweixinback.entity.dto.esPicture.EsPictureNoDTO;
+import com.example.xiaoshiweixinback.entity.dto.esPicture.EsPatentVectorDTO;
 import com.example.xiaoshiweixinback.entity.dto.patent.ImportTaskAMVO;
-import com.example.xiaoshiweixinback.entity.vo.esPicture.EsPictureVectorVo;
+import com.example.xiaoshiweixinback.entity.vo.esPicture.EsPictureNoVo;
 import com.example.xiaoshiweixinback.service.common.EsDenseVectorService;
 import com.example.xiaoshiweixinback.service.common.FileManagerService;
 import com.example.xiaoshiweixinback.service.importPatent.ImportFromWebToEsService;
@@ -12,6 +15,7 @@ import io.swagger.v3.oas.annotations.Operation;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.web.bind.annotation.*;
 
+import java.io.File;
 import java.util.List;
 
 
@@ -39,10 +43,18 @@ public class PatentController {
         return Response.success("");
     }
 
-    @Operation(summary = "获取检索专利列表--")
-    @PostMapping(value = "/getPatentList")
-    public Response getPatentList(@RequestBody EsPatentSearchDTO searchDTO) throws Exception {
-        List<EsPictureVectorVo> patentList = esDenseVectorService.getPatentList(searchDTO);
-        return Response.success(patentList);
+    @Operation(summary = "根据关键词获取列表(图片用于排序)--zero")
+    @PostMapping(value = "/getPatentVectors")
+    public Response getPatentVectors(String vectorDTO, File file) throws Exception {
+        EsPatentVectorDTO esPatentVectorDTO = JSONObject.parseObject(vectorDTO, EsPatentVectorDTO.class);
+        Records records = esDenseVectorService.getPatentVectors(esPatentVectorDTO, file);
+        return Response.success(records);
+    }
+
+    @Operation(summary = "根据专利号获取相关图片--zero")
+    @PostMapping(value = "/getPictureByNo")
+    public Response getPictureByNo(@RequestBody EsPictureNoDTO pictureNoDTO) throws Exception {
+        List<EsPictureNoVo> pictureByNo = esDenseVectorService.getPictureByNo(pictureNoDTO);
+        return Response.success(pictureByNo);
     }
 }

+ 21 - 8
src/main/java/com/example/xiaoshiweixinback/domain/es/PatentVector.java

@@ -21,20 +21,35 @@ public class PatentVector {
     private String appNo;
 
     /**
+     * 申请日
+     */
+    @JsonProperty("app_date")
+    private Date appDate;
+
+    /**
      * 权利人
      */
     @JsonProperty("right_holder")
     private List<PatentPerson> rightHolder;
 
-    @JsonProperty("app_date")
-    private Date appDate;
-
     /**
      * 标题
      */
     @JsonProperty("title")
     private List<Text> title;
 
+    /**
+     * 摘要
+     */
+    @JsonProperty("abstract_str")
+    private List<Text> abstractStr;
+
+    /**
+     * 申请国家
+     */
+    @JsonProperty("app_country")
+    private String appCountry;
+
     @JsonProperty("image_index")
     private Integer imageIndex;
 
@@ -44,12 +59,10 @@ public class PatentVector {
     @JsonProperty("my_vector")
     private List<Float> myVector;
 
+    /**
+     * LOC分类号(主)
+     */
     @JsonProperty("loc")
     private List<PatentClassify> loc;
 
-    @JsonProperty("app_country")
-    private String appCountry;
-
-    @JsonProperty("abstract_str")
-    private List<Text> abstractStr;
 }

+ 7 - 3
src/main/java/com/example/xiaoshiweixinback/entity/dto/esPicture/EsPatentSearchDTO.java

@@ -2,12 +2,16 @@ package com.example.xiaoshiweixinback.entity.dto.esPicture;
 
 import lombok.Data;
 
+import java.io.File;
+
 @Data
-public class EsPatentSearchDTO {
+public class EsPatentVectorDTO {
+
+    private String key;
+
+    private String description;
 
     private Long pageNum;
 
     private Long pageSize;
-
-    private String key;
 }

+ 0 - 13
src/main/java/com/example/xiaoshiweixinback/entity/dto/esPicture/EsPictureVectorDTO.java

@@ -1,13 +0,0 @@
-package com.example.xiaoshiweixinback.entity.dto.esPicture;
-
-import lombok.Data;
-
-import java.io.File;
-
-@Data
-public class EsPictureVectorDTO {
-
-    private File file;
-
-
-}

+ 9 - 2
src/main/java/com/example/xiaoshiweixinback/entity/vo/esPicture/EsPictureVectorVo.java

@@ -1,5 +1,6 @@
 package com.example.xiaoshiweixinback.entity.vo.esPicture;
 
+import com.example.xiaoshiweixinback.domain.es.PatentClassify;
 import com.example.xiaoshiweixinback.domain.es.PatentPerson;
 import com.example.xiaoshiweixinback.domain.es.Text;
 import lombok.Data;
@@ -8,17 +9,23 @@ import java.util.Date;
 import java.util.List;
 
 @Data
-public class EsPictureVectorVo {
+public class EsPatentVectorVo {
 
     private String appNo;
 
+    private Date appDate;
+
     private List<PatentPerson> rightHolder;
 
-    private Date publicDate;
+    private List<Text> abstractStr;
+
+    private String appCountry;
 
     private List<Text> title;
 
     private Integer imageIndex;
 
     private String guid;
+
+    private List<PatentClassify> loc;
 }

+ 83 - 53
src/main/java/com/example/xiaoshiweixinback/service/common/EsDenseVectorService.java

@@ -7,28 +7,34 @@ import co.elastic.clients.elasticsearch._types.query_dsl.Query;
 import co.elastic.clients.elasticsearch._types.query_dsl.QueryBuilders;
 import co.elastic.clients.elasticsearch.core.SearchRequest;
 import co.elastic.clients.elasticsearch.core.SearchResponse;
+import co.elastic.clients.elasticsearch.core.search.FieldCollapse;
 import co.elastic.clients.elasticsearch.core.search.Hit;
 import co.elastic.clients.json.JsonData;
+import com.example.xiaoshiweixinback.business.common.base.Records;
 import com.example.xiaoshiweixinback.business.utils.BeanUtil;
+import com.example.xiaoshiweixinback.business.utils.ToolUtil;
 import com.example.xiaoshiweixinback.business.utils.parseQueryToTree.expressManager;
 import com.example.xiaoshiweixinback.business.utils.parseQueryToTree.operateNode;
 import com.example.xiaoshiweixinback.business.utils.parseQueryToTree.treeNode;
 import com.example.xiaoshiweixinback.domain.es.PatentVector;
-import com.example.xiaoshiweixinback.entity.dto.esPicture.EsPatentSearchDTO;
 import com.example.xiaoshiweixinback.entity.dto.esPicture.EsPictureNoDTO;
+import com.example.xiaoshiweixinback.entity.dto.esPicture.EsPatentVectorDTO;
 import com.example.xiaoshiweixinback.entity.vo.esPicture.EsPictureNoVo;
-import com.example.xiaoshiweixinback.entity.vo.esPicture.EsPictureVectorVo;
+import com.example.xiaoshiweixinback.entity.vo.esPicture.EsPatentVectorVo;
 import com.example.xiaoshiweixinback.service.importPatent.FormatQueryService;
 import lombok.RequiredArgsConstructor;
 import org.apache.commons.lang3.StringUtils;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.context.annotation.Lazy;
 import org.springframework.stereotype.Service;
+import org.springframework.util.CollectionUtils;
 
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Comparator;
 import java.util.List;
+import java.util.stream.Collectors;
 
 @Service
 @RequiredArgsConstructor(onConstructor_ = {@Lazy})
@@ -42,85 +48,109 @@ public class EsDenseVectorService {
     @Autowired
     private GetVectorService getVectorService;
 
-    public List<EsPictureVectorVo> getPatentList(EsPatentSearchDTO dto) throws Exception {
+    /**
+     * 根据图片排序获取列表
+     *
+     * @param dto
+     * @return
+     * @throws IOException
+     */
+    public Records getPatentVectors(EsPatentVectorDTO dto,File file) throws Exception {
         Long pageNum = dto.getPageNum();
         Long pageSize = dto.getPageSize();
-//        String key = dto.getKey().replaceAll("[,。、;,./;\\s]"," OR ");
-        String key = dto.getKey().replaceAll("[,。、;,./;]"," OR ");
-        String s = "TI = " + "(" + key + ")";
-        System.out.println(s);
 
         SearchRequest.Builder builder = new SearchRequest.Builder();
         //设置查询索引
         builder.index("patent_vector");
-
-        //1. 解析检索条件
-        treeNode tree = expressManager.getInstance().Parse(s, false);
-        //3. 从es中检索数据
-        Query query = formatQueryService.EsQueryToQuery((operateNode) tree, "patentVector", null);
-        builder.query(query);
-
-        //分页
-        if (pageNum != null && pageSize != null && pageNum > 0 && pageSize > 0) {
-            builder.from((pageNum.intValue() - 1) * pageSize.intValue()).size(pageSize.intValue());
-        }
-//        else {
-//            builder.from(0).size(9);
-//        }
-
-        SearchResponse<PatentVector> response = client.search(builder.build(), PatentVector.class);
-        List<EsPictureVectorVo> vectorVos = new ArrayList<>();
-        List<Hit<PatentVector>> hits = response.hits().hits();
-        for (Hit<PatentVector> hit : hits) {
-            PatentVector vector = hit.source();
-            EsPictureVectorVo vectorVo = new EsPictureVectorVo();
-            BeanUtil.copy(vector,vectorVo);
-            vectorVos.add(vectorVo);
+        Query q = null;
+        //获取关键词
+        if (StringUtils.isNotEmpty(dto.getKey())) {
+//        String key = dto.getKey().replaceAll("[,。、;,./;\\s]"," OR ");
+            String key = dto.getKey().replaceAll("[,。、;,./;]"," OR ");
+            String condition = "TI = " + "(" + key + ")";
+            //1. 解析检索条件
+            treeNode tree = expressManager.getInstance().Parse(condition, false);
+            //3. 从es中检索数据
+            q = formatQueryService.EsQueryToQuery((operateNode) tree, "patentVector", null);
         }
 
-        return vectorVos;
-    }
 
-    public List<EsPictureVectorVo> getPatentVectorSort(File file, String description) throws IOException {
+        //获取图片向量
         List<Float> imageList = new ArrayList<>();
         List<String> stringList = new ArrayList<>();
-        if (file != null) {
+        if (file != null && file.exists() && file.length() != 0) {
             stringList = getVectorService.getVectorByFile(file);
-        } else if (StringUtils.isNotEmpty(description)) {
-            stringList = getVectorService.getVectorByText(description);
+        } else if (StringUtils.isNotEmpty(dto.getDescription())) {
+            stringList = getVectorService.getVectorByText(dto.getDescription());
         }
         stringList.forEach(item -> {
             Float a = Float.parseFloat(item);
             imageList.add(a);
         });
 
-        List<EsPictureVectorVo> list = new ArrayList<>();
-        SearchRequest.Builder builder = new SearchRequest.Builder();
-        //设置查询索引
-        builder.index("patent_vector");
-        String source = "cosineSimilarity(params.queryVector, 'my_vector') + 1.0";
-        InlineScript inlineScript = InlineScript.of(i -> i.lang("painless").params("queryVector", JsonData.of(imageList)).source(source));
-        Script script = Script.of(i -> i.inline(inlineScript));
-        Query query = QueryBuilders.scriptScore(i -> i.script(script).query(org.springframework.data.elasticsearch.client.elc.QueryBuilders.matchAllQueryAsQuery()));
-        builder.query(query);
-        builder.size(100);
+        if (!CollectionUtils.isEmpty(imageList)) {
+            String source = "cosineSimilarity(params.queryVector, 'my_vector') + 1.0";
+            InlineScript inlineScript = InlineScript.of(i -> i.lang("painless").params("queryVector", JsonData.of(imageList)).source(source));
+            Script script = Script.of(i -> i.inline(inlineScript));
+            Query query = null;
+            if (q != null) {
+                Query finalQ = q;
+                query = QueryBuilders.scriptScore(i -> i.script(script)
+                        .query(finalQ));
+            } else {
+                query = QueryBuilders.scriptScore(i -> i.script(script)
+                        .query(org.springframework.data.elasticsearch.client.elc.QueryBuilders.matchAllQueryAsQuery()));
+            }
+            builder.query(query);
+        } else {
+            builder.query(q);
+        }
+        //根据申请号去重
+        FieldCollapse collapse = FieldCollapse.of(i -> i.field("app_no"));
+        builder.collapse(collapse);
+
+        //分页
+        if (pageNum != null && pageSize != null && pageNum > 0 && pageSize > 0) {
+            builder.from((pageNum.intValue() - 1) * pageSize.intValue()).size(pageSize.intValue());
+        }
+
+        //解除最大条数限制
+        builder.trackTotalHits(i -> i.enabled(true));
         SearchResponse<PatentVector> response = client.search(builder.build(), PatentVector.class);
         List<Hit<PatentVector>> hits = response.hits().hits();
+        List<EsPatentVectorVo> vectorVos = new ArrayList<>();
+        long total = response.hits().total().value();
+        Double fixedScore = 1.8d;
         for (Hit<PatentVector> hit : hits) {
-            PatentVector vector = hit.source();
-            EsPictureVectorVo vectorVo = new EsPictureVectorVo();
-            BeanUtil.copy(vector,vectorVo);
-            list.add(vectorVo);
+            Double score = hit.score();
+            if (score > fixedScore) {
+                PatentVector vector = hit.source();
+                EsPatentVectorVo vectorVo = new EsPatentVectorVo();
+                BeanUtil.copy(vector, vectorVo);
+                vectorVos.add(vectorVo);
+            }
         }
-        return list;
+
+        Records records = new Records();
+        records.setCurrent(pageNum);
+        records.setSize(pageSize);
+        records.setData(vectorVos);
+        records.setTotal(total);
+        return records;
     }
 
+    /**
+     * 根据专利号获取相关图片
+     * @param noDTO
+     * @return
+     * @throws IOException
+     */
     public List<EsPictureNoVo> getPictureByNo(EsPictureNoDTO noDTO) throws IOException {
         List<EsPictureNoVo> pictureNoVos = new ArrayList<>();
         SearchRequest.Builder builder = new SearchRequest.Builder();
         //设置查询索引
         builder.index("patent_vector");
-        Query query = QueryBuilders.term(i -> i.field("app_no.keyword").value(noDTO.getAppNo()));
+        Query query = QueryBuilders.term(i -> i.field("app_no").value(noDTO.getAppNo()));
         builder.query(query);
         builder.size(100);
         SearchResponse<PatentVector> response = client.search(builder.build(), PatentVector.class);
@@ -132,6 +162,6 @@ public class EsDenseVectorService {
             noVo.setImageIndex(vector.getImageIndex());
             pictureNoVos.add(noVo);
         }
-        return pictureNoVos;
+        return pictureNoVos.stream().sorted(Comparator.comparing(EsPictureNoVo::getImageIndex)).collect(Collectors.toList());
     }
 }

+ 5 - 8
src/test/java/com/example/xiaoshiweixinback/XiaoshiWeixinbackApplicationTests.java

@@ -1,9 +1,5 @@
 package com.example.xiaoshiweixinback;
 
-import cn.dev33.satoken.stp.StpUtil;
-import com.example.xiaoshiweixinback.business.utils.RegexUtil;
-import com.example.xiaoshiweixinback.entity.dto.esPicture.EsPatentSearchDTO;
-import com.example.xiaoshiweixinback.entity.vo.esPicture.EsPictureVectorVo;
 import com.example.xiaoshiweixinback.service.common.EsDenseVectorService;
 import org.junit.jupiter.api.Test;
 import org.springframework.beans.factory.annotation.Autowired;
@@ -27,10 +23,11 @@ class XiaoshiWeixinbackApplicationTests {
     @Test
     public void test() throws Exception {
 //        List<EsPictureVectorVo> list = denseVectorService.getPatentVectorSort(null, "大大的原型");
-        EsPatentSearchDTO dto = new EsPatentSearchDTO();
-        dto.setKey("手机 OR 产品");
-        List<EsPictureVectorVo> list = denseVectorService.getPatentList(dto);
-        System.out.println(list);
+//        EsPatentSearchDTO dto = new EsPatentSearchDTO();
+//        dto.setKey("手机 OR 产品");
+//        Records records = denseVectorService.getPatentList(dto);
+//        List<EsPictureVectorVo> list = JSON.parseArray(records.getData().toString(), EsPictureVectorVo.class);
+//        System.out.println(list);
 
 //        String s = "a , b , c , d";
 //        boolean regExpReplace = RegexUtil.isRegExpReplace(s);