|
@@ -7,28 +7,34 @@ import co.elastic.clients.elasticsearch._types.query_dsl.Query;
|
|
|
import co.elastic.clients.elasticsearch._types.query_dsl.QueryBuilders;
|
|
|
import co.elastic.clients.elasticsearch.core.SearchRequest;
|
|
|
import co.elastic.clients.elasticsearch.core.SearchResponse;
|
|
|
+import co.elastic.clients.elasticsearch.core.search.FieldCollapse;
|
|
|
import co.elastic.clients.elasticsearch.core.search.Hit;
|
|
|
import co.elastic.clients.json.JsonData;
|
|
|
+import com.example.xiaoshiweixinback.business.common.base.Records;
|
|
|
import com.example.xiaoshiweixinback.business.utils.BeanUtil;
|
|
|
+import com.example.xiaoshiweixinback.business.utils.ToolUtil;
|
|
|
import com.example.xiaoshiweixinback.business.utils.parseQueryToTree.expressManager;
|
|
|
import com.example.xiaoshiweixinback.business.utils.parseQueryToTree.operateNode;
|
|
|
import com.example.xiaoshiweixinback.business.utils.parseQueryToTree.treeNode;
|
|
|
import com.example.xiaoshiweixinback.domain.es.PatentVector;
|
|
|
-import com.example.xiaoshiweixinback.entity.dto.esPicture.EsPatentSearchDTO;
|
|
|
import com.example.xiaoshiweixinback.entity.dto.esPicture.EsPictureNoDTO;
|
|
|
+import com.example.xiaoshiweixinback.entity.dto.esPicture.EsPatentVectorDTO;
|
|
|
import com.example.xiaoshiweixinback.entity.vo.esPicture.EsPictureNoVo;
|
|
|
-import com.example.xiaoshiweixinback.entity.vo.esPicture.EsPictureVectorVo;
|
|
|
+import com.example.xiaoshiweixinback.entity.vo.esPicture.EsPatentVectorVo;
|
|
|
import com.example.xiaoshiweixinback.service.importPatent.FormatQueryService;
|
|
|
import lombok.RequiredArgsConstructor;
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.context.annotation.Lazy;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.util.CollectionUtils;
|
|
|
|
|
|
import java.io.File;
|
|
|
import java.io.IOException;
|
|
|
import java.util.ArrayList;
|
|
|
+import java.util.Comparator;
|
|
|
import java.util.List;
|
|
|
+import java.util.stream.Collectors;
|
|
|
|
|
|
@Service
|
|
|
@RequiredArgsConstructor(onConstructor_ = {@Lazy})
|
|
@@ -42,85 +48,109 @@ public class EsDenseVectorService {
|
|
|
@Autowired
|
|
|
private GetVectorService getVectorService;
|
|
|
|
|
|
- public List<EsPictureVectorVo> getPatentList(EsPatentSearchDTO dto) throws Exception {
|
|
|
+ /**
|
|
|
+ * 根据图片排序获取列表
|
|
|
+ *
|
|
|
+ * @param dto
|
|
|
+ * @return
|
|
|
+ * @throws IOException
|
|
|
+ */
|
|
|
+ public Records getPatentVectors(EsPatentVectorDTO dto,File file) throws Exception {
|
|
|
Long pageNum = dto.getPageNum();
|
|
|
Long pageSize = dto.getPageSize();
|
|
|
-// String key = dto.getKey().replaceAll("[,。、;,./;\\s]"," OR ");
|
|
|
- String key = dto.getKey().replaceAll("[,。、;,./;]"," OR ");
|
|
|
- String s = "TI = " + "(" + key + ")";
|
|
|
- System.out.println(s);
|
|
|
|
|
|
SearchRequest.Builder builder = new SearchRequest.Builder();
|
|
|
//设置查询索引
|
|
|
builder.index("patent_vector");
|
|
|
-
|
|
|
- //1. 解析检索条件
|
|
|
- treeNode tree = expressManager.getInstance().Parse(s, false);
|
|
|
- //3. 从es中检索数据
|
|
|
- Query query = formatQueryService.EsQueryToQuery((operateNode) tree, "patentVector", null);
|
|
|
- builder.query(query);
|
|
|
-
|
|
|
- //分页
|
|
|
- if (pageNum != null && pageSize != null && pageNum > 0 && pageSize > 0) {
|
|
|
- builder.from((pageNum.intValue() - 1) * pageSize.intValue()).size(pageSize.intValue());
|
|
|
- }
|
|
|
-// else {
|
|
|
-// builder.from(0).size(9);
|
|
|
-// }
|
|
|
-
|
|
|
- SearchResponse<PatentVector> response = client.search(builder.build(), PatentVector.class);
|
|
|
- List<EsPictureVectorVo> vectorVos = new ArrayList<>();
|
|
|
- List<Hit<PatentVector>> hits = response.hits().hits();
|
|
|
- for (Hit<PatentVector> hit : hits) {
|
|
|
- PatentVector vector = hit.source();
|
|
|
- EsPictureVectorVo vectorVo = new EsPictureVectorVo();
|
|
|
- BeanUtil.copy(vector,vectorVo);
|
|
|
- vectorVos.add(vectorVo);
|
|
|
+ Query q = null;
|
|
|
+ //获取关键词
|
|
|
+ if (StringUtils.isNotEmpty(dto.getKey())) {
|
|
|
+// String key = dto.getKey().replaceAll("[,。、;,./;\\s]"," OR ");
|
|
|
+ String key = dto.getKey().replaceAll("[,。、;,./;]"," OR ");
|
|
|
+ String condition = "TI = " + "(" + key + ")";
|
|
|
+ //1. 解析检索条件
|
|
|
+ treeNode tree = expressManager.getInstance().Parse(condition, false);
|
|
|
+ //3. 从es中检索数据
|
|
|
+ q = formatQueryService.EsQueryToQuery((operateNode) tree, "patentVector", null);
|
|
|
}
|
|
|
|
|
|
- return vectorVos;
|
|
|
- }
|
|
|
|
|
|
- public List<EsPictureVectorVo> getPatentVectorSort(File file, String description) throws IOException {
|
|
|
+ //获取图片向量
|
|
|
List<Float> imageList = new ArrayList<>();
|
|
|
List<String> stringList = new ArrayList<>();
|
|
|
- if (file != null) {
|
|
|
+ if (file != null && file.exists() && file.length() != 0) {
|
|
|
stringList = getVectorService.getVectorByFile(file);
|
|
|
- } else if (StringUtils.isNotEmpty(description)) {
|
|
|
- stringList = getVectorService.getVectorByText(description);
|
|
|
+ } else if (StringUtils.isNotEmpty(dto.getDescription())) {
|
|
|
+ stringList = getVectorService.getVectorByText(dto.getDescription());
|
|
|
}
|
|
|
stringList.forEach(item -> {
|
|
|
Float a = Float.parseFloat(item);
|
|
|
imageList.add(a);
|
|
|
});
|
|
|
|
|
|
- List<EsPictureVectorVo> list = new ArrayList<>();
|
|
|
- SearchRequest.Builder builder = new SearchRequest.Builder();
|
|
|
- //设置查询索引
|
|
|
- builder.index("patent_vector");
|
|
|
- String source = "cosineSimilarity(params.queryVector, 'my_vector') + 1.0";
|
|
|
- InlineScript inlineScript = InlineScript.of(i -> i.lang("painless").params("queryVector", JsonData.of(imageList)).source(source));
|
|
|
- Script script = Script.of(i -> i.inline(inlineScript));
|
|
|
- Query query = QueryBuilders.scriptScore(i -> i.script(script).query(org.springframework.data.elasticsearch.client.elc.QueryBuilders.matchAllQueryAsQuery()));
|
|
|
- builder.query(query);
|
|
|
- builder.size(100);
|
|
|
+ if (!CollectionUtils.isEmpty(imageList)) {
|
|
|
+ String source = "cosineSimilarity(params.queryVector, 'my_vector') + 1.0";
|
|
|
+ InlineScript inlineScript = InlineScript.of(i -> i.lang("painless").params("queryVector", JsonData.of(imageList)).source(source));
|
|
|
+ Script script = Script.of(i -> i.inline(inlineScript));
|
|
|
+ Query query = null;
|
|
|
+ if (q != null) {
|
|
|
+ Query finalQ = q;
|
|
|
+ query = QueryBuilders.scriptScore(i -> i.script(script)
|
|
|
+ .query(finalQ));
|
|
|
+ } else {
|
|
|
+ query = QueryBuilders.scriptScore(i -> i.script(script)
|
|
|
+ .query(org.springframework.data.elasticsearch.client.elc.QueryBuilders.matchAllQueryAsQuery()));
|
|
|
+ }
|
|
|
+ builder.query(query);
|
|
|
+ } else {
|
|
|
+ builder.query(q);
|
|
|
+ }
|
|
|
+ //根据申请号去重
|
|
|
+ FieldCollapse collapse = FieldCollapse.of(i -> i.field("app_no"));
|
|
|
+ builder.collapse(collapse);
|
|
|
+
|
|
|
+ //分页
|
|
|
+ if (pageNum != null && pageSize != null && pageNum > 0 && pageSize > 0) {
|
|
|
+ builder.from((pageNum.intValue() - 1) * pageSize.intValue()).size(pageSize.intValue());
|
|
|
+ }
|
|
|
+
|
|
|
+ //解除最大条数限制
|
|
|
+ builder.trackTotalHits(i -> i.enabled(true));
|
|
|
SearchResponse<PatentVector> response = client.search(builder.build(), PatentVector.class);
|
|
|
List<Hit<PatentVector>> hits = response.hits().hits();
|
|
|
+ List<EsPatentVectorVo> vectorVos = new ArrayList<>();
|
|
|
+ long total = response.hits().total().value();
|
|
|
+ Double fixedScore = 1.8d;
|
|
|
for (Hit<PatentVector> hit : hits) {
|
|
|
- PatentVector vector = hit.source();
|
|
|
- EsPictureVectorVo vectorVo = new EsPictureVectorVo();
|
|
|
- BeanUtil.copy(vector,vectorVo);
|
|
|
- list.add(vectorVo);
|
|
|
+ Double score = hit.score();
|
|
|
+ if (score > fixedScore) {
|
|
|
+ PatentVector vector = hit.source();
|
|
|
+ EsPatentVectorVo vectorVo = new EsPatentVectorVo();
|
|
|
+ BeanUtil.copy(vector, vectorVo);
|
|
|
+ vectorVos.add(vectorVo);
|
|
|
+ }
|
|
|
}
|
|
|
- return list;
|
|
|
+
|
|
|
+ Records records = new Records();
|
|
|
+ records.setCurrent(pageNum);
|
|
|
+ records.setSize(pageSize);
|
|
|
+ records.setData(vectorVos);
|
|
|
+ records.setTotal(total);
|
|
|
+ return records;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * 根据专利号获取相关图片
|
|
|
+ * @param noDTO
|
|
|
+ * @return
|
|
|
+ * @throws IOException
|
|
|
+ */
|
|
|
public List<EsPictureNoVo> getPictureByNo(EsPictureNoDTO noDTO) throws IOException {
|
|
|
List<EsPictureNoVo> pictureNoVos = new ArrayList<>();
|
|
|
SearchRequest.Builder builder = new SearchRequest.Builder();
|
|
|
//设置查询索引
|
|
|
builder.index("patent_vector");
|
|
|
- Query query = QueryBuilders.term(i -> i.field("app_no.keyword").value(noDTO.getAppNo()));
|
|
|
+ Query query = QueryBuilders.term(i -> i.field("app_no").value(noDTO.getAppNo()));
|
|
|
builder.query(query);
|
|
|
builder.size(100);
|
|
|
SearchResponse<PatentVector> response = client.search(builder.build(), PatentVector.class);
|
|
@@ -132,6 +162,6 @@ public class EsDenseVectorService {
|
|
|
noVo.setImageIndex(vector.getImageIndex());
|
|
|
pictureNoVos.add(noVo);
|
|
|
}
|
|
|
- return pictureNoVos;
|
|
|
+ return pictureNoVos.stream().sorted(Comparator.comparing(EsPictureNoVo::getImageIndex)).collect(Collectors.toList());
|
|
|
}
|
|
|
}
|