فهرست منبع

fixed essearch

zero 1 سال پیش
والد
کامیت
0cdd3e762e

+ 25 - 0
src/main/java/com/example/xiaoshiweixinback/business/utils/RegexUtil.java

@@ -1,5 +1,7 @@
 package com.example.xiaoshiweixinback.business.utils;
 
+import java.util.ArrayList;
+import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
@@ -52,4 +54,27 @@ public class RegexUtil {
         return m.matches();
     }
 
+    //split By分隔符忽略引号
+    public static List<String> splitByDelimiters(String input) {
+        List<String> result = new ArrayList<>();
+        // 正则表达式匹配分隔符,但忽略引号内的内容
+        // 注意:这里使用了Unicode转义序列来表示中文引号
+        Pattern pattern = Pattern.compile("\"([^\"]*)\"|’([^’]*)’|‘([^’]*)’|“([^\"]*)”|([^,。、;,./;\\s+]+)");
+        Matcher matcher = pattern.matcher(input);
+
+        while (matcher.find()) {
+            if (matcher.group(1) != null) { // 英文双引号
+                result.add(matcher.group(1));
+            } else if (matcher.group(2) != null) { // 中文单引号
+                result.add(matcher.group(2));
+            } else if (matcher.group(3) != null) { // 中文单引号(另一种可能的形式)
+                result.add(matcher.group(3));
+            } else if (matcher.group(4) != null) { // 中文双引号
+                result.add(matcher.group(4));
+            } else { // 其他内容(即分隔符外的部分)
+                result.add(matcher.group(5));
+            }
+        }
+        return result;
+    }
 }

+ 13 - 29
src/main/java/com/example/xiaoshiweixinback/service/importPatent/EsDenseVectorService.java

@@ -17,6 +17,7 @@ import com.example.xiaoshiweixinback.business.common.base.Records;
 import com.example.xiaoshiweixinback.business.common.base.RedisConf;
 import com.example.xiaoshiweixinback.business.redis.RedisService;
 import com.example.xiaoshiweixinback.business.utils.BeanUtil;
+import com.example.xiaoshiweixinback.business.utils.RegexUtil;
 import com.example.xiaoshiweixinback.business.utils.parseQueryToTree.expressManager;
 import com.example.xiaoshiweixinback.business.utils.parseQueryToTree.operateNode;
 import com.example.xiaoshiweixinback.business.utils.parseQueryToTree.treeNode;
@@ -223,7 +224,7 @@ public class EsDenseVectorService {
         builder.index("patent_vector");
         Query query = QueryBuilders.term(i -> i.field("app_no").value(noDTO.getAppNo()));
         builder.query(query);
-        builder.size(100);
+        builder.size(30);
         SearchResponse<PatentVector> response = client.search(builder.build(), PatentVector.class);
         List<Hit<PatentVector>> hits = response.hits().hits();
         for (Hit<PatentVector> hit : hits) {
@@ -290,42 +291,25 @@ public class EsDenseVectorService {
         } else {
             condition = searchCondition;
         }
-//        if (StringUtils.isNotEmpty(searchCondition)) {
-//            if (StringUtils.isNotEmpty(keyword)) {
-//                String key = keyword.replaceAll("[,。、;,./;]", " OR ");
-//                condition = "TI = " + "(" + "\"" + key + "\"" + ")" + " AND " + searchCondition;
-//            } else {
-//                condition = searchCondition;
-//            }
-//        } else {
-//            //获取关键词
-//            if (StringUtils.isNotEmpty(keyword)) {
-////        String key = dto.getKey().replaceAll("[,。、;,./;\\s]"," OR ");
-//                String key = keyword.replaceAll("[,。、;,./;]", " OR ");
-//                condition = "TI = " + "(" + "\"" + key + "\"" + ")";
-//            }
-//        }
         return condition;
     }
 
     public String appendStr(String str) {
-        String needStr = "";
-        String cName = str.replaceAll("[,。、;,./;]+|\\s+", "OR");
-        if (cName.contains("OR")) {
-            String[] split = cName.split("OR");
-            String ccName = "";
-            for (int i = 0; i < split.length; i++) {
-                String s = split[i];
-                if (split.length - 1 != i) {
-                    ccName = ccName + "\"" + s + "\"" + " OR ";
+        String s = "";
+        List<String> delimiters = RegexUtil.splitByDelimiters(str);
+        if (delimiters.size() > 1) {
+            for (int i = 0; i < delimiters.size(); i++) {
+                String value = delimiters.get(i);
+                if (delimiters.size() - 1 != i) {
+                    s = s + "\"" + value + "\"" + " OR ";
                 } else {
-                    ccName = ccName + "\"" + s + "\"";
+                    s = s + "\"" + value + "\"";
                 }
             }
-            needStr = ccName;
         } else {
-            needStr = "\"" + str + "\"";
+            s = "\"" + str + "\"";
         }
-        return needStr;
+        return s;
     }
+
 }

+ 16 - 1
src/test/java/com/example/xiaoshiweixinback/XiaoshiWeixinbackApplicationTests.java

@@ -1,5 +1,6 @@
 package com.example.xiaoshiweixinback;
 
+import com.example.xiaoshiweixinback.business.utils.RegexUtil;
 import com.example.xiaoshiweixinback.service.importPatent.EsDenseVectorService;
 import org.apache.commons.lang3.StringUtils;
 import org.junit.jupiter.api.Test;
@@ -15,6 +16,8 @@ import java.time.format.DateTimeFormatter;
 import java.time.temporal.ChronoUnit;
 import java.util.*;
 import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 
 @SpringBootTest
@@ -100,7 +103,7 @@ class XiaoshiWeixinbackApplicationTests {
     @Test
     public void test2() {
 //        String companyName = "CN";
-        String keyword = "CN";
+        String keyword = "CN ZN KM,IU";
         String searchCondition = "";
 //        String cName = companyName.replaceAll("[,。、;,./;]+|\\s+", "OR");
 //        if (cName.contains("OR")) {
@@ -126,4 +129,16 @@ class XiaoshiWeixinbackApplicationTests {
         }
         System.out.println(searchCondition);
     }
+
+    @Test
+    public void test4() {
+        String input = "field1,field2,field3;‘field4,with,commas’, field5 with quotes,“field6,another field”,    ac";
+        List<String> result = RegexUtil.splitByDelimiters(input);
+//        String append = denseVectorService.appendStr(input);
+        System.out.println(result);
+        for (String s : result) {
+            System.out.println(s);
+        }
+    }
+
 }