Ver Fonte

add bayes公众号

zero há 1 mês atrás
pai
commit
770a71d469

+ 2 - 0
src/main/java/com/cslg/ppa/dto/GetArticleInfoDTO.java

@@ -27,4 +27,6 @@ public class GetArticleInfoDTO {
     //公众号资讯链接
     private String wxArticleIcon;
 
+    //摘要
+    private String pctDigest;
 }

+ 4 - 0
src/main/java/com/cslg/ppa/entity/ArticleInfo.java

@@ -41,4 +41,8 @@ public class ArticleInfo extends BaseEntity<ArticleInfo> {
     //公众号资讯图标
     @TableField(value = "wx_article_icon")
     private String wxArticleIcon;
+
+    //pct摘要
+    @TableField(value = "pct_digest")
+    private String pctDigest;
 }

+ 1 - 0
src/main/java/com/cslg/ppa/service/ArticleInfoService.java

@@ -51,6 +51,7 @@ public class ArticleInfoService extends ServiceImpl<ArticleInfoMapper, ArticleIn
         ArticleInfo articleInfo = new ArticleInfo();
         articleInfo.setTitle(infoDTO.getTitle());
         articleInfo.setDigest(infoDTO.getDigest());
+        articleInfo.setPctDigest(infoDTO.getPctDigest());
         articleInfo.setCategoryId(infoDTO.getCategoryId());
         articleInfo.setPublicTime(infoDTO.getPublicTime());
         articleInfo.setArticleUrl(infoDTO.getArticleUrl());

+ 3 - 0
src/main/java/com/cslg/ppa/service/GetWebArticle/GetCNIPAArticleService.java

@@ -121,8 +121,10 @@ public class GetCNIPAArticleService {
                     continue;
                 }
                 String condensedAbstract = null;
+                String pctCondensedAbstract = null;
                 try {
                     condensedAbstract = difyService.getCondensedAbstract(digest);
+                    pctCondensedAbstract = difyService.getPctCondensedAbstract(digest);
                 } catch (Exception e) {
 
                 }
@@ -136,6 +138,7 @@ public class GetCNIPAArticleService {
                 articleInfoDTO.setTitle(title);
                 articleInfoDTO.setPublicTime(date);
                 articleInfoDTO.setDigest(condensedAbstract);
+                articleInfoDTO.setPctDigest(pctCondensedAbstract);
                 articleInfoDTOS.add(articleInfoDTO);
             }
             articleInfoService.batchAddArticleInfo(articleInfoDTOS);

+ 49 - 0
src/main/java/com/cslg/ppa/service/GetWebArticle/GetEcigaretteService.java

@@ -0,0 +1,49 @@
+package com.cslg.ppa.service.GetWebArticle;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.springframework.stereotype.Service;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+@Slf4j
+@Service
+@RequiredArgsConstructor
+public class GetEcigaretteService {
+    private static final String LINK_SELECTOR = "a[href]";
+
+    public void crawlEcigaretteArticles(String baseUrl) throws IOException {
+        // 使用Jsoup连接并解析网页
+        Document doc = Jsoup.connect(baseUrl)
+                .timeout(15000) // 增加超时时间
+                .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36")
+                .followRedirects(true) // 跟随重定向
+                .get();
+
+        List<String> list1 = new ArrayList<>();
+        Elements elements = doc.select("div.app-container");
+        Elements linkElements = elements.select(LINK_SELECTOR);
+        for (Element element : linkElements) {
+            String link = element.absUrl("href");
+            list1.add(link);
+        }
+        Elements newsItems  = doc.select(LINK_SELECTOR);
+        List<String> list = new ArrayList<>();
+        for (Element item : newsItems) {
+            try {
+                Element linkElement = item.selectFirst(LINK_SELECTOR);
+                String link = linkElement.absUrl("href"); // 获取绝对URL
+                list.add(link);
+            } catch (Exception e) {
+                log.warn("解析单个新闻项时出错: ", e);
+            }
+        }
+        System.out.println("-------------");
+    }
+}

+ 0 - 6
src/main/java/com/cslg/ppa/service/GetWebArticle/GetProvinceNewsService.java

@@ -92,12 +92,6 @@ public class GetProvinceNewsService {
 
         // 改进的新闻列表抓取逻辑
         Element container = doc.selectFirst(NEWS_CONTAINER_SELECTOR);
-        //todo 如果找不到该怎么做
-//        if (container == null) {
-//            String contentText = doc.html();
-//            List<String> reStrs = xmlParseService.xmlParse(contentText, "record");
-//            System.out.println(reStrs);
-//        }
         Element scriptElement = container.select("script").first();
         List<String> reStrs = new ArrayList<>();
         if (scriptElement == null) {

+ 7 - 0
src/main/java/com/cslg/ppa/service/GetWebArticle/GetWeChatArticleService.java

@@ -105,8 +105,10 @@ public class GetWeChatArticleService {
                     continue;
                 }
                 String condensedAbstract = null;
+                String pctCondensedAbstract = null;
                 try {
                     condensedAbstract = difyService.getCondensedAbstract(weChatArticleContent);
+                    pctCondensedAbstract = difyService.getPctCondensedAbstract(weChatArticleContent);
                 } catch (Exception e) {
                     continue;
                 }
@@ -121,6 +123,8 @@ public class GetWeChatArticleService {
                     articleInfoDTO.setCategoryId(3);
                 } else if (sourceInfo.getSourceName().equals("知识产权界")) {
                     articleInfoDTO.setCategoryId(5);
+                } else if (sourceInfo.getSourceName().equals("Bayes美国知识产权")) {
+                    articleInfoDTO.setCategoryId(9);
                 } else {
                     articleInfoDTO.setCategoryId(6);
                 }
@@ -130,6 +134,7 @@ public class GetWeChatArticleService {
                 articleInfoDTO.setPublicTime(createTime);
                 articleInfoDTO.setWxArticleIcon(cover);
                 articleInfoDTO.setDigest(condensedAbstract);
+                articleInfoDTO.setPctDigest(pctCondensedAbstract);
                 articleInfoDTOS.add(articleInfoDTO);
             }
             articleInfoService.batchAddArticleInfo(articleInfoDTOS);
@@ -248,9 +253,11 @@ public class GetWeChatArticleService {
                     }
                 }
                 String condensedAbstract = difyService.getCondensedAbstract(content);
+                String pctCondensedAbstract = difyService.getPctCondensedAbstract(content);
                 ArticleInfo articleInfo = new ArticleInfo();
                 articleInfo.setTitle(title);
                 articleInfo.setDigest(condensedAbstract);
+                articleInfo.setPctDigest(pctCondensedAbstract);
                 articleInfo.setCategoryId(categoryId);
                 articleInfo.setSourceId(sourceId);
                 articleInfo.setPublicTime(createTime);

+ 32 - 0
src/main/java/com/cslg/ppa/service/commom/DifyService.java

@@ -23,6 +23,8 @@ public class DifyService {
     private String url;
     @Value("${DIFY.getAbstractKey}")
     private String getAbstractKey;
+    @Value("${DIFY.getPctAbstractKey}")
+    private String getPctAbstractKey;
 
     public String getCondensedAbstract(String text) throws Exception {
         Map<String, Object> map = new HashMap<>();
@@ -53,4 +55,34 @@ public class DifyService {
         String content = jsonObject1.get("text").toString();
         return DataUtils.unicodeDecode(content);
     }
+
+    public String getPctCondensedAbstract(String text) throws Exception {
+        Map<String, Object> map = new HashMap<>();
+        map.put("content", text);
+        OAMessageDTO oaMessageDTO = new OAMessageDTO();
+        oaMessageDTO.setInputs(map);
+        oaMessageDTO.setResponseMode("blocking");
+        oaMessageDTO.setUser("1");
+
+        String param = new Gson().toJson(oaMessageDTO);
+        RequestBody requestBody = RequestBody.create(MediaType.parse("application/json"), param);
+        OkHttpClient client = new OkHttpClient.Builder()
+                .connectTimeout(600, TimeUnit.SECONDS)
+                .writeTimeout(600, TimeUnit.SECONDS)
+                .readTimeout(600, TimeUnit.SECONDS)
+                .build();
+        Request request = new Request.Builder()
+                .url(url + "workflows/run")
+                .addHeader("Authorization", "Bearer " + getPctAbstractKey)
+                .post(requestBody)
+                .build();
+        String res = Objects.requireNonNull(client.newCall(request).execute().body()).string();
+        JSONObject jsonObject = JSONObject.parseObject(res);
+        String dataStr = jsonObject.get("data").toString();
+        JSONObject dataObject = JSONObject.parseObject(dataStr);
+        String outPuts = dataObject.get("outputs").toString();
+        JSONObject jsonObject1 = JSONObject.parseObject(outPuts);
+        String content = jsonObject1.get("text").toString();
+        return DataUtils.unicodeDecode(content);
+    }
 }

Diff do ficheiro suprimidas por serem muito extensas
+ 3 - 2
src/main/resources/application-dev.yml