zero 1 hafta önce
ebeveyn
işleme
5471607163

+ 12 - 32
src/main/java/com/cslg/ppa/service/GetWebArticle/GetProvinceNewsService.java

@@ -34,6 +34,7 @@ import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
 import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.scheduling.annotation.Scheduled;
 import org.springframework.stereotype.Service;
 import org.springframework.transaction.annotation.Propagation;
 import org.springframework.transaction.annotation.Transactional;
@@ -71,7 +72,7 @@ public class GetProvinceNewsService {
      *
      * @throws Exception
      */
-//    @Scheduled(cron = "0 0 5 * * ?")
+    @Scheduled(cron = "0 0 5 * * ?")
     @Transactional(propagation = Propagation.REQUIRED,rollbackFor = Throwable.class)
     public void addArticleFromWebSource() throws Exception {
         System.out.println(new Date() + "Province-Begin");
@@ -115,8 +116,8 @@ public class GetProvinceNewsService {
                     }
                 }
                 sum++;
+                System.out.println("Finished Num" + sum + "Time");
             }
-            System.out.println("Finished Num" + sum + "Time");
         }
         System.out.println(new Date() + "Province-Begin");
     }
@@ -427,19 +428,13 @@ public class GetProvinceNewsService {
                     JSONObject object = JSONObject.parseObject(res);
                     List<HubeiArticleDTO> list = JSONObject.parseArray(object.getString("data"), HubeiArticleDTO.class);
                     List<GetArticleInfoDTO> articleInfoDTOS = new ArrayList<>();
-                    int count = 0;
                     for (HubeiArticleDTO articleDTO : list) {
-                        // 限制处理的新闻项数量,避免处理过多数据
-                        if (count > 20) {
-                            log.info("已达到处理上限(20条),停止处理更多新闻项");
-                            break;
-                        }
                         String docreltime = articleDTO.getDOCRELTIME();
                         String dateStr = RegexUtil.extractDate(docreltime);
                         String yesterdayDateStr = DateUtil.getYesterdayDateStr();
-//                        if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
-//                            break;
-//                        }
+                        if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
+                            break;
+                        }
                         Date date = new Date();
                         SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
                         try {
@@ -484,7 +479,6 @@ public class GetProvinceNewsService {
                         articleInfoDTO.setPublicTime(date);
                         articleInfoDTO.setDigest(condensedAbstract);
                         articleInfoDTOS.add(articleInfoDTO);
-                        count++;
                     }
                     articleInfoService.batchAddArticleInfo(articleInfoDTOS);
                 }
@@ -550,20 +544,14 @@ public class GetProvinceNewsService {
         // 改进的新闻列表抓取逻辑
         Element container = doc.selectFirst(NEWS_CONTAINER_SELECTOR);
         Elements linkElements = container.select("a.zkmmr_tl1_item_a");
-        int count = 0;
         List<GetArticleInfoDTO> articleInfoDTOS = new ArrayList<>();
         for (Element linkElement : linkElements) {
-            // 限制处理的新闻项数量,避免处理过多数据
-            if (count > 20) {
-                log.info("已达到处理上限(20条),停止处理更多新闻项");
-                break;
-            }
             Element element = linkElement.select("p.zkmmr_tl1_item_date").first();
             String dateStr = element.text().trim();
             String yesterdayDateStr = DateUtil.getYesterdayDateStr();
-//            if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
-//                break;
-//            }
+            if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
+                break;
+            }
             Date date = new Date();
             SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
             try {
@@ -610,7 +598,6 @@ public class GetProvinceNewsService {
             articleInfoDTO.setPublicTime(date);
             articleInfoDTO.setDigest(condensedAbstract);
             articleInfoDTOS.add(articleInfoDTO);
-            count++;
         }
         articleInfoService.batchAddArticleInfo(articleInfoDTOS);
     }
@@ -640,22 +627,16 @@ public class GetProvinceNewsService {
             JSONObject object = jsonObject.getJSONObject("data");
             results = object.getJSONArray("results");
             List<GetArticleInfoDTO> articleInfoDTOS = new ArrayList<>();
-            int count = 0;
             for (int i = 0; i < results.size(); i++) {
-                // 限制处理的新闻项数量,避免处理过多数据
-                if (count > 20) {
-                    log.info("已达到处理上限(20条),停止处理更多新闻项");
-                    break;
-                }
                 JSONObject item = results.getJSONObject(i);
                 JSONObject source = item.getJSONObject("source");
 
                 String pubDate = source.get("pubDate").toString();
                 String dateStr = RegexUtil.extractDate(pubDate);
                 String yesterdayDateStr = DateUtil.getYesterdayDateStr();
-//                if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
-//                    break;
-//                }
+                if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
+                    break;
+                }
                 Date date = new Date();
                 SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
                 try {
@@ -702,7 +683,6 @@ public class GetProvinceNewsService {
                 articleInfoDTO.setPublicTime(date);
                 articleInfoDTO.setDigest(condensedAbstract);
                 articleInfoDTOS.add(articleInfoDTO);
-                count++;
             }
             articleInfoService.batchAddArticleInfo(articleInfoDTOS);
         } catch (Exception ignored) {

+ 1 - 1
src/main/resources/mapper/ArticleInfoMapper.xml

@@ -26,7 +26,7 @@
                 and ai.category_id = #{vo.categoryId}
             </if>
             <if test="vo.sourceName != null and vo.sourceName != ''">
-                and si.source_name = #{vo.sourceName}
+                and si.source_name like concat('%',#{vo.sourceName},'%')
             </if>
             <if test="vo.beginTime != null">
                 and ai.public_time &gt;= #{vo.beginTime}

+ 3 - 3
src/test/java/com/cslg/ppa/PpaApplicationTests.java

@@ -274,7 +274,7 @@ class PpaApplicationTests {
         //-------------天津知识产权局
 //        String url = "https://zscq.tj.gov.cn/";
         //-------------河南知识产权局
-        String url = "https://scjg.henan.gov.cn/hnzscqj/";
+//        String url = "https://scjg.henan.gov.cn/hnzscqj/";
         //-------------广东省知识产权保护中心
 //        String url = "https://www.gippc.com.cn/ippc/index.shtml";
         //-------------海南知识产权局
@@ -313,7 +313,7 @@ class PpaApplicationTests {
         //-------------山东省市场监督管理局
 //        String url = "http://amr.shandong.gov.cn/";
         //-------------贵州省市场监督管理局(贵州知识产权局)
-//        String url = "https://amr.guizhou.gov.cn/";
+        String url = "https://amr.guizhou.gov.cn/";
         //-------------甘肃市场监督管理局
 //        String url = "https://scjg.gansu.gov.cn/scjg/index.shtml";
         //-------------广西知识产权公告服务平台
@@ -322,7 +322,7 @@ class PpaApplicationTests {
 //        String url = "http://scjg.nx.gov.cn/";
         //-------------新疆市场监督管理局(新疆知识产权局)
 //        String url = "https://scjgj.xinjiang.gov.cn/";
-        getProvinceNewsService.crawlArticles(url,"",1,2);
+        getProvinceNewsService.crawlArticles(url,"贵州省市场监督管理局(知识产权局)",34,2);
 //        String url = "https://www.ahippc.cn/news.html?categoryId=a5e96b641ade4fc9b50b4f9504ba0f62";
 //        final List<GetArticleInfoDTO> articleInfoDTOS = getProvinceNewsService.crawlArticlesDetail(url);
 //        System.out.println(articleInfoDTOS);