1 tydzień temu · 8760aad0e3
--- a/src/main/java/com/cslg/ppa/service/GetWebArticle/GetCNIPAArticleService.java
+++ b/src/main/java/com/cslg/ppa/service/GetWebArticle/GetCNIPAArticleService.java
@@ -1,6 +1,7 @@
 
				 package com.cslg.ppa.service.GetWebArticle;
			
 
				 
			
 
				 
			
 
				+import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
			
 
				 import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
			
 
				 import com.cslg.ppa.common.utils.DateUtil;
			
 
				 import com.cslg.ppa.dto.GetArticleInfoDTO;
			
@@ -46,7 +47,7 @@ public class GetCNIPAArticleService {
 
				     /**
			
 
				      * 国家知识产权局
			
 
				      */
			
 
				-    @Scheduled(cron = "0 0 2 * * ?")
			
 
				+    @Scheduled(cron = "0 0 1 * * ?")
			
 
				     @Transactional(propagation = Propagation.REQUIRED,rollbackFor = Throwable.class)
			
 
				     public void getCNIPA(){
			
 
				         System.out.println(new Date() + "CNIPA-Begin");
			
@@ -95,7 +96,7 @@ public class GetCNIPAArticleService {
 
				                 String dateStr = document.getElementsByTag("span").text().trim();
			
 
				                 String yesterdayDateStr = DateUtil.getYesterdayDateStr();
			
 
				                 if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
			
 
				-                    continue;
			
 
				+                    break;
			
 
				                 }
			
 
				                 Date date = new Date();
			
 
				                 SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
			
@@ -109,8 +110,9 @@ public class GetCNIPAArticleService {
 
				                 String title = linkElement.text();
			
 
				 
			
 
				                 // 检查文章是否已存在
			
 
				-                ArticleInfo articleInfo = articleInfoService.getOne(new QueryWrapper<ArticleInfo>().lambda().eq(ArticleInfo::getTitle, title));
			
 
				-                if (ObjectUtils.isNotEmpty(articleInfo)) {
			
 
				+                long count = articleInfoService.count(new LambdaQueryWrapper<ArticleInfo>()
			
 
				+                        .eq(ArticleInfo::getTitle, title));
			
 
				+                if (count > 0) {
			
 
				                     continue; // 文章已存在，跳过
			
 
				                 }
			
 
				 
			
@@ -124,11 +126,9 @@ public class GetCNIPAArticleService {
 
				                     continue;
			
 
				                 }
			
 
				                 String condensedAbstract = null;
			
 
				-//                String pctCondensedAbstract = null;
			
 
				                 try {
			
 
				                     condensedAbstract = difyService.getCondensedAbstract(digest);
			
 
				-//                    pctCondensedAbstract = difyService.getPctCondensedAbstract(digest);
			
 
				-                } catch (Exception e) {
			
 
				+                } catch (Exception ignored) {
			
 
				 
			
 
				                 }
			
 
				                 if (StringUtils.isEmpty(condensedAbstract)) {
			
--- a/src/main/java/com/cslg/ppa/service/GetWebArticle/GetCNIPRArticleService.java
+++ b/src/main/java/com/cslg/ppa/service/GetWebArticle/GetCNIPRArticleService.java
@@ -1,10 +1,13 @@
 
				 package com.cslg.ppa.service.GetWebArticle;
			
 
				 
			
 
				+import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
			
 
				 import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
			
 
				 import com.cslg.ppa.common.utils.DateUtil;
			
 
				 import com.cslg.ppa.common.utils.RegexUtil;
			
 
				 import com.cslg.ppa.dto.GetArticleInfoDTO;
			
 
				 import com.cslg.ppa.entity.ArticleInfo;
			
 
				+import com.cslg.ppa.entity.SourceInfo;
			
 
				+import com.cslg.ppa.mapper.SourceInfoMapper;
			
 
				 import com.cslg.ppa.service.ArticleInfoService;
			
 
				 import com.cslg.ppa.service.commom.DifyService;
			
 
				 import lombok.RequiredArgsConstructor;
			
@@ -16,6 +19,7 @@ import org.jsoup.nodes.Document;
 
				 import org.jsoup.nodes.Element;
			
 
				 import org.jsoup.select.Elements;
			
 
				 import org.springframework.beans.factory.annotation.Autowired;
			
 
				+import org.springframework.scheduling.annotation.Scheduled;
			
 
				 import org.springframework.stereotype.Service;
			
 
				 import org.springframework.transaction.annotation.Propagation;
			
 
				 import org.springframework.transaction.annotation.Transactional;
			
@@ -26,6 +30,7 @@ import java.text.SimpleDateFormat;
 
				 import java.util.ArrayList;
			
 
				 import java.util.Date;
			
 
				 import java.util.List;
			
 
				+import java.util.stream.Collectors;
			
 
				 
			
 
				 @Slf4j
			
 
				 @Service
			
@@ -34,16 +39,72 @@ public class GetCNIPRArticleService {
 
				     private final ArticleInfoService articleInfoService;
			
 
				     @Autowired
			
 
				     private DifyService difyService;
			
 
				+    @Autowired
			
 
				+    private SourceInfoMapper sourceInfoMapper;
			
 
				+
			
 
				+    private static final String LINK_SELECTOR = "a[href]";
			
 
				+    private static final String TITLE_SELECTOR = "a";
			
 
				 
			
 
				     /***
			
 
				      * 中国知识产权网
			
 
				-     * @param baseUrl
			
 
				      * @throws IOException
			
 
				      */
			
 
				-    //    @Scheduled(cron = "0 0 2 * * ?")
			
 
				-    @Transactional(propagation = Propagation.REQUIRED,rollbackFor = Throwable.class)
			
 
				-    public void crawCniprArticle(String baseUrl) throws IOException {
			
 
				+    @Scheduled(cron = "0 0 4 * * ?")
			
 
				+    @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Throwable.class)
			
 
				+    public void crawCniprArticle() throws IOException {
			
 
				         System.out.println(new Date() + "CNIPR-Begin");
			
 
				+        SourceInfo sourceInfo = sourceInfoMapper.selectOne(new LambdaQueryWrapper<SourceInfo>()
			
 
				+                .eq(SourceInfo::getWebType, 3));
			
 
				+        if (ObjectUtils.isNotEmpty(sourceInfo)) {
			
 
				+            String baseUrl = sourceInfo.getSourceUrl();
			
 
				+            Integer sourceInfoId = sourceInfo.getId();
			
 
				+            List<String> list = new ArrayList<>();
			
 
				+            try {
			
 
				+                Thread.sleep(1000);
			
 
				+                // 使用Jsoup连接并解析网页
			
 
				+                Document doc = Jsoup.connect(baseUrl)
			
 
				+                        .timeout(20000) // 增加超时时间
			
 
				+                        .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36")
			
 
				+                        .followRedirects(true) // 跟随重定向
			
 
				+                        .get();
			
 
				+
			
 
				+                // 改进的新闻列表抓取逻辑
			
 
				+                Elements newsItems = doc.select(LINK_SELECTOR);
			
 
				+                for (Element item : newsItems) {
			
 
				+                    try {
			
 
				+                        Element titleElement = item.selectFirst(TITLE_SELECTOR);
			
 
				+                        Element linkElement = item.selectFirst(LINK_SELECTOR);
			
 
				+
			
 
				+                        if (titleElement != null && linkElement != null) {
			
 
				+                            String title = titleElement.text().trim();
			
 
				+                            if (StringUtils.equals("通知公告", title) || StringUtils.equals("新闻资讯", title)) {
			
 
				+                                String link = linkElement.absUrl("href"); // 获取绝对URL
			
 
				+                                list.add(link);
			
 
				+                            }
			
 
				+                        }
			
 
				+                    } catch (Exception e) {
			
 
				+                        log.warn("解析单个新闻项时出错: ", e);
			
 
				+                    }
			
 
				+                }
			
 
				+            } catch (Exception ignored) {
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+            if (!CollectionUtils.isEmpty(list)) {
			
 
				+                List<String> collect = list.stream().distinct().collect(Collectors.toList());
			
 
				+                for (String url : collect) {
			
 
				+                    try {
			
 
				+                        this.crawCniprArticleDetail(url, sourceInfoId, 6);
			
 
				+                    } catch (IOException ignored) {
			
 
				+
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+        System.out.println(new Date() + "CNIPR-End");
			
 
				+    }
			
 
				+
			
 
				+    public void crawCniprArticleDetail(String baseUrl,Integer sourceId, Integer type) throws IOException {
			
 
				         // 使用Jsoup连接并解析网页
			
 
				         Document doc = Jsoup.connect(baseUrl)
			
 
				                 .timeout(20000) // 增加超时时间
			
@@ -57,6 +118,9 @@ public class GetCNIPRArticleService {
 
				         List<GetArticleInfoDTO> articleInfoDTOS = new ArrayList<>();
			
 
				         for (Element linkElement : linkElements) {
			
 
				             Element element = linkElement.getElementsByTag("a").first();
			
 
				+            if (element == null) {
			
 
				+                continue;
			
 
				+            }
			
 
				             String link = element.absUrl("href");
			
 
				             // 如果absUrl没有返回有效链接，尝试其他方式
			
 
				             if (StringUtils.isEmpty(link)) {
			
@@ -75,7 +139,7 @@ public class GetCNIPRArticleService {
 
				             }
			
 
				 
			
 
				             try {
			
 
				-                GetArticleInfoDTO articleInfoDTO = this.addCniprArticle(link);
			
 
				+                GetArticleInfoDTO articleInfoDTO = this.addCniprArticle(link, sourceId, type);
			
 
				                 if (ObjectUtils.isNotEmpty(articleInfoDTO)) {
			
 
				                     articleInfoDTOS.add(articleInfoDTO);
			
 
				                 }
			
@@ -83,10 +147,9 @@ public class GetCNIPRArticleService {
 
				             }
			
 
				         }
			
 
				         articleInfoService.batchAddArticleInfo(articleInfoDTOS);
			
 
				-        System.out.println(new Date() + "CNIPR-Begin");
			
 
				     }
			
 
				 
			
 
				-    public GetArticleInfoDTO addCniprArticle(String baseUrl) throws IOException {
			
 
				+    public GetArticleInfoDTO addCniprArticle(String baseUrl, Integer sourceId, Integer type) throws IOException {
			
 
				         // 使用Jsoup连接并解析网页
			
 
				         Document doc = Jsoup.connect(baseUrl)
			
 
				                 .timeout(15000) // 增加超时时间
			
@@ -113,8 +176,9 @@ public class GetCNIPRArticleService {
 
				         Element titleElement = initElement.selectFirst("div.xq_cont_title");
			
 
				         String title = titleElement.select("p").first().text();
			
 
				         // 检查文章是否已存在
			
 
				-        ArticleInfo articleInfo = articleInfoService.getOne(new QueryWrapper<ArticleInfo>().lambda().eq(ArticleInfo::getTitle, title));
			
 
				-        if (ObjectUtils.isNotEmpty(articleInfo)) {
			
 
				+        long count = articleInfoService.count(new LambdaQueryWrapper<ArticleInfo>()
			
 
				+                .eq(ArticleInfo::getTitle, title));
			
 
				+        if (count > 0) {
			
 
				             return null;
			
 
				         }
			
 
				 
			
@@ -142,8 +206,8 @@ public class GetCNIPRArticleService {
 
				             condensedAbstract = "";
			
 
				         }
			
 
				         GetArticleInfoDTO articleInfoDTO = new GetArticleInfoDTO();
			
 
				-        articleInfoDTO.setCategoryId(6);
			
 
				-        articleInfoDTO.setSourceId(38);
			
 
				+        articleInfoDTO.setCategoryId(type);
			
 
				+        articleInfoDTO.setSourceId(sourceId);
			
 
				         articleInfoDTO.setArticleUrl(baseUrl);
			
 
				         articleInfoDTO.setTitle(title);
			
 
				         articleInfoDTO.setPublicTime(date);
			
--- a/src/main/java/com/cslg/ppa/service/GetWebArticle/GetIprDailyArticleService.java
+++ b/src/main/java/com/cslg/ppa/service/GetWebArticle/GetIprDailyArticleService.java
@@ -0,0 +1,211 @@
 
				+package com.cslg.ppa.service.GetWebArticle;
			
 
				+
			
 
				+import com.alibaba.fastjson.JSONObject;
			
 
				+import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
			
 
				+import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
			
 
				+import com.cslg.ppa.common.utils.DateUtil;
			
 
				+import com.cslg.ppa.dto.GetArticleInfoDTO;
			
 
				+import com.cslg.ppa.entity.ArticleInfo;
			
 
				+import com.cslg.ppa.entity.SourceInfo;
			
 
				+import com.cslg.ppa.mapper.SourceInfoMapper;
			
 
				+import com.cslg.ppa.service.ArticleInfoService;
			
 
				+import com.cslg.ppa.service.commom.DifyService;
			
 
				+import lombok.RequiredArgsConstructor;
			
 
				+import lombok.extern.slf4j.Slf4j;
			
 
				+import okhttp3.OkHttpClient;
			
 
				+import okhttp3.Request;
			
 
				+import org.apache.commons.lang3.ObjectUtils;
			
 
				+import org.apache.commons.lang3.StringUtils;
			
 
				+import org.apache.http.HttpResponse;
			
 
				+import org.apache.http.client.methods.HttpGet;
			
 
				+import org.apache.http.impl.client.CloseableHttpClient;
			
 
				+import org.apache.http.impl.client.HttpClients;
			
 
				+import org.apache.http.util.EntityUtils;
			
 
				+import org.jsoup.Jsoup;
			
 
				+import org.jsoup.nodes.Document;
			
 
				+import org.jsoup.nodes.Element;
			
 
				+import org.jsoup.select.Elements;
			
 
				+import org.springframework.beans.factory.annotation.Autowired;
			
 
				+import org.springframework.scheduling.annotation.Scheduled;
			
 
				+import org.springframework.stereotype.Service;
			
 
				+import org.springframework.transaction.annotation.Propagation;
			
 
				+import org.springframework.transaction.annotation.Transactional;
			
 
				+import org.springframework.util.CollectionUtils;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.text.SimpleDateFormat;
			
 
				+import java.util.*;
			
 
				+import java.util.concurrent.TimeUnit;
			
 
				+
			
 
				+@Slf4j
			
 
				+@Service
			
 
				+@RequiredArgsConstructor
			
 
				+public class GetIprDailyArticleService {
			
 
				+    private final ArticleInfoService articleInfoService;
			
 
				+    @Autowired
			
 
				+    private DifyService difyService;
			
 
				+    @Autowired
			
 
				+    private SourceInfoMapper sourceInfoMapper;
			
 
				+
			
 
				+    private static final String BaseUrl = "http://www.iprdaily.cn/";
			
 
				+    private static final Integer BaseId = 40;
			
 
				+
			
 
				+    /**
			
 
				+     * IprDaily中文网
			
 
				+     * @throws Exception
			
 
				+     */
			
 
				+//    @Scheduled(cron = "0 15 0 * * ?")
			
 
				+    @Transactional(propagation = Propagation.REQUIRED,rollbackFor = Throwable.class)
			
 
				+    public void crawIprDailyArticle() throws Exception {
			
 
				+        System.out.println(new Date() + "IprDaily-Begin");
			
 
				+        Map<String, Integer> urlMap = new HashMap<>();
			
 
				+        //案例
			
 
				+        urlMap.put("http://www.iprdaily.cn/index.php?s=api&c=api&m=template&name=get_news.html&catid=9", 3);
			
 
				+        //行业----诉讼
			
 
				+        urlMap.put("http://www.iprdaily.cn/index.php?s=api&c=api&m=template&name=get_news.html&catid=11", 5);
			
 
				+        //行业----国际视野
			
 
				+        urlMap.put("http://www.iprdaily.cn/index.php?s=api&c=api&m=template&name=get_news.html&catid=15", 5);
			
 
				+        //行业----行业
			
 
				+        urlMap.put("http://www.iprdaily.cn/index.php?s=api&c=api&m=template&name=get_news.html&catid=8", 5);
			
 
				+        for (String key : urlMap.keySet()) {
			
 
				+            Integer type = urlMap.get(key);
			
 
				+            try {
			
 
				+                OkHttpClient okHttpClient = new OkHttpClient.Builder()
			
 
				+                        .connectTimeout(60, TimeUnit.SECONDS)
			
 
				+                        .writeTimeout(60, TimeUnit.SECONDS)
			
 
				+                        .readTimeout(60, TimeUnit.SECONDS)
			
 
				+                        .build();
			
 
				+                Request request = new Request.Builder()
			
 
				+                        .url(key)
			
 
				+                        .get()
			
 
				+                        .build();
			
 
				+                String res = Objects.requireNonNull(okHttpClient.newCall(request).execute().body()).string();
			
 
				+                JSONObject parseObject = JSONObject.parseObject(res);
			
 
				+                String htmlContent = parseObject.getString("msg");
			
 
				+//            final Integer code = parseObject.getInteger("code");
			
 
				+                this.addIprDailyArticle(htmlContent, BaseUrl,BaseId,type);
			
 
				+            } catch (IOException e) {
			
 
				+                System.out.println("Get IPRDaily Web Article Error:" + e);
			
 
				+            }
			
 
				+        }
			
 
				+        System.out.println(new Date() + "IprDaily-End");
			
 
				+    }
			
 
				+
			
 
				+    public void addIprDailyArticle(String htmlContent, String baseUrl,Integer sourceId, Integer type) throws IOException {
			
 
				+        Document doc = Jsoup.parse(htmlContent);
			
 
				+        // 解析每个li标签
			
 
				+        Elements liElements = doc.select("li.box-list");
			
 
				+        List<GetArticleInfoDTO> articleInfoDTOS = new ArrayList<>();
			
 
				+        for (Element liElement : liElements) {
			
 
				+            // 提取time中的日期
			
 
				+            Element timeElement = liElement.selectFirst("dd.time");
			
 
				+            String dateStr = timeElement.text();
			
 
				+            String yesterdayDateStr = DateUtil.getYesterdayDateStr();
			
 
				+            if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
			
 
				+                break;
			
 
				+            }
			
 
				+            Date date = new Date();
			
 
				+            SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
			
 
				+            try {
			
 
				+                date = dateFormat.parse(dateStr);
			
 
				+            } catch (Exception e) {
			
 
				+                continue;
			
 
				+            }
			
 
				+
			
 
				+            // 提取首个title
			
 
				+            Element titleElement = liElement.selectFirst("dt.title");
			
 
				+            String title = titleElement.text();
			
 
				+            // 检查文章是否已存在
			
 
				+            long count = articleInfoService.count(new LambdaQueryWrapper<ArticleInfo>()
			
 
				+                    .eq(ArticleInfo::getTitle, title));
			
 
				+            if (count > 0) {
			
 
				+                continue; // 文章已存在，跳过
			
 
				+            }
			
 
				+
			
 
				+            Element imgBox = liElement.selectFirst("a.img-box");
			
 
				+            String link = imgBox.attr("href");
			
 
				+            if (StringUtils.isEmpty(link)) {
			
 
				+                link = imgBox.attr("href");
			
 
				+            }
			
 
				+            if (link.startsWith("/")) {
			
 
				+                // 提取基础URL的协议和域名
			
 
				+                int endIndex = baseUrl.indexOf("/", 8); // 跳过http://或https://
			
 
				+                String baseDomain = endIndex > 0 ? baseUrl.substring(0, endIndex) : baseUrl;
			
 
				+                link = baseDomain + link;
			
 
				+            } else if (!link.startsWith("http")) {
			
 
				+                // 处理相对路径
			
 
				+                int lastSlash = baseUrl.lastIndexOf("/");
			
 
				+                String basePath = lastSlash > 0 ? baseUrl.substring(0, lastSlash + 1) : baseUrl + "/";
			
 
				+                link = basePath + link;
			
 
				+            }
			
 
				+            String digest = null;
			
 
				+            try {
			
 
				+                digest = this.getDigest(link);
			
 
				+            } catch (Exception e) {
			
 
				+                digest = "";
			
 
				+            }
			
 
				+            if (StringUtils.isEmpty(digest)) {
			
 
				+                continue;
			
 
				+            }
			
 
				+            String condensedAbstract = null;
			
 
				+            try {
			
 
				+                condensedAbstract = difyService.getCondensedAbstract(digest);
			
 
				+            } catch (Exception ignored) {
			
 
				+
			
 
				+            }
			
 
				+            if (StringUtils.isEmpty(condensedAbstract)) {
			
 
				+                continue;
			
 
				+            }
			
 
				+            GetArticleInfoDTO articleInfoDTO = new GetArticleInfoDTO();
			
 
				+            articleInfoDTO.setCategoryId(type);
			
 
				+            articleInfoDTO.setSourceId(sourceId);
			
 
				+            articleInfoDTO.setArticleUrl(link);
			
 
				+            articleInfoDTO.setTitle(title);
			
 
				+            articleInfoDTO.setPublicTime(date);
			
 
				+            articleInfoDTO.setDigest(condensedAbstract);
			
 
				+            articleInfoDTOS.add(articleInfoDTO);
			
 
				+        }
			
 
				+        articleInfoService.batchAddArticleInfo(articleInfoDTOS);
			
 
				+    }
			
 
				+
			
 
				+    public String getDigest(String url) throws IOException {
			
 
				+        String digest = "";
			
 
				+        CloseableHttpClient httpClient = HttpClients.createDefault();
			
 
				+        HttpGet request = new HttpGet(url);
			
 
				+        HttpResponse response = httpClient.execute(request);
			
 
				+        String responseBody = EntityUtils.toString(response.getEntity());
			
 
				+        httpClient.close();
			
 
				+        if (responseBody != null) {
			
 
				+            digest = this.readJson(responseBody);
			
 
				+        }
			
 
				+        return digest;
			
 
				+    }
			
 
				+
			
 
				+    public String readJson(String responseBody) {
			
 
				+        String content = "";
			
 
				+        try {
			
 
				+            //使用JSoup解析HTML
			
 
				+            Document doc = Jsoup.parse(responseBody);
			
 
				+            // 获取description元数据
			
 
				+            Element metaElement = doc.selectFirst("dl.article-con");
			
 
				+            Elements elements = metaElement.select("p");
			
 
				+            List<String> list = new ArrayList<>();
			
 
				+            for (Element element : elements) {
			
 
				+                String text = element.text();
			
 
				+                if (StringUtils.isNotEmpty(text)) {
			
 
				+                    list.add(text);
			
 
				+                }
			
 
				+            }
			
 
				+            if (CollectionUtils.isEmpty(list)) {
			
 
				+                String articleContent = elements.text();
			
 
				+                list.add(articleContent);
			
 
				+                content = StringUtils.join(list, "\n");
			
 
				+            } else {
			
 
				+                content = StringUtils.join(list, "\n");
			
 
				+            }
			
 
				+        } catch (Exception e) {
			
 
				+            content = "";
			
 
				+        }
			
 
				+        return content;
			
 
				+    }
			
 
				+}
			
--- a/src/main/java/com/cslg/ppa/service/GetWebArticle/GetLocalInformationService.java
+++ b/src/main/java/com/cslg/ppa/service/GetWebArticle/GetLocalInformationService.java
@@ -22,7 +22,7 @@ import java.util.*;
 
				 public class GetLocalInformationService {
			
 
				     private final GetCNIPAArticleService getCNIPAArticleService;
			
 
				 
			
 
				-    @Scheduled(cron = "0 0 3 * * ?")
			
 
				+    @Scheduled(cron = "0 0 2 * * ?")
			
 
				     @Transactional(propagation = Propagation.REQUIRED,rollbackFor = Throwable.class)
			
 
				     public void getLocalInformation(){
			
 
				         System.out.println(new Date() + "Local-Begin");
			
--- a/src/main/java/com/cslg/ppa/service/GetWebArticle/GetPCTArticleService.java
+++ b/src/main/java/com/cslg/ppa/service/GetWebArticle/GetPCTArticleService.java
@@ -23,7 +23,7 @@ import java.util.Map;
 
				 public class GetPCTArticleService {
			
 
				     private final GetCNIPAArticleService getCNIPAArticleService;
			
 
				 
			
 
				-    @Scheduled(cron = "0 30 2 * * ?")
			
 
				+    @Scheduled(cron = "0 30 1 * * ?")
			
 
				     @Transactional(propagation = Propagation.REQUIRED,rollbackFor = Throwable.class)
			
 
				     public void getPCTArticle(){
			
 
				         System.out.println(new Date() + "PCT-Begin");
			
--- a/src/main/java/com/cslg/ppa/service/GetWebArticle/GetProvinceNewsService.java
+++ b/src/main/java/com/cslg/ppa/service/GetWebArticle/GetProvinceNewsService.java
@@ -58,7 +58,6 @@ public class GetProvinceNewsService {
 
				     @Autowired
			
 
				     private SourceInfoMapper sourceInfoMapper;
			
 
				 
			
 
				-
			
 
				     private static List<String> TitleList = Arrays.asList("要闻动态", "通知公告", "公示公告", "公告");
			
 
				     // 新闻动态项的CSS选择器 (通用配置，适用于大部分政府网站)
			
 
				     private static final String NEWS_CONTAINER_SELECTOR = "div.con-right-list,div.main-content-right,div.tab-content,div.subpageCon-con,div.gl-main,div.nymain," +
			
@@ -80,6 +79,7 @@ public class GetProvinceNewsService {
 
				                 .eq(SourceInfo::getSourceType, 1)
			
 
				                 .eq(SourceInfo::getWebType, 2));
			
 
				         if (!CollectionUtils.isEmpty(sourceInfos)) {
			
 
				+            int sum = 0;
			
 
				             for (SourceInfo sourceInfo : sourceInfos) {
			
 
				                 try {
			
 
				                     Thread.sleep(1000);
			
@@ -114,7 +114,9 @@ public class GetProvinceNewsService {
 
				                         log.warn("解析" + sourceName + "地区新闻项时出错: ", e);
			
 
				                     }
			
 
				                 }
			
 
				+                sum++;
			
 
				             }
			
 
				+            System.out.println("Finished Num" + sum + "Time");
			
 
				         }
			
 
				         System.out.println(new Date() + "Province-Begin");
			
 
				     }
			
@@ -262,9 +264,9 @@ public class GetProvinceNewsService {
 
				                 dateStr = getHainanDate(dateStr);
			
 
				             }
			
 
				             String yesterdayDateStr = DateUtil.getYesterdayDateStr();
			
 
				-            if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
			
 
				-                break;
			
 
				-            }
			
 
				+//            if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
			
 
				+//                break;
			
 
				+//            }
			
 
				             Date date = new Date();
			
 
				             SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
			
 
				             try {
			
@@ -435,9 +437,9 @@ public class GetProvinceNewsService {
 
				                         String docreltime = articleDTO.getDOCRELTIME();
			
 
				                         String dateStr = RegexUtil.extractDate(docreltime);
			
 
				                         String yesterdayDateStr = DateUtil.getYesterdayDateStr();
			
 
				-                        if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
			
 
				-                            break;
			
 
				-                        }
			
 
				+//                        if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
			
 
				+//                            break;
			
 
				+//                        }
			
 
				                         Date date = new Date();
			
 
				                         SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
			
 
				                         try {
			
@@ -559,9 +561,9 @@ public class GetProvinceNewsService {
 
				             Element element = linkElement.select("p.zkmmr_tl1_item_date").first();
			
 
				             String dateStr = element.text().trim();
			
 
				             String yesterdayDateStr = DateUtil.getYesterdayDateStr();
			
 
				-            if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
			
 
				-                break;
			
 
				-            }
			
 
				+//            if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
			
 
				+//                break;
			
 
				+//            }
			
 
				             Date date = new Date();
			
 
				             SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
			
 
				             try {
			
@@ -581,7 +583,6 @@ public class GetProvinceNewsService {
 
				                 continue; // 文章已存在，跳过
			
 
				             }
			
 
				 
			
 
				-
			
 
				             String link = linkElement.absUrl("href");
			
 
				             String digest = null;
			
 
				             try {
			
@@ -652,9 +653,9 @@ public class GetProvinceNewsService {
 
				                 String pubDate = source.get("pubDate").toString();
			
 
				                 String dateStr = RegexUtil.extractDate(pubDate);
			
 
				                 String yesterdayDateStr = DateUtil.getYesterdayDateStr();
			
 
				-                if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
			
 
				-                    break;
			
 
				-                }
			
 
				+//                if (!StringUtils.equals(dateStr, yesterdayDateStr)) {
			
 
				+//                    break;
			
 
				+//                }
			
 
				                 Date date = new Date();
			
 
				                 SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
			
 
				                 try {
			
--- a/src/main/java/com/cslg/ppa/service/GetWebArticle/GetWeChatArticleService.java
+++ b/src/main/java/com/cslg/ppa/service/GetWebArticle/GetWeChatArticleService.java
@@ -70,7 +70,7 @@ public class GetWeChatArticleService {
 
				     @Autowired
			
 
				     private FileManagerService fileManagerService;
			
 
				 
			
 
				-    @Scheduled(cron = "0 0 4 * * ?")
			
 
				+    @Scheduled(cron = "0 0 3 * * ?")
			
 
				     @Transactional(propagation = Propagation.REQUIRED,rollbackFor = Throwable.class)
			
 
				     public void getWeChatArticle() throws Exception {
			
 
				         System.out.println(new Date() + "Wechat-Begin");
			
@@ -88,7 +88,7 @@ public class GetWeChatArticleService {
 
				                 String createTimeStr = DateUtil.convertTimestamp(secondCreateTime);
			
 
				                 String yesterdayDateStr = DateUtil.getYesterdayDateStr();
			
 
				                 if (!StringUtils.equals(createTimeStr, yesterdayDateStr)) {
			
 
				-                    continue;
			
 
				+                    break;
			
 
				                 }
			
 
				                 Date createTime = new Date();
			
 
				                 SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
			
@@ -103,8 +103,9 @@ public class GetWeChatArticleService {
 
				 
			
 
				 
			
 
				                 // 检查文章是否已存在
			
 
				-                ArticleInfo articleInfo = articleInfoService.getOne(new QueryWrapper<ArticleInfo>().lambda().eq(ArticleInfo::getTitle, title));
			
 
				-                if (ObjectUtils.isNotEmpty(articleInfo)) {
			
 
				+                long sum = articleInfoService.count(new LambdaQueryWrapper<ArticleInfo>()
			
 
				+                        .eq(ArticleInfo::getTitle, title));
			
 
				+                if (sum > 0) {
			
 
				                     continue; // 文章已存在，跳过
			
 
				                 }
			
 
				 
			
--- a/src/main/java/com/cslg/ppa/service/GetWebArticle/GetZhiNanZhenArticleService.java
+++ b/src/main/java/com/cslg/ppa/service/GetWebArticle/GetZhiNanZhenArticleService.java
@@ -1,10 +1,13 @@
 
				 package com.cslg.ppa.service.GetWebArticle;
			
 
				 
			
 
				+import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
			
 
				 import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
			
 
				 import com.cslg.ppa.common.utils.DateUtil;
			
 
				 import com.cslg.ppa.common.utils.RegexUtil;
			
 
				 import com.cslg.ppa.dto.GetArticleInfoDTO;
			
 
				 import com.cslg.ppa.entity.ArticleInfo;
			
 
				+import com.cslg.ppa.entity.SourceInfo;
			
 
				+import com.cslg.ppa.mapper.SourceInfoMapper;
			
 
				 import com.cslg.ppa.service.ArticleInfoService;
			
 
				 import com.cslg.ppa.service.commom.DifyService;
			
 
				 import lombok.RequiredArgsConstructor;
			
@@ -16,6 +19,7 @@ import org.jsoup.nodes.Document;
 
				 import org.jsoup.nodes.Element;
			
 
				 import org.jsoup.select.Elements;
			
 
				 import org.springframework.beans.factory.annotation.Autowired;
			
 
				+import org.springframework.scheduling.annotation.Scheduled;
			
 
				 import org.springframework.stereotype.Service;
			
 
				 import org.springframework.transaction.annotation.Propagation;
			
 
				 import org.springframework.transaction.annotation.Transactional;
			
@@ -36,32 +40,39 @@ public class GetZhiNanZhenArticleService {
 
				     private final ArticleInfoService articleInfoService;
			
 
				     @Autowired
			
 
				     private DifyService difyService;
			
 
				+    @Autowired
			
 
				+    private SourceInfoMapper sourceInfoMapper;
			
 
				 
			
 
				     /**
			
 
				      * 智南针
			
 
				      *
			
 
				-     * @param baseUrl
			
 
				      * @throws Exception
			
 
				      */
			
 
				-    //    @Scheduled(cron = "0 0 2 * * ?")
			
 
				-    @Transactional(propagation = Propagation.REQUIRED,rollbackFor = Throwable.class)
			
 
				-    public void crawZhiNanZhenArticle(String baseUrl) throws Exception {
			
 
				+    @Scheduled(cron = "0 30 4 * * ?")
			
 
				+    @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Throwable.class)
			
 
				+    public void crawZhiNanZhenArticle() throws Exception {
			
 
				         System.out.println(new Date() + "ZhiNanZhen-Begin");
			
 
				-        // 使用Jsoup连接并解析网页
			
 
				-        Document doc = Jsoup.connect(baseUrl)
			
 
				-                .timeout(20000) // 增加超时时间
			
 
				-                .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36")
			
 
				-                .followRedirects(true) // 跟随重定向
			
 
				-                .get();
			
 
				-        List<String> list = Arrays.asList("海外知识产权动态信息", "海外知识产权实务指引", "海外知识产权官费专栏");
			
 
				-        // 遍历每个栏目并抓取资讯链接
			
 
				-        for (String sectionTitle : list) {
			
 
				-            extractSectionLinks(doc, sectionTitle);
			
 
				+        SourceInfo sourceInfo = sourceInfoMapper.selectOne(new LambdaQueryWrapper<SourceInfo>()
			
 
				+                .eq(SourceInfo::getWebType, 4));
			
 
				+        if (ObjectUtils.isNotEmpty(sourceInfo)) {
			
 
				+            String sourceUrl = sourceInfo.getSourceUrl();
			
 
				+            Integer sourceInfoId = sourceInfo.getId();
			
 
				+            // 使用Jsoup连接并解析网页
			
 
				+            Document doc = Jsoup.connect(sourceUrl)
			
 
				+                    .timeout(20000) // 增加超时时间
			
 
				+                    .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36")
			
 
				+                    .followRedirects(true) // 跟随重定向
			
 
				+                    .get();
			
 
				+            List<String> list = Arrays.asList("海外知识产权动态信息", "海外知识产权实务指引", "海外知识产权官费专栏");
			
 
				+            // 遍历每个栏目并抓取资讯链接
			
 
				+            for (String sectionTitle : list) {
			
 
				+                this.extractSectionLinks(doc, sectionTitle, sourceInfoId, 11);
			
 
				+            }
			
 
				         }
			
 
				         System.out.println(new Date() + "ZhiNanZhen-End");
			
 
				     }
			
 
				 
			
 
				-    private void extractSectionLinks(Document doc, String sectionTitle) throws Exception {
			
 
				+    private void extractSectionLinks(Document doc, String sectionTitle, Integer sourceId, Integer type) throws Exception {
			
 
				         List<String> links = new ArrayList<>();
			
 
				         // 选择所有section元素
			
 
				         Elements sections = doc.select("section.section");
			
@@ -74,7 +85,7 @@ public class GetZhiNanZhenArticleService {
 
				                 for (Element item : newsItems) {
			
 
				                     String href = item.attr("href");
			
 
				                     // 确保href属性存在且不是空值，并且不是非资讯链接
			
 
				-                    if (!href.isEmpty() && !isValidNewsLink(item,href,item.text())) {
			
 
				+                    if (!href.isEmpty() && !isValidNewsLink(item, href, item.text())) {
			
 
				                         links.add(href);
			
 
				                     }
			
 
				                 }
			
@@ -85,7 +96,7 @@ public class GetZhiNanZhenArticleService {
 
				         if (!CollectionUtils.isEmpty(links)) {
			
 
				             for (String link : links) {
			
 
				                 try {
			
 
				-                    GetArticleInfoDTO articleInfoDTO = this.addZhiNanZhenArticle(link);
			
 
				+                    GetArticleInfoDTO articleInfoDTO = this.addZhiNanZhenArticle(link, sourceId, type);
			
 
				                     if (ObjectUtils.isNotEmpty(articleInfoDTO)) {
			
 
				                         articleInfoDTOS.add(articleInfoDTO);
			
 
				                     }
			
@@ -98,11 +109,12 @@ public class GetZhiNanZhenArticleService {
 
				 
			
 
				     /**
			
 
				      * 添加智南针网资讯内容到资讯表中
			
 
				+     *
			
 
				      * @param baseUrl
			
 
				      * @return
			
 
				      * @throws IOException
			
 
				      */
			
 
				-    public GetArticleInfoDTO addZhiNanZhenArticle(String baseUrl) throws IOException {
			
 
				+    public GetArticleInfoDTO addZhiNanZhenArticle(String baseUrl, Integer sourceId, Integer type) throws IOException {
			
 
				         // 使用Jsoup连接并解析网页
			
 
				         Document doc = Jsoup.connect(baseUrl)
			
 
				                 .timeout(15000) // 增加超时时间
			
@@ -130,8 +142,9 @@ public class GetZhiNanZhenArticleService {
 
				         String titleContent = titleElement.text();
			
 
				         String title = titleContent.substring(0, (titleContent.length() - 23)).trim();
			
 
				         // 检查文章是否已存在
			
 
				-        ArticleInfo articleInfo = articleInfoService.getOne(new QueryWrapper<ArticleInfo>().lambda().eq(ArticleInfo::getTitle, title));
			
 
				-        if (ObjectUtils.isNotEmpty(articleInfo)) {
			
 
				+        long sum = articleInfoService.count(new LambdaQueryWrapper<ArticleInfo>()
			
 
				+                .eq(ArticleInfo::getTitle, title));
			
 
				+        if (sum > 0) {
			
 
				             return null;
			
 
				         }
			
 
				 
			
@@ -160,8 +173,8 @@ public class GetZhiNanZhenArticleService {
 
				             condensedAbstract = "";
			
 
				         }
			
 
				         GetArticleInfoDTO articleInfoDTO = new GetArticleInfoDTO();
			
 
				-        articleInfoDTO.setCategoryId(6);
			
 
				-        articleInfoDTO.setSourceId(39);
			
 
				+        articleInfoDTO.setCategoryId(type);
			
 
				+        articleInfoDTO.setSourceId(sourceId);
			
 
				         articleInfoDTO.setArticleUrl(baseUrl);
			
 
				         articleInfoDTO.setTitle(title);
			
 
				         articleInfoDTO.setPublicTime(date);
			
--- a/src/main/resources/application-dev.yml
+++ b/src/main/resources/application-dev.yml
--- a/src/test/java/com/cslg/ppa/PpaApplicationTests.java
+++ b/src/test/java/com/cslg/ppa/PpaApplicationTests.java
@@ -89,6 +89,12 @@ class PpaApplicationTests {
 
				     private FileManagerService fileManagerService;
			
 
				     @Autowired
			
 
				     private GetPCTArticleService getPCTArticleService;
			
 
				+    @Autowired
			
 
				+    private GetIprDailyArticleService getIprDailyArticleService;
			
 
				+    @Autowired
			
 
				+    private GetZhiNanZhenArticleService getZhiNanZhenArticleService;
			
 
				+    @Autowired
			
 
				+    private GetCNIPRArticleService getCNIPRArticleService;
			
 
				 
			
 
				     @Test
			
 
				     void contextLoads() {
			
@@ -268,7 +274,7 @@ class PpaApplicationTests {
 
				         //-------------天津知识产权局
			
 
				 //        String url = "https://zscq.tj.gov.cn/";
			
 
				         //-------------河南知识产权局
			
 
				-//        String url = "https://scjg.henan.gov.cn/hnzscqj/";
			
 
				+        String url = "https://scjg.henan.gov.cn/hnzscqj/";
			
 
				         //-------------广东省知识产权保护中心
			
 
				 //        String url = "https://www.gippc.com.cn/ippc/index.shtml";
			
 
				         //-------------海南知识产权局
			
@@ -315,7 +321,7 @@ class PpaApplicationTests {
 
				         //-------------宁夏市场监督管理局
			
 
				 //        String url = "http://scjg.nx.gov.cn/";
			
 
				         //-------------新疆市场监督管理局（新疆知识产权局）
			
 
				-        String url = "https://scjgj.xinjiang.gov.cn/";
			
 
				+//        String url = "https://scjgj.xinjiang.gov.cn/";
			
 
				         getProvinceNewsService.crawlArticles(url,"",1,2);
			
 
				 //        String url = "https://www.ahippc.cn/news.html?categoryId=a5e96b641ade4fc9b50b4f9504ba0f62";
			
 
				 //        final List<GetArticleInfoDTO> articleInfoDTOS = getProvinceNewsService.crawlArticlesDetail(url);
			
@@ -327,13 +333,6 @@ class PpaApplicationTests {
 
				 
			
 
				     @Test
			
 
				     void addArticleFromWebSource() throws Exception {
			
 
				-//        String url = "https://scjg.hebei.gov.cn/";
			
 
				-//        getProvinceNewsService.crawlHebeiArticles(url);
			
 
				-//        String url = "https://scjg.hebei.gov.cn/node/919";
			
 
				-//        getProvinceNewsService.addHebeiArticle(url);
			
 
				-//        String url = "https://zscqj.hubei.gov.cn/";
			
 
				-//        getProvinceNewsService.crawlHubeiArticles(url);
			
 
				-//        getProvinceNewsService.crawlJiangxiArticles("https://amr.jiangxi.gov.cn/");
			
 
				         getProvinceNewsService.addArticleFromWebSource();
			
 
				 
			
 
				     }
			
@@ -383,7 +382,17 @@ class PpaApplicationTests {
 
				     }
			
 
				 
			
 
				     @Test
			
 
				-    void test111() throws Exception {
			
 
				+    void crawZhiNanZhenArticle() throws Exception {
			
 
				+        getZhiNanZhenArticleService.crawZhiNanZhenArticle();
			
 
				+    }
			
 
				+
			
 
				+    @Test
			
 
				+    void crawCniprArticle() throws Exception {
			
 
				+        getCNIPRArticleService.crawCniprArticle();
			
 
				+    }
			
 
				+
			
 
				+    @Test
			
 
				+    void crawIprDailyArticle() throws Exception {
			
 
				         //-------------中国知识产权网
			
 
				 //        String url = "http://www.cnipr.com/";
			
 
				 //        getProvinceNewsService.test(url);
			
@@ -394,5 +403,8 @@ class PpaApplicationTests {
 
				 //        String url = "https://www.worldip.cn/index.php?m=content&c=index&a=show&catid=64&id=2996";
			
 
				 //        getProvinceNewsService.getDigest2(url);
			
 
				 
			
 
				+        getIprDailyArticleService.crawIprDailyArticle();
			
 
				+
			
 
				+//        getIprDailyArticleService.getDigest("http://www.iprdaily.cn/news_40675.html");
			
 
				     }
			
 
				 }