package com.cslg.ppa; import com.alibaba.fastjson2.JSON; import com.alibaba.fastjson2.JSONArray; import com.alibaba.fastjson2.JSONObject; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.cslg.ppa.common.okhttp.MyCookieStore; import com.cslg.ppa.common.utils.DateUtil; import com.cslg.ppa.dto.GetArticleInfoDTO; import com.cslg.ppa.dto.ReportDTO; import com.cslg.ppa.dto.SelectArticleInfoDTO; import com.cslg.ppa.dto.SelectCategoryListDTO; import com.cslg.ppa.entity.ArticleInfo; import com.cslg.ppa.entity.Category; import com.cslg.ppa.entity.SourceInfo; import com.cslg.ppa.entity.commom.Article; import com.cslg.ppa.entity.commom.WxResultBody; import com.cslg.ppa.mapper.CategoryMapper; import com.cslg.ppa.mapper.ReportMapper; import com.cslg.ppa.mapper.SourceInfoMapper; import com.cslg.ppa.service.ArticleInfoService; import com.cslg.ppa.service.CategoryService; import com.cslg.ppa.service.GetWebArticle.*; import com.cslg.ppa.service.ReportService; import com.cslg.ppa.service.commom.DifyService; import com.cslg.ppa.service.commom.FileManagerService; import com.cslg.ppa.service.commom.WeiXinApi; import okhttp3.Cookie; import okhttp3.HttpUrl; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hc.client5.http.classic.methods.HttpGet; import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse; import org.apache.hc.client5.http.impl.classic.HttpClients; import org.apache.hc.core5.http.io.entity.EntityUtils; import org.apache.hc.core5.net.URIBuilder; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.util.CollectionUtils; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.nio.file.Files; import java.text.SimpleDateFormat; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; @SpringBootTest class PpaApplicationTests { @Autowired private SourceInfoMapper sourceInfoMapper; @Autowired private GetCNIPAArticleService getCNIPAArticleService; @Autowired private GetLocalInformationService getLocalInformationService; @Autowired private GetWeChatArticleService getWeChatArticleService; @Autowired private DifyService difyService; @Autowired private ArticleInfoService articleInfoService; @Autowired private CategoryService categoryService; @Autowired private CategoryMapper categoryMapper; @Autowired private ReportMapper reportMapper; @Autowired private ReportService reportService; @Autowired private GetProvinceNewsService getProvinceNewsService; @Autowired private GetEcigaretteService getEcigaretteService; @Autowired private FileManagerService fileManagerService; @Autowired private GetPCTArticleService getPCTArticleService; @Test void contextLoads() { SourceInfo sourceInfo = new SourceInfo(); sourceInfo.setSourceName("国家知识产权局"); sourceInfo.setSourceUrl("https://www.cnipa.gov.cn/"); sourceInfo.setSourceType(1); sourceInfo.insert(); } @Test void getCNIPA() throws Exception { // String url = "https://www.cnipa.gov.cn/art/2025/8/15/art_74_201044.html"; // String url = "https://www.cnipa.gov.cn/art/2025/8/13/art_57_201010.html"; // String url = "https://www.cnipa.gov.cn/art/2025/8/22/art_75_201148.html"; // getCNIPAArticleService.getDigest(url); // getCNIPAArticleService.getCNIPA(); // getLocalInformationService.getLocalInformation(); // getPCTArticleService.getPCTArticle(); getWeChatArticleService.getWeChatArticle(); // final String digest = getCNIPAArticleService.getDigest(""); // final Date yesterdayDate = DateUtil.getYesterdayDate(); // System.out.println(yesterdayDate); // // final String yesterdayDateStr = DateUtil.getYesterdayDateStr(); // System.out.println(yesterdayDateStr); // String url = "https://mp.weixin.qq.com/s/pywZJeUcJ_Z3wIrZRmRZvg"; // String url = "https://mp.weixin.qq.com/s/cd3vapzw-2c6HdfrGs4r7g"; // final String content = getWeChatArticleService.getWeChatArticleContent(url); // System.out.println(content); } @Test void getWeChatArticle() { String token = "267426578"; String cookie = "_ga=GA1.1.1951344733.1721120989; _ga_TPFW0KPXC1=GS1.1.1721122799.2.0.1721122799.0.0.0; ua_id=lYVPBmbUB2Br9v5GAAAAANySKCt88vpelcEXZ-NfgB4=; wxuin=54469110938579; mm_lang=zh_CN; cert=DwPjsGtL0kB9wndBHqxn27g_A5ieSRxR; rewardsn=; wxtokenkey=777; _clck=3932387393|1|fyk|0; uuid=1e63a83ba14fbc73a99a9a1851cb9309; rand_info=CAESICH2biM3G9816Xru3QvU9bI9LHQMkRKQwz4Me/mrQ/1d; slave_bizuin=3932387393; data_bizuin=3932387393; bizuin=3932387393; data_ticket=rhwtJ0MaVJn3YjGE5d5mPPBdxpoJWkopkoeW3K862z/dkVogSUuhqCra7NisadlF; slave_sid=cER3VlROczAyRm80Z3lHcmZIT29mNGZTZ2JKTzlwQlJBSGEzVTZhOTV3TlZRZ1VOM2RTUUZLTmhmanI0bE1YWHVnXzlLZUdIVmJFNENnUko4YzNFRkpwWUQ1Tm5MMjg5djBuejU0SG9UYm1sRGcwNE0ydXBjNzVNMVFVMm5OZE9xQVE5VVpkUWhnbE1xcHU3; slave_user=gh_292dda35ab06; xid=204a3bd25e3d0930a849421eb42a83ea; _clsk=wu7q1f|1755495257688|2|1|mp.weixin.qq.com/weheat-agent/payload/record"; WxResultBody> findExList = WeiXinApi.findExList("MzA4NDAzMjcyOA==", token, cookie); List
exList = findExList.getApp_msg_list(); System.out.println(exList); } @Test void getCondensedAbstract() throws Exception { // difyService.getCondensedAbstract("近日,最高人民法院知识产权法庭审结一起侵害发明专利权纠纷,驳回专利权人。" + // "专利权人无正当理由隐瞒不报,构成“故意作虚假陈述妨碍人民法院审理”的,人民法院可以依法处罚。"); final String s = difyService.getPctCondensedAbstract("新加坡知识产权局自2025年9月1日起上调发明专利年费。"); System.out.println(s); } @Test void getWeChatArticleContent() { try (CloseableHttpClient httpClient = HttpClients.createDefault()) { // 1. 使用目标文章的直接链接(需确保Cookie有效) String articleUrl = "https://mp.weixin.qq.com/s?__biz=Mzg4MDY4NTIyMw==&mid=2247496191&idx=1&sn=72c307f8652a70209314fdfc6a8a3226&chksm=cf73c837f80441212095d786f66b549d97d1d670ce65692bedb89b7b9e729f5312b03a7cdd2d#rd"; // String articleUrl = "https://mp.weixin.qq.com/s?__biz=MzA4NDAzMjcyOA==&mid=2686975557&idx=8&sn=eff5ef5dd05ea0eb836181b6266159cc&chksm=ba3e33f28d49bae4bd598fed075d4115e7a4b6a6778d4bc648f8dd4883f193c1c6f767627ebf#rd"; // String articleUrl = "https://mp.weixin.qq.com/s?__biz=MzA4NDAzMjcyOA==&mid=2686975557&idx=6&sn=bb85e5bddefcc78c7f4c8d25ded202c9&chksm=ba3e33f28d49bae4f80e1667182400632889da418568033a4faaeeef989099424b51db4bf3ec#rd"; HttpGet request = new HttpGet(articleUrl); // 2. 设置完整的请求头(关键步骤!) request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"); request.setHeader("Referer", "https://mp.weixin.qq.com/"); request.setHeader("Cookie", "_ga=GA1.1.1951344733.1721120989; _ga_TPFW0KPXC1=GS1.1.1721122799.2.0.1721122799.0.0.0; ua_id=lYVPBmbUB2Br9v5GAAAAANySKCt88vpelcEXZ-NfgB4=; wxuin=54469110938579; mm_lang=zh_CN; cert=DwPjsGtL0kB9wndBHqxn27g_A5ieSRxR; uuid=319a730f894e4db1d1ef0bc81f39a062; rand_info=CAESIDLoBwjXI/3LC0I9nd7JfKtXh56j+87f7ZscLgnKB6Pt; slave_bizuin=3932387393; data_bizuin=3932387393; bizuin=3932387393; data_ticket=YYzYTHhGCnGFvgiXfO2BSeDzN/jWRTMdSZ//2DKDOuD1zNhewApnIwX5ruHTNtvs; slave_sid=MldNcXc0QnRfYVdkWWVKbHZBdEx1WUlHbkh1UWN0N3lTOVpmZUhoMEhjR3lJaUVGVkR1U0ZBa2dVSmVGRG0xeFV6R0tHU1d0NU1MOWQ3djc1MjRpeVM1S2xNRE1yQktIeTNmdmptSkF5aEQyQV9ZYnYyc2tRSkh4MHZIcnkxSjlPSXByZUphS1VTWDdGOUY4; slave_user=gh_292dda35ab06; xid=f4e6941e8d1019c71e230139c5bae238; _clck=3932387393|1|fy9|0; _clsk=1ly0zby|1754532579853|5|1|mp.weixin.qq.com/weheat-agent/payload/record"); // 必须包含session凭证 // 3. 执行请求并处理响应 try (CloseableHttpResponse response = httpClient.execute(request)) { final int code = response.getCode(); // 检查响应状态码 if (code != 200) { System.err.println("请求失败,状态码: " + code); return; } // 4. 解析HTML内容 String htmlContent = EntityUtils.toString(response.getEntity(), "UTF-8"); Document doc = Jsoup.parse(htmlContent); // 5. 提取文章正文(微信使用特定class) Element contentElement = doc.selectFirst("#js_content"); if (contentElement != null) { String articleContent = contentElement.text(); final Elements elements1 = contentElement.select("p"); Elements elements = contentElement.select("p"); for (Element element : elements) { final String text1 = element.text(); } } else { System.err.println("未找到文章内容,可能Cookie已失效"); } } } catch (Exception e) { e.printStackTrace(); } } @Test public void selectArticleInfoList() { SelectArticleInfoDTO vo = new SelectArticleInfoDTO(); vo.setPageNum(1L); vo.setPageSize(10L); final String list = articleInfoService.selectArticleInfoList(vo); System.out.println(list); } @Test public void selectCategoryList() { SelectCategoryListDTO vo = new SelectCategoryListDTO(); // vo.setPageNum(1L); // vo.setPageSize(10L); String list = categoryService.selectCategoryList(vo); System.out.println(list); } @Test public void addReport() { ReportDTO reportDTO = new ReportDTO(); reportDTO.setReportName("0815测试"); final Integer id = reportService.addReport(reportDTO); System.out.println(id); } @Test void singleTestAdd() throws IOException { List list = new ArrayList<>(); list.add("https://mp.weixin.qq.com/s?__biz=Mzg5MjA1MTkyNA==&mid=2247485509&idx=1&sn=8d9551806c9a6d2dd1cb8f0a5351144e&chksm=cfc540fef8b2c9e857a9142d5c433922a33c0f82cb25600582fcac91a00dd5fbc21b2eac9e24#rd"); // list.add("https://mp.weixin.qq.com/s/LCnC8UICiG7wxdvMqfE5qQ"); // list.add("https://www.cnipa.gov.cn/art/2025/8/21/art_74_201139.html"); for (String url : list) { getWeChatArticleService.getWeChatArticleContent1(url,3,2); // getCNIPAArticleService.getDigest1(url); } System.out.println("-------------------"); } @Test public void test() { // final long mistime = Long.parseLong("1754563895"); // final String s = DateUtil.convertTimestamp(mistime); // System.out.println(s); String createTimeSecondStr = "1755817699"; long secondCreateTime = Long.parseLong(createTimeSecondStr); String createTimeStr = DateUtil.convertTimestamp(secondCreateTime); String yesterdayDateStr = DateUtil.getYesterdayDateStr(); Date createTime = new Date(); SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); try { createTime = dateFormat.parse(createTimeStr); } catch (Exception e) { } System.out.println(createTime); } @Test void crawlArticles() throws Exception { //-------------江苏知识产权局 // String url = "https://jsip.jiangsu.gov.cn/"; // String url = "https://jsip.jiangsu.gov.cn/col/col75877/index.html"; // String url = "https://jsip.jiangsu.gov.cn/col/col85036/index.html"; //-------------上海知识产权局 // String url = "https://sipa.sh.gov.cn/index.html"; // String url = "https://sipa.sh.gov.cn/2021gzdt/index.html"; //-------------浙江知识产权局(浙江省知识产权保护中心) // String url = "https://zjippc.org.cn/"; //-------------北京知识产权局 // String url = "https://zscqj.beijing.gov.cn/zscqj/index/index.html"; // String url = "https://zscqj.beijing.gov.cn/zscqj/zwgk/tzgg/index.html"; //-------------重庆知识产权局 // String url = "https://zscqj.cq.gov.cn/"; //-------------天津知识产权局 // String url = "https://zscq.tj.gov.cn/"; //-------------河南知识产权局 // String url = "https://scjg.henan.gov.cn/hnzscqj/"; //-------------广东省知识产权保护中心 // String url = "https://www.gippc.com.cn/ippc/index.shtml"; //-------------海南知识产权局 // String url = "https://amr.hainan.gov.cn/szscqj/"; //-------------湖南市场监督管理局 // String url = "https://amr.hunan.gov.cn/"; //-------------辽宁省知识产权局 // String url = "https://zscq.ln.gov.cn/zscq/index/index.shtml"; //-------------青海省知识产权局(青海省市场监督管理局) // String url = "https://scjgj.qinghai.gov.cn/"; //-------------陕西省知识产权局 // String url = "https://snipa.shaanxi.gov.cn/"; //-------------山西省知识产权保护中心 // String url = "https://www.sxippc.com/"; //-------------云南知识产权局(云南省市场监督管理局) // String url = "https://amr.yn.gov.cn/zscqj/index.htm"; //-------------内蒙古市场监督管理局 // String url = "https://amr.nmg.gov.cn/"; //-------------安徽省知识产权保护中心 // String url = "https://www.ahippc.cn/"; //-------------湖北知识产权局 // String url = "https://zscqj.hubei.gov.cn/"; // getProvinceNewsService.crawlHubeiArticles(url); //-------------江西省市场监督管理局(知识产权局) // String url = "https://amr.jiangxi.gov.cn/"; //-------------黑龙江知识产权局 // String url = "https://hlipa.hlj.gov.cn/hlipa/index.shtml"; //-------------福建省知识产权局(福建省市场监督管理局) // String url = "https://scjgj.fujian.gov.cn/"; //-------------四川省知识产权局(四川省市场监督管理局) // String url = "https://scjgj.sc.gov.cn/"; //-------------河北知识产权局(河北市场监督管理局) // String url = "https://scjg.hebei.gov.cn/"; //-------------吉林省市场监督管理厅 // String url = "http://scjg.jl.gov.cn/"; //-------------山东省市场监督管理局 // String url = "http://amr.shandong.gov.cn/"; //-------------贵州省市场监督管理局(贵州知识产权局) // String url = "https://amr.guizhou.gov.cn/"; //-------------甘肃市场监督管理局 // String url = "https://scjg.gansu.gov.cn/scjg/index.shtml"; //-------------广西知识产权公告服务平台 // String url = "http://www.gxipo.net/"; //-------------宁夏市场监督管理局 // String url = "http://scjg.nx.gov.cn/"; //-------------新疆市场监督管理局(新疆知识产权局) String url = "https://scjgj.xinjiang.gov.cn/"; getProvinceNewsService.crawlArticles(url,"",1,2); // String url = "https://www.ahippc.cn/news.html?categoryId=a5e96b641ade4fc9b50b4f9504ba0f62"; // final List articleInfoDTOS = getProvinceNewsService.crawlArticlesDetail(url); // System.out.println(articleInfoDTOS); // String url = "https://jsip.jiangsu.gov.cn/art/2025/8/28/art_75877_11630402.html"; // final String digest = getProvinceNewsService.getDigest(url); // System.out.println(digest); } @Test void addArticleFromWebSource() throws Exception { // String url = "https://scjg.hebei.gov.cn/"; // getProvinceNewsService.crawlHebeiArticles(url); // String url = "https://scjg.hebei.gov.cn/node/919"; // getProvinceNewsService.addHebeiArticle(url); // String url = "https://zscqj.hubei.gov.cn/"; // getProvinceNewsService.crawlHubeiArticles(url); // getProvinceNewsService.crawlJiangxiArticles("https://amr.jiangxi.gov.cn/"); getProvinceNewsService.addArticleFromWebSource(); } @Test void crawlEcigaretteArticles() throws IOException { String url = "https://www.2firsts.cn/"; getEcigaretteService.crawlEcigaretteArticles(url); } @Test void getGuid() throws Exception { String url = "https://mmbiz.qlogo.cn/mmbiz_jpg/N7S9NTpmPL8yibib3EwG7zxia25Ij9zS0A2XTlWWxpUomE7S9aDxNe8ibJYX7n8KjF8xf90n9UUSwaC2nTLh2D8a7Q/0?wx_fmt=jpeg"; URL fileUrl = new URL(url); HttpURLConnection connection = (HttpURLConnection) fileUrl.openConnection(); File tempFile = File.createTempFile("tem-", ".jpeg"); try (InputStream in = connection.getInputStream(); FileOutputStream out = new FileOutputStream(tempFile)) { IOUtils.copy(in, out); } final List list = fileManagerService.uploadFileGetGuid2(Collections.singletonList(tempFile)); System.out.println(list.get(0)); Files.delete(tempFile.toPath()); } @Test void test11() throws Exception { List list = articleInfoService.list(new LambdaQueryWrapper() .in(ArticleInfo::getCategoryId,Arrays.asList(3,4,5,6)) .isNotNull(ArticleInfo::getWxArticleIcon)); System.out.println(list.size()); if (!CollectionUtils.isEmpty(list)) { for (ArticleInfo articleInfo : list) { if (StringUtils.isNotEmpty(articleInfo.getWxArticleIcon())) { String guid = getWeChatArticleService.getGuid(articleInfo.getWxArticleIcon()); articleInfo.setWxArticleIcon(guid); articleInfo.setId(articleInfo.getId()); articleInfo.updateById(); } } } System.out.println("----------------"); } @Test void getPCTArticle() { getPCTArticleService.getPCTArticle(); } @Test void test111() throws Exception { //-------------中国知识产权网 // String url = "http://www.cnipr.com/"; // getProvinceNewsService.test(url); //-------------智南针 // String url = "https://www.worldip.cn/"; // getProvinceNewsService.test1(url); // String url = "https://www.worldip.cn/index.php?m=content&c=index&a=show&catid=64&id=2996"; // getProvinceNewsService.getDigest2(url); } }