123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398 |
- package com.cslg.ppa;
- import com.alibaba.fastjson2.JSON;
- import com.alibaba.fastjson2.JSONArray;
- import com.alibaba.fastjson2.JSONObject;
- import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
- import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
- import com.cslg.ppa.common.okhttp.MyCookieStore;
- import com.cslg.ppa.common.utils.DateUtil;
- import com.cslg.ppa.dto.GetArticleInfoDTO;
- import com.cslg.ppa.dto.ReportDTO;
- import com.cslg.ppa.dto.SelectArticleInfoDTO;
- import com.cslg.ppa.dto.SelectCategoryListDTO;
- import com.cslg.ppa.entity.ArticleInfo;
- import com.cslg.ppa.entity.Category;
- import com.cslg.ppa.entity.SourceInfo;
- import com.cslg.ppa.entity.commom.Article;
- import com.cslg.ppa.entity.commom.WxResultBody;
- import com.cslg.ppa.mapper.CategoryMapper;
- import com.cslg.ppa.mapper.ReportMapper;
- import com.cslg.ppa.mapper.SourceInfoMapper;
- import com.cslg.ppa.service.ArticleInfoService;
- import com.cslg.ppa.service.CategoryService;
- import com.cslg.ppa.service.GetWebArticle.*;
- import com.cslg.ppa.service.ReportService;
- import com.cslg.ppa.service.commom.DifyService;
- import com.cslg.ppa.service.commom.FileManagerService;
- import com.cslg.ppa.service.commom.WeiXinApi;
- import okhttp3.Cookie;
- import okhttp3.HttpUrl;
- import org.apache.commons.io.IOUtils;
- import org.apache.commons.lang3.StringUtils;
- import org.apache.hc.client5.http.classic.methods.HttpGet;
- import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
- import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
- import org.apache.hc.client5.http.impl.classic.HttpClients;
- import org.apache.hc.core5.http.io.entity.EntityUtils;
- import org.apache.hc.core5.net.URIBuilder;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- import org.junit.jupiter.api.Test;
- import org.springframework.beans.factory.annotation.Autowired;
- import org.springframework.boot.test.context.SpringBootTest;
- import org.springframework.util.CollectionUtils;
- import java.io.File;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.net.HttpURLConnection;
- import java.net.MalformedURLException;
- import java.net.URL;
- import java.nio.file.Files;
- import java.text.SimpleDateFormat;
- import java.util.*;
- import java.util.concurrent.ConcurrentHashMap;
- import java.util.stream.Collectors;
- @SpringBootTest
- class PpaApplicationTests {
- @Autowired
- private SourceInfoMapper sourceInfoMapper;
- @Autowired
- private GetCNIPAArticleService getCNIPAArticleService;
- @Autowired
- private GetLocalInformationService getLocalInformationService;
- @Autowired
- private GetWeChatArticleService getWeChatArticleService;
- @Autowired
- private DifyService difyService;
- @Autowired
- private ArticleInfoService articleInfoService;
- @Autowired
- private CategoryService categoryService;
- @Autowired
- private CategoryMapper categoryMapper;
- @Autowired
- private ReportMapper reportMapper;
- @Autowired
- private ReportService reportService;
- @Autowired
- private GetProvinceNewsService getProvinceNewsService;
- @Autowired
- private GetEcigaretteService getEcigaretteService;
- @Autowired
- private FileManagerService fileManagerService;
- @Autowired
- private GetPCTArticleService getPCTArticleService;
- @Test
- void contextLoads() {
- SourceInfo sourceInfo = new SourceInfo();
- sourceInfo.setSourceName("国家知识产权局");
- sourceInfo.setSourceUrl("https://www.cnipa.gov.cn/");
- sourceInfo.setSourceType(1);
- sourceInfo.insert();
- }
- @Test
- void getCNIPA() throws Exception {
- // String url = "https://www.cnipa.gov.cn/art/2025/8/15/art_74_201044.html";
- // String url = "https://www.cnipa.gov.cn/art/2025/8/13/art_57_201010.html";
- // String url = "https://www.cnipa.gov.cn/art/2025/8/22/art_75_201148.html";
- // getCNIPAArticleService.getDigest(url);
- // getCNIPAArticleService.getCNIPA();
- // getLocalInformationService.getLocalInformation();
- // getPCTArticleService.getPCTArticle();
- getWeChatArticleService.getWeChatArticle();
- // final String digest = getCNIPAArticleService.getDigest("");
- // final Date yesterdayDate = DateUtil.getYesterdayDate();
- // System.out.println(yesterdayDate);
- //
- // final String yesterdayDateStr = DateUtil.getYesterdayDateStr();
- // System.out.println(yesterdayDateStr);
- // String url = "https://mp.weixin.qq.com/s/pywZJeUcJ_Z3wIrZRmRZvg";
- // String url = "https://mp.weixin.qq.com/s/cd3vapzw-2c6HdfrGs4r7g";
- // final String content = getWeChatArticleService.getWeChatArticleContent(url);
- // System.out.println(content);
- }
- @Test
- void getWeChatArticle() {
- String token = "267426578";
- String cookie = "_ga=GA1.1.1951344733.1721120989; _ga_TPFW0KPXC1=GS1.1.1721122799.2.0.1721122799.0.0.0; ua_id=lYVPBmbUB2Br9v5GAAAAANySKCt88vpelcEXZ-NfgB4=; wxuin=54469110938579; mm_lang=zh_CN; cert=DwPjsGtL0kB9wndBHqxn27g_A5ieSRxR; rewardsn=; wxtokenkey=777; _clck=3932387393|1|fyk|0; uuid=1e63a83ba14fbc73a99a9a1851cb9309; rand_info=CAESICH2biM3G9816Xru3QvU9bI9LHQMkRKQwz4Me/mrQ/1d; slave_bizuin=3932387393; data_bizuin=3932387393; bizuin=3932387393; data_ticket=rhwtJ0MaVJn3YjGE5d5mPPBdxpoJWkopkoeW3K862z/dkVogSUuhqCra7NisadlF; slave_sid=cER3VlROczAyRm80Z3lHcmZIT29mNGZTZ2JKTzlwQlJBSGEzVTZhOTV3TlZRZ1VOM2RTUUZLTmhmanI0bE1YWHVnXzlLZUdIVmJFNENnUko4YzNFRkpwWUQ1Tm5MMjg5djBuejU0SG9UYm1sRGcwNE0ydXBjNzVNMVFVMm5OZE9xQVE5VVpkUWhnbE1xcHU3; slave_user=gh_292dda35ab06; xid=204a3bd25e3d0930a849421eb42a83ea; _clsk=wu7q1f|1755495257688|2|1|mp.weixin.qq.com/weheat-agent/payload/record";
- WxResultBody<List<Article>> findExList = WeiXinApi.findExList("MzA4NDAzMjcyOA==", token, cookie);
- List<Article> exList = findExList.getApp_msg_list();
- System.out.println(exList);
- }
- @Test
- void getCondensedAbstract() throws Exception {
- // difyService.getCondensedAbstract("近日,最高人民法院知识产权法庭审结一起侵害发明专利权纠纷,驳回专利权人。" +
- // "专利权人无正当理由隐瞒不报,构成“故意作虚假陈述妨碍人民法院审理”的,人民法院可以依法处罚。");
- final String s = difyService.getPctCondensedAbstract("新加坡知识产权局自2025年9月1日起上调发明专利年费。");
- System.out.println(s);
- }
- @Test
- void getWeChatArticleContent() {
- try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
- // 1. 使用目标文章的直接链接(需确保Cookie有效)
- String articleUrl = "https://mp.weixin.qq.com/s?__biz=Mzg4MDY4NTIyMw==&mid=2247496191&idx=1&sn=72c307f8652a70209314fdfc6a8a3226&chksm=cf73c837f80441212095d786f66b549d97d1d670ce65692bedb89b7b9e729f5312b03a7cdd2d#rd";
- // String articleUrl = "https://mp.weixin.qq.com/s?__biz=MzA4NDAzMjcyOA==&mid=2686975557&idx=8&sn=eff5ef5dd05ea0eb836181b6266159cc&chksm=ba3e33f28d49bae4bd598fed075d4115e7a4b6a6778d4bc648f8dd4883f193c1c6f767627ebf#rd";
- // String articleUrl = "https://mp.weixin.qq.com/s?__biz=MzA4NDAzMjcyOA==&mid=2686975557&idx=6&sn=bb85e5bddefcc78c7f4c8d25ded202c9&chksm=ba3e33f28d49bae4f80e1667182400632889da418568033a4faaeeef989099424b51db4bf3ec#rd";
- HttpGet request = new HttpGet(articleUrl);
- // 2. 设置完整的请求头(关键步骤!)
- request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36");
- request.setHeader("Referer", "https://mp.weixin.qq.com/");
- request.setHeader("Cookie", "_ga=GA1.1.1951344733.1721120989; _ga_TPFW0KPXC1=GS1.1.1721122799.2.0.1721122799.0.0.0; ua_id=lYVPBmbUB2Br9v5GAAAAANySKCt88vpelcEXZ-NfgB4=; wxuin=54469110938579; mm_lang=zh_CN; cert=DwPjsGtL0kB9wndBHqxn27g_A5ieSRxR; uuid=319a730f894e4db1d1ef0bc81f39a062; rand_info=CAESIDLoBwjXI/3LC0I9nd7JfKtXh56j+87f7ZscLgnKB6Pt; slave_bizuin=3932387393; data_bizuin=3932387393; bizuin=3932387393; data_ticket=YYzYTHhGCnGFvgiXfO2BSeDzN/jWRTMdSZ//2DKDOuD1zNhewApnIwX5ruHTNtvs; slave_sid=MldNcXc0QnRfYVdkWWVKbHZBdEx1WUlHbkh1UWN0N3lTOVpmZUhoMEhjR3lJaUVGVkR1U0ZBa2dVSmVGRG0xeFV6R0tHU1d0NU1MOWQ3djc1MjRpeVM1S2xNRE1yQktIeTNmdmptSkF5aEQyQV9ZYnYyc2tRSkh4MHZIcnkxSjlPSXByZUphS1VTWDdGOUY4; slave_user=gh_292dda35ab06; xid=f4e6941e8d1019c71e230139c5bae238; _clck=3932387393|1|fy9|0; _clsk=1ly0zby|1754532579853|5|1|mp.weixin.qq.com/weheat-agent/payload/record"); // 必须包含session凭证
- // 3. 执行请求并处理响应
- try (CloseableHttpResponse response = httpClient.execute(request)) {
- final int code = response.getCode();
- // 检查响应状态码
- if (code != 200) {
- System.err.println("请求失败,状态码: " + code);
- return;
- }
- // 4. 解析HTML内容
- String htmlContent = EntityUtils.toString(response.getEntity(), "UTF-8");
- Document doc = Jsoup.parse(htmlContent);
- // 5. 提取文章正文(微信使用特定class)
- Element contentElement = doc.selectFirst("#js_content");
- if (contentElement != null) {
- String articleContent = contentElement.text();
- final Elements elements1 = contentElement.select("p");
- Elements elements = contentElement.select("p");
- for (Element element : elements) {
- final String text1 = element.text();
- }
- } else {
- System.err.println("未找到文章内容,可能Cookie已失效");
- }
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- @Test
- public void selectArticleInfoList() {
- SelectArticleInfoDTO vo = new SelectArticleInfoDTO();
- vo.setPageNum(1L);
- vo.setPageSize(10L);
- final String list = articleInfoService.selectArticleInfoList(vo);
- System.out.println(list);
- }
- @Test
- public void selectCategoryList() {
- SelectCategoryListDTO vo = new SelectCategoryListDTO();
- // vo.setPageNum(1L);
- // vo.setPageSize(10L);
- String list = categoryService.selectCategoryList(vo);
- System.out.println(list);
- }
- @Test
- public void addReport() {
- ReportDTO reportDTO = new ReportDTO();
- reportDTO.setReportName("0815测试");
- final Integer id = reportService.addReport(reportDTO);
- System.out.println(id);
- }
- @Test
- void singleTestAdd() throws IOException {
- List<String> list = new ArrayList<>();
- list.add("https://mp.weixin.qq.com/s?__biz=Mzg5MjA1MTkyNA==&mid=2247485509&idx=1&sn=8d9551806c9a6d2dd1cb8f0a5351144e&chksm=cfc540fef8b2c9e857a9142d5c433922a33c0f82cb25600582fcac91a00dd5fbc21b2eac9e24#rd");
- // list.add("https://mp.weixin.qq.com/s/LCnC8UICiG7wxdvMqfE5qQ");
- // list.add("https://www.cnipa.gov.cn/art/2025/8/21/art_74_201139.html");
- for (String url : list) {
- getWeChatArticleService.getWeChatArticleContent1(url,3,2);
- // getCNIPAArticleService.getDigest1(url);
- }
- System.out.println("-------------------");
- }
- @Test
- public void test() {
- // final long mistime = Long.parseLong("1754563895");
- // final String s = DateUtil.convertTimestamp(mistime);
- // System.out.println(s);
- String createTimeSecondStr = "1755817699";
- long secondCreateTime = Long.parseLong(createTimeSecondStr);
- String createTimeStr = DateUtil.convertTimestamp(secondCreateTime);
- String yesterdayDateStr = DateUtil.getYesterdayDateStr();
- Date createTime = new Date();
- SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
- try {
- createTime = dateFormat.parse(createTimeStr);
- } catch (Exception e) {
- }
- System.out.println(createTime);
- }
- @Test
- void crawlArticles() throws Exception {
- //-------------江苏知识产权局
- // String url = "https://jsip.jiangsu.gov.cn/";
- // String url = "https://jsip.jiangsu.gov.cn/col/col75877/index.html";
- // String url = "https://jsip.jiangsu.gov.cn/col/col85036/index.html";
- //-------------上海知识产权局
- // String url = "https://sipa.sh.gov.cn/index.html";
- // String url = "https://sipa.sh.gov.cn/2021gzdt/index.html";
- //-------------浙江知识产权局(浙江省知识产权保护中心)
- // String url = "https://zjippc.org.cn/";
- //-------------北京知识产权局
- // String url = "https://zscqj.beijing.gov.cn/zscqj/index/index.html";
- // String url = "https://zscqj.beijing.gov.cn/zscqj/zwgk/tzgg/index.html";
- //-------------重庆知识产权局
- // String url = "https://zscqj.cq.gov.cn/";
- //-------------天津知识产权局
- // String url = "https://zscq.tj.gov.cn/";
- //-------------河南知识产权局
- // String url = "https://scjg.henan.gov.cn/hnzscqj/";
- //-------------广东省知识产权保护中心
- // String url = "https://www.gippc.com.cn/ippc/index.shtml";
- //-------------海南知识产权局
- // String url = "https://amr.hainan.gov.cn/szscqj/";
- //-------------湖南市场监督管理局
- // String url = "https://amr.hunan.gov.cn/";
- //-------------辽宁省知识产权局
- // String url = "https://zscq.ln.gov.cn/zscq/index/index.shtml";
- //-------------青海省知识产权局(青海省市场监督管理局)
- // String url = "https://scjgj.qinghai.gov.cn/";
- //-------------陕西省知识产权局
- // String url = "https://snipa.shaanxi.gov.cn/";
- //-------------山西省知识产权保护中心
- // String url = "https://www.sxippc.com/";
- //-------------云南知识产权局(云南省市场监督管理局)
- // String url = "https://amr.yn.gov.cn/zscqj/index.htm";
- //-------------内蒙古市场监督管理局
- // String url = "https://amr.nmg.gov.cn/";
- //-------------安徽省知识产权保护中心
- // String url = "https://www.ahippc.cn/";
- //-------------湖北知识产权局
- // String url = "https://zscqj.hubei.gov.cn/";
- // getProvinceNewsService.crawlHubeiArticles(url);
- //-------------江西省市场监督管理局(知识产权局)
- // String url = "https://amr.jiangxi.gov.cn/";
- //-------------黑龙江知识产权局
- // String url = "https://hlipa.hlj.gov.cn/hlipa/index.shtml";
- //-------------福建省知识产权局(福建省市场监督管理局)
- // String url = "https://scjgj.fujian.gov.cn/";
- //-------------四川省知识产权局(四川省市场监督管理局)
- // String url = "https://scjgj.sc.gov.cn/";
- //-------------河北知识产权局(河北市场监督管理局)
- // String url = "https://scjg.hebei.gov.cn/";
- //-------------吉林省市场监督管理厅
- // String url = "http://scjg.jl.gov.cn/";
- //-------------山东省市场监督管理局
- // String url = "http://amr.shandong.gov.cn/";
- //-------------贵州省市场监督管理局(贵州知识产权局)
- // String url = "https://amr.guizhou.gov.cn/";
- //-------------甘肃市场监督管理局
- // String url = "https://scjg.gansu.gov.cn/scjg/index.shtml";
- //-------------广西知识产权公告服务平台
- // String url = "http://www.gxipo.net/";
- //-------------宁夏市场监督管理局
- // String url = "http://scjg.nx.gov.cn/";
- //-------------新疆市场监督管理局(新疆知识产权局)
- String url = "https://scjgj.xinjiang.gov.cn/";
- getProvinceNewsService.crawlArticles(url,"",1,2);
- // String url = "https://www.ahippc.cn/news.html?categoryId=a5e96b641ade4fc9b50b4f9504ba0f62";
- // final List<GetArticleInfoDTO> articleInfoDTOS = getProvinceNewsService.crawlArticlesDetail(url);
- // System.out.println(articleInfoDTOS);
- // String url = "https://jsip.jiangsu.gov.cn/art/2025/8/28/art_75877_11630402.html";
- // final String digest = getProvinceNewsService.getDigest(url);
- // System.out.println(digest);
- }
- @Test
- void addArticleFromWebSource() throws Exception {
- // String url = "https://scjg.hebei.gov.cn/";
- // getProvinceNewsService.crawlHebeiArticles(url);
- // String url = "https://scjg.hebei.gov.cn/node/919";
- // getProvinceNewsService.addHebeiArticle(url);
- // String url = "https://zscqj.hubei.gov.cn/";
- // getProvinceNewsService.crawlHubeiArticles(url);
- // getProvinceNewsService.crawlJiangxiArticles("https://amr.jiangxi.gov.cn/");
- getProvinceNewsService.addArticleFromWebSource();
- }
- @Test
- void crawlEcigaretteArticles() throws IOException {
- String url = "https://www.2firsts.cn/";
- getEcigaretteService.crawlEcigaretteArticles(url);
- }
- @Test
- void getGuid() throws Exception {
- String url = "https://mmbiz.qlogo.cn/mmbiz_jpg/N7S9NTpmPL8yibib3EwG7zxia25Ij9zS0A2XTlWWxpUomE7S9aDxNe8ibJYX7n8KjF8xf90n9UUSwaC2nTLh2D8a7Q/0?wx_fmt=jpeg";
- URL fileUrl = new URL(url);
- HttpURLConnection connection = (HttpURLConnection) fileUrl.openConnection();
- File tempFile = File.createTempFile("tem-", ".jpeg");
- try (InputStream in = connection.getInputStream(); FileOutputStream out = new FileOutputStream(tempFile)) {
- IOUtils.copy(in, out);
- }
- final List<String> list = fileManagerService.uploadFileGetGuid2(Collections.singletonList(tempFile));
- System.out.println(list.get(0));
- Files.delete(tempFile.toPath());
- }
- @Test
- void test11() throws Exception {
- List<ArticleInfo> list = articleInfoService.list(new LambdaQueryWrapper<ArticleInfo>()
- .in(ArticleInfo::getCategoryId,Arrays.asList(3,4,5,6))
- .isNotNull(ArticleInfo::getWxArticleIcon));
- System.out.println(list.size());
- if (!CollectionUtils.isEmpty(list)) {
- for (ArticleInfo articleInfo : list) {
- if (StringUtils.isNotEmpty(articleInfo.getWxArticleIcon())) {
- String guid = getWeChatArticleService.getGuid(articleInfo.getWxArticleIcon());
- articleInfo.setWxArticleIcon(guid);
- articleInfo.setId(articleInfo.getId());
- articleInfo.updateById();
- }
- }
- }
- System.out.println("----------------");
- }
- @Test
- void getPCTArticle() {
- getPCTArticleService.getPCTArticle();
- }
- @Test
- void test111() throws Exception {
- //-------------中国知识产权网
- // String url = "http://www.cnipr.com/";
- // getProvinceNewsService.test(url);
- //-------------智南针
- // String url = "https://www.worldip.cn/";
- // getProvinceNewsService.test1(url);
- // String url = "https://www.worldip.cn/index.php?m=content&c=index&a=show&catid=64&id=2996";
- // getProvinceNewsService.getDigest2(url);
- }
- }
|