PpaApplicationTests.java 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. package com.cslg.ppa;
  2. import com.alibaba.fastjson2.JSON;
  3. import com.alibaba.fastjson2.JSONArray;
  4. import com.alibaba.fastjson2.JSONObject;
  5. import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
  6. import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
  7. import com.cslg.ppa.common.okhttp.MyCookieStore;
  8. import com.cslg.ppa.common.utils.DateUtil;
  9. import com.cslg.ppa.dto.GetArticleInfoDTO;
  10. import com.cslg.ppa.dto.ReportDTO;
  11. import com.cslg.ppa.dto.SelectArticleInfoDTO;
  12. import com.cslg.ppa.dto.SelectCategoryListDTO;
  13. import com.cslg.ppa.entity.ArticleInfo;
  14. import com.cslg.ppa.entity.Category;
  15. import com.cslg.ppa.entity.SourceInfo;
  16. import com.cslg.ppa.entity.commom.Article;
  17. import com.cslg.ppa.entity.commom.WxResultBody;
  18. import com.cslg.ppa.mapper.CategoryMapper;
  19. import com.cslg.ppa.mapper.ReportMapper;
  20. import com.cslg.ppa.mapper.SourceInfoMapper;
  21. import com.cslg.ppa.service.ArticleInfoService;
  22. import com.cslg.ppa.service.CategoryService;
  23. import com.cslg.ppa.service.GetWebArticle.*;
  24. import com.cslg.ppa.service.ReportService;
  25. import com.cslg.ppa.service.commom.DifyService;
  26. import com.cslg.ppa.service.commom.FileManagerService;
  27. import com.cslg.ppa.service.commom.WeiXinApi;
  28. import okhttp3.Cookie;
  29. import okhttp3.HttpUrl;
  30. import org.apache.commons.io.IOUtils;
  31. import org.apache.commons.lang3.StringUtils;
  32. import org.apache.hc.client5.http.classic.methods.HttpGet;
  33. import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
  34. import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
  35. import org.apache.hc.client5.http.impl.classic.HttpClients;
  36. import org.apache.hc.core5.http.io.entity.EntityUtils;
  37. import org.apache.hc.core5.net.URIBuilder;
  38. import org.jsoup.Jsoup;
  39. import org.jsoup.nodes.Document;
  40. import org.jsoup.nodes.Element;
  41. import org.jsoup.select.Elements;
  42. import org.junit.jupiter.api.Test;
  43. import org.springframework.beans.factory.annotation.Autowired;
  44. import org.springframework.boot.test.context.SpringBootTest;
  45. import org.springframework.util.CollectionUtils;
  46. import java.io.File;
  47. import java.io.FileOutputStream;
  48. import java.io.IOException;
  49. import java.io.InputStream;
  50. import java.net.HttpURLConnection;
  51. import java.net.MalformedURLException;
  52. import java.net.URL;
  53. import java.nio.file.Files;
  54. import java.text.SimpleDateFormat;
  55. import java.util.*;
  56. import java.util.concurrent.ConcurrentHashMap;
  57. import java.util.stream.Collectors;
  58. @SpringBootTest
  59. class PpaApplicationTests {
  60. @Autowired
  61. private SourceInfoMapper sourceInfoMapper;
  62. @Autowired
  63. private GetCNIPAArticleService getCNIPAArticleService;
  64. @Autowired
  65. private GetLocalInformationService getLocalInformationService;
  66. @Autowired
  67. private GetWeChatArticleService getWeChatArticleService;
  68. @Autowired
  69. private DifyService difyService;
  70. @Autowired
  71. private ArticleInfoService articleInfoService;
  72. @Autowired
  73. private CategoryService categoryService;
  74. @Autowired
  75. private CategoryMapper categoryMapper;
  76. @Autowired
  77. private ReportMapper reportMapper;
  78. @Autowired
  79. private ReportService reportService;
  80. @Autowired
  81. private GetProvinceNewsService getProvinceNewsService;
  82. @Autowired
  83. private GetEcigaretteService getEcigaretteService;
  84. @Autowired
  85. private FileManagerService fileManagerService;
  86. @Autowired
  87. private GetPCTArticleService getPCTArticleService;
  88. @Test
  89. void contextLoads() {
  90. SourceInfo sourceInfo = new SourceInfo();
  91. sourceInfo.setSourceName("国家知识产权局");
  92. sourceInfo.setSourceUrl("https://www.cnipa.gov.cn/");
  93. sourceInfo.setSourceType(1);
  94. sourceInfo.insert();
  95. }
  96. @Test
  97. void getCNIPA() throws Exception {
  98. // String url = "https://www.cnipa.gov.cn/art/2025/8/15/art_74_201044.html";
  99. // String url = "https://www.cnipa.gov.cn/art/2025/8/13/art_57_201010.html";
  100. // String url = "https://www.cnipa.gov.cn/art/2025/8/22/art_75_201148.html";
  101. // getCNIPAArticleService.getDigest(url);
  102. // getCNIPAArticleService.getCNIPA();
  103. // getLocalInformationService.getLocalInformation();
  104. // getPCTArticleService.getPCTArticle();
  105. getWeChatArticleService.getWeChatArticle();
  106. // final String digest = getCNIPAArticleService.getDigest("");
  107. // final Date yesterdayDate = DateUtil.getYesterdayDate();
  108. // System.out.println(yesterdayDate);
  109. //
  110. // final String yesterdayDateStr = DateUtil.getYesterdayDateStr();
  111. // System.out.println(yesterdayDateStr);
  112. // String url = "https://mp.weixin.qq.com/s/pywZJeUcJ_Z3wIrZRmRZvg";
  113. // String url = "https://mp.weixin.qq.com/s/cd3vapzw-2c6HdfrGs4r7g";
  114. // final String content = getWeChatArticleService.getWeChatArticleContent(url);
  115. // System.out.println(content);
  116. }
  117. @Test
  118. void getWeChatArticle() {
  119. String token = "267426578";
  120. String cookie = "_ga=GA1.1.1951344733.1721120989; _ga_TPFW0KPXC1=GS1.1.1721122799.2.0.1721122799.0.0.0; ua_id=lYVPBmbUB2Br9v5GAAAAANySKCt88vpelcEXZ-NfgB4=; wxuin=54469110938579; mm_lang=zh_CN; cert=DwPjsGtL0kB9wndBHqxn27g_A5ieSRxR; rewardsn=; wxtokenkey=777; _clck=3932387393|1|fyk|0; uuid=1e63a83ba14fbc73a99a9a1851cb9309; rand_info=CAESICH2biM3G9816Xru3QvU9bI9LHQMkRKQwz4Me/mrQ/1d; slave_bizuin=3932387393; data_bizuin=3932387393; bizuin=3932387393; data_ticket=rhwtJ0MaVJn3YjGE5d5mPPBdxpoJWkopkoeW3K862z/dkVogSUuhqCra7NisadlF; slave_sid=cER3VlROczAyRm80Z3lHcmZIT29mNGZTZ2JKTzlwQlJBSGEzVTZhOTV3TlZRZ1VOM2RTUUZLTmhmanI0bE1YWHVnXzlLZUdIVmJFNENnUko4YzNFRkpwWUQ1Tm5MMjg5djBuejU0SG9UYm1sRGcwNE0ydXBjNzVNMVFVMm5OZE9xQVE5VVpkUWhnbE1xcHU3; slave_user=gh_292dda35ab06; xid=204a3bd25e3d0930a849421eb42a83ea; _clsk=wu7q1f|1755495257688|2|1|mp.weixin.qq.com/weheat-agent/payload/record";
  121. WxResultBody<List<Article>> findExList = WeiXinApi.findExList("MzA4NDAzMjcyOA==", token, cookie);
  122. List<Article> exList = findExList.getApp_msg_list();
  123. System.out.println(exList);
  124. }
  125. @Test
  126. void getCondensedAbstract() throws Exception {
  127. // difyService.getCondensedAbstract("近日,最高人民法院知识产权法庭审结一起侵害发明专利权纠纷,驳回专利权人。" +
  128. // "专利权人无正当理由隐瞒不报,构成“故意作虚假陈述妨碍人民法院审理”的,人民法院可以依法处罚。");
  129. final String s = difyService.getPctCondensedAbstract("新加坡知识产权局自2025年9月1日起上调发明专利年费。");
  130. System.out.println(s);
  131. }
  132. @Test
  133. void getWeChatArticleContent() {
  134. try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
  135. // 1. 使用目标文章的直接链接(需确保Cookie有效)
  136. String articleUrl = "https://mp.weixin.qq.com/s?__biz=Mzg4MDY4NTIyMw==&mid=2247496191&idx=1&sn=72c307f8652a70209314fdfc6a8a3226&chksm=cf73c837f80441212095d786f66b549d97d1d670ce65692bedb89b7b9e729f5312b03a7cdd2d#rd";
  137. // String articleUrl = "https://mp.weixin.qq.com/s?__biz=MzA4NDAzMjcyOA==&mid=2686975557&idx=8&sn=eff5ef5dd05ea0eb836181b6266159cc&chksm=ba3e33f28d49bae4bd598fed075d4115e7a4b6a6778d4bc648f8dd4883f193c1c6f767627ebf#rd";
  138. // String articleUrl = "https://mp.weixin.qq.com/s?__biz=MzA4NDAzMjcyOA==&mid=2686975557&idx=6&sn=bb85e5bddefcc78c7f4c8d25ded202c9&chksm=ba3e33f28d49bae4f80e1667182400632889da418568033a4faaeeef989099424b51db4bf3ec#rd";
  139. HttpGet request = new HttpGet(articleUrl);
  140. // 2. 设置完整的请求头(关键步骤!)
  141. request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36");
  142. request.setHeader("Referer", "https://mp.weixin.qq.com/");
  143. request.setHeader("Cookie", "_ga=GA1.1.1951344733.1721120989; _ga_TPFW0KPXC1=GS1.1.1721122799.2.0.1721122799.0.0.0; ua_id=lYVPBmbUB2Br9v5GAAAAANySKCt88vpelcEXZ-NfgB4=; wxuin=54469110938579; mm_lang=zh_CN; cert=DwPjsGtL0kB9wndBHqxn27g_A5ieSRxR; uuid=319a730f894e4db1d1ef0bc81f39a062; rand_info=CAESIDLoBwjXI/3LC0I9nd7JfKtXh56j+87f7ZscLgnKB6Pt; slave_bizuin=3932387393; data_bizuin=3932387393; bizuin=3932387393; data_ticket=YYzYTHhGCnGFvgiXfO2BSeDzN/jWRTMdSZ//2DKDOuD1zNhewApnIwX5ruHTNtvs; slave_sid=MldNcXc0QnRfYVdkWWVKbHZBdEx1WUlHbkh1UWN0N3lTOVpmZUhoMEhjR3lJaUVGVkR1U0ZBa2dVSmVGRG0xeFV6R0tHU1d0NU1MOWQ3djc1MjRpeVM1S2xNRE1yQktIeTNmdmptSkF5aEQyQV9ZYnYyc2tRSkh4MHZIcnkxSjlPSXByZUphS1VTWDdGOUY4; slave_user=gh_292dda35ab06; xid=f4e6941e8d1019c71e230139c5bae238; _clck=3932387393|1|fy9|0; _clsk=1ly0zby|1754532579853|5|1|mp.weixin.qq.com/weheat-agent/payload/record"); // 必须包含session凭证
  144. // 3. 执行请求并处理响应
  145. try (CloseableHttpResponse response = httpClient.execute(request)) {
  146. final int code = response.getCode();
  147. // 检查响应状态码
  148. if (code != 200) {
  149. System.err.println("请求失败,状态码: " + code);
  150. return;
  151. }
  152. // 4. 解析HTML内容
  153. String htmlContent = EntityUtils.toString(response.getEntity(), "UTF-8");
  154. Document doc = Jsoup.parse(htmlContent);
  155. // 5. 提取文章正文(微信使用特定class)
  156. Element contentElement = doc.selectFirst("#js_content");
  157. if (contentElement != null) {
  158. String articleContent = contentElement.text();
  159. final Elements elements1 = contentElement.select("p");
  160. Elements elements = contentElement.select("p");
  161. for (Element element : elements) {
  162. final String text1 = element.text();
  163. }
  164. } else {
  165. System.err.println("未找到文章内容,可能Cookie已失效");
  166. }
  167. }
  168. } catch (Exception e) {
  169. e.printStackTrace();
  170. }
  171. }
  172. @Test
  173. public void selectArticleInfoList() {
  174. SelectArticleInfoDTO vo = new SelectArticleInfoDTO();
  175. vo.setPageNum(1L);
  176. vo.setPageSize(10L);
  177. final String list = articleInfoService.selectArticleInfoList(vo);
  178. System.out.println(list);
  179. }
  180. @Test
  181. public void selectCategoryList() {
  182. SelectCategoryListDTO vo = new SelectCategoryListDTO();
  183. // vo.setPageNum(1L);
  184. // vo.setPageSize(10L);
  185. String list = categoryService.selectCategoryList(vo);
  186. System.out.println(list);
  187. }
  188. @Test
  189. public void addReport() {
  190. ReportDTO reportDTO = new ReportDTO();
  191. reportDTO.setReportName("0815测试");
  192. final Integer id = reportService.addReport(reportDTO);
  193. System.out.println(id);
  194. }
  195. @Test
  196. void singleTestAdd() throws IOException {
  197. List<String> list = new ArrayList<>();
  198. list.add("https://mp.weixin.qq.com/s?__biz=Mzg5MjA1MTkyNA==&mid=2247485509&idx=1&sn=8d9551806c9a6d2dd1cb8f0a5351144e&chksm=cfc540fef8b2c9e857a9142d5c433922a33c0f82cb25600582fcac91a00dd5fbc21b2eac9e24#rd");
  199. // list.add("https://mp.weixin.qq.com/s/LCnC8UICiG7wxdvMqfE5qQ");
  200. // list.add("https://www.cnipa.gov.cn/art/2025/8/21/art_74_201139.html");
  201. for (String url : list) {
  202. getWeChatArticleService.getWeChatArticleContent1(url,3,2);
  203. // getCNIPAArticleService.getDigest1(url);
  204. }
  205. System.out.println("-------------------");
  206. }
  207. @Test
  208. public void test() {
  209. // final long mistime = Long.parseLong("1754563895");
  210. // final String s = DateUtil.convertTimestamp(mistime);
  211. // System.out.println(s);
  212. String createTimeSecondStr = "1755817699";
  213. long secondCreateTime = Long.parseLong(createTimeSecondStr);
  214. String createTimeStr = DateUtil.convertTimestamp(secondCreateTime);
  215. String yesterdayDateStr = DateUtil.getYesterdayDateStr();
  216. Date createTime = new Date();
  217. SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
  218. try {
  219. createTime = dateFormat.parse(createTimeStr);
  220. } catch (Exception e) {
  221. }
  222. System.out.println(createTime);
  223. }
  224. @Test
  225. void crawlArticles() throws Exception {
  226. //-------------江苏知识产权局
  227. // String url = "https://jsip.jiangsu.gov.cn/";
  228. // String url = "https://jsip.jiangsu.gov.cn/col/col75877/index.html";
  229. // String url = "https://jsip.jiangsu.gov.cn/col/col85036/index.html";
  230. //-------------上海知识产权局
  231. // String url = "https://sipa.sh.gov.cn/index.html";
  232. // String url = "https://sipa.sh.gov.cn/2021gzdt/index.html";
  233. //-------------浙江知识产权局(浙江省知识产权保护中心)
  234. // String url = "https://zjippc.org.cn/";
  235. //-------------北京知识产权局
  236. // String url = "https://zscqj.beijing.gov.cn/zscqj/index/index.html";
  237. // String url = "https://zscqj.beijing.gov.cn/zscqj/zwgk/tzgg/index.html";
  238. //-------------重庆知识产权局
  239. // String url = "https://zscqj.cq.gov.cn/";
  240. //-------------天津知识产权局
  241. // String url = "https://zscq.tj.gov.cn/";
  242. //-------------河南知识产权局
  243. // String url = "https://scjg.henan.gov.cn/hnzscqj/";
  244. //-------------广东省知识产权保护中心
  245. // String url = "https://www.gippc.com.cn/ippc/index.shtml";
  246. //-------------海南知识产权局
  247. // String url = "https://amr.hainan.gov.cn/szscqj/";
  248. //-------------湖南市场监督管理局
  249. // String url = "https://amr.hunan.gov.cn/";
  250. //-------------辽宁省知识产权局
  251. // String url = "https://zscq.ln.gov.cn/zscq/index/index.shtml";
  252. //-------------青海省知识产权局(青海省市场监督管理局)
  253. // String url = "https://scjgj.qinghai.gov.cn/";
  254. //-------------陕西省知识产权局
  255. // String url = "https://snipa.shaanxi.gov.cn/";
  256. //-------------山西省知识产权保护中心
  257. // String url = "https://www.sxippc.com/";
  258. //-------------云南知识产权局(云南省市场监督管理局)
  259. // String url = "https://amr.yn.gov.cn/zscqj/index.htm";
  260. //-------------内蒙古市场监督管理局
  261. // String url = "https://amr.nmg.gov.cn/";
  262. //-------------安徽省知识产权保护中心
  263. // String url = "https://www.ahippc.cn/";
  264. //-------------湖北知识产权局
  265. // String url = "https://zscqj.hubei.gov.cn/";
  266. // getProvinceNewsService.crawlHubeiArticles(url);
  267. //-------------江西省市场监督管理局(知识产权局)
  268. // String url = "https://amr.jiangxi.gov.cn/";
  269. //-------------黑龙江知识产权局
  270. // String url = "https://hlipa.hlj.gov.cn/hlipa/index.shtml";
  271. //-------------福建省知识产权局(福建省市场监督管理局)
  272. // String url = "https://scjgj.fujian.gov.cn/";
  273. //-------------四川省知识产权局(四川省市场监督管理局)
  274. // String url = "https://scjgj.sc.gov.cn/";
  275. //-------------河北知识产权局(河北市场监督管理局)
  276. // String url = "https://scjg.hebei.gov.cn/";
  277. //-------------吉林省市场监督管理厅
  278. // String url = "http://scjg.jl.gov.cn/";
  279. //-------------山东省市场监督管理局
  280. // String url = "http://amr.shandong.gov.cn/";
  281. //-------------贵州省市场监督管理局(贵州知识产权局)
  282. // String url = "https://amr.guizhou.gov.cn/";
  283. //-------------甘肃市场监督管理局
  284. // String url = "https://scjg.gansu.gov.cn/scjg/index.shtml";
  285. //-------------广西知识产权公告服务平台
  286. // String url = "http://www.gxipo.net/";
  287. //-------------宁夏市场监督管理局
  288. // String url = "http://scjg.nx.gov.cn/";
  289. //-------------新疆市场监督管理局(新疆知识产权局)
  290. String url = "https://scjgj.xinjiang.gov.cn/";
  291. getProvinceNewsService.crawlArticles(url,"",1,2);
  292. // String url = "https://www.ahippc.cn/news.html?categoryId=a5e96b641ade4fc9b50b4f9504ba0f62";
  293. // final List<GetArticleInfoDTO> articleInfoDTOS = getProvinceNewsService.crawlArticlesDetail(url);
  294. // System.out.println(articleInfoDTOS);
  295. // String url = "https://jsip.jiangsu.gov.cn/art/2025/8/28/art_75877_11630402.html";
  296. // final String digest = getProvinceNewsService.getDigest(url);
  297. // System.out.println(digest);
  298. }
  299. @Test
  300. void addArticleFromWebSource() throws Exception {
  301. // String url = "https://scjg.hebei.gov.cn/";
  302. // getProvinceNewsService.crawlHebeiArticles(url);
  303. // String url = "https://scjg.hebei.gov.cn/node/919";
  304. // getProvinceNewsService.addHebeiArticle(url);
  305. // String url = "https://zscqj.hubei.gov.cn/";
  306. // getProvinceNewsService.crawlHubeiArticles(url);
  307. // getProvinceNewsService.crawlJiangxiArticles("https://amr.jiangxi.gov.cn/");
  308. getProvinceNewsService.addArticleFromWebSource();
  309. }
  310. @Test
  311. void crawlEcigaretteArticles() throws IOException {
  312. String url = "https://www.2firsts.cn/";
  313. getEcigaretteService.crawlEcigaretteArticles(url);
  314. }
  315. @Test
  316. void getGuid() throws Exception {
  317. String url = "https://mmbiz.qlogo.cn/mmbiz_jpg/N7S9NTpmPL8yibib3EwG7zxia25Ij9zS0A2XTlWWxpUomE7S9aDxNe8ibJYX7n8KjF8xf90n9UUSwaC2nTLh2D8a7Q/0?wx_fmt=jpeg";
  318. URL fileUrl = new URL(url);
  319. HttpURLConnection connection = (HttpURLConnection) fileUrl.openConnection();
  320. File tempFile = File.createTempFile("tem-", ".jpeg");
  321. try (InputStream in = connection.getInputStream(); FileOutputStream out = new FileOutputStream(tempFile)) {
  322. IOUtils.copy(in, out);
  323. }
  324. final List<String> list = fileManagerService.uploadFileGetGuid2(Collections.singletonList(tempFile));
  325. System.out.println(list.get(0));
  326. Files.delete(tempFile.toPath());
  327. }
  328. @Test
  329. void test11() throws Exception {
  330. List<ArticleInfo> list = articleInfoService.list(new LambdaQueryWrapper<ArticleInfo>()
  331. .in(ArticleInfo::getCategoryId,Arrays.asList(3,4,5,6))
  332. .isNotNull(ArticleInfo::getWxArticleIcon));
  333. System.out.println(list.size());
  334. if (!CollectionUtils.isEmpty(list)) {
  335. for (ArticleInfo articleInfo : list) {
  336. if (StringUtils.isNotEmpty(articleInfo.getWxArticleIcon())) {
  337. String guid = getWeChatArticleService.getGuid(articleInfo.getWxArticleIcon());
  338. articleInfo.setWxArticleIcon(guid);
  339. articleInfo.setId(articleInfo.getId());
  340. articleInfo.updateById();
  341. }
  342. }
  343. }
  344. System.out.println("----------------");
  345. }
  346. @Test
  347. void getPCTArticle() {
  348. getPCTArticleService.getPCTArticle();
  349. }
  350. @Test
  351. void test111() throws Exception {
  352. //-------------中国知识产权网
  353. // String url = "http://www.cnipr.com/";
  354. // getProvinceNewsService.test(url);
  355. //-------------智南针
  356. // String url = "https://www.worldip.cn/";
  357. // getProvinceNewsService.test1(url);
  358. // String url = "https://www.worldip.cn/index.php?m=content&c=index&a=show&catid=64&id=2996";
  359. // getProvinceNewsService.getDigest2(url);
  360. }
  361. }