Program.cs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. using IPRS.xiaoshi.sz.com;
  2. using Microsoft.VisualBasic;
  3. using System;
  4. using System.IO;
  5. using System.Reflection.Metadata;
  6. using System.Runtime.CompilerServices;
  7. using System.Text.Json;
  8. using testtrieTree;
  9. using trieTree.xiaoshi.sz.com;
  10. using static System.Runtime.InteropServices.JavaScript.JSType;
  11. // See https://aka.ms/new-console-template for more information
  12. string? key = null;
  13. if (args.Length > 0)
  14. {
  15. key = args[0];
  16. }
  17. else
  18. {
  19. Console.WriteLine("请选择:");
  20. Console.WriteLine("1、手动输入地址解析;");
  21. Console.WriteLine("2、从文件中读取地址批量解析;");
  22. Console.WriteLine("3、测试IPRS检索;");
  23. Console.WriteLine("4、测试IPRS获取中国专利全文文本;");
  24. Console.WriteLine("5、测试IPRS获取中国专利PDF;");
  25. Console.WriteLine("6、测试IPRS获取中国专利著录项目;");
  26. key = Console.ReadLine();
  27. }
  28. switch (key)
  29. {
  30. case "4":
  31. Console.Write("请输入专利申请号:");
  32. string strAppNo = Console.ReadLine();
  33. IPRSSearcher searcher = new IPRSSearcher();
  34. var ret =searcher.GetPatentFullTxtInfo(strAppNo);
  35. Console.WriteLine(ret.ToString());
  36. Parser paser = new Parser();
  37. var p = paser.ParserFulltext(ret.ToString());
  38. break;
  39. case "5":
  40. Console.Write("请输入专利申请号:");
  41. strAppNo = Console.ReadLine();
  42. searcher = new IPRSSearcher();
  43. ret = searcher.GetPatentCNPdf(strAppNo);
  44. Console.WriteLine(ret.ToString());
  45. break;
  46. case "6":
  47. Console.Write("请输入专利申请号:");
  48. strAppNo = Console.ReadLine();
  49. searcher = new IPRSSearcher();
  50. ret = searcher.GetPatentCNBiblio(strAppNo);
  51. Console.WriteLine(ret.ToString());
  52. break;
  53. case "1":
  54. Console.WriteLine(DateTime.Now.ToString());
  55. AddressUtility parser = new AddressUtility();
  56. Console.WriteLine(DateTime.Now.ToString());
  57. while (true)
  58. {
  59. Console.Write("请输入解析的地址:");
  60. string strAddress = Console.ReadLine();
  61. if (strAddress != "exit")
  62. {
  63. List<string>? result = parser.Paser(strAddress);
  64. if (result != null)
  65. {
  66. foreach (string s in result)
  67. {
  68. Console.Write($"{s}\t");
  69. }
  70. Console.WriteLine();
  71. }
  72. Console.WriteLine(DateTime.Now);
  73. }
  74. else
  75. {
  76. Console.WriteLine("bye!");
  77. break;
  78. }
  79. }
  80. break;
  81. case "2":
  82. Console.WriteLine(DateTime.Now.ToString());
  83. parser = new AddressUtility();
  84. Console.WriteLine(DateTime.Now.ToString());
  85. string csvFile = "c:/temp/地址.txt";
  86. List<string> strings = new List<string>();
  87. using (StreamReader sr = new StreamReader(csvFile))
  88. {
  89. while (!sr.EndOfStream)
  90. {
  91. string line = sr.ReadLine();
  92. List<string> retList = parser.Paser(line);
  93. string str = "\"" + line.Trim() + "\"";
  94. string ssqx = "";
  95. string sszq = "";
  96. int i = 0;
  97. foreach (var s in retList)
  98. {
  99. if (i < 2)
  100. {
  101. if (s != "市辖区" && s != "省直辖县级行政区划")
  102. sszq += s;
  103. }
  104. if (i < 3)
  105. {
  106. if (s != "市辖区" && s != "省直辖县级行政区划")
  107. ssqx += s;
  108. }
  109. str += "," + s;
  110. i++;
  111. }
  112. for (int j = i; j < 5; j++)
  113. {
  114. str += ",";
  115. }
  116. str = str + "," + line.Contains(ssqx).ToString() + "," + line.Contains(sszq).ToString();
  117. Console.WriteLine(str);
  118. strings.Add(str);
  119. }
  120. }
  121. string strPath = $"c:/temp/address_{DateTime.Now.ToString("hhmmss")}.csv";
  122. using (StreamWriter streamWriter = new StreamWriter(strPath, true, System.Text.Encoding.UTF8))
  123. {
  124. streamWriter.WriteLine("地址,省,市,区县,乡镇,村级,区县正确,市正确,省正确");
  125. foreach (var s in strings)
  126. {
  127. streamWriter.WriteLine(s);
  128. }
  129. }
  130. Console.WriteLine(DateTime.Now);
  131. break;
  132. case "3":
  133. string savePath = "e:/CNPatents/";
  134. Console.Write("请输入开始日期(yyyy-mm-dd):");
  135. string? strStart = Console.ReadLine();
  136. DateTime start;
  137. if(!DateTime.TryParse(strStart, out start))
  138. {
  139. Console.WriteLine("输入日期格式错误,系统退出!");
  140. } //DateTime.Parse("2024-11-01");
  141. Console.Write("请输入结束日期(yyyy-mm-dd):");
  142. string? strEnd = Console.ReadLine();
  143. DateTime end;
  144. if (!DateTime.TryParse(strEnd, out end))
  145. {
  146. Console.WriteLine("输入日期格式错误,系统退出!");
  147. } //DateTime.Parse("2024-11-01");
  148. //DateTime end = DateTime.Parse("2024-11-30");
  149. while(start.CompareTo(end) <= 0)
  150. {
  151. //Console.WriteLine("请输入下载的日期(yyyymmdd):");
  152. string strPubDate = start.ToString("yyyyMMdd");
  153. string strCondition = $"F XX (({strPubDate}/GD*3/PT)+({strPubDate}/GD*2/PT)+({strPubDate}/GD*9/PT)+({strPubDate}/PD*1/PT)+({strPubDate}/PD*8/PT))";
  154. string strRet = new IPRSSearcher().GetPatents(strCondition, 1, 1);
  155. int TotalPatents = 0;
  156. int RetiredPatents =0;
  157. using (JsonDocument document = JsonDocument.Parse(strRet))
  158. {
  159. var retCode = document.RootElement.GetProperty("Ret").GetInt32();
  160. TotalPatents = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
  161. }
  162. List<string> list = new List<string>();
  163. string temSavePath = $"{savePath}{strPubDate}";
  164. if (TotalPatents > 0)
  165. {
  166. if (!System.IO.Directory.Exists(temSavePath))
  167. {
  168. System.IO.Directory.CreateDirectory(temSavePath);
  169. }
  170. if (TotalPatents > 10000)
  171. {
  172. list = new IPRSSearcher().splitCondition(strCondition,DateTime.Now.AddYears(-15),DateTime.Now);
  173. }
  174. else
  175. {
  176. list.Add(strCondition);
  177. }
  178. }
  179. foreach (string c in list)
  180. {
  181. string strRet1 = new IPRSSearcher().GetPatents(c, 1, 1);
  182. int TotalPatents1 = 0;
  183. using (JsonDocument document = JsonDocument.Parse(strRet1))
  184. {
  185. var retCode1 = document.RootElement.GetProperty("Ret").GetInt32();
  186. TotalPatents1 = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
  187. }
  188. if (TotalPatents1 > 0)
  189. {
  190. int totalPages = (TotalPatents1 % 50 == 0) ? TotalPatents1 / 50 : TotalPatents1 / 50 + 1;
  191. for (int page = 1; page <= totalPages; page++)
  192. {
  193. int iListTry = 0;
  194. tryListAgain:
  195. strRet1 = new IPRSSearcher().GetPatents(c, page, 50);
  196. if (string.IsNullOrEmpty(strRet1))
  197. {
  198. iListTry++;
  199. if (iListTry < 3)
  200. {
  201. System.Threading.Thread.Sleep(1000);
  202. goto tryListAgain;
  203. }
  204. }
  205. using (JsonDocument document = JsonDocument.Parse(strRet1))
  206. {
  207. var retCode = document.RootElement.GetProperty("Ret").GetInt32();
  208. if(retCode == 500)
  209. {
  210. iListTry++;
  211. if (iListTry < 3)
  212. {
  213. goto tryListAgain;
  214. }
  215. }
  216. //TotalPatents = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
  217. var Datas = document.RootElement.GetProperty("Data").GetProperty("List");
  218. for (int i = 0; i < Datas.GetArrayLength(); i++)
  219. {
  220. string appNo = Datas[i].GetProperty("AN").GetString().Replace(".", "");
  221. string strType = appNo.Length > 9 ? appNo.Substring(6, 1) : appNo.Substring(4, 1);
  222. if (!System.IO.File.Exists($"{temSavePath}/{appNo}.json"))
  223. {
  224. RetiredPatents++;
  225. using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}.json"))
  226. {
  227. writer.WriteLine(Datas[i].ToString());
  228. }
  229. var searcher1 = new IPRSSearcher();
  230. int iGetMPic = 0;
  231. TryGetMainPic:
  232. try
  233. {
  234. string mpicUrl = searcher1.GetPatentCnMainPic(appNo);
  235. byte[] data = new HttpClient().GetByteArrayAsync(mpicUrl).Result;
  236. using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_abs.jpeg", FileMode.Create))
  237. {
  238. fileStream.Write(data, 0, data.Length);
  239. }
  240. }
  241. catch (Exception ex)
  242. {
  243. iGetMPic++;
  244. if (iGetMPic < 3)
  245. {
  246. goto TryGetMainPic;
  247. }
  248. }
  249. if (strType == "3")
  250. {
  251. Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents1}-{RetiredPatents}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}");
  252. continue;
  253. string strWGImagesUrl = searcher1.GetPatentCNWGImage(appNo);
  254. if (!string.IsNullOrEmpty(strWGImagesUrl))
  255. {
  256. using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_ImageUrls.txt"))
  257. {
  258. writer.WriteLine(strWGImagesUrl);
  259. }
  260. #region
  261. //break;
  262. //string[] img_urls = strWGImagesUrl.Split('|');
  263. //for (int idx = 0; idx < img_urls.Length; idx++)
  264. //{
  265. // int iTryGetPic = 0;
  266. //TryGetPic:
  267. // try
  268. // {
  269. // byte[] data = new HttpClient().GetByteArrayAsync(img_urls[idx]).Result;
  270. // using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_img{idx + 1}.jpeg", FileMode.Create))
  271. // {
  272. // fileStream.Write(data, 0, data.Length);
  273. // }
  274. // }
  275. // catch
  276. // {
  277. // iTryGetPic++;
  278. // if (iTryGetPic < 3)
  279. // {
  280. // goto TryGetPic;
  281. // }
  282. // else
  283. // {
  284. // Console.WriteLine($"{appNo}图{idx}获取错误!");
  285. // }
  286. // }
  287. //}
  288. #endregion
  289. }
  290. }
  291. else
  292. {
  293. string strFullText = searcher1.GetPatentFullTxtInfo(appNo);
  294. using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_fulltext.xml"))
  295. {
  296. writer.WriteLine(strFullText);
  297. }
  298. }
  299. }
  300. Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents1}-{RetiredPatents}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}");
  301. }
  302. }
  303. }
  304. }
  305. }
  306. start = start.AddDays(1);
  307. while ((start.DayOfWeek != DayOfWeek.Friday ) && (start.DayOfWeek != DayOfWeek.Tuesday))
  308. {
  309. start = start.AddDays(1);
  310. }
  311. }
  312. //return;
  313. break;
  314. default:
  315. break;
  316. }