Program.cs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. using IPRS.xiaoshi.sz.com;
  2. using Microsoft.VisualBasic;
  3. using System;
  4. using System.IO;
  5. using System.Reflection.Metadata;
  6. using System.Runtime.CompilerServices;
  7. using System.Text.Json;
  8. using trieTree.xiaoshi.sz.com;
  9. using static System.Runtime.InteropServices.JavaScript.JSType;
  10. // See https://aka.ms/new-console-template for more information
  11. string? key = null;
  12. if (args.Length > 0)
  13. {
  14. key = args[0];
  15. }
  16. else
  17. {
  18. Console.WriteLine("请选择:");
  19. Console.WriteLine("1、手动输入地址解析;");
  20. Console.WriteLine("2、从文件中读取地址批量解析;");
  21. Console.WriteLine("3、测试IPRS检索;");
  22. Console.WriteLine("4、测试IPRS获取中国专利全文文本;");
  23. Console.WriteLine("5、测试IPRS获取中国专利PDF;");
  24. Console.WriteLine("6、测试IPRS获取中国专利著录项目;");
  25. key = Console.ReadLine();
  26. }
  27. switch (key)
  28. {
  29. case "4":
  30. Console.Write("请输入专利申请号:");
  31. string strAppNo = Console.ReadLine();
  32. IPRSSearcher searcher = new IPRSSearcher();
  33. var ret =searcher.GetPatentFullTxtInfo(strAppNo);
  34. Console.WriteLine(ret.ToString());
  35. Parser paser = new Parser();
  36. var p = paser.ParserFulltext(ret.ToString());
  37. break;
  38. case "5":
  39. Console.Write("请输入专利申请号:");
  40. strAppNo = Console.ReadLine();
  41. searcher = new IPRSSearcher();
  42. ret = searcher.GetPatentCNPdf(strAppNo);
  43. Console.WriteLine(ret.ToString());
  44. break;
  45. case "6":
  46. Console.Write("请输入专利申请号:");
  47. strAppNo = Console.ReadLine();
  48. searcher = new IPRSSearcher();
  49. ret = searcher.GetPatentCNBiblio(strAppNo);
  50. Console.WriteLine(ret.ToString());
  51. break;
  52. case "1":
  53. Console.WriteLine(DateTime.Now.ToString());
  54. AddressUtility parser = new AddressUtility();
  55. Console.WriteLine(DateTime.Now.ToString());
  56. while (true)
  57. {
  58. Console.Write("请输入解析的地址:");
  59. string strAddress = Console.ReadLine();
  60. if (strAddress != "exit")
  61. {
  62. List<string>? result = parser.Paser(strAddress);
  63. if (result != null)
  64. {
  65. foreach (string s in result)
  66. {
  67. Console.Write($"{s}\t");
  68. }
  69. Console.WriteLine();
  70. }
  71. Console.WriteLine(DateTime.Now);
  72. }
  73. else
  74. {
  75. Console.WriteLine("bye!");
  76. break;
  77. }
  78. }
  79. break;
  80. case "2":
  81. Console.WriteLine(DateTime.Now.ToString());
  82. parser = new AddressUtility();
  83. Console.WriteLine(DateTime.Now.ToString());
  84. string csvFile = "c:/temp/地址.txt";
  85. List<string> strings = new List<string>();
  86. using (StreamReader sr = new StreamReader(csvFile))
  87. {
  88. while (!sr.EndOfStream)
  89. {
  90. string line = sr.ReadLine();
  91. List<string> retList = parser.Paser(line);
  92. string str = "\"" + line.Trim() + "\"";
  93. string ssqx = "";
  94. string sszq = "";
  95. int i = 0;
  96. foreach (var s in retList)
  97. {
  98. if (i < 2)
  99. {
  100. if (s != "市辖区" && s != "省直辖县级行政区划")
  101. sszq += s;
  102. }
  103. if (i < 3)
  104. {
  105. if (s != "市辖区" && s != "省直辖县级行政区划")
  106. ssqx += s;
  107. }
  108. str += "," + s;
  109. i++;
  110. }
  111. for (int j = i; j < 5; j++)
  112. {
  113. str += ",";
  114. }
  115. str = str + "," + line.Contains(ssqx).ToString() + "," + line.Contains(sszq).ToString();
  116. Console.WriteLine(str);
  117. strings.Add(str);
  118. }
  119. }
  120. string strPath = $"c:/temp/address_{DateTime.Now.ToString("hhmmss")}.csv";
  121. using (StreamWriter streamWriter = new StreamWriter(strPath, true, System.Text.Encoding.UTF8))
  122. {
  123. streamWriter.WriteLine("地址,省,市,区县,乡镇,村级,区县正确,市正确,省正确");
  124. foreach (var s in strings)
  125. {
  126. streamWriter.WriteLine(s);
  127. }
  128. }
  129. Console.WriteLine(DateTime.Now);
  130. break;
  131. case "3":
  132. string savePath = "e:/CNPatents/";
  133. Console.Write("请输入开始日期(yyyy-mm-dd):");
  134. string? strStart = Console.ReadLine();
  135. DateTime start;
  136. if(!DateTime.TryParse(strStart, out start))
  137. {
  138. Console.WriteLine("输入日期格式错误,系统退出!");
  139. } //DateTime.Parse("2024-11-01");
  140. Console.Write("请输入结束日期(yyyy-mm-dd):");
  141. string? strEnd = Console.ReadLine();
  142. DateTime end;
  143. if (!DateTime.TryParse(strEnd, out end))
  144. {
  145. Console.WriteLine("输入日期格式错误,系统退出!");
  146. } //DateTime.Parse("2024-11-01");
  147. //DateTime end = DateTime.Parse("2024-11-30");
  148. while(start.CompareTo(end) <= 0)
  149. {
  150. //Console.WriteLine("请输入下载的日期(yyyymmdd):");
  151. string strPubDate = start.ToString("yyyyMMdd");
  152. string strCondition = $"F XX (({strPubDate}/GD*3/PT)+({strPubDate}/GD*2/PT)+({strPubDate}/GD*9/PT)+({strPubDate}/PD*1/PT)+({strPubDate}/PD*8/PT))";
  153. string strRet = new IPRSSearcher().GetPatents(strCondition, 1, 1);
  154. int TotalPatents = 0;
  155. int RetiredPatents =0;
  156. using (JsonDocument document = JsonDocument.Parse(strRet))
  157. {
  158. var retCode = document.RootElement.GetProperty("Ret").GetInt32();
  159. TotalPatents = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
  160. }
  161. List<string> list = new List<string>();
  162. string temSavePath = Path.Combine(savePath,start.Year.ToString(),strPubDate); //$"{savePath}{strPubDate}";
  163. if (TotalPatents > 0)
  164. {
  165. if (!System.IO.Directory.Exists(temSavePath))
  166. {
  167. System.IO.Directory.CreateDirectory(temSavePath);
  168. }
  169. if (TotalPatents > 10000)
  170. {
  171. list = new IPRSSearcher().splitCondition(strCondition,DateTime.Parse("1985-01-01"),start.AddDays(1));
  172. }
  173. else
  174. {
  175. list.Add(strCondition);
  176. }
  177. }
  178. foreach (string c in list)
  179. {
  180. string strRet1 = new IPRSSearcher().GetPatents(c, 1, 1);
  181. int TotalPatents1 = 0;
  182. using (JsonDocument document = JsonDocument.Parse(strRet1))
  183. {
  184. var retCode1 = document.RootElement.GetProperty("Ret").GetInt32();
  185. TotalPatents1 = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
  186. }
  187. if (TotalPatents1 > 0)
  188. {
  189. int totalPages = (TotalPatents1 % 50 == 0) ? TotalPatents1 / 50 : TotalPatents1 / 50 + 1;
  190. for (int page = 1; page <= totalPages; page++)
  191. {
  192. int iListTry = 0;
  193. tryListAgain:
  194. strRet1 = new IPRSSearcher().GetPatents(c, page, 50);
  195. if (string.IsNullOrEmpty(strRet1))
  196. {
  197. iListTry++;
  198. if (iListTry < 3)
  199. {
  200. System.Threading.Thread.Sleep(1000);
  201. goto tryListAgain;
  202. }
  203. }
  204. using (JsonDocument document = JsonDocument.Parse(strRet1))
  205. {
  206. var retCode = document.RootElement.GetProperty("Ret").GetInt32();
  207. if(retCode == 500)
  208. {
  209. iListTry++;
  210. if (iListTry < 3)
  211. {
  212. goto tryListAgain;
  213. }
  214. }
  215. //TotalPatents = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
  216. var Datas = document.RootElement.GetProperty("Data").GetProperty("List");
  217. for (int i = 0; i < Datas.GetArrayLength(); i++)
  218. {
  219. string appNo = Datas[i].GetProperty("AN").GetString().Replace(".", "");
  220. string strType = appNo.Length > 9 ? appNo.Substring(6, 1) : appNo.Substring(4, 1);
  221. if (!System.IO.File.Exists($"{temSavePath}/{appNo}.json"))
  222. {
  223. using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}.json"))
  224. {
  225. writer.WriteLine(Datas[i].ToString());
  226. }
  227. var searcher1 = new IPRSSearcher();
  228. int iGetMPic = 0;
  229. TryGetMainPic:
  230. try
  231. {
  232. //string mpicUrl = searcher1.GetPatentCnMainPic(appNo);
  233. //byte[] data = new HttpClient().GetByteArrayAsync(mpicUrl).Result;
  234. //using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_abs.jpeg", FileMode.Create))
  235. //{
  236. // fileStream.Write(data, 0, data.Length);
  237. //}
  238. }
  239. catch (Exception ex)
  240. {
  241. iGetMPic++;
  242. if (iGetMPic < 3)
  243. {
  244. goto TryGetMainPic;
  245. }
  246. }
  247. if (strType == "3")
  248. {
  249. //Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents1}-{RetiredPatents}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}");
  250. //string strWGImagesUrl = searcher1.GetPatentCNWGImage(appNo);
  251. //if (!string.IsNullOrEmpty(strWGImagesUrl))
  252. //{
  253. // using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_ImageUrls.txt"))
  254. // {
  255. // writer.WriteLine(strWGImagesUrl);
  256. // }
  257. // #region
  258. // //break;
  259. // //string[] img_urls = strWGImagesUrl.Split('|');
  260. // //for (int idx = 0; idx < img_urls.Length; idx++)
  261. // //{
  262. // // int iTryGetPic = 0;
  263. // //TryGetPic:
  264. // // try
  265. // // {
  266. // // byte[] data = new HttpClient().GetByteArrayAsync(img_urls[idx]).Result;
  267. // // using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_img{idx + 1}.jpeg", FileMode.Create))
  268. // // {
  269. // // fileStream.Write(data, 0, data.Length);
  270. // // }
  271. // // }
  272. // // catch
  273. // // {
  274. // // iTryGetPic++;
  275. // // if (iTryGetPic < 3)
  276. // // {
  277. // // goto TryGetPic;
  278. // // }
  279. // // else
  280. // // {
  281. // // Console.WriteLine($"{appNo}图{idx}获取错误!");
  282. // // }
  283. // // }
  284. // //}
  285. // #endregion
  286. //}
  287. }
  288. else
  289. {
  290. string strFullText = searcher1.GetPatentFullTxtInfo(appNo);
  291. if (!string.IsNullOrEmpty(strFullText))
  292. {
  293. using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_fulltext.xml"))
  294. {
  295. writer.WriteLine(strFullText);
  296. }
  297. }
  298. }
  299. RetiredPatents++;
  300. }
  301. else
  302. {
  303. RetiredPatents++;
  304. }
  305. Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents1}-{RetiredPatents}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}");
  306. }
  307. }
  308. }
  309. }
  310. }
  311. start = start.AddDays(1);
  312. //while ((start.DayOfWeek != DayOfWeek.Friday ) && (start.DayOfWeek != DayOfWeek.Tuesday))
  313. //{
  314. // start = start.AddDays(1);
  315. //}
  316. }
  317. //return;
  318. break;
  319. default:
  320. break;
  321. }