Program.cs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. using IPRS.xiaoshi.sz.com;
  2. using Microsoft.VisualBasic;
  3. using System;
  4. using System.IO;
  5. using System.Reflection.Metadata;
  6. using System.Runtime.CompilerServices;
  7. using System.Text.Json;
  8. using testtrieTree;
  9. using trieTree.xiaoshi.sz.com;
  10. using static System.Runtime.InteropServices.JavaScript.JSType;
  11. // See https://aka.ms/new-console-template for more information
  12. Console.WriteLine("请选择:");
  13. Console.WriteLine("1、手动输入地址解析;");
  14. Console.WriteLine("2、从文件中读取地址批量解析;");
  15. Console.WriteLine("3、测试IPRS检索;");
  16. Console.WriteLine("4、测试IPRS获取中国专利全文文本;");
  17. Console.WriteLine("5、测试IPRS获取中国专利PDF;");
  18. Console.WriteLine("6、测试IPRS获取中国专利著录项目;");
  19. string? key = Console.ReadLine();
  20. switch (key)
  21. {
  22. case "4":
  23. Console.Write("请输入专利申请号:");
  24. string strAppNo = Console.ReadLine();
  25. IPRSSearcher searcher = new IPRSSearcher();
  26. var ret =searcher.GetPatentFullTxtInfo(strAppNo);
  27. Console.WriteLine(ret.ToString());
  28. Parser paser = new Parser();
  29. var p = paser.ParserFulltext(ret.ToString());
  30. break;
  31. case "5":
  32. Console.Write("请输入专利申请号:");
  33. strAppNo = Console.ReadLine();
  34. searcher = new IPRSSearcher();
  35. ret = searcher.GetPatentCNPdf(strAppNo);
  36. Console.WriteLine(ret.ToString());
  37. break;
  38. case "6":
  39. Console.Write("请输入专利申请号:");
  40. strAppNo = Console.ReadLine();
  41. searcher = new IPRSSearcher();
  42. ret = searcher.GetPatentCNBiblio(strAppNo);
  43. Console.WriteLine(ret.ToString());
  44. break;
  45. case "1":
  46. Console.WriteLine(DateTime.Now.ToString());
  47. AddressUtility parser = new AddressUtility();
  48. Console.WriteLine(DateTime.Now.ToString());
  49. while (true)
  50. {
  51. Console.Write("请输入解析的地址:");
  52. string strAddress = Console.ReadLine();
  53. if (strAddress != "exit")
  54. {
  55. List<string>? result = parser.Paser(strAddress);
  56. if (result != null)
  57. {
  58. foreach (string s in result)
  59. {
  60. Console.Write($"{s}\t");
  61. }
  62. Console.WriteLine();
  63. }
  64. Console.WriteLine(DateTime.Now);
  65. }
  66. else
  67. {
  68. Console.WriteLine("bye!");
  69. break;
  70. }
  71. }
  72. break;
  73. case "2":
  74. Console.WriteLine(DateTime.Now.ToString());
  75. parser = new AddressUtility();
  76. Console.WriteLine(DateTime.Now.ToString());
  77. string csvFile = "c:/temp/地址.txt";
  78. List<string> strings = new List<string>();
  79. using (StreamReader sr = new StreamReader(csvFile))
  80. {
  81. while (!sr.EndOfStream)
  82. {
  83. string line = sr.ReadLine();
  84. List<string> retList = parser.Paser(line);
  85. string str = "\"" + line.Trim() + "\"";
  86. string ssqx = "";
  87. string sszq = "";
  88. int i = 0;
  89. foreach (var s in retList)
  90. {
  91. if (i < 2)
  92. {
  93. if (s != "市辖区" && s != "省直辖县级行政区划")
  94. sszq += s;
  95. }
  96. if (i < 3)
  97. {
  98. if (s != "市辖区" && s != "省直辖县级行政区划")
  99. ssqx += s;
  100. }
  101. str += "," + s;
  102. i++;
  103. }
  104. for (int j = i; j < 5; j++)
  105. {
  106. str += ",";
  107. }
  108. str = str + "," + line.Contains(ssqx).ToString() + "," + line.Contains(sszq).ToString();
  109. Console.WriteLine(str);
  110. strings.Add(str);
  111. }
  112. }
  113. string strPath = $"c:/temp/address_{DateTime.Now.ToString("hhmmss")}.csv";
  114. using (StreamWriter streamWriter = new StreamWriter(strPath, true, System.Text.Encoding.UTF8))
  115. {
  116. streamWriter.WriteLine("地址,省,市,区县,乡镇,村级,区县正确,市正确,省正确");
  117. foreach (var s in strings)
  118. {
  119. streamWriter.WriteLine(s);
  120. }
  121. }
  122. Console.WriteLine(DateTime.Now);
  123. break;
  124. case "3":
  125. string savePath = "e:/CNWGPatents/";
  126. DateTime start = DateTime.Parse("2021-06-08");
  127. DateTime end = DateTime.Parse("2021-12-31");
  128. while(start.CompareTo(end) < 0)
  129. {
  130. //Console.WriteLine("请输入下载的日期(yyyymmdd):");
  131. string strPubDate = start.ToString("yyyyMMdd");
  132. string strCondition = $"F XX {strPubDate}/GD*3/PT";
  133. string strRet = new IPRSSearcher().GetPatents(strCondition, 1, 1);
  134. int TotalPatents = 0;
  135. using (JsonDocument document = JsonDocument.Parse(strRet))
  136. {
  137. var retCode = document.RootElement.GetProperty("Ret").GetInt32();
  138. TotalPatents = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
  139. }
  140. List<string> list = new List<string>();
  141. string temSavePath = $"{savePath}{strPubDate}";
  142. if (TotalPatents > 0)
  143. {
  144. if (!System.IO.Directory.Exists(temSavePath))
  145. {
  146. System.IO.Directory.CreateDirectory(temSavePath);
  147. }
  148. if (TotalPatents > 10000)
  149. {
  150. list = new IPRSSearcher().splitCondition(strCondition,DateTime.Now.AddYears(-15),DateTime.Now);
  151. }
  152. else
  153. {
  154. list.Add(strCondition);
  155. }
  156. }
  157. foreach (string c in list)
  158. {
  159. string strRet1 = new IPRSSearcher().GetPatents(c, 1, 1);
  160. int TotalPatents1 = 0;
  161. using (JsonDocument document = JsonDocument.Parse(strRet1))
  162. {
  163. var retCode1 = document.RootElement.GetProperty("Ret").GetInt32();
  164. TotalPatents1 = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
  165. }
  166. if (TotalPatents1 > 0)
  167. {
  168. int totalPages = (TotalPatents1 % 50 == 0) ? TotalPatents1 / 50 : TotalPatents1 / 50 + 1;
  169. for (int page = 1; page <= totalPages; page++)
  170. {
  171. strRet1 = new IPRSSearcher().GetPatents(c, page, 50);
  172. using (JsonDocument document = JsonDocument.Parse(strRet1))
  173. {
  174. var retCode = document.RootElement.GetProperty("Ret").GetInt32();
  175. //TotalPatents = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
  176. var Datas = document.RootElement.GetProperty("Data").GetProperty("List");
  177. for (int i = 0; i < Datas.GetArrayLength(); i++)
  178. {
  179. string appNo = Datas[i].GetProperty("AN").GetString().Replace(".", "");
  180. if (!System.IO.File.Exists($"{temSavePath}/{appNo}.json"))
  181. {
  182. using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}.json"))
  183. {
  184. writer.WriteLine(Datas[i].ToString());
  185. }
  186. var searcher1 = new IPRSSearcher();
  187. int iGetMPic = 0;
  188. TryGetMainPic:
  189. try
  190. {
  191. string mpicUrl = searcher1.GetPatentCnMainPic(appNo);
  192. byte[] data = new HttpClient().GetByteArrayAsync(mpicUrl).Result;
  193. using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_abs.jpeg", FileMode.Create))
  194. {
  195. fileStream.Write(data, 0, data.Length);
  196. }
  197. }
  198. catch (Exception ex)
  199. {
  200. iGetMPic++;
  201. if (iGetMPic < 3)
  202. {
  203. goto TryGetMainPic;
  204. }
  205. }
  206. string strWGImagesUrl = searcher1.GetPatentCNWGImage(appNo);
  207. if (!string.IsNullOrEmpty(strWGImagesUrl))
  208. {
  209. string[] img_urls = strWGImagesUrl.Split('|');
  210. for (int idx = 0; idx < img_urls.Length; idx++)
  211. {
  212. int iTryGetPic = 0;
  213. TryGetPic:
  214. try
  215. {
  216. byte[] data = new HttpClient().GetByteArrayAsync(img_urls[idx]).Result;
  217. using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_img{idx + 1}.jpeg", FileMode.Create))
  218. {
  219. fileStream.Write(data, 0, data.Length);
  220. }
  221. }
  222. catch
  223. {
  224. iTryGetPic++;
  225. if (iTryGetPic < 3)
  226. {
  227. goto TryGetPic;
  228. }
  229. }
  230. }
  231. }
  232. }
  233. Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}");
  234. }
  235. }
  236. }
  237. }
  238. }
  239. start = start.AddDays(1);
  240. while ((start.DayOfWeek != DayOfWeek.Friday ) && (start.DayOfWeek != DayOfWeek.Tuesday))
  241. {
  242. start = start.AddDays(1);
  243. }
  244. }
  245. //return;
  246. break;
  247. default:
  248. break;
  249. }