using IPRS.xiaoshi.sz.com; using Microsoft.VisualBasic; using System; using System.IO; using System.Reflection.Metadata; using System.Runtime.CompilerServices; using System.Text.Json; using testtrieTree; using trieTree.xiaoshi.sz.com; using static System.Runtime.InteropServices.JavaScript.JSType; // See https://aka.ms/new-console-template for more information string? key = null; if (args.Length > 0) { key = args[0]; } else { Console.WriteLine("请选择:"); Console.WriteLine("1、手动输入地址解析;"); Console.WriteLine("2、从文件中读取地址批量解析;"); Console.WriteLine("3、测试IPRS检索;"); Console.WriteLine("4、测试IPRS获取中国专利全文文本;"); Console.WriteLine("5、测试IPRS获取中国专利PDF;"); Console.WriteLine("6、测试IPRS获取中国专利著录项目;"); key = Console.ReadLine(); } switch (key) { case "4": Console.Write("请输入专利申请号:"); string strAppNo = Console.ReadLine(); IPRSSearcher searcher = new IPRSSearcher(); var ret =searcher.GetPatentFullTxtInfo(strAppNo); Console.WriteLine(ret.ToString()); Parser paser = new Parser(); var p = paser.ParserFulltext(ret.ToString()); break; case "5": Console.Write("请输入专利申请号:"); strAppNo = Console.ReadLine(); searcher = new IPRSSearcher(); ret = searcher.GetPatentCNPdf(strAppNo); Console.WriteLine(ret.ToString()); break; case "6": Console.Write("请输入专利申请号:"); strAppNo = Console.ReadLine(); searcher = new IPRSSearcher(); ret = searcher.GetPatentCNBiblio(strAppNo); Console.WriteLine(ret.ToString()); break; case "1": Console.WriteLine(DateTime.Now.ToString()); AddressUtility parser = new AddressUtility(); Console.WriteLine(DateTime.Now.ToString()); while (true) { Console.Write("请输入解析的地址:"); string strAddress = Console.ReadLine(); if (strAddress != "exit") { List? result = parser.Paser(strAddress); if (result != null) { foreach (string s in result) { Console.Write($"{s}\t"); } Console.WriteLine(); } Console.WriteLine(DateTime.Now); } else { Console.WriteLine("bye!"); break; } } break; case "2": Console.WriteLine(DateTime.Now.ToString()); parser = new AddressUtility(); Console.WriteLine(DateTime.Now.ToString()); string csvFile = "c:/temp/地址.txt"; List strings = new List(); using (StreamReader sr = new StreamReader(csvFile)) { while (!sr.EndOfStream) { string line = sr.ReadLine(); List retList = parser.Paser(line); string str = "\"" + line.Trim() + "\""; string ssqx = ""; string sszq = ""; int i = 0; foreach (var s in retList) { if (i < 2) { if (s != "市辖区" && s != "省直辖县级行政区划") sszq += s; } if (i < 3) { if (s != "市辖区" && s != "省直辖县级行政区划") ssqx += s; } str += "," + s; i++; } for (int j = i; j < 5; j++) { str += ","; } str = str + "," + line.Contains(ssqx).ToString() + "," + line.Contains(sszq).ToString(); Console.WriteLine(str); strings.Add(str); } } string strPath = $"c:/temp/address_{DateTime.Now.ToString("hhmmss")}.csv"; using (StreamWriter streamWriter = new StreamWriter(strPath, true, System.Text.Encoding.UTF8)) { streamWriter.WriteLine("地址,省,市,区县,乡镇,村级,区县正确,市正确,省正确"); foreach (var s in strings) { streamWriter.WriteLine(s); } } Console.WriteLine(DateTime.Now); break; case "3": string savePath = "e:/CNPatents/"; Console.Write("请输入开始日期(yyyy-mm-dd):"); string? strStart = Console.ReadLine(); DateTime start; if(!DateTime.TryParse(strStart, out start)) { Console.WriteLine("输入日期格式错误,系统退出!"); } //DateTime.Parse("2024-11-01"); Console.Write("请输入结束日期(yyyy-mm-dd):"); string? strEnd = Console.ReadLine(); DateTime end; if (!DateTime.TryParse(strEnd, out end)) { Console.WriteLine("输入日期格式错误,系统退出!"); } //DateTime.Parse("2024-11-01"); //DateTime end = DateTime.Parse("2024-11-30"); while(start.CompareTo(end) <= 0) { //Console.WriteLine("请输入下载的日期(yyyymmdd):"); string strPubDate = start.ToString("yyyyMMdd"); string strCondition = $"F XX (({strPubDate}/GD*3/PT)+({strPubDate}/GD*2/PT)+({strPubDate}/GD*9/PT)+({strPubDate}/PD*1/PT)+({strPubDate}/PD*8/PT))"; string strRet = new IPRSSearcher().GetPatents(strCondition, 1, 1); int TotalPatents = 0; int RetiredPatents =0; using (JsonDocument document = JsonDocument.Parse(strRet)) { var retCode = document.RootElement.GetProperty("Ret").GetInt32(); TotalPatents = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32(); } List list = new List(); string temSavePath = $"{savePath}{strPubDate}"; if (TotalPatents > 0) { if (!System.IO.Directory.Exists(temSavePath)) { System.IO.Directory.CreateDirectory(temSavePath); } if (TotalPatents > 10000) { list = new IPRSSearcher().splitCondition(strCondition,DateTime.Now.AddYears(-15),DateTime.Now); } else { list.Add(strCondition); } } foreach (string c in list) { string strRet1 = new IPRSSearcher().GetPatents(c, 1, 1); int TotalPatents1 = 0; using (JsonDocument document = JsonDocument.Parse(strRet1)) { var retCode1 = document.RootElement.GetProperty("Ret").GetInt32(); TotalPatents1 = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32(); } if (TotalPatents1 > 0) { int totalPages = (TotalPatents1 % 50 == 0) ? TotalPatents1 / 50 : TotalPatents1 / 50 + 1; for (int page = 1; page <= totalPages; page++) { int iListTry = 0; tryListAgain: strRet1 = new IPRSSearcher().GetPatents(c, page, 50); if (string.IsNullOrEmpty(strRet1)) { iListTry++; if (iListTry < 3) { System.Threading.Thread.Sleep(1000); goto tryListAgain; } } using (JsonDocument document = JsonDocument.Parse(strRet1)) { var retCode = document.RootElement.GetProperty("Ret").GetInt32(); if(retCode == 500) { iListTry++; if (iListTry < 3) { goto tryListAgain; } } //TotalPatents = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32(); var Datas = document.RootElement.GetProperty("Data").GetProperty("List"); for (int i = 0; i < Datas.GetArrayLength(); i++) { string appNo = Datas[i].GetProperty("AN").GetString().Replace(".", ""); string strType = appNo.Length > 9 ? appNo.Substring(6, 1) : appNo.Substring(4, 1); if (!System.IO.File.Exists($"{temSavePath}/{appNo}.json")) { RetiredPatents++; using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}.json")) { writer.WriteLine(Datas[i].ToString()); } var searcher1 = new IPRSSearcher(); int iGetMPic = 0; TryGetMainPic: try { string mpicUrl = searcher1.GetPatentCnMainPic(appNo); byte[] data = new HttpClient().GetByteArrayAsync(mpicUrl).Result; using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_abs.jpeg", FileMode.Create)) { fileStream.Write(data, 0, data.Length); } } catch (Exception ex) { iGetMPic++; if (iGetMPic < 3) { goto TryGetMainPic; } } if (strType == "3") { Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents1}-{RetiredPatents}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}"); continue; string strWGImagesUrl = searcher1.GetPatentCNWGImage(appNo); if (!string.IsNullOrEmpty(strWGImagesUrl)) { using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_ImageUrls.txt")) { writer.WriteLine(strWGImagesUrl); } #region //break; //string[] img_urls = strWGImagesUrl.Split('|'); //for (int idx = 0; idx < img_urls.Length; idx++) //{ // int iTryGetPic = 0; //TryGetPic: // try // { // byte[] data = new HttpClient().GetByteArrayAsync(img_urls[idx]).Result; // using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_img{idx + 1}.jpeg", FileMode.Create)) // { // fileStream.Write(data, 0, data.Length); // } // } // catch // { // iTryGetPic++; // if (iTryGetPic < 3) // { // goto TryGetPic; // } // else // { // Console.WriteLine($"{appNo}图{idx}获取错误!"); // } // } //} #endregion } } else { string strFullText = searcher1.GetPatentFullTxtInfo(appNo); using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_fulltext.xml")) { writer.WriteLine(strFullText); } } } Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents1}-{RetiredPatents}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}"); } } } } } start = start.AddDays(1); while ((start.DayOfWeek != DayOfWeek.Friday ) && (start.DayOfWeek != DayOfWeek.Tuesday)) { start = start.AddDays(1); } } //return; break; default: break; }