123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374 |
- using IPRS.xiaoshi.sz.com;
- using Microsoft.VisualBasic;
- using System;
- using System.IO;
- using System.Reflection.Metadata;
- using System.Runtime.CompilerServices;
- using System.Text.Json;
- using trieTree.xiaoshi.sz.com;
- using static System.Runtime.InteropServices.JavaScript.JSType;
- // See https://aka.ms/new-console-template for more information
- string? key = null;
- if (args.Length > 0)
- {
- key = args[0];
- }
- else
- {
- Console.WriteLine("请选择:");
- Console.WriteLine("1、手动输入地址解析;");
- Console.WriteLine("2、从文件中读取地址批量解析;");
- Console.WriteLine("3、测试IPRS检索;");
- Console.WriteLine("4、测试IPRS获取中国专利全文文本;");
- Console.WriteLine("5、测试IPRS获取中国专利PDF;");
- Console.WriteLine("6、测试IPRS获取中国专利著录项目;");
- key = Console.ReadLine();
- }
- switch (key)
- {
-
- case "4":
- Console.Write("请输入专利申请号:");
- string strAppNo = Console.ReadLine();
- IPRSSearcher searcher = new IPRSSearcher();
- var ret =searcher.GetPatentFullTxtInfo(strAppNo);
- Console.WriteLine(ret.ToString());
-
- Parser paser = new Parser();
- var p = paser.ParserFulltext(ret.ToString());
- break;
- case "5":
- Console.Write("请输入专利申请号:");
- strAppNo = Console.ReadLine();
- searcher = new IPRSSearcher();
- ret = searcher.GetPatentCNPdf(strAppNo);
- Console.WriteLine(ret.ToString());
- break;
- case "6":
- Console.Write("请输入专利申请号:");
- strAppNo = Console.ReadLine();
- searcher = new IPRSSearcher();
- ret = searcher.GetPatentCNBiblio(strAppNo);
- Console.WriteLine(ret.ToString());
- break;
- case "1":
- Console.WriteLine(DateTime.Now.ToString());
- AddressUtility parser = new AddressUtility();
- Console.WriteLine(DateTime.Now.ToString());
- while (true)
- {
- Console.Write("请输入解析的地址:");
- string strAddress = Console.ReadLine();
- if (strAddress != "exit")
- {
- List<string>? result = parser.Paser(strAddress);
- if (result != null)
- {
- foreach (string s in result)
- {
- Console.Write($"{s}\t");
- }
- Console.WriteLine();
- }
- Console.WriteLine(DateTime.Now);
- }
- else
- {
- Console.WriteLine("bye!");
- break;
- }
- }
- break;
- case "2":
- Console.WriteLine(DateTime.Now.ToString());
- parser = new AddressUtility();
- Console.WriteLine(DateTime.Now.ToString());
- string csvFile = "c:/temp/地址.txt";
- List<string> strings = new List<string>();
- using (StreamReader sr = new StreamReader(csvFile))
- {
- while (!sr.EndOfStream)
- {
- string line = sr.ReadLine();
- List<string> retList = parser.Paser(line);
- string str = "\"" + line.Trim() + "\"";
- string ssqx = "";
- string sszq = "";
- int i = 0;
- foreach (var s in retList)
- {
- if (i < 2)
- {
- if (s != "市辖区" && s != "省直辖县级行政区划")
- sszq += s;
- }
- if (i < 3)
- {
- if (s != "市辖区" && s != "省直辖县级行政区划")
- ssqx += s;
- }
- str += "," + s;
- i++;
- }
- for (int j = i; j < 5; j++)
- {
- str += ",";
- }
- str = str + "," + line.Contains(ssqx).ToString() + "," + line.Contains(sszq).ToString();
- Console.WriteLine(str);
- strings.Add(str);
- }
- }
- string strPath = $"c:/temp/address_{DateTime.Now.ToString("hhmmss")}.csv";
- using (StreamWriter streamWriter = new StreamWriter(strPath, true, System.Text.Encoding.UTF8))
- {
- streamWriter.WriteLine("地址,省,市,区县,乡镇,村级,区县正确,市正确,省正确");
- foreach (var s in strings)
- {
- streamWriter.WriteLine(s);
- }
- }
- Console.WriteLine(DateTime.Now);
- break;
- case "3":
- string savePath = "e:/CNPatents/";
- Console.Write("请输入开始日期(yyyy-mm-dd):");
- string? strStart = Console.ReadLine();
- DateTime start;
- if(!DateTime.TryParse(strStart, out start))
- {
- Console.WriteLine("输入日期格式错误,系统退出!");
- } //DateTime.Parse("2024-11-01");
- Console.Write("请输入结束日期(yyyy-mm-dd):");
- string? strEnd = Console.ReadLine();
- DateTime end;
- if (!DateTime.TryParse(strEnd, out end))
- {
- Console.WriteLine("输入日期格式错误,系统退出!");
- } //DateTime.Parse("2024-11-01");
- //DateTime end = DateTime.Parse("2024-11-30");
- while(start.CompareTo(end) <= 0)
- {
- //Console.WriteLine("请输入下载的日期(yyyymmdd):");
- string strPubDate = start.ToString("yyyyMMdd");
- string strCondition = $"F XX (({strPubDate}/GD*3/PT)+({strPubDate}/GD*2/PT)+({strPubDate}/GD*9/PT)+({strPubDate}/PD*1/PT)+({strPubDate}/PD*8/PT))";
- string strRet = new IPRSSearcher().GetPatents(strCondition, 1, 1);
- int TotalPatents = 0;
- int RetiredPatents =0;
- using (JsonDocument document = JsonDocument.Parse(strRet))
- {
- var retCode = document.RootElement.GetProperty("Ret").GetInt32();
- TotalPatents = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
- }
- List<string> list = new List<string>();
- string temSavePath = $"{savePath}{strPubDate}";
- if (TotalPatents > 0)
- {
-
- if (!System.IO.Directory.Exists(temSavePath))
- {
- System.IO.Directory.CreateDirectory(temSavePath);
- }
- if (TotalPatents > 10000)
- {
- list = new IPRSSearcher().splitCondition(strCondition,DateTime.Now.AddYears(-15),DateTime.Now);
- }
- else
- {
- list.Add(strCondition);
- }
- }
-
- foreach (string c in list)
- {
- string strRet1 = new IPRSSearcher().GetPatents(c, 1, 1);
- int TotalPatents1 = 0;
- using (JsonDocument document = JsonDocument.Parse(strRet1))
- {
- var retCode1 = document.RootElement.GetProperty("Ret").GetInt32();
- TotalPatents1 = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
- }
- if (TotalPatents1 > 0)
- {
- int totalPages = (TotalPatents1 % 50 == 0) ? TotalPatents1 / 50 : TotalPatents1 / 50 + 1;
- for (int page = 1; page <= totalPages; page++)
- {
- int iListTry = 0;
- tryListAgain:
- strRet1 = new IPRSSearcher().GetPatents(c, page, 50);
- if (string.IsNullOrEmpty(strRet1))
- {
- iListTry++;
- if (iListTry < 3)
- {
- System.Threading.Thread.Sleep(1000);
- goto tryListAgain;
- }
- }
- using (JsonDocument document = JsonDocument.Parse(strRet1))
- {
- var retCode = document.RootElement.GetProperty("Ret").GetInt32();
- if(retCode == 500)
- {
- iListTry++;
- if (iListTry < 3)
- {
- goto tryListAgain;
- }
- }
- //TotalPatents = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
- var Datas = document.RootElement.GetProperty("Data").GetProperty("List");
- for (int i = 0; i < Datas.GetArrayLength(); i++)
- {
- string appNo = Datas[i].GetProperty("AN").GetString().Replace(".", "");
- string strType = appNo.Length > 9 ? appNo.Substring(6, 1) : appNo.Substring(4, 1);
- if (!System.IO.File.Exists($"{temSavePath}/{appNo}.json"))
- {
- RetiredPatents++;
- using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}.json"))
- {
- writer.WriteLine(Datas[i].ToString());
- }
- var searcher1 = new IPRSSearcher();
- int iGetMPic = 0;
- TryGetMainPic:
- try
- {
- string mpicUrl = searcher1.GetPatentCnMainPic(appNo);
- byte[] data = new HttpClient().GetByteArrayAsync(mpicUrl).Result;
- using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_abs.jpeg", FileMode.Create))
- {
- fileStream.Write(data, 0, data.Length);
- }
- }
- catch (Exception ex)
- {
- iGetMPic++;
- if (iGetMPic < 3)
- {
- goto TryGetMainPic;
- }
- }
- if (strType == "3")
- {
- Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents1}-{RetiredPatents}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}");
- continue;
- string strWGImagesUrl = searcher1.GetPatentCNWGImage(appNo);
- if (!string.IsNullOrEmpty(strWGImagesUrl))
- {
- using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_ImageUrls.txt"))
- {
- writer.WriteLine(strWGImagesUrl);
- }
- #region
- //break;
- //string[] img_urls = strWGImagesUrl.Split('|');
- //for (int idx = 0; idx < img_urls.Length; idx++)
- //{
- // int iTryGetPic = 0;
- //TryGetPic:
- // try
- // {
- // byte[] data = new HttpClient().GetByteArrayAsync(img_urls[idx]).Result;
- // using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_img{idx + 1}.jpeg", FileMode.Create))
- // {
- // fileStream.Write(data, 0, data.Length);
- // }
- // }
- // catch
- // {
- // iTryGetPic++;
- // if (iTryGetPic < 3)
- // {
- // goto TryGetPic;
- // }
- // else
- // {
- // Console.WriteLine($"{appNo}图{idx}获取错误!");
- // }
- // }
- //}
- #endregion
- }
- }
- else
- {
- string strFullText = searcher1.GetPatentFullTxtInfo(appNo);
- using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_fulltext.xml"))
- {
- writer.WriteLine(strFullText);
- }
- }
- }
-
- Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents1}-{RetiredPatents}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}");
- }
- }
- }
- }
- }
- start = start.AddDays(1);
- while ((start.DayOfWeek != DayOfWeek.Friday ) && (start.DayOfWeek != DayOfWeek.Tuesday))
- {
- start = start.AddDays(1);
- }
-
- }
-
- //return;
- break;
- default:
- break;
- }
|