luocaiyang 8 месяцев назад
Родитель
Сommit
48a9af2d7b
3 измененных файлов с 116 добавлено и 45 удалено
  1. 2 2
      IPRS.xiaoshi.sz.com/Patent.cs
  2. 2 2
      IPRS.xiaoshi.sz.com/Searcher.cs
  3. 112 41
      testtrieTree/Program.cs

+ 2 - 2
IPRS.xiaoshi.sz.com/Patent.cs

@@ -16,8 +16,8 @@ namespace IPRS.xiaoshi.sz.com
         public string IssueNo {  get; set; }
         public DateTime IssueDate { get; set; }
         public string Abstract {  get; set; }
-        public string Claim {  get; set; }  
-        public string FullText { get; set; }
+        public List<string> Claim {  get; set; }  
+        public List<string> FullText { get; set; }
         public List<string> IPCs { get; set; }
         public List<string> Applicants { get; set; }
         public List<string> Inventors { get; set; }

+ 2 - 2
IPRS.xiaoshi.sz.com/Searcher.cs

@@ -35,7 +35,7 @@ namespace IPRS.xiaoshi.sz.com
 
 
 
-            using (HttpClient client = new HttpClient())
+            using (HttpClient client = new HttpClient() { Timeout=TimeSpan.FromSeconds(5)})
             {
                 try
                 {
@@ -46,7 +46,7 @@ namespace IPRS.xiaoshi.sz.com
                 catch(Exception ex)
                 {
                     tryCount ++;
-                    if(tryCount < 3)
+                    if(tryCount < 5)
                     {
                         System.Threading.Thread.Sleep(1000);
                         goto begin;

+ 112 - 41
testtrieTree/Program.cs

@@ -11,16 +11,24 @@ using static System.Runtime.InteropServices.JavaScript.JSType;
 
 
 // See https://aka.ms/new-console-template for more information
+string? key = null;
+if (args.Length > 0)
+{
+    key = args[0];
+}
+else
+{
 
-Console.WriteLine("请选择:");
-Console.WriteLine("1、手动输入地址解析;");
-Console.WriteLine("2、从文件中读取地址批量解析;");
-Console.WriteLine("3、测试IPRS检索;");
-Console.WriteLine("4、测试IPRS获取中国专利全文文本;");
-Console.WriteLine("5、测试IPRS获取中国专利PDF;");
-Console.WriteLine("6、测试IPRS获取中国专利著录项目;");
+    Console.WriteLine("请选择:");
+    Console.WriteLine("1、手动输入地址解析;");
+    Console.WriteLine("2、从文件中读取地址批量解析;");
+    Console.WriteLine("3、测试IPRS检索;");
+    Console.WriteLine("4、测试IPRS获取中国专利全文文本;");
+    Console.WriteLine("5、测试IPRS获取中国专利PDF;");
+    Console.WriteLine("6、测试IPRS获取中国专利著录项目;");
 
-string? key = Console.ReadLine();
+    key = Console.ReadLine();
+}
 
 switch (key)
 {
@@ -142,23 +150,41 @@ switch (key)
         Console.WriteLine(DateTime.Now);
         break;
     case "3":
-        string savePath = "e:/CNWGPatents/";
-        DateTime start = DateTime.Parse("2021-06-08");
-        DateTime end = DateTime.Parse("2021-12-31");
-        while(start.CompareTo(end) < 0)
+        string savePath = "e:/CNPatents/";
+        Console.Write("请输入开始日期(yyyy-mm-dd):");
+        string? strStart = Console.ReadLine();
+
+        DateTime start;
+        if(!DateTime.TryParse(strStart, out start))
+        {
+            Console.WriteLine("输入日期格式错误,系统退出!");
+        } //DateTime.Parse("2024-11-01");
+
+        Console.Write("请输入结束日期(yyyy-mm-dd):");
+        string? strEnd = Console.ReadLine();
+
+        DateTime end;
+        if (!DateTime.TryParse(strEnd, out end))
+        {
+            Console.WriteLine("输入日期格式错误,系统退出!");
+        } //DateTime.Parse("2024-11-01");
+        //DateTime end = DateTime.Parse("2024-11-30");
+        while(start.CompareTo(end) <= 0)
         {
             //Console.WriteLine("请输入下载的日期(yyyymmdd):");
             string strPubDate = start.ToString("yyyyMMdd");
-            string strCondition = $"F XX {strPubDate}/GD*3/PT";
+            string strCondition = $"F XX (({strPubDate}/GD*3/PT)+({strPubDate}/GD*2/PT)+({strPubDate}/GD*9/PT)+({strPubDate}/PD*1/PT)+({strPubDate}/PD*8/PT))";
 
             string strRet = new IPRSSearcher().GetPatents(strCondition, 1, 1);
             int TotalPatents = 0;
+            int RetiredPatents =0;
             using (JsonDocument document = JsonDocument.Parse(strRet))
             {
                 var retCode = document.RootElement.GetProperty("Ret").GetInt32();
                 TotalPatents = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
             }
 
+
             List<string> list = new List<string>();
             string temSavePath = $"{savePath}{strPubDate}";
             if (TotalPatents > 0)
@@ -179,7 +205,6 @@ switch (key)
                 }
             }
             
-
             foreach (string c in list)
             {
                 string strRet1 = new IPRSSearcher().GetPatents(c, 1, 1);
@@ -192,16 +217,34 @@ switch (key)
 
                 if (TotalPatents1 > 0)
                 {
-
-                    
                     int totalPages = (TotalPatents1 % 50 == 0) ? TotalPatents1 / 50 : TotalPatents1 / 50 + 1;
 
                     for (int page = 1; page <= totalPages; page++)
                     {
+                        int iListTry = 0;
+                        tryListAgain:
                         strRet1 = new IPRSSearcher().GetPatents(c, page, 50);
+
+                        if (string.IsNullOrEmpty(strRet1))
+                        {
+                            iListTry++;
+                            if (iListTry < 3)
+                            {
+                                System.Threading.Thread.Sleep(1000);
+                                goto tryListAgain;
+                            }
+                        }
                         using (JsonDocument document = JsonDocument.Parse(strRet1))
                         {
                             var retCode = document.RootElement.GetProperty("Ret").GetInt32();
+                            if(retCode == 500)
+                            {
+                                iListTry++;
+                                if (iListTry < 3)
+                                {
+                                    goto tryListAgain;
+                                }
+                            }
                             //TotalPatents = document.RootElement.GetProperty("Data").GetProperty("HitCount").GetInt32();
                             var Datas = document.RootElement.GetProperty("Data").GetProperty("List");
 
@@ -209,9 +252,11 @@ switch (key)
                             {
                                 string appNo = Datas[i].GetProperty("AN").GetString().Replace(".", "");
 
+                                string strType = appNo.Length > 9 ? appNo.Substring(6, 1) : appNo.Substring(4, 1);
+
                                 if (!System.IO.File.Exists($"{temSavePath}/{appNo}.json"))
                                 {
-
+                                    RetiredPatents++;
                                     using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}.json"))
                                     {
                                         writer.WriteLine(Datas[i].ToString());
@@ -237,40 +282,66 @@ switch (key)
                                         }
                                     }
 
-                                    string strWGImagesUrl = searcher1.GetPatentCNWGImage(appNo);
-
-                                    if (!string.IsNullOrEmpty(strWGImagesUrl))
+                                    if (strType == "3")
                                     {
-                                        string[] img_urls = strWGImagesUrl.Split('|');
+                                        Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents1}-{RetiredPatents}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}");
+                                        continue;
+                                        string strWGImagesUrl = searcher1.GetPatentCNWGImage(appNo);
 
-
-                                        for (int idx = 0; idx < img_urls.Length; idx++)
+                                        if (!string.IsNullOrEmpty(strWGImagesUrl))
                                         {
-                                            int iTryGetPic = 0;
-                                        TryGetPic:
-                                            try
-                                            {
-                                                byte[] data = new HttpClient().GetByteArrayAsync(img_urls[idx]).Result;
-                                                using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_img{idx + 1}.jpeg", FileMode.Create))
-                                                {
-                                                    fileStream.Write(data, 0, data.Length);
-                                                }
-                                            }
-                                            catch
+                                            using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_ImageUrls.txt"))
                                             {
-                                                iTryGetPic++;
-
-                                                if (iTryGetPic < 3)
-                                                {
-                                                    goto TryGetPic;
-                                                }
+                                                writer.WriteLine(strWGImagesUrl);
                                             }
+                                            #region 
+                                            //break;
+
+                                            //string[] img_urls = strWGImagesUrl.Split('|');
+
+                                            //for (int idx = 0; idx < img_urls.Length; idx++)
+                                            //{
+                                            //    int iTryGetPic = 0;
+                                            //TryGetPic:
+                                            //    try
+                                            //    {
+                                            //        byte[] data = new HttpClient().GetByteArrayAsync(img_urls[idx]).Result;
+                                            //        using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_img{idx + 1}.jpeg", FileMode.Create))
+                                            //        {
+                                            //            fileStream.Write(data, 0, data.Length);
+                                            //        }
+                                            //    }
+                                            //    catch
+                                            //    {
+                                            //        iTryGetPic++;
+
+                                            //        if (iTryGetPic < 3)
+                                            //        {
+                                            //            goto TryGetPic;
+                                            //        }
+                                            //        else
+                                            //        {
+                                            //            Console.WriteLine($"{appNo}图{idx}获取错误!");
+                                            //        }
+                                            //    }
+
+                                            //}
+                                            #endregion
 
                                         }
                                     }
+                                    else
+                                    {
+                                        string strFullText = searcher1.GetPatentFullTxtInfo(appNo);
+                                        using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_fulltext.xml"))
+                                        {
+                                            writer.WriteLine(strFullText);
+                                        }
+                                    }
 
                                 }
-                                Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}");
+                                
+                                Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents1}-{RetiredPatents}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}");
 
                             }