Pārlūkot izejas kodu

修正拆分条件方法的bug

luocaiyang 5 mēneši atpakaļ
vecāks
revīzija
58d6fb48ae

+ 1 - 0
AddressPaser/AddressPaser.csproj

@@ -13,6 +13,7 @@
   </ItemGroup>
 
   <ItemGroup>
+    <ProjectReference Include="..\IPRS.xiaoshi.sz.com\IPRS.xiaoshi.sz.com.csproj" />
     <ProjectReference Include="..\trieTree\trieTree.csproj" />
   </ItemGroup>
 

+ 113 - 0
AddressPaser/Controllers/PatentGetterController.cs

@@ -0,0 +1,113 @@
+using Microsoft.AspNetCore.Mvc;
+using System.Reflection.Emit;
+using System.Xml;
+using System.Xml.Linq;
+
+
+namespace AddressPaser.Controllers
+{
+    [Route("api/[controller]/[action]")]
+    [ApiController]
+    public class PatentGetterController : ControllerBase
+    {
+        [HttpGet(Name = "GetPatentbyAppNo")]
+        public List<string> GetPatentbyAppNo(string appNo)
+        {
+            IPRS.xiaoshi.sz.com.IPRSSearcher searcher = new IPRS.xiaoshi.sz.com.IPRSSearcher();
+            string retXml = searcher.GetPatentFullTxtInfo(appNo);
+            
+            List<string> technicalFieldTexts = new List<string>();
+            XmlDocument xmlDoc = new XmlDocument();
+            xmlDoc.LoadXml(retXml);
+
+            XmlNamespaceManager nsManager = new XmlNamespaceManager(xmlDoc.NameTable);
+            nsManager.AddNamespace("business", "http://www.sipo.gov.cn/XMLSchema/business");
+            nsManager.AddNamespace("base", "http://www.sipo.gov.cn/XMLSchema/base");
+
+            // XPath to find all <p> tags within <technical-field>
+            XmlNodeList TNodes = xmlDoc.SelectNodes("//cn-patent-document/application-body/description/technical-field/p");
+
+            
+
+            if (TNodes != null)
+            {
+                if (TNodes.Count != 0)
+                {
+                    foreach (XmlNode pNode in TNodes)
+                    {
+                        // Extract the text content of each <p> tag
+                        technicalFieldTexts.Add(pNode.InnerText.Trim());
+                    }
+                }
+                else
+                {
+                    // 选择所有 business:Description 下的 base:Paragraphs 节点
+                    XmlNodeList paragraphs = xmlDoc.DocumentElement.SelectNodes("//business:Description/base:Paragraphs",
+                        nsManager
+                    );
+
+                    // 遍历节点提取内容
+                    foreach (XmlNode para in paragraphs)
+                    {
+                        technicalFieldTexts.Add(para.InnerText);
+                    }
+                }
+            }
+            
+
+            // XPath to find all <p> tags within <technical-field>
+            XmlNodeList BNodes = xmlDoc.SelectNodes("//cn-patent-document/application-body/description/background-art/p");
+
+            if (TNodes != null)
+            {
+                foreach (XmlNode pNode in BNodes)
+                {
+                    // Extract the text content of each <p> tag
+                    technicalFieldTexts.Add(pNode.InnerText.Trim());
+                }
+            }
+
+            // XPath to find all <p> tags within <technical-field>
+            XmlNodeList DNodes = xmlDoc.SelectNodes("//cn-patent-document/application-body/description/disclosure/p");
+
+            if (TNodes != null)
+            {
+                foreach (XmlNode pNode in DNodes)
+                {
+                    // Extract the text content of each <p> tag
+                    technicalFieldTexts.Add(pNode.InnerText.Trim());
+                }
+            }
+
+            // XPath to find all <p> tags within <technical-field>
+            XmlNodeList MNodes = xmlDoc.SelectNodes("//cn-patent-document/application-body/description/mode-for-invention/p");
+
+            if (TNodes != null)
+            {
+                foreach (XmlNode pNode in MNodes)
+                {
+                    // Extract the text content of each <p> tag
+                    technicalFieldTexts.Add(pNode.InnerText.Trim());
+                }
+            }
+
+            if(technicalFieldTexts.Count == 0)
+            {
+                // XPath to find all <p> tags within <technical-field>
+                XmlNodeList desNodes = xmlDoc.SelectNodes("//cn-patent-document/application-body/description/p");
+
+                if (TNodes != null)
+                {
+                    foreach (XmlNode pNode in desNodes)
+                    {
+                        // Extract the text content of each <p> tag
+                        technicalFieldTexts.Add(pNode.InnerText.Trim());
+                    }
+                }
+            }
+
+
+            return technicalFieldTexts;
+        }
+    }
+}

+ 1 - 10
AddressPaser/Properties/launchSettings.json

@@ -19,16 +19,7 @@
         "ASPNETCORE_ENVIRONMENT": "Development"
       }
     },
-    "https": {
-      "commandName": "Project",
-      "dotnetRunMessages": true,
-      "launchBrowser": true,
-      "launchUrl": "swagger",
-      "applicationUrl": "https://*:7198;http://*:5241",
-      "environmentVariables": {
-        "ASPNETCORE_ENVIRONMENT": "Development"
-      }
-    },
+    
     "IIS Express": {
       "commandName": "IISExpress",
       "launchBrowser": true,

+ 10 - 10
IPRS.xiaoshi.sz.com/Searcher.cs

@@ -35,7 +35,7 @@ namespace IPRS.xiaoshi.sz.com
 
 
 
-            using (HttpClient client = new HttpClient() { Timeout=TimeSpan.FromSeconds(5)})
+            using (HttpClient client = new HttpClient() { Timeout=TimeSpan.FromSeconds(600)})
             {
                 try
                 {
@@ -68,7 +68,7 @@ namespace IPRS.xiaoshi.sz.com
 
             try
             {
-                using (HttpClient client = new HttpClient())
+                using (HttpClient client = new HttpClient() { Timeout = TimeSpan.FromSeconds(600) })
                 {
                     client.DefaultRequestHeaders.Add("_appid", appId);
                     client.DefaultRequestHeaders.Add("_timestamp", currentTimeMillis.ToString());
@@ -98,45 +98,45 @@ namespace IPRS.xiaoshi.sz.com
 
         public string GetPatentCNBiblio(string appNo)
         {
-            string url = $"https://api.patentstar.com.cn/api/Patent/CnBibo/{appNo}";
+            string url = $"https://api.patentstar.com.cn/api/Patent/CnBibo/{appNo.Trim()}";
             return GetIPRSData(url);
         }
 
         public string GetPatentFullTxtInfo(string appNo)
         {
             //string url = $"https://api.patentstar.com.cn/api/Patent/CnBibo/{appNo}";
-            string url = $"https://api.patentstar.com.cn/api/Patent/CnFullXml/{appNo}";
+            string url = $"https://api.patentstar.com.cn/api/Patent/CnFullXml/{appNo.Trim()}";
 
             return GetIPRSData (url) ;
         }
 
         public string GetPatentCnMainPic(string appNo)
         {
-            string url = $"https://api.patentstar.com.cn/api/Patent/CnMainImage/{appNo}";
+            string url = $"https://api.patentstar.com.cn/api/Patent/CnMainImage/{appNo.Trim()}";
             return GetIPRSData(url);
         }
 
         public string GetPatentCNWGImage(string appNo)
         {
-            string url = $"https://api.patentstar.com.cn/api/Patent/CnWGImage/{appNo}";
+            string url = $"https://api.patentstar.com.cn/api/Patent/CnWGImage/{appNo.Trim()}";
             return GetIPRSData(url);
         }
 
         public string GetPatentCNLegal(string appNo)
         {
-            string url = $"https://api.patentstar.com.cn/api/Patent/CnLegal/{appNo}";
+            string url = $"https://api.patentstar.com.cn/api/Patent/CnLegal/{appNo.Trim()}";
             return GetIPRSData(url);
         }
 
         public string GetPatentCNPdf(string appNo)
         {
-            string url = $"https://api.patentstar.com.cn/api/Patent/CnPdf/{appNo}";
+            string url = $"https://api.patentstar.com.cn/api/Patent/CnPdf/{appNo.Trim()}";
             return GetIPRSData(url);
         }
 
         public string GetPatentENPdf(string pubNo)
         {
-            string url = $"https://api.patentstar.com.cn/api/Patent/EnPdf/{pubNo}";
+            string url = $"https://api.patentstar.com.cn/api/Patent/EnPdf/{pubNo.Trim()}";
             return GetIPRSData(url);
         }
 
@@ -154,7 +154,7 @@ namespace IPRS.xiaoshi.sz.com
 
         public string GetPatentCitedNum(string pubNo)
         {
-            string url = $"https://api.patentstar.com.cn/api/Patent/CitedNumByPubNo/{pubNo}";
+            string url = $"https://api.patentstar.com.cn/api/Patent/CitedNumByPubNo/{pubNo.Trim()}";
             return GetIPRSData(url);
         }
 

+ 69 - 61
testtrieTree/Program.cs

@@ -185,7 +185,7 @@ switch (key)
 
 
             List<string> list = new List<string>();
-            string temSavePath = $"{savePath}{strPubDate}";
+            string temSavePath = Path.Combine(savePath,start.Year.ToString(),strPubDate); //$"{savePath}{strPubDate}";
             if (TotalPatents > 0)
             {
                 
@@ -196,7 +196,7 @@ switch (key)
 
                 if (TotalPatents > 10000)
                 {
-                    list = new IPRSSearcher().splitCondition(strCondition,DateTime.Now.AddYears(-15),DateTime.Now);
+                    list = new IPRSSearcher().splitCondition(strCondition,DateTime.Parse("1985-01-01"),start.AddDays(1));
                 }
                 else
                 {
@@ -255,7 +255,7 @@ switch (key)
 
                                 if (!System.IO.File.Exists($"{temSavePath}/{appNo}.json"))
                                 {
-                                    RetiredPatents++;
+                                    
                                     using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}.json"))
                                     {
                                         writer.WriteLine(Datas[i].ToString());
@@ -265,12 +265,12 @@ switch (key)
                                 TryGetMainPic:
                                     try
                                     {
-                                        string mpicUrl = searcher1.GetPatentCnMainPic(appNo);
-                                        byte[] data = new HttpClient().GetByteArrayAsync(mpicUrl).Result;
-                                        using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_abs.jpeg", FileMode.Create))
-                                        {
-                                            fileStream.Write(data, 0, data.Length);
-                                        }
+                                        //string mpicUrl = searcher1.GetPatentCnMainPic(appNo);
+                                        //byte[] data = new HttpClient().GetByteArrayAsync(mpicUrl).Result;
+                                        //using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_abs.jpeg", FileMode.Create))
+                                        //{
+                                        //    fileStream.Write(data, 0, data.Length);
+                                        //}
                                     }
                                     catch (Exception ex)
                                     {
@@ -283,63 +283,71 @@ switch (key)
 
                                     if (strType == "3")
                                     {
-                                        Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents1}-{RetiredPatents}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}");
-                                        continue;
-                                        string strWGImagesUrl = searcher1.GetPatentCNWGImage(appNo);
-
-                                        if (!string.IsNullOrEmpty(strWGImagesUrl))
-                                        {
-                                            using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_ImageUrls.txt"))
-                                            {
-                                                writer.WriteLine(strWGImagesUrl);
-                                            }
-                                            #region 
-                                            //break;
-
-                                            //string[] img_urls = strWGImagesUrl.Split('|');
-
-                                            //for (int idx = 0; idx < img_urls.Length; idx++)
-                                            //{
-                                            //    int iTryGetPic = 0;
-                                            //TryGetPic:
-                                            //    try
-                                            //    {
-                                            //        byte[] data = new HttpClient().GetByteArrayAsync(img_urls[idx]).Result;
-                                            //        using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_img{idx + 1}.jpeg", FileMode.Create))
-                                            //        {
-                                            //            fileStream.Write(data, 0, data.Length);
-                                            //        }
-                                            //    }
-                                            //    catch
-                                            //    {
-                                            //        iTryGetPic++;
-
-                                            //        if (iTryGetPic < 3)
-                                            //        {
-                                            //            goto TryGetPic;
-                                            //        }
-                                            //        else
-                                            //        {
-                                            //            Console.WriteLine($"{appNo}图{idx}获取错误!");
-                                            //        }
-                                            //    }
-
-                                            //}
-                                            #endregion
-
-                                        }
+                                        //Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents1}-{RetiredPatents}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}");
+                                        
+                                        //string strWGImagesUrl = searcher1.GetPatentCNWGImage(appNo);
+
+                                        //if (!string.IsNullOrEmpty(strWGImagesUrl))
+                                        //{
+                                        //    using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_ImageUrls.txt"))
+                                        //    {
+                                        //        writer.WriteLine(strWGImagesUrl);
+                                        //    }
+                                        //    #region 
+                                        //    //break;
+
+                                        //    //string[] img_urls = strWGImagesUrl.Split('|');
+
+                                        //    //for (int idx = 0; idx < img_urls.Length; idx++)
+                                        //    //{
+                                        //    //    int iTryGetPic = 0;
+                                        //    //TryGetPic:
+                                        //    //    try
+                                        //    //    {
+                                        //    //        byte[] data = new HttpClient().GetByteArrayAsync(img_urls[idx]).Result;
+                                        //    //        using (FileStream fileStream = new FileStream($"{temSavePath}/{appNo}_img{idx + 1}.jpeg", FileMode.Create))
+                                        //    //        {
+                                        //    //            fileStream.Write(data, 0, data.Length);
+                                        //    //        }
+                                        //    //    }
+                                        //    //    catch
+                                        //    //    {
+                                        //    //        iTryGetPic++;
+
+                                        //    //        if (iTryGetPic < 3)
+                                        //    //        {
+                                        //    //            goto TryGetPic;
+                                        //    //        }
+                                        //    //        else
+                                        //    //        {
+                                        //    //            Console.WriteLine($"{appNo}图{idx}获取错误!");
+                                        //    //        }
+                                        //    //    }
+
+                                        //    //}
+                                        //    #endregion
+
+                                        //}
                                     }
                                     else
                                     {
                                         string strFullText = searcher1.GetPatentFullTxtInfo(appNo);
-                                        using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_fulltext.xml"))
+                                        if (!string.IsNullOrEmpty(strFullText))
                                         {
-                                            writer.WriteLine(strFullText);
+                                            using (StreamWriter writer = new StreamWriter($"{temSavePath}/{appNo}_fulltext.xml"))
+                                            {
+                                                writer.WriteLine(strFullText);
+                                            }
                                         }
                                     }
 
+                                    RetiredPatents++;
                                 }
-                                
+                                else
+                                {
+                                    RetiredPatents++;
+                                }
+
                                 Console.WriteLine($"{(page - 1) * 50 + i + 1}/{TotalPatents1}-{RetiredPatents}/{TotalPatents}\t{appNo}\t{DateTime.Now}\t{start.ToString("yyyMMdd")}");
 
                             }
@@ -351,10 +359,10 @@ switch (key)
                 }
             }
             start = start.AddDays(1);
-            while ((start.DayOfWeek != DayOfWeek.Friday ) && (start.DayOfWeek != DayOfWeek.Tuesday))
-            {
-                start = start.AddDays(1);
-            }
+            //while ((start.DayOfWeek != DayOfWeek.Friday ) && (start.DayOfWeek != DayOfWeek.Tuesday))
+            //{
+            //    start = start.AddDays(1);
+            //}
                 
         }