Parcourir la source

添加按照不同方式计算异常案件的评分

luocaiyang il y a 9 mois
Parent
commit
4d0b0162cb

+ 329 - 2
wispro.sp.api/Controllers/CaseFileCompareController.cs

@@ -4,8 +4,12 @@ using Microsoft.AspNetCore.Authorization;
 using Microsoft.AspNetCore.Http;
 using Microsoft.AspNetCore.Mvc;
 using Microsoft.EntityFrameworkCore;
+using Microsoft.EntityFrameworkCore.Query.SqlExpressions;
 using System;
 using System.Collections.Generic;
+using System.Data;
+using System.Diagnostics;
+using System.Dynamic;
 using System.Linq;
 using System.Linq.Expressions;
 using System.Text.RegularExpressions;
@@ -252,9 +256,332 @@ namespace wispro.sp.api.Controllers
             return Context.CaseInfos.Where(p => p.CaseNo == caseNo.Trim()).Count() > 0;
         }
 
-        public IList<Object> CalMean_Std(DateTime start,DateTime end)
+        /// <summary>
+        /// 计算案件的zScore
+        /// </summary>
+        /// <param name="start">定稿日开始时间</param>
+        /// <param name="end">定稿日结束时间</param>
+        /// <param name="type">0:基于文本相似度计算,1:基于文本修改差异度计算</param>
+        /// <returns></returns>
+        public IList<Object> CalCustomer_mean(DateTime start,DateTime end,int type=0)
+        {
+            var caseList = Context.CaseInfos.Where<CaseInfo>(
+                p => p.FinalVersionDate >= start && p.FinalVersionDate <= end)
+                .Include(p=>p.Customer)
+                .Include(p=>p.Reviewer)
+                .Include(p=>p.DRRAbstract)
+                .Include(p => p.DRRAbstract)
+                .Include(p => p.DRRCalim)
+                .Include(p => p.DRRFulltext)
+                .Include(p => p.DRRAll)
+                .Include(p => p.RFRAbstract)
+                .Include(p => p.RFRCalim)
+                .Include(p => p.RFRFulltext)
+                .Include(p => p.RFRAll);
+
+            int iTotals = caseList.Count();
+            int iIndex = 0;
+
+            List<Object> retList = new List<Object>();
+
+            #region 计算客户相似度平均值和标准方差
+            List<CustomerAvg_Std> avg_std1 = new List<CustomerAvg_Std>();
+            List<CustomerAvg_Std> avg_std2 = new List<CustomerAvg_Std>();
+            foreach (var caseInfo in caseList)
+            {
+                iIndex++;
+                Debug.WriteLine($"{iIndex}/{iTotals}\t{caseInfo.CaseNo}\t{caseInfo.DRRAbstractId},{caseInfo.DRRCalimId},{caseInfo.DRRFulltextId},{caseInfo.DRRAllId},{caseInfo.RFRAbstractId},{caseInfo.RFRCalimId},{caseInfo.RFRFulltextId},{caseInfo.RFRAllId}");
+                string tmpCustomerName = "未知";
+                if(caseInfo.Customer!= null)
+                {
+                    tmpCustomerName = caseInfo.Customer.Name;
+                }
+                
+                var one = avg_std1.Where<CustomerAvg_Std>(p => p.Name == tmpCustomerName).FirstOrDefault();
+                if (one == null)
+                {
+                    one = new CustomerAvg_Std() { Name = tmpCustomerName };
+                    avg_std1.Add(one);
+                }
+                var two = avg_std2.Where<CustomerAvg_Std>(p => p.Name == tmpCustomerName).FirstOrDefault();
+                if (two == null)
+                {
+                    two = new CustomerAvg_Std() { Name = tmpCustomerName };
+                    avg_std2.Add(two);
+                }
+
+                double? oneTmp = getCalValue(caseInfo, type, 0);
+                if(oneTmp != null)
+                {
+                    one.Sum += oneTmp.Value;
+                    one.SquareSum += oneTmp.Value * oneTmp.Value;
+                    one.count += 1;
+                }
+                double? twoTmp = getCalValue(caseInfo, type, 1);
+                if (twoTmp != null)
+                {
+                    two.Sum += twoTmp.Value;
+                    two.SquareSum += twoTmp.Value * twoTmp.Value;
+                    two.count += 1;
+                }
+            }
+            #endregion
+
+            System.Data.DataTable dt = new System.Data.DataTable();
+            dt.Columns.Add("我方文号");
+            dt.Columns.Add("案件名称");
+            dt.Columns.Add("客户");
+            dt.Columns.Add("处理人");
+            dt.Columns.Add("核稿人");
+            dt.Columns.Add("内部核稿分数");
+            dt.Columns.Add("外部核稿分数");
+            dt.Columns.Add("备注");
+
+            foreach (var item in caseList)
+            {
+                string tmpCustomerName = "未知";
+                if (item.Customer != null)
+                {
+                    tmpCustomerName = item.Customer.Name;
+                }
+                var one = avg_std1.Where<CustomerAvg_Std>(p => p.Name == tmpCustomerName).FirstOrDefault();
+                var two = avg_std2.Where<CustomerAvg_Std>(p => p.Name == tmpCustomerName).FirstOrDefault();
+
+                double? oneSim = getCalValue(item,type,0);
+                double? twoSim = getCalValue(item, type, 1); ;
+
+                if(oneSim == null)
+                {
+                    oneSim = 1;
+                }
+
+                if (twoSim == null)
+                {
+                    twoSim = 1; 
+                }
+
+                DataRow row = dt.NewRow();
+                row["我方文号"] = item.CaseNo;
+                row["案件名称"] = item.CaseName;
+                row["客户"] = item.Customer?.Name;
+                row["核稿人"] = item.Reviewer?.Name;
+                row["处理人"] = item.Handlers;
+                var zScoreA = (oneSim -one.Average)/one.Std_Deviation;
+                row["内部核稿分数"] = zScoreA;
+                var zScoreB = (twoSim -two.Average)/two.Std_Deviation;
+                row["外部核稿分数"] = zScoreB;
+
+                var distince = Math.Sqrt(zScoreB.Value * zScoreB.Value + zScoreA.Value * zScoreA.Value);
+
+                if (type == 0 || type==2)
+                {
+                    if (distince > 3)
+                    {
+                        if (zScoreA.Value > 0 && zScoreB.Value > 0)
+                        {
+                            row["备注"] = "内部核稿、外部核稿修改都较少![撰稿人给力或客户友好]";
+                        }
+
+                        if (zScoreA.Value > 0 && zScoreB.Value < 0)
+                        {
+                            row["备注"] = "内部核稿较少、外部核稿修改较多![核稿人没有尽责!]";
+                        }
+
+                        if (zScoreA.Value < 0 && zScoreB.Value > 0)
+                        {
+                            row["备注"] = "内部核稿较多、外部核稿修改都较少![核稿人给力]";
+                        }
+
+                        if (zScoreA.Value < 0 && zScoreB.Value < 0)
+                        {
+                            row["备注"] = "内部核稿和外部核稿修改都较多![案件沟通问题?]";
+                        }
+                    }
+                }
+                else
+                {
+                    if (distince > 3)
+                    {
+                        if (zScoreA.Value > 0 && zScoreB.Value > 0)
+                        {
+                            row["备注"] = "内部核稿和外部核稿修改都较多![案件沟通问题?]";
+                            
+                        }
+
+                        if (zScoreA.Value > 0 && zScoreB.Value < 0)
+                        {
+                            row["备注"] = "内部核稿较多、外部核稿修改都较少![核稿人给力]";
+                        }
+
+                        if (zScoreA.Value < 0 && zScoreB.Value > 0)
+                        {
+                            row["备注"] = "内部核稿较少、外部核稿修改较多![核稿人没有尽责!]";
+                        }
+
+                        if (zScoreA.Value < 0 && zScoreB.Value < 0)
+                        {
+                            row["备注"] = "内部核稿、外部核稿修改都较少![撰稿人给力或客户友好]";
+                        }
+                    }
+                }
+                
+                
+                dt.Rows.Add(row);
+
+                retList.Add(
+                
+                    new
+                    {
+                        Id = item.Id,
+                        CaseNo = item.CaseNo,
+                        CaseName = item.CaseName,
+                        Customer = item.Customer,
+                        Reviewer = item.Reviewer,
+                        Handlers = item.Handlers,
+                        zScoreA = (oneSim - one.Average) / one.Std_Deviation,
+                        zScoreB = (twoSim - two.Average) / two.Std_Deviation
+                    }
+                 );
+            }
+
+            
+            wispro.sp.utility.NPOIExcel.DataTableToExcel(dt,$"c:\\temp\\{DateTime.Now.ToString("yyyyMMdd")}-内部核稿外部核稿情况案件清单_{typeName(type)}.xlsx");
+            return retList;
+        }
+
+        private double? getCalValue(CaseInfo caseInfo,int type,int stage)
+        {
+            double? calValue = null;
+            switch (type)
+            {
+                case 0:
+                    if(stage == 0)
+                    {
+                        if(caseInfo.DRRFulltext != null && caseInfo.DRRAbstract != null)
+                        {
+                            calValue = caseInfo.DRRAll.TextSimilarity;
+                        }
+                        else
+                        {
+                            if(caseInfo.DRRCalim != null)
+                            {
+                                calValue = caseInfo.DRRCalim.TextSimilarity;
+                            }
+                        }
+                    }
+                    else
+                    {
+                        if (caseInfo.RFRFulltext != null && caseInfo.RFRAbstract != null)
+                        {
+                            calValue = caseInfo.RFRAll.TextSimilarity;
+                        }
+                        else
+                        {
+                            if (caseInfo.RFRCalim != null)
+                            {
+                                calValue = caseInfo.RFRCalim.TextSimilarity;
+                            }
+                        }
+                    }
+                    break;
+                case 1:
+                    if (stage == 0)
+                    {
+                        if (caseInfo.DRRFulltext != null && caseInfo.DRRAbstract != null)
+                        {
+                            calValue = caseInfo.DRRAll.diffRate;
+                        }
+                        else
+                        {
+                            if (caseInfo.DRRCalim != null)
+                            {
+                                calValue = caseInfo.DRRCalim.diffRate;
+                            }
+                        }
+                    }
+                    else
+                    {
+                        if (caseInfo.RFRFulltext != null && caseInfo.RFRAbstract != null)
+                        {
+                            calValue = caseInfo.RFRAll.diffRate;
+                        }
+                        else
+                        {
+                            if (caseInfo.RFRCalim != null)
+                            {
+                                calValue = caseInfo.RFRCalim.diffRate;
+                            }
+                        }
+                    }
+                    break;
+                case 2:
+                    if (stage == 0)
+                    {
+                        if(caseInfo.DRRFulltext != null && caseInfo.DRRCalim!= null)
+                        {
+                            calValue = caseInfo.DRRCalim.TextSimilarity * 0.7 + caseInfo.DRRFulltext.TextSimilarity * 0.3;
+                        }
+                        else
+                        {
+                            if( caseInfo.DRRCalim != null)
+                            {
+                                calValue = caseInfo.DRRCalim.TextSimilarity;
+                            }
+                            else
+                            {
+                                if(caseInfo.DRRAll != null)
+                                {
+                                    calValue = caseInfo.DRRAll.TextSimilarity;
+                                }
+                            }
+                        }
+                    }
+                    else
+                    {
+                        if (caseInfo.RFRFulltext != null && caseInfo.RFRCalim != null)
+                        {
+                            calValue = caseInfo.RFRCalim.TextSimilarity * 0.7 + caseInfo.RFRFulltext.TextSimilarity * 0.3;
+                        }
+                        else
+                        {
+                            if (caseInfo.RFRCalim != null)
+                            {
+                                calValue = caseInfo.RFRCalim.TextSimilarity;
+                            }
+                            else
+                            {
+                                if (caseInfo.RFRAll != null)
+                                {
+                                    calValue = caseInfo.RFRAll.TextSimilarity;
+                                }
+                            }
+                        }
+                    }
+                    break;
+            }
+
+            return calValue;
+        }
+
+        private string typeName(int type)
         {
+            switch (type)
+            {
+                case 0:
+                    return "文本相似度计算";
+                case 1:
+                    return "字符修改计算";
+                case 2:
+                    return "权要权重70说明书30";
+
+            }
+
+            return "";
+        }
+
 
+        public IList<Object> CalMean_Std(DateTime start,DateTime end)
+        {
             double AverageA = Context.CaseInfos.Where<CaseInfo>(
                 p => p.FinalVersionDate >= start && p.FinalVersionDate <= end && p.DRRCalim!= null)
                 .Average(x=>x.DRRCalim.TextSimilarity);
@@ -289,7 +616,7 @@ namespace wispro.sp.api.Controllers
                         Reviewer = item.Reviewer,
                         Handlers = item.Handlers,
                         zScoreA = (item.DRRCalim?.TextSimilarity -AverageA)/stdA,
-                        zScoreB = (item.RFRAll?.TextSimilarity - AverageB) / stdB
+                        zScoreB = (item.RFRAll?.TextSimilarity - AverageB) /stdB
                     }    
                 );
             }

+ 31 - 0
wispro.sp.share/Customer_Std.cs

@@ -0,0 +1,31 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace wispro.sp.share
+{
+    public class CustomerAvg_Std
+    {
+        public string Name { get; set; }
+        public double Sum { get; set; }
+        public double SquareSum { get; set; }
+        public int count { get; set; }
+
+        public double Average
+        {
+            get { return Sum / count; }
+        }
+
+        public double Std_Deviation
+        {
+            get
+            {
+                return Math.Sqrt(SquareSum/count - Average * Average);
+            }
+        }
+
+
+    }
+}

+ 6 - 2
wispro.sp.utility/CompareDocx.cs

@@ -66,7 +66,7 @@ namespace wispro.sp.utility
                     // 读取文档内容
                     content = doc.Content.Text;
 
-                    List<string> lines = content.Split("\r").ToList();
+                    List<string> lines = content.Split(new string[] { "\f", "\r" }, StringSplitOptions.None).ToList();
                     return List2String(lines);
 
                 }
@@ -108,7 +108,7 @@ namespace wispro.sp.utility
 
             private string List2String(List<string> lines)
             {
-                string[] array = { "权利要求书", "说明书摘要", "说明书" , "摘要附图", "说明书附图" };
+                string[] array = { "权利要求书", "说明书摘要", "说明书" ,"背景技术","发明内容", "技术领域", "具体实施方式", "摘要附图", "说明书附图" };
 
                 StringBuilder sb = new StringBuilder();
                 string lastBlock = string.Empty;
@@ -133,6 +133,10 @@ namespace wispro.sp.utility
                                     this.Abstract = this.Abstract + "\r\n" + line;
                                     break;
                                 case "说明书":
+                                case "背景技术":
+                                case "发明内容":
+                                case "技术领域":
+                                case "具体实施方式":
                                     this.FullText = this.FullText + "\r\n" + line;
                                     break;
 

+ 20 - 2
wispro.sp.utility/CosineSimilarity.cs

@@ -32,9 +32,27 @@ namespace wispro.sp.utility
             return dotProduct / (Math.Sqrt(norm1) * Math.Sqrt(norm2));
         }
 
-        private static Dictionary<string, double> GetTermFrequencyVector(string text)
+        private static Dictionary<string, double> Normalize(Dictionary<string, double> vector)
         {
+            double length = 0;
+            foreach (var key in vector.Keys)
+            {
+                length += vector.GetValueOrDefault(key) * vector.GetValueOrDefault(key);
+            }
+            length = Math.Sqrt(length);
+
+            Dictionary<string,double> result = new Dictionary<string, double>();
+            foreach (var key in vector.Keys)
+            {
+                result.Add(key, vector.GetValueOrDefault(key)/length);
+            }
+            
+            return result;
+        }
+
 
+        private static Dictionary<string, double> GetTermFrequencyVector(string text)
+        {
             var terms = new Jieba_Segmenter().Cut(text.Replace("\r\n","").Replace("\r","").Replace("\n", ""));
             var vector = new Dictionary<string, double>();
 
@@ -45,7 +63,7 @@ namespace wispro.sp.utility
                 vector[term]++;
             }
 
-            return vector;
+            return Normalize(vector);
         }
     }
 }

+ 4 - 4
wispro.sp.web/Pages/CompareFile/List.razor

@@ -200,14 +200,14 @@
                                 <td> @($"{(_currentCase.DRRCalim?.TextSimilarity * 100)?.ToString("0.00")}")</td>
                                 <td>@($"{(_currentCase.DRRAbstract?.TextSimilarity * 100)?.ToString("0.00")}")</td>
                                 <td>@($"{(_currentCase.DRRFulltext?.TextSimilarity * 100)?.ToString("0.00")}")</td>
-                                <td>@($"{(_currentCase.DRRFulltext?.TextSimilarity * 100)?.ToString("0.00")}")</td>
+                                <td>@($"{(_currentCase.DRRAll?.TextSimilarity * 100)?.ToString("0.00")}")</td>
                             }
                             else
                             {
                                 <td>@($"{(_currentCase.RFRCalim?.TextSimilarity * 100)?.ToString("0.00")}")</td>
                                 <td>@($"{(_currentCase.RFRAbstract?.TextSimilarity * 100)?.ToString("0.00")}")</td>
                                 <td>@($"{(_currentCase.RFRFulltext?.TextSimilarity * 100)?.ToString("0.00")}")</td>
-                                <td>@($"{(_currentCase.RFRFulltext?.TextSimilarity * 100)?.ToString("0.00")}")</td>
+                                <td>@($"{(_currentCase.RFRAll?.TextSimilarity * 100)?.ToString("0.00")}")</td>
                             }
                         </tr>
                         <tr>
@@ -271,14 +271,14 @@
                                 <td> @($"{(_currentCase.DRRCalim?.diffRate*100)?.ToString("0.00")}")</td>
                                 <td>@($"{(_currentCase.DRRAbstract?.diffRate*100)?.ToString("0.00")}")</td>
                                 <td>@($"{(_currentCase.DRRFulltext?.diffRate*100)?.ToString("0.00")}")</td>
-                                <td>@($"{(_currentCase.DRRFulltext?.diffRate*100)?.ToString("0.00")}")</td>
+                                <td>@($"{(_currentCase.DRRAll?.diffRate*100)?.ToString("0.00")}")</td>
                             }
                             else
                             {
                                 <td> @($"{(_currentCase.RFRCalim?.diffRate*100)?.ToString("0.00")}")</td>
                                 <td>@($"{(_currentCase.RFRAbstract?.diffRate*100)?.ToString("0.00")}")</td>
                                 <td>@($"{(_currentCase.RFRFulltext?.diffRate*100)?.ToString("0.00")}")</td>
-                                <td>@($"{(_currentCase.RFRFulltext?.diffRate*100)?.ToString("0.00")}")</td>
+                                <td>@($"{(_currentCase.RFRAll?.diffRate*100)?.ToString("0.00")}")</td>
                             }
                             
                         </tr>

+ 1 - 1
wispro.sp.winClient/APIService.cs

@@ -105,7 +105,7 @@ namespace wispro.sp.winClient
                 http.DefaultRequestHeaders.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("bearer", Token?.Token);
                 try
                 {
-                    var data = await http.GetAsync($"{strAPIBaseUri}/api/CaseFileCompare/CalMean_Std?start=2024-10-01&end=2024-12-01");
+                    var data = await http.GetAsync($"{strAPIBaseUri}/api/CaseFileCompare/CalCustomer_mean?start=2024-10-01&end=2024-12-27&type=1");
 
                     if (data.IsSuccessStatusCode)
                     {

+ 19 - 4
wispro.sp.winClient/frmCaseFileCompare.cs

@@ -25,6 +25,7 @@ using System.Dynamic;
 using System.Diagnostics;
 using NPOI.Util;
 using NPOI.SS.Formula.Functions;
+using static wispro.sp.utility.CompareDocx;
 
 namespace wispro.sp.winClient
 {
@@ -140,7 +141,7 @@ namespace wispro.sp.winClient
                     string CaseNo = row["我方文号"].ToString();
                     if (row["申请类型"].ToString().Trim() == "外观设计")
                     {
-                        string str = $"{DateTime.Now}\t{iIndex}/{CaseNo}\t外观设计";
+                        string str = $"{DateTime.Now}\t{iIndex}/{table.Rows.Count}\t{CaseNo}\t外观设计";
                         DebugLog(str);
                         continue;
                     }
@@ -175,12 +176,18 @@ namespace wispro.sp.winClient
                             (string.IsNullOrEmpty(returnFile) ? 0 : 1) +
                             (string.IsNullOrEmpty(finalFile) ? 0 : 1);
 
-                        DebugLog($"{DateTime.Now}\t{iIndex}/{table.Rows.Count}\t{CaseNo},{(string.IsNullOrEmpty(draftFile) ? 0 : 1)},{(string.IsNullOrEmpty(returnFile) ? 0 : 1)},{(string.IsNullOrEmpty(finalFile) ? 0 : 1)}");
+                        DebugLog($"{DateTime.Now}\t{iIndex}/{table.Rows.Count}\t{CaseNo}\t{(string.IsNullOrEmpty(draftFile) ? 0 : 1)}\t{(string.IsNullOrEmpty(returnFile) ? 0 : 1)}\t{(string.IsNullOrEmpty(finalFile) ? 0 : 1)}");
                         
                         if (i > 1 && returnFile != null)
                         {
                             CompareFiles(draftFile, returnFile, finalFile, caseInfo);
-                            await new APIService().SaveCompareResult(caseInfo);
+
+                            if (!(caseInfo.DRRAll == null && caseInfo.RFRAll == null))
+                            {
+                                await new APIService().SaveCompareResult(caseInfo);
+                            }
+
+                            
                         }
 
                         try
@@ -380,6 +387,10 @@ namespace wispro.sp.winClient
             if (string.IsNullOrEmpty(strFile)) { return false; }
 
             string strfileName = new FileInfo(strFile).Name;
+            if(!new FileInfo(strfileName).Extension.Contains("doc"))
+            {
+                return false;
+            }
             switch (type)
             {
                 case 0:
@@ -866,7 +877,11 @@ namespace wispro.sp.winClient
 
         private void button3_Click(object sender, EventArgs e)
         {
-            emlFileReader reader = new emlFileReader(@"D:\Users\luowen\Downloads\2f7e92c1-e009-4b19-ba8d-332e8f6f2ffc.eml");
+            //new PatentDocument(@"C:\temp\PI2023CN1303-优先权分析(1).docx");
+            var ret = new APIService().CalZScore();
+            //ret.Wait();
+
+            //emlFileReader reader = new emlFileReader(@"D:\Users\luowen\Downloads\2f7e92c1-e009-4b19-ba8d-332e8f6f2ffc.eml");
         }
 
         private void txtCaseNo_TextChanged(object sender, EventArgs e)