Browse Source

获取Word文档文本模块,添加了对.doc文档的支持(使用Microsoft.Office.Interop.Word)

luocaiyang 11 tháng trước cách đây
mục cha
commit
6f7a9c1eaf
2 tập tin đã thay đổi với 43 bổ sung17 xóa
  1. 31 17
      wispro.sp.utility/CompareDocx.cs
  2. 12 0
      wispro.sp.utility/wispro.sp.utility.csproj

+ 31 - 17
wispro.sp.utility/CompareDocx.cs

@@ -6,6 +6,8 @@ using DiffPlex;
 using System.IO.Packaging;
 using System.Text.RegularExpressions;
 using NPOI.XWPF.UserModel;
+using System.Linq;
+using Microsoft.Office.Interop.Word;
 
 namespace wispro.sp.utility
 {
@@ -129,11 +131,27 @@ namespace wispro.sp.utility
             }
 
             var differ = new Differ();
+            string oldtext = "";
+            if (this.oldDocument.EndsWith(".doc"))
+            {
+                oldtext = GetDocTxt(this.oldDocument);
+            }
+            else
+            {
+                oldtext = GetDocxTxt(this.oldDocument);
+            }
 
-            var oldtext = GetDocTxt(this.oldDocument);
             _oldDocCount = oldtext.Length;
 
-            var newtext = GetDocxTxt(this.newDocument);
+            string newtext = "";
+            if (this.newDocument.EndsWith(".doc"))
+            {
+                newtext = GetDocTxt(this.newDocument);
+            }
+            else
+            {
+                newtext = GetDocxTxt(this.newDocument);
+            }
             _newDocCount = newtext.Length;
 
             var diff = differ.CreateCharacterDiffs(oldtext, newtext, true);
@@ -170,27 +188,23 @@ namespace wispro.sp.utility
             lastResult += oldtext.Substring(lastPos);
             _CompareResultString += oldtext.Substring(lastPos);
             _CompareResultString = _CompareResultString.Replace("\r\n", "</p>\r\n<p>") + "</p>";
-
-            //_diffRate = (double)iDeff / (double)oldtext.Length;
-
-            //Console.WriteLine($"修改字数:{iDeff}\r\n原文档字数:{oldtext.Length}\r\n修改比率:{(_diffRate * 100.00).ToString("0.0000")}%");
+            
         }
 
         private string GetDocTxt(string filepath)
         {
-            using (var stream = File.OpenRead(filepath))
-            {
+            Application wordApp = new Application();
+            System.IO.FileInfo fileInfo = new System.IO.FileInfo(filepath);
+            Microsoft.Office.Interop.Word.Document doc = wordApp.Documents.Open(fileInfo.FullName);
+            string text = doc.Content.Text;
 
-                XWPFDocument doc = new XWPFDocument(stream);
-                string text = "";
+            List<string> lines = text.Split("\r").ToList();
+            text = List2String(lines);
 
-                foreach (var para in doc.Paragraphs)
-                {
-                    text += "\r\n" + para.Text;
-                }
+            doc.Close();
+            wordApp.Quit();
+            return text;
 
-                return text.Trim();
-            }
         }
 
         private string GetDocxTxt(string filepath)
@@ -209,7 +223,7 @@ namespace wispro.sp.utility
             {
                 if (!string.IsNullOrEmpty(line))
                 {
-                    sb.Append(line + "\r\n");
+                    sb.Append(line.Trim() + "\r\n");
                 }
             }
 

+ 12 - 0
wispro.sp.utility/wispro.sp.utility.csproj

@@ -10,6 +10,18 @@
   </ItemGroup>
 
   <ItemGroup>
+    <COMReference Include="Microsoft.Office.Interop.Word">
+      <WrapperTool>tlbimp</WrapperTool>
+      <VersionMinor>7</VersionMinor>
+      <VersionMajor>8</VersionMajor>
+      <Guid>00020905-0000-0000-c000-000000000046</Guid>
+      <Lcid>0</Lcid>
+      <Isolated>false</Isolated>
+      <EmbedInteropTypes>true</EmbedInteropTypes>
+    </COMReference>
+  </ItemGroup>
+
+  <ItemGroup>
     <PackageReference Include="DiffPlex" Version="1.7.2" />
     <PackageReference Include="DocumentFormat.OpenXml" Version="3.1.1" />
     <PackageReference Include="MailKit" Version="2.15.0" />