|
@@ -0,0 +1,268 @@
|
|
|
+using System;
|
|
|
+using System.Collections.Generic;
|
|
|
+using System.IO;
|
|
|
+using System.Text;
|
|
|
+using DiffPlex;
|
|
|
+using System.IO.Packaging;
|
|
|
+using System.Text.RegularExpressions;
|
|
|
+using NPOI.XWPF.UserModel;
|
|
|
+
|
|
|
+namespace wispro.sp.utility
|
|
|
+{
|
|
|
+ /// <summary>
|
|
|
+ /// 比较两个Docx文档中文字的不同
|
|
|
+ /// </summary>
|
|
|
+ public class CompareDocx
|
|
|
+ {
|
|
|
+ /// <summary>
|
|
|
+ /// 原文档路径
|
|
|
+ /// </summary>
|
|
|
+ public string oldDocument { get; set; }
|
|
|
+
|
|
|
+ /// <summary>
|
|
|
+ /// 修订后文档路径
|
|
|
+ /// </summary>
|
|
|
+ public string newDocument { get; set; }
|
|
|
+
|
|
|
+ /// <summary>
|
|
|
+ /// 总的修改比率
|
|
|
+ /// </summary>
|
|
|
+ public double diffRate
|
|
|
+ {
|
|
|
+ get
|
|
|
+ {
|
|
|
+ return ((double)_DeleteCount + (double)_InsertCount) / (double)_oldDocCount;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private int _oldDocCount;
|
|
|
+ /// <summary>
|
|
|
+ /// 原文档字数
|
|
|
+ /// </summary>
|
|
|
+ public int oldDocumentCount
|
|
|
+ {
|
|
|
+ get { return _oldDocCount; }
|
|
|
+ }
|
|
|
+
|
|
|
+ private int _newDocCount;
|
|
|
+ /// <summary>
|
|
|
+ /// 修订后文档字数
|
|
|
+ /// </summary>
|
|
|
+ public int newDocumentCount
|
|
|
+ {
|
|
|
+ get { return _newDocCount; }
|
|
|
+ }
|
|
|
+
|
|
|
+ private int _DeleteCount;
|
|
|
+ /// <summary>
|
|
|
+ /// 修订后文档相比原文档删除的字数
|
|
|
+ /// </summary>
|
|
|
+ public int DeleteCount
|
|
|
+ {
|
|
|
+ get
|
|
|
+ {
|
|
|
+ return _DeleteCount;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private int _InsertCount;
|
|
|
+
|
|
|
+ /// <summary>
|
|
|
+ /// 修改后文档相比原文档插入的字数
|
|
|
+ /// </summary>
|
|
|
+ public int InsertCount
|
|
|
+ {
|
|
|
+ get
|
|
|
+ {
|
|
|
+ return _InsertCount;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private double _EditCount;
|
|
|
+ /// <summary>
|
|
|
+ /// 修订处数量
|
|
|
+ /// </summary>
|
|
|
+ public double EditCount
|
|
|
+ {
|
|
|
+ get
|
|
|
+ {
|
|
|
+ return _EditCount;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ private string _CompareResultString;
|
|
|
+ /// <summary>
|
|
|
+ /// 修订版本的文字
|
|
|
+ /// </summary>
|
|
|
+ public string CompareResultString
|
|
|
+ {
|
|
|
+ get
|
|
|
+ {
|
|
|
+ return _CompareResultString;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /// <summary>
|
|
|
+ /// 比较两个文档
|
|
|
+ /// </summary>
|
|
|
+ /// <param name="oldFile"></param>
|
|
|
+ /// <param name="newFile"></param>
|
|
|
+ public void Compare(string oldFile, string newFile)
|
|
|
+ {
|
|
|
+ this.oldDocument = oldFile;
|
|
|
+ this.newDocument = newFile;
|
|
|
+
|
|
|
+ Compare();
|
|
|
+ }
|
|
|
+
|
|
|
+ /// <summary>
|
|
|
+ /// 比较两个文档
|
|
|
+ /// </summary>
|
|
|
+ /// <exception cref="ApplicationException"></exception>
|
|
|
+ public void Compare()
|
|
|
+ {
|
|
|
+ if (!System.IO.File.Exists(this.oldDocument) || !System.IO.File.Exists(this.newDocument))
|
|
|
+ {
|
|
|
+ throw new ApplicationException("指定的文件不存在!");
|
|
|
+ }
|
|
|
+
|
|
|
+ var differ = new Differ();
|
|
|
+
|
|
|
+ var oldtext = GetDocTxt(this.oldDocument);
|
|
|
+ _oldDocCount = oldtext.Length;
|
|
|
+
|
|
|
+ var newtext = GetDocxTxt(this.newDocument);
|
|
|
+ _newDocCount = newtext.Length;
|
|
|
+
|
|
|
+ var diff = differ.CreateCharacterDiffs(oldtext, newtext, true);
|
|
|
+ _EditCount = diff.DiffBlocks.Count;
|
|
|
+
|
|
|
+ int iDeff = 0;
|
|
|
+ int lastPos = 0;
|
|
|
+
|
|
|
+ _CompareResultString = "<p>";
|
|
|
+ string lastResult = "";
|
|
|
+ foreach (var change in diff.DiffBlocks)
|
|
|
+ {
|
|
|
+ iDeff += change.DeleteCountA + change.InsertCountB;
|
|
|
+ _DeleteCount += change.DeleteCountA;
|
|
|
+ _InsertCount += change.InsertCountB;
|
|
|
+
|
|
|
+ lastResult += oldtext.Substring(lastPos, change.DeleteStartA - lastPos);
|
|
|
+ _CompareResultString += oldtext.Substring(lastPos, change.DeleteStartA - lastPos);
|
|
|
+ lastPos = change.DeleteStartA + change.DeleteCountA;
|
|
|
+
|
|
|
+
|
|
|
+ if (change.DeleteCountA > 0)
|
|
|
+ {
|
|
|
+ _CompareResultString += $"<strike style=\"text-decoration: line-through; color: red;\">{oldtext.Substring(change.DeleteStartA, change.DeleteCountA)}</strike>";
|
|
|
+ }
|
|
|
+
|
|
|
+ if (change.InsertCountB > 0)
|
|
|
+ {
|
|
|
+ _CompareResultString += $"<u style=\"text-decoration: underline; color: blue;\">{newtext.Substring(change.InsertStartB, change.InsertCountB)}</u>";
|
|
|
+ lastResult += newtext.Substring(change.InsertStartB, change.InsertCountB);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ lastResult += oldtext.Substring(lastPos);
|
|
|
+ _CompareResultString += oldtext.Substring(lastPos);
|
|
|
+ _CompareResultString = _CompareResultString.Replace("\r\n", "</p>\r\n<p>") + "</p>";
|
|
|
+
|
|
|
+ //_diffRate = (double)iDeff / (double)oldtext.Length;
|
|
|
+
|
|
|
+ //Console.WriteLine($"修改字数:{iDeff}\r\n原文档字数:{oldtext.Length}\r\n修改比率:{(_diffRate * 100.00).ToString("0.0000")}%");
|
|
|
+ }
|
|
|
+
|
|
|
+ private string GetDocTxt(string filepath)
|
|
|
+ {
|
|
|
+ using (var stream = File.OpenRead(filepath))
|
|
|
+ {
|
|
|
+
|
|
|
+ XWPFDocument doc = new XWPFDocument(stream);
|
|
|
+ string text = "";
|
|
|
+
|
|
|
+ foreach (var para in doc.Paragraphs)
|
|
|
+ {
|
|
|
+ text += "\r\n" + para.Text;
|
|
|
+ }
|
|
|
+
|
|
|
+ return text.Trim();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private string GetDocxTxt(string filepath)
|
|
|
+ {
|
|
|
+ var oldtext = getDocxMainXml(filepath);
|
|
|
+ var oldlines = ExtractWPTextFromXml(oldtext);
|
|
|
+ oldtext = List2String(oldlines);
|
|
|
+
|
|
|
+ return oldtext;
|
|
|
+ }
|
|
|
+
|
|
|
+ private string List2String(List<string> lines)
|
|
|
+ {
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
+ foreach (var line in lines)
|
|
|
+ {
|
|
|
+ if (!string.IsNullOrEmpty(line))
|
|
|
+ {
|
|
|
+ sb.Append(line + "\r\n");
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return sb.ToString();
|
|
|
+ }
|
|
|
+
|
|
|
+ private string getDocxMainXml(string filePath)
|
|
|
+ {
|
|
|
+ string text = string.Empty;
|
|
|
+ using (Package package = Package.Open(filePath, FileMode.Open))
|
|
|
+ {
|
|
|
+ var Parts = package.GetParts();
|
|
|
+
|
|
|
+ foreach (var part in Parts)
|
|
|
+ {
|
|
|
+ if (part.ContentType.StartsWith("application/vnd.openxmlformats-officedocument.wordprocessingml.document.main"))
|
|
|
+ {
|
|
|
+ using (Stream stream = part.GetStream())
|
|
|
+ {
|
|
|
+ StreamReader reader = new StreamReader(stream);
|
|
|
+ text = reader.ReadToEnd();
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return text;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<string> ExtractWPTextFromXml(string xmlText)
|
|
|
+ {
|
|
|
+ List<string> lines = new List<string>();
|
|
|
+ // 使用正则表达式匹配 <w:t> 标签的内容
|
|
|
+ MatchCollection matches = Regex.Matches(xmlText, "(<w:p\\s.*?>|<w:p>)(.*?)</w:p>");
|
|
|
+
|
|
|
+ foreach (Match match in matches)
|
|
|
+ {
|
|
|
+ lines.Add(ExtractWtTextFromXml(match.Groups[2].Value));
|
|
|
+ }
|
|
|
+ return lines;
|
|
|
+ }
|
|
|
+
|
|
|
+ private string ExtractWtTextFromXml(string xmlText)
|
|
|
+ {
|
|
|
+ // 使用正则表达式匹配 <w:t> 标签的内容
|
|
|
+ MatchCollection matches = Regex.Matches(xmlText, "(<w:t\\s.*?>|<w:t>)(.*?)</w:t>");
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
+ foreach (Match match in matches)
|
|
|
+ {
|
|
|
+ sb.Append(match.Groups[2].Value);
|
|
|
+ }
|
|
|
+ return sb.ToString();
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|