using System; using System.Collections.Generic; using System.IO; using System.Text; using DiffPlex; using System.IO.Packaging; using System.Text.RegularExpressions; using System.Linq; using Microsoft.Office.Interop.Word; namespace wispro.sp.utility { /// /// 比较两个Docx文档中文字的不同 /// public class CompareDocx { /// /// 原文档路径 /// public string oldDocument { get; set; } /// /// 修订后文档路径 /// public string newDocument { get; set; } /// /// 总的修改比率 /// public double diffRate { get { return ((double)_DeleteCount + (double)_InsertCount) / (double)_oldDocCount; } } private int _oldDocCount; /// /// 原文档字数 /// public int oldDocumentCount { get { return _oldDocCount; } } private int _newDocCount; /// /// 修订后文档字数 /// public int newDocumentCount { get { return _newDocCount; } } private int _DeleteCount; /// /// 修订后文档相比原文档删除的字数 /// public int DeleteCount { get { return _DeleteCount; } } private int _InsertCount; /// /// 修改后文档相比原文档插入的字数 /// public int InsertCount { get { return _InsertCount; } } private double _EditCount; /// /// 修订处数量 /// public double EditCount { get { return _EditCount; } } private string _CompareResultString; /// /// 包括修订文字版本的文档 /// public string CompareResultString { get { return _CompareResultString; } } /// /// 比较两个文档 /// /// /// public void Compare(string oldFile, string newFile) { this.oldDocument = oldFile; this.newDocument = newFile; Compare(); } /// /// 比较两个文档 /// /// public void Compare() { if (!System.IO.File.Exists(this.oldDocument) || !System.IO.File.Exists(this.newDocument)) { throw new ApplicationException("指定的文件不存在!"); } var differ = new Differ(); string oldtext = ""; if (this.oldDocument.EndsWith(".doc")) { oldtext = GetDocTxt(this.oldDocument); } else { oldtext = GetDocxTxt(this.oldDocument); } _oldDocCount = oldtext.Length; string newtext = ""; if (this.newDocument.EndsWith(".doc")) { newtext = GetDocTxt(this.newDocument); } else { newtext = GetDocxTxt(this.newDocument); } _newDocCount = newtext.Length; var diff = differ.CreateCharacterDiffs(oldtext, newtext, true); _EditCount = diff.DiffBlocks.Count; int iDeff = 0; int lastPos = 0; _CompareResultString = "

"; string lastResult = ""; foreach (var change in diff.DiffBlocks) { iDeff += change.DeleteCountA + change.InsertCountB; _DeleteCount += change.DeleteCountA; _InsertCount += change.InsertCountB; lastResult += oldtext.Substring(lastPos, change.DeleteStartA - lastPos); _CompareResultString += oldtext.Substring(lastPos, change.DeleteStartA - lastPos); lastPos = change.DeleteStartA + change.DeleteCountA; if (change.DeleteCountA > 0) { _CompareResultString += $"{oldtext.Substring(change.DeleteStartA, change.DeleteCountA)}"; } if (change.InsertCountB > 0) { _CompareResultString += $"{newtext.Substring(change.InsertStartB, change.InsertCountB)}"; lastResult += newtext.Substring(change.InsertStartB, change.InsertCountB); } } lastResult += oldtext.Substring(lastPos); _CompareResultString += oldtext.Substring(lastPos); _CompareResultString = _CompareResultString.Replace("\r\n", "

\r\n

") + "

"; } private string GetDocTxt(string filePath) { Application word = null; Document doc = null; string content = string.Empty; try { // 创建Word应用实例 word = new Application(); // 打开Word文档 System.IO.FileInfo fileInfo = new System.IO.FileInfo(filePath); doc = word.Documents.Open(fileInfo.FullName); // 读取文档内容 content = doc.Content.Text; List lines = content.Split("\r").ToList(); return List2String(lines); } catch (Exception ex) { throw new Exception($"读取Word文档时发生错误: {ex.Message}"); } finally { // 关闭文档 if (doc != null) { doc.Close(); #pragma warning disable CA1416 // 验证平台兼容性 System.Runtime.InteropServices.Marshal.ReleaseComObject(doc); #pragma warning restore CA1416 // 验证平台兼容性 } // 退出Word应用 if (word != null) { word.Quit(); #pragma warning disable CA1416 // 验证平台兼容性 System.Runtime.InteropServices.Marshal.ReleaseComObject(word); #pragma warning restore CA1416 // 验证平台兼容性 } } } private string GetDocxTxt(string filepath) { var oldtext = getDocxMainXml(filepath); var oldlines = ExtractWPTextFromXml(oldtext); oldtext = List2String(oldlines); return oldtext; } private string List2String(List lines) { StringBuilder sb = new StringBuilder(); foreach (var line in lines) { if (!string.IsNullOrEmpty(line)) { sb.Append(line.Trim() + "\r\n"); } } return sb.ToString(); } private string getDocxMainXml(string filePath) { string text = string.Empty; using (Package package = Package.Open(filePath, FileMode.Open)) { var Parts = package.GetParts(); foreach (var part in Parts) { if (part.ContentType.StartsWith("application/vnd.openxmlformats-officedocument.wordprocessingml.document.main")) { using (Stream stream = part.GetStream()) { StreamReader reader = new StreamReader(stream); text = reader.ReadToEnd(); break; } } } return text; } } private List ExtractWPTextFromXml(string xmlText) { List lines = new List(); // 使用正则表达式匹配 标签的内容 MatchCollection matches = Regex.Matches(xmlText, "(|)(.*?)"); foreach (Match match in matches) { lines.Add(ExtractWtTextFromXml(match.Groups[2].Value)); } return lines; } private string ExtractWtTextFromXml(string xmlText) { // 使用正则表达式匹配 标签的内容 MatchCollection matches = Regex.Matches(xmlText, "(|)(.*?)"); StringBuilder sb = new StringBuilder(); foreach (Match match in matches) { sb.Append(match.Groups[2].Value); } return sb.ToString(); } } }