using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using DiffPlex;
using System.IO.Packaging;
using System.Text.RegularExpressions;
using System.Linq;
using Microsoft.Office.Interop.Word;
namespace wispro.sp.utility
{
///
/// 比较两个Docx文档中文字的不同
///
public class CompareDocx
{
///
/// 原文档路径
///
public string oldDocument { get; set; }
///
/// 修订后文档路径
///
public string newDocument { get; set; }
///
/// 总的修改比率
///
public double diffRate
{
get
{
return ((double)_DeleteCount + (double)_InsertCount) / (double)_oldDocCount;
}
}
private int _oldDocCount;
///
/// 原文档字数
///
public int oldDocumentCount
{
get { return _oldDocCount; }
}
private int _newDocCount;
///
/// 修订后文档字数
///
public int newDocumentCount
{
get { return _newDocCount; }
}
private int _DeleteCount;
///
/// 修订后文档相比原文档删除的字数
///
public int DeleteCount
{
get
{
return _DeleteCount;
}
}
private int _InsertCount;
///
/// 修改后文档相比原文档插入的字数
///
public int InsertCount
{
get
{
return _InsertCount;
}
}
private double _EditCount;
///
/// 修订处数量
///
public double EditCount
{
get
{
return _EditCount;
}
}
private string _CompareResultString;
///
/// 包括修订文字版本的文档
///
public string CompareResultString
{
get
{
return _CompareResultString;
}
}
///
/// 比较两个文档
///
///
///
public void Compare(string oldFile, string newFile)
{
this.oldDocument = oldFile;
this.newDocument = newFile;
Compare();
}
///
/// 比较两个文档
///
///
public void Compare()
{
if (!System.IO.File.Exists(this.oldDocument) || !System.IO.File.Exists(this.newDocument))
{
throw new ApplicationException("指定的文件不存在!");
}
var differ = new Differ();
string oldtext = "";
if (this.oldDocument.EndsWith(".doc"))
{
oldtext = GetDocTxt(this.oldDocument);
}
else
{
oldtext = GetDocxTxt(this.oldDocument);
}
_oldDocCount = oldtext.Length;
string newtext = "";
if (this.newDocument.EndsWith(".doc"))
{
newtext = GetDocTxt(this.newDocument);
}
else
{
newtext = GetDocxTxt(this.newDocument);
}
_newDocCount = newtext.Length;
var diff = differ.CreateCharacterDiffs(oldtext, newtext, true);
_EditCount = diff.DiffBlocks.Count;
int iDeff = 0;
int lastPos = 0;
_CompareResultString = "
";
string lastResult = "";
foreach (var change in diff.DiffBlocks)
{
iDeff += change.DeleteCountA + change.InsertCountB;
_DeleteCount += change.DeleteCountA;
_InsertCount += change.InsertCountB;
lastResult += oldtext.Substring(lastPos, change.DeleteStartA - lastPos);
_CompareResultString += oldtext.Substring(lastPos, change.DeleteStartA - lastPos);
lastPos = change.DeleteStartA + change.DeleteCountA;
if (change.DeleteCountA > 0)
{
_CompareResultString += $"{oldtext.Substring(change.DeleteStartA, change.DeleteCountA)}";
}
if (change.InsertCountB > 0)
{
_CompareResultString += $"{newtext.Substring(change.InsertStartB, change.InsertCountB)}";
lastResult += newtext.Substring(change.InsertStartB, change.InsertCountB);
}
}
lastResult += oldtext.Substring(lastPos);
_CompareResultString += oldtext.Substring(lastPos);
_CompareResultString = _CompareResultString.Replace("\r\n", "
\r\n") + "
";
}
private string GetDocTxt(string filePath)
{
Application word = null;
Document doc = null;
string content = string.Empty;
try
{
// 创建Word应用实例
word = new Application();
// 打开Word文档
System.IO.FileInfo fileInfo = new System.IO.FileInfo(filePath);
doc = word.Documents.Open(fileInfo.FullName);
// 读取文档内容
content = doc.Content.Text;
List lines = content.Split("\r").ToList();
return List2String(lines);
}
catch (Exception ex)
{
throw new Exception($"读取Word文档时发生错误: {ex.Message}");
}
finally
{
// 关闭文档
if (doc != null)
{
doc.Close();
#pragma warning disable CA1416 // 验证平台兼容性
System.Runtime.InteropServices.Marshal.ReleaseComObject(doc);
#pragma warning restore CA1416 // 验证平台兼容性
}
// 退出Word应用
if (word != null)
{
word.Quit();
#pragma warning disable CA1416 // 验证平台兼容性
System.Runtime.InteropServices.Marshal.ReleaseComObject(word);
#pragma warning restore CA1416 // 验证平台兼容性
}
}
}
private string GetDocxTxt(string filepath)
{
var oldtext = getDocxMainXml(filepath);
var oldlines = ExtractWPTextFromXml(oldtext);
oldtext = List2String(oldlines);
return oldtext;
}
private string List2String(List lines)
{
StringBuilder sb = new StringBuilder();
foreach (var line in lines)
{
if (!string.IsNullOrEmpty(line))
{
sb.Append(line.Trim() + "\r\n");
}
}
return sb.ToString();
}
private string getDocxMainXml(string filePath)
{
string text = string.Empty;
using (Package package = Package.Open(filePath, FileMode.Open))
{
var Parts = package.GetParts();
foreach (var part in Parts)
{
if (part.ContentType.StartsWith("application/vnd.openxmlformats-officedocument.wordprocessingml.document.main"))
{
using (Stream stream = part.GetStream())
{
StreamReader reader = new StreamReader(stream);
text = reader.ReadToEnd();
break;
}
}
}
return text;
}
}
private List ExtractWPTextFromXml(string xmlText)
{
List lines = new List();
// 使用正则表达式匹配 标签的内容
MatchCollection matches = Regex.Matches(xmlText, "(|)(.*?)");
foreach (Match match in matches)
{
lines.Add(ExtractWtTextFromXml(match.Groups[2].Value));
}
return lines;
}
private string ExtractWtTextFromXml(string xmlText)
{
// 使用正则表达式匹配 标签的内容
MatchCollection matches = Regex.Matches(xmlText, "(|)(.*?)");
StringBuilder sb = new StringBuilder();
foreach (Match match in matches)
{
sb.Append(match.Groups[2].Value);
}
return sb.ToString();
}
}
}