|
@@ -16,95 +16,257 @@ namespace wispro.sp.utility
|
|
/// </summary>
|
|
/// </summary>
|
|
public class CompareDocx
|
|
public class CompareDocx
|
|
{
|
|
{
|
|
- /// <summary>
|
|
|
|
- /// 原文档路径
|
|
|
|
- /// </summary>
|
|
|
|
- public string oldDocument { get; set; }
|
|
|
|
|
|
+ public class PatentDocument
|
|
|
|
+ {
|
|
|
|
+ public string FilePath { get; set; }
|
|
|
|
|
|
- /// <summary>
|
|
|
|
- /// 修订后文档路径
|
|
|
|
- /// </summary>
|
|
|
|
- public string newDocument { get; set; }
|
|
|
|
|
|
+ public string Abstract { get; set; }
|
|
|
|
|
|
- /// <summary>
|
|
|
|
- /// 总的修改比率
|
|
|
|
- /// </summary>
|
|
|
|
- public double diffRate
|
|
|
|
- {
|
|
|
|
- get
|
|
|
|
|
|
+ public string Claim { get; set; }
|
|
|
|
+
|
|
|
|
+ public string FullText { get; set; }
|
|
|
|
+
|
|
|
|
+ public string DocumentString { get; set; }
|
|
|
|
+
|
|
|
|
+ public PatentDocument(string filePath) {
|
|
|
|
+ this.FilePath = filePath;
|
|
|
|
+
|
|
|
|
+ if (!System.IO.File.Exists(this.FilePath) )
|
|
|
|
+ {
|
|
|
|
+ throw new ApplicationException("指定的文件不存在!");
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (this.FilePath.EndsWith(".doc"))
|
|
|
|
+ {
|
|
|
|
+ DocumentString = GetDocTxt(this.FilePath);
|
|
|
|
+ }
|
|
|
|
+ else
|
|
|
|
+ {
|
|
|
|
+ DocumentString = GetDocxTxt(this.FilePath);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private string GetDocTxt(string filePath)
|
|
{
|
|
{
|
|
- return ((double)_DeleteCount + (double)_InsertCount) / (double)_oldDocCount;
|
|
|
|
|
|
+ Application word = null;
|
|
|
|
+ Document doc = null;
|
|
|
|
+ string content = string.Empty;
|
|
|
|
+
|
|
|
|
+ try
|
|
|
|
+ {
|
|
|
|
+ // 创建Word应用实例
|
|
|
|
+ word = new Application();
|
|
|
|
+ // 打开Word文档
|
|
|
|
+ System.IO.FileInfo fileInfo = new System.IO.FileInfo(filePath);
|
|
|
|
+ doc = word.Documents.Open(fileInfo.FullName);
|
|
|
|
+ // 读取文档内容
|
|
|
|
+ content = doc.Content.Text;
|
|
|
|
+
|
|
|
|
+ List<string> lines = content.Split("\r").ToList();
|
|
|
|
+ return List2String(lines);
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+ catch (Exception ex)
|
|
|
|
+ {
|
|
|
|
+ throw new Exception($"读取Word文档时发生错误: {ex.Message}");
|
|
|
|
+ }
|
|
|
|
+ finally
|
|
|
|
+ {
|
|
|
|
+ // 关闭文档
|
|
|
|
+ if (doc != null)
|
|
|
|
+ {
|
|
|
|
+ doc.Close();
|
|
|
|
+#pragma warning disable CA1416 // 验证平台兼容性
|
|
|
|
+ System.Runtime.InteropServices.Marshal.ReleaseComObject(doc);
|
|
|
|
+#pragma warning restore CA1416 // 验证平台兼容性
|
|
|
|
+ }
|
|
|
|
+ // 退出Word应用
|
|
|
|
+ if (word != null)
|
|
|
|
+ {
|
|
|
|
+ word.Quit();
|
|
|
|
+#pragma warning disable CA1416 // 验证平台兼容性
|
|
|
|
+ System.Runtime.InteropServices.Marshal.ReleaseComObject(word);
|
|
|
|
+#pragma warning restore CA1416 // 验证平台兼容性
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
}
|
|
}
|
|
- }
|
|
|
|
|
|
|
|
- private int _oldDocCount;
|
|
|
|
- /// <summary>
|
|
|
|
- /// 原文档字数
|
|
|
|
- /// </summary>
|
|
|
|
- public int oldDocumentCount
|
|
|
|
- {
|
|
|
|
- get { return _oldDocCount; }
|
|
|
|
- }
|
|
|
|
|
|
+ private string GetDocxTxt(string filepath)
|
|
|
|
+ {
|
|
|
|
+ var oldtext = getDocxMainXml(filepath);
|
|
|
|
+ var oldlines = ExtractWPTextFromXml(oldtext);
|
|
|
|
+ oldtext = List2String(oldlines);
|
|
|
|
|
|
- private int _newDocCount;
|
|
|
|
- /// <summary>
|
|
|
|
- /// 修订后文档字数
|
|
|
|
- /// </summary>
|
|
|
|
- public int newDocumentCount
|
|
|
|
- {
|
|
|
|
- get { return _newDocCount; }
|
|
|
|
- }
|
|
|
|
|
|
+ return oldtext;
|
|
|
|
+ }
|
|
|
|
|
|
- private int _DeleteCount;
|
|
|
|
- /// <summary>
|
|
|
|
- /// 修订后文档相比原文档删除的字数
|
|
|
|
- /// </summary>
|
|
|
|
- public int DeleteCount
|
|
|
|
- {
|
|
|
|
- get
|
|
|
|
|
|
+ private string List2String(List<string> lines)
|
|
{
|
|
{
|
|
- return _DeleteCount;
|
|
|
|
|
|
+ string[] array = { "权 利 要 求 书", "说 明 书 摘 要", "说 明 书" , "摘 要 附 图", "说 明 书 附 图" };
|
|
|
|
+
|
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
|
+ string lastBlock = string.Empty;
|
|
|
|
+ foreach (var line in lines)
|
|
|
|
+ {
|
|
|
|
+ if (!string.IsNullOrEmpty(line))
|
|
|
|
+ {
|
|
|
|
+ sb.Append(line.Trim() + "\r\n");
|
|
|
|
+
|
|
|
|
+ if(Array.Exists(array, element => element == line.Trim()))
|
|
|
|
+ {
|
|
|
|
+ lastBlock = line.Trim();
|
|
|
|
+ }
|
|
|
|
+ else
|
|
|
|
+ {
|
|
|
|
+ switch(lastBlock) {
|
|
|
|
+ case "权 利 要 求 书":
|
|
|
|
+ this.Claim = this.Claim + "\r\n" + line;
|
|
|
|
+ break;
|
|
|
|
+ case "说 明 书 摘 要":
|
|
|
|
+ this.Abstract = this.Abstract + "\r\n" + line;
|
|
|
|
+ break;
|
|
|
|
+ case "说 明 书":
|
|
|
|
+ this.FullText = this.FullText + "\r\n" + line;
|
|
|
|
+ break;
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ this.Abstract = string.IsNullOrEmpty(this.Abstract)?string.Empty: this.Abstract.Trim();
|
|
|
|
+ this.Claim = string.IsNullOrEmpty(this.Claim) ? string.Empty : this.Claim.Trim();
|
|
|
|
+ this.FullText = string.IsNullOrEmpty(this.FullText) ? string.Empty : this.FullText.Trim();
|
|
|
|
+
|
|
|
|
+ return sb.ToString();
|
|
}
|
|
}
|
|
- }
|
|
|
|
|
|
|
|
- private int _InsertCount;
|
|
|
|
|
|
+ private string getDocxMainXml(string filePath)
|
|
|
|
+ {
|
|
|
|
+ string text = string.Empty;
|
|
|
|
+ using (Package package = Package.Open(filePath, FileMode.Open))
|
|
|
|
+ {
|
|
|
|
+ var Parts = package.GetParts();
|
|
|
|
+
|
|
|
|
+ foreach (var part in Parts)
|
|
|
|
+ {
|
|
|
|
+ if (part.ContentType.StartsWith("application/vnd.openxmlformats-officedocument.wordprocessingml.document.main"))
|
|
|
|
+ {
|
|
|
|
+ using (Stream stream = part.GetStream())
|
|
|
|
+ {
|
|
|
|
+ StreamReader reader = new StreamReader(stream);
|
|
|
|
+ text = reader.ReadToEnd();
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
|
|
- /// <summary>
|
|
|
|
- /// 修改后文档相比原文档插入的字数
|
|
|
|
- /// </summary>
|
|
|
|
- public int InsertCount
|
|
|
|
- {
|
|
|
|
- get
|
|
|
|
|
|
+ return text;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private List<string> ExtractWPTextFromXml(string xmlText)
|
|
{
|
|
{
|
|
- return _InsertCount;
|
|
|
|
|
|
+ List<string> lines = new List<string>();
|
|
|
|
+ // 使用正则表达式匹配 <w:t> 标签的内容
|
|
|
|
+ MatchCollection matches = Regex.Matches(xmlText, "(<w:p\\s.*?>|<w:p>)(.*?)</w:p>");
|
|
|
|
+
|
|
|
|
+ foreach (Match match in matches)
|
|
|
|
+ {
|
|
|
|
+ lines.Add(ExtractWtTextFromXml(match.Groups[2].Value));
|
|
|
|
+ }
|
|
|
|
+ return lines;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private string ExtractWtTextFromXml(string xmlText)
|
|
|
|
+ {
|
|
|
|
+ // 使用正则表达式匹配 <w:t> 标签的内容
|
|
|
|
+ MatchCollection matches = Regex.Matches(xmlText, "(<w:t\\s.*?>|<w:t>)(.*?)</w:t>");
|
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
|
+ foreach (Match match in matches)
|
|
|
|
+ {
|
|
|
|
+ sb.Append(match.Groups[2].Value);
|
|
|
|
+ }
|
|
|
|
+ return sb.ToString();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- private double _EditCount;
|
|
|
|
- /// <summary>
|
|
|
|
- /// 修订处数量
|
|
|
|
- /// </summary>
|
|
|
|
- public double EditCount
|
|
|
|
|
|
+ public class StringCompareResult
|
|
{
|
|
{
|
|
- get
|
|
|
|
|
|
+ /// <summary>
|
|
|
|
+ /// 源字符串字数
|
|
|
|
+ /// </summary>
|
|
|
|
+ public int oldWordCount { get; set;}
|
|
|
|
+
|
|
|
|
+ /// <summary>
|
|
|
|
+ /// 新字符串字数
|
|
|
|
+ /// </summary>
|
|
|
|
+ public int newWordCount { get; set;}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ /// <summary>
|
|
|
|
+ /// 修订后文档相比原文档删除的字数
|
|
|
|
+ /// </summary>
|
|
|
|
+ public int DeleteCount { get; set; }
|
|
|
|
+
|
|
|
|
+ /// <summary>
|
|
|
|
+ /// 修改后文档相比原文档插入的字数
|
|
|
|
+ /// </summary>
|
|
|
|
+ public int InsertCount { get; set; }
|
|
|
|
+
|
|
|
|
+ /// <summary>
|
|
|
|
+ /// 修订处数量
|
|
|
|
+ /// </summary>
|
|
|
|
+ public double EditCount{ get;set; }
|
|
|
|
+
|
|
|
|
+ /// <summary>
|
|
|
|
+ /// 包括修订文字版本的文档
|
|
|
|
+ /// </summary>
|
|
|
|
+ public string CompareResultString { get; set; }
|
|
|
|
+
|
|
|
|
+ /// <summary>
|
|
|
|
+ /// 总的修改比率
|
|
|
|
+ /// </summary>
|
|
|
|
+ public double diffRate
|
|
{
|
|
{
|
|
- return _EditCount;
|
|
|
|
|
|
+ get
|
|
|
|
+ {
|
|
|
|
+ return ((double)DeleteCount + (double)InsertCount) / (double)oldWordCount;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
+
|
|
}
|
|
}
|
|
|
|
+ /// <summary>
|
|
|
|
+ /// 原文档路径
|
|
|
|
+ /// </summary>
|
|
|
|
+ public PatentDocument oldDocument { get; set; }
|
|
|
|
+
|
|
|
|
+ /// <summary>
|
|
|
|
+ /// 修订后文档路径
|
|
|
|
+ /// </summary>
|
|
|
|
+ public PatentDocument newDocument { get; set; }
|
|
|
|
|
|
|
|
+ /// <summary>
|
|
|
|
+ /// 权力要求比较结果
|
|
|
|
+ /// </summary>
|
|
|
|
+ public StringCompareResult ClaimResult { get; set; }
|
|
|
|
|
|
|
|
+ /// <summary>
|
|
|
|
+ /// 摘要比较结果
|
|
|
|
+ /// </summary>
|
|
|
|
+ public StringCompareResult AbstractResult { get; set; }
|
|
|
|
|
|
- private string _CompareResultString;
|
|
|
|
/// <summary>
|
|
/// <summary>
|
|
- /// 包括修订文字版本的文档
|
|
|
|
|
|
+ /// 说明书比较结果
|
|
/// </summary>
|
|
/// </summary>
|
|
- public string CompareResultString
|
|
|
|
- {
|
|
|
|
- get
|
|
|
|
- {
|
|
|
|
- return _CompareResultString;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ public StringCompareResult FulltextResult { get; set; }
|
|
|
|
+
|
|
|
|
+ /// <summary>
|
|
|
|
+ /// 所有文字比较结果
|
|
|
|
+ /// </summary>
|
|
|
|
+ public StringCompareResult AllStringResult { get; set; }
|
|
|
|
|
|
/// <summary>
|
|
/// <summary>
|
|
/// 比较两个文档
|
|
/// 比较两个文档
|
|
@@ -113,60 +275,44 @@ namespace wispro.sp.utility
|
|
/// <param name="newFile"></param>
|
|
/// <param name="newFile"></param>
|
|
public void Compare(string oldFile, string newFile)
|
|
public void Compare(string oldFile, string newFile)
|
|
{
|
|
{
|
|
- this.oldDocument = oldFile;
|
|
|
|
- this.newDocument = newFile;
|
|
|
|
|
|
+ this.oldDocument =new PatentDocument(oldFile);
|
|
|
|
+ this.newDocument =new PatentDocument(newFile);
|
|
|
|
+
|
|
|
|
+ this.ClaimResult = StringCompare(this.oldDocument.Claim,this.newDocument.Claim);
|
|
|
|
+ this.AbstractResult = StringCompare(this.oldDocument.Abstract, this.newDocument.Abstract);
|
|
|
|
+ this.FulltextResult = StringCompare(this.oldDocument.FullText, this.newDocument.FullText);
|
|
|
|
+ this.AllStringResult = StringCompare(this.oldDocument.DocumentString, this.newDocument.DocumentString);
|
|
|
|
|
|
- Compare();
|
|
|
|
}
|
|
}
|
|
|
|
|
|
/// <summary>
|
|
/// <summary>
|
|
/// 比较两个文档
|
|
/// 比较两个文档
|
|
/// </summary>
|
|
/// </summary>
|
|
/// <exception cref="ApplicationException"></exception>
|
|
/// <exception cref="ApplicationException"></exception>
|
|
- public void Compare()
|
|
|
|
|
|
+ public StringCompareResult StringCompare(string oldtext,string newtext)
|
|
{
|
|
{
|
|
- if (!System.IO.File.Exists(this.oldDocument) || !System.IO.File.Exists(this.newDocument))
|
|
|
|
- {
|
|
|
|
- throw new ApplicationException("指定的文件不存在!");
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
|
|
+ StringCompareResult result = new StringCompareResult();
|
|
var differ = new Differ();
|
|
var differ = new Differ();
|
|
- string oldtext = "";
|
|
|
|
- if (this.oldDocument.EndsWith(".doc"))
|
|
|
|
- {
|
|
|
|
- oldtext = GetDocTxt(this.oldDocument);
|
|
|
|
- }
|
|
|
|
- else
|
|
|
|
- {
|
|
|
|
- oldtext = GetDocxTxt(this.oldDocument);
|
|
|
|
- }
|
|
|
|
|
|
|
|
- _oldDocCount = oldtext.Length;
|
|
|
|
-
|
|
|
|
- string newtext = "";
|
|
|
|
- if (this.newDocument.EndsWith(".doc"))
|
|
|
|
- {
|
|
|
|
- newtext = GetDocTxt(this.newDocument);
|
|
|
|
- }
|
|
|
|
- else
|
|
|
|
- {
|
|
|
|
- newtext = GetDocxTxt(this.newDocument);
|
|
|
|
- }
|
|
|
|
- _newDocCount = newtext.Length;
|
|
|
|
|
|
+ if(oldtext == null) { oldtext = ""; }
|
|
|
|
+ if(newtext == null) { newtext = ""; }
|
|
|
|
|
|
|
|
+ result.oldWordCount = oldtext.Length;
|
|
|
|
+ result.newWordCount = newtext.Length;
|
|
|
|
+
|
|
var diff = differ.CreateCharacterDiffs(oldtext, newtext, true);
|
|
var diff = differ.CreateCharacterDiffs(oldtext, newtext, true);
|
|
- _EditCount = diff.DiffBlocks.Count;
|
|
|
|
|
|
+ result.EditCount = diff.DiffBlocks.Count;
|
|
|
|
|
|
int iDeff = 0;
|
|
int iDeff = 0;
|
|
int lastPos = 0;
|
|
int lastPos = 0;
|
|
|
|
|
|
- _CompareResultString = "<p>";
|
|
|
|
|
|
+ string _CompareResultString = "";
|
|
string lastResult = "";
|
|
string lastResult = "";
|
|
foreach (var change in diff.DiffBlocks)
|
|
foreach (var change in diff.DiffBlocks)
|
|
{
|
|
{
|
|
iDeff += change.DeleteCountA + change.InsertCountB;
|
|
iDeff += change.DeleteCountA + change.InsertCountB;
|
|
- _DeleteCount += change.DeleteCountA;
|
|
|
|
- _InsertCount += change.InsertCountB;
|
|
|
|
|
|
+ result.DeleteCount += change.DeleteCountA;
|
|
|
|
+ result.InsertCount += change.InsertCountB;
|
|
|
|
|
|
lastResult += oldtext.Substring(lastPos, change.DeleteStartA - lastPos);
|
|
lastResult += oldtext.Substring(lastPos, change.DeleteStartA - lastPos);
|
|
_CompareResultString += oldtext.Substring(lastPos, change.DeleteStartA - lastPos);
|
|
_CompareResultString += oldtext.Substring(lastPos, change.DeleteStartA - lastPos);
|
|
@@ -188,126 +334,13 @@ namespace wispro.sp.utility
|
|
lastResult += oldtext.Substring(lastPos);
|
|
lastResult += oldtext.Substring(lastPos);
|
|
_CompareResultString += oldtext.Substring(lastPos);
|
|
_CompareResultString += oldtext.Substring(lastPos);
|
|
_CompareResultString = _CompareResultString.Replace("\r\n", "<br/>");
|
|
_CompareResultString = _CompareResultString.Replace("\r\n", "<br/>");
|
|
-
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- private string GetDocTxt(string filePath)
|
|
|
|
- {
|
|
|
|
- Application word = null;
|
|
|
|
- Document doc = null;
|
|
|
|
- string content = string.Empty;
|
|
|
|
-
|
|
|
|
- try
|
|
|
|
- {
|
|
|
|
- // 创建Word应用实例
|
|
|
|
- word = new Application();
|
|
|
|
- // 打开Word文档
|
|
|
|
- System.IO.FileInfo fileInfo = new System.IO.FileInfo(filePath);
|
|
|
|
- doc = word.Documents.Open(fileInfo.FullName);
|
|
|
|
- // 读取文档内容
|
|
|
|
- content = doc.Content.Text;
|
|
|
|
-
|
|
|
|
- List<string> lines = content.Split("\r").ToList();
|
|
|
|
- return List2String(lines);
|
|
|
|
|
|
|
|
- }
|
|
|
|
- catch (Exception ex)
|
|
|
|
- {
|
|
|
|
- throw new Exception($"读取Word文档时发生错误: {ex.Message}");
|
|
|
|
- }
|
|
|
|
- finally
|
|
|
|
- {
|
|
|
|
- // 关闭文档
|
|
|
|
- if (doc != null)
|
|
|
|
- {
|
|
|
|
- doc.Close();
|
|
|
|
- #pragma warning disable CA1416 // 验证平台兼容性
|
|
|
|
- System.Runtime.InteropServices.Marshal.ReleaseComObject(doc);
|
|
|
|
- #pragma warning restore CA1416 // 验证平台兼容性
|
|
|
|
- }
|
|
|
|
- // 退出Word应用
|
|
|
|
- if (word != null)
|
|
|
|
- {
|
|
|
|
- word.Quit();
|
|
|
|
- #pragma warning disable CA1416 // 验证平台兼容性
|
|
|
|
- System.Runtime.InteropServices.Marshal.ReleaseComObject(word);
|
|
|
|
- #pragma warning restore CA1416 // 验证平台兼容性
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- private string GetDocxTxt(string filepath)
|
|
|
|
- {
|
|
|
|
- var oldtext = getDocxMainXml(filepath);
|
|
|
|
- var oldlines = ExtractWPTextFromXml(oldtext);
|
|
|
|
- oldtext = List2String(oldlines);
|
|
|
|
-
|
|
|
|
- return oldtext;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- private string List2String(List<string> lines)
|
|
|
|
- {
|
|
|
|
- StringBuilder sb = new StringBuilder();
|
|
|
|
- foreach (var line in lines)
|
|
|
|
- {
|
|
|
|
- if (!string.IsNullOrEmpty(line))
|
|
|
|
- {
|
|
|
|
- sb.Append(line.Trim() + "\r\n");
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ result.CompareResultString = _CompareResultString;
|
|
|
|
|
|
- return sb.ToString();
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- private string getDocxMainXml(string filePath)
|
|
|
|
- {
|
|
|
|
- string text = string.Empty;
|
|
|
|
- using (Package package = Package.Open(filePath, FileMode.Open))
|
|
|
|
- {
|
|
|
|
- var Parts = package.GetParts();
|
|
|
|
-
|
|
|
|
- foreach (var part in Parts)
|
|
|
|
- {
|
|
|
|
- if (part.ContentType.StartsWith("application/vnd.openxmlformats-officedocument.wordprocessingml.document.main"))
|
|
|
|
- {
|
|
|
|
- using (Stream stream = part.GetStream())
|
|
|
|
- {
|
|
|
|
- StreamReader reader = new StreamReader(stream);
|
|
|
|
- text = reader.ReadToEnd();
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ return result;
|
|
|
|
|
|
- return text;
|
|
|
|
- }
|
|
|
|
}
|
|
}
|
|
|
|
|
|
- private List<string> ExtractWPTextFromXml(string xmlText)
|
|
|
|
- {
|
|
|
|
- List<string> lines = new List<string>();
|
|
|
|
- // 使用正则表达式匹配 <w:t> 标签的内容
|
|
|
|
- MatchCollection matches = Regex.Matches(xmlText, "(<w:p\\s.*?>|<w:p>)(.*?)</w:p>");
|
|
|
|
-
|
|
|
|
- foreach (Match match in matches)
|
|
|
|
- {
|
|
|
|
- lines.Add(ExtractWtTextFromXml(match.Groups[2].Value));
|
|
|
|
- }
|
|
|
|
- return lines;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- private string ExtractWtTextFromXml(string xmlText)
|
|
|
|
- {
|
|
|
|
- // 使用正则表达式匹配 <w:t> 标签的内容
|
|
|
|
- MatchCollection matches = Regex.Matches(xmlText, "(<w:t\\s.*?>|<w:t>)(.*?)</w:t>");
|
|
|
|
- StringBuilder sb = new StringBuilder();
|
|
|
|
- foreach (Match match in matches)
|
|
|
|
- {
|
|
|
|
- sb.Append(match.Groups[2].Value);
|
|
|
|
- }
|
|
|
|
- return sb.ToString();
|
|
|
|
- }
|
|
|
|
|
|
+
|
|
}
|
|
}
|
|
}
|
|
}
|