HtmlToRtfConverter.cs 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. using System.Windows.Forms;
  2. using System.Text.RegularExpressions;
  3. using System.Web;
  4. using System.Drawing;
  5. using System;
  6. using System.Linq;
  7. namespace wispro.sp.winClient
  8. {
  9. public class HtmlToRtfConverter
  10. {
  11. public string ConvertHtmlToRtf(string html)
  12. {
  13. try
  14. {
  15. using (RichTextBox rtBox = new RichTextBox())
  16. {
  17. string decodedHtml = HttpUtility.HtmlDecode(html);
  18. ProcessHtmlContent(rtBox, decodedHtml);
  19. return rtBox.Rtf;
  20. }
  21. }
  22. catch (Exception ex)
  23. {
  24. throw new Exception($"HTML转RTF时发生错误: {ex.Message}");
  25. }
  26. }
  27. private void ProcessHtmlContent(RichTextBox rtBox, string html)
  28. {
  29. // 首先处理段落
  30. var paragraphs = Regex.Split(html, @"</?p>")
  31. .Where(p => !string.IsNullOrWhiteSpace(p));
  32. foreach (var paragraph in paragraphs)
  33. {
  34. // 处理段落内的内联元素
  35. ProcessInlineElements(rtBox, paragraph);
  36. // 在段落后添加换行
  37. if (!rtBox.Text.EndsWith(Environment.NewLine))
  38. {
  39. rtBox.AppendText(Environment.NewLine);
  40. }
  41. }
  42. }
  43. private void ProcessInlineElements(RichTextBox rtBox, string html)
  44. {
  45. // 匹配内联元素,包括strike、u标签和br标签
  46. string pattern = @"<(?<tag>strike|u)[^>]*style=['""](?<style>[^'""]*)['""]>(?<text>[^<]*)</\k<tag>>|<br\s*/>|(?<text>[^<]+)";
  47. int lastIndex = 0;
  48. foreach (Match match in Regex.Matches(html, pattern))
  49. {
  50. // 处理标签之间的纯文本
  51. int textStart = match.Index;
  52. if (textStart > lastIndex)
  53. {
  54. string plainText = html.Substring(lastIndex, textStart - lastIndex);
  55. if (!string.IsNullOrWhiteSpace(plainText))
  56. {
  57. rtBox.AppendText(plainText);
  58. }
  59. }
  60. string tag = match.Groups["tag"].Value;
  61. string style = match.Groups["style"].Value;
  62. string text = match.Groups["text"].Value;
  63. if (match.Value.StartsWith("<br"))
  64. {
  65. // 处理换行标签
  66. rtBox.AppendText(Environment.NewLine);
  67. }
  68. else if (!string.IsNullOrEmpty(tag))
  69. {
  70. // 处理格式化标签
  71. ApplyFormatting(rtBox, tag, style, text);
  72. }
  73. else if (!string.IsNullOrEmpty(text))
  74. {
  75. // 处理纯文本
  76. rtBox.AppendText(text);
  77. }
  78. lastIndex = match.Index + match.Length;
  79. }
  80. // 处理剩余的文本
  81. if (lastIndex < html.Length)
  82. {
  83. string remainingText = html.Substring(lastIndex);
  84. if (!string.IsNullOrWhiteSpace(remainingText))
  85. {
  86. rtBox.AppendText(remainingText);
  87. }
  88. }
  89. }
  90. private void ApplyFormatting(RichTextBox rtBox, string tag, string style, string text)
  91. {
  92. // 保存当前的格式设置
  93. Font originalFont = rtBox.SelectionFont ?? rtBox.Font;
  94. Color originalColor = rtBox.SelectionColor;
  95. // 设置字体样式
  96. FontStyle fontStyle = FontStyle.Regular;
  97. Color textColor = originalColor;
  98. // 根据标签和样式设置格式
  99. if (tag == "strike" || style.Contains("line-through"))
  100. {
  101. fontStyle |= FontStyle.Strikeout;
  102. }
  103. if (tag == "u" || style.Contains("underline"))
  104. {
  105. fontStyle |= FontStyle.Underline;
  106. }
  107. // 处理颜色
  108. var colorMatch = Regex.Match(style, @"color:\s*([^;]+)");
  109. if (colorMatch.Success)
  110. {
  111. try
  112. {
  113. string colorValue = colorMatch.Groups[1].Value.Trim();
  114. textColor = ColorTranslator.FromHtml(colorValue);
  115. }
  116. catch { /* 忽略无效的颜色值 */ }
  117. }
  118. // 应用格式
  119. rtBox.SelectionStart = rtBox.TextLength;
  120. rtBox.SelectionLength = 0;
  121. rtBox.SelectionFont = new Font(originalFont.FontFamily, originalFont.Size, fontStyle);
  122. rtBox.SelectionColor = textColor;
  123. // 添加文本
  124. rtBox.AppendText(text);
  125. // 恢复原始格式
  126. rtBox.SelectionFont = originalFont;
  127. rtBox.SelectionColor = originalColor;
  128. }
  129. }
  130. }