CosineSimilarity.cs 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Threading.Tasks;
  6. namespace wispro.sp.utility
  7. {
  8. public class CosineSimilarity
  9. {
  10. public static double Calculate(string s1, string s2)
  11. {
  12. // 将字符串转换为词频向量
  13. var vector1 = GetTermFrequencyVector(s1);
  14. var vector2 = GetTermFrequencyVector(s2);
  15. // 计算余弦相似度
  16. double dotProduct = 0;
  17. double norm1 = 0;
  18. double norm2 = 0;
  19. foreach (var term in vector1.Keys.Union(vector2.Keys))
  20. {
  21. double v1 = vector1.ContainsKey(term) ? vector1[term] : 0;
  22. double v2 = vector2.ContainsKey(term) ? vector2[term] : 0;
  23. dotProduct += v1 * v2;
  24. norm1 += v1 * v1;
  25. norm2 += v2 * v2;
  26. }
  27. return dotProduct / (Math.Sqrt(norm1) * Math.Sqrt(norm2));
  28. }
  29. private static Dictionary<string, double> GetTermFrequencyVector(string text)
  30. {
  31. var terms = new Jieba_Segmenter().Cut(text.Replace("\r\n","").Replace("\r","").Replace("\n", ""));
  32. var vector = new Dictionary<string, double>();
  33. foreach (var term in terms)
  34. {
  35. if (!vector.ContainsKey(term))
  36. vector[term] = 0;
  37. vector[term]++;
  38. }
  39. return vector;
  40. }
  41. }
  42. }