using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace wispro.sp.utility { public class CosineSimilarity { public static double Calculate(string s1, string s2) { // 将字符串转换为词频向量 var vector1 = GetTermFrequencyVector(s1); var vector2 = GetTermFrequencyVector(s2); // 计算余弦相似度 double dotProduct = 0; double norm1 = 0; double norm2 = 0; foreach (var term in vector1.Keys.Union(vector2.Keys)) { double v1 = vector1.ContainsKey(term) ? vector1[term] : 0; double v2 = vector2.ContainsKey(term) ? vector2[term] : 0; dotProduct += v1 * v2; norm1 += v1 * v1; norm2 += v2 * v2; } return dotProduct / (Math.Sqrt(norm1) * Math.Sqrt(norm2)); } private static Dictionary GetTermFrequencyVector(string text) { var terms = new Jieba_Segmenter().Cut(text.Replace("\r\n","").Replace("\r","").Replace("\n", "")); var vector = new Dictionary(); foreach (var term in terms) { if (!vector.ContainsKey(term)) vector[term] = 0; vector[term]++; } return vector; } } }