有人熟悉文本分类knn吗,帮我看下代码是什么意思
strayly
2009-12-24
在网上下载了一个文本分类的实例程序,对里面knn.java里的LazyLearning不是很明白,
有熟悉的能帮我看一下,主要是它里面的两个参数是做什么用的? package text_category; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.PrintWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import edu.udo.cs.wvtool.main.WVTWordVector; public class KNN { private int k = 15; private double[] ClassSim = null; private static Map indexmap = null; static { if (indexmap == null) { indexmap = new HashMap(); indexmap.put(0, "汽车"); indexmap.put(1, "教育"); indexmap.put(2, "娱乐"); indexmap.put(3, "财经"); indexmap.put(4, "房产"); indexmap.put(5, "军事"); indexmap.put(6, "奥运"); indexmap.put(7, "时政"); indexmap.put(8, "体育"); indexmap.put(9, "科技"); } } public KNN() { } public List LazyLearning(WVTWordVector v, WVTWordVector[] vectors, int numClasses) { if (v == null || vectors == null) return null; System.out.println("number of documents : " + vectors.length); System.out.println("number of classes: " + numClasses); ClassSim = new double[numClasses]; for (int i = 0; i < numClasses; i++) { ClassSim[i] = 0; } k = (k < vectors.length)? k : vectors.length; double[] Sim = new double[vectors.length]; for (int i = 0; i < Sim.length; i++) { Sim[i] = 0; Map map1 = v.getWordMap(); Map map2 = vectors[i].getWordMap(); for (Iterator it = map1.keySet().iterator(); it.hasNext();) { String word1 = (String)it.next(); if (map2.containsKey(word1)) { double value1 = Double.valueOf(map1.get(word1).toString()); double value2 = Double.valueOf(map2.get(word1).toString()); Sim[i] += (value1 * value2); } } } for (int i = 0; i < k; i++) { for (int j = i + 1; j < Sim.length; j++) { if (Sim[j] > Sim[i]) { double dtemp = Sim[i]; Sim[i] = Sim[j]; Sim[j] = dtemp; WVTWordVector wv = vectors[i]; vectors[i] = vectors[j]; vectors[j] = wv; } } } double TotalSim = 0; for (int i = 0; i < k; i++) { WVTWordVector wv = vectors[i]; int numClass = wv.getDocumentInfo().getClassValue(); ClassSim[numClass] += Sim[i]; TotalSim += Sim[i]; } // output the first 3 class int[] index = new int[ClassSim.length]; for (int i = 0; i < ClassSim.length; i++) index[i] = i; for (int i = 0; i < 3; i++) { for (int j = i + 1; j < ClassSim.length; j++) { if (ClassSim[j] > ClassSim[i]) { double dtemp = ClassSim[i]; ClassSim[i] = ClassSim[j]; ClassSim[j] = dtemp; int itemp = index[i]; index[i] = index[j]; index[j] = itemp; } } } List result = new ArrayList(); for (int i = 0; i < 3; i++) { if (ClassSim[i] > 0) { CategoryResult cr = new CategoryResult(indexmap.get(index[i]).toString(), ClassSim[i] / TotalSim); result.add(cr); } } for (int i = 0; i < index.length; i++) { System.out.println(index[i] + ": " + ClassSim[i]); } for (int i = 0; i < k; i++) { WVTWordVector wv = vectors[i]; String id; /*int cutIndex = wv.getDocumentInfo().getSourceName().lastIndexOf(File.separator); if (cutIndex > 0) id = wv.getDocumentInfo().getSourceName().substring(cutIndex + 1); else*/ id = wv.getDocumentInfo().getSourceName(); System.out.println(id + ": " + Sim[i]); } return result; } } class CategoryResult { private String CategoryName; private double similarity; public CategoryResult(String categoryname, double similarity) { this.CategoryName = categoryname; this.similarity = similarity; } public String getCategoryName() { return CategoryName; } public void setCategoryName(String categoryName) { CategoryName = categoryName; } public double getSimilarity() { return similarity; } public void setSimilarity(double similarity) { this.similarity = similarity; } } 有熟悉的朋友能帮我看一下public List LazyLearning(WVTWordVector v, WVTWordVector[] vectors, int numClasses)这个方法主要是做什么的,它里面的v和vectors是做什么用的? |