有人熟悉文本分类knn吗,帮我看下代码是什么意思

strayly 2009-12-24
在网上下载了一个文本分类的实例程序,对里面knn.java里的LazyLearning不是很明白,
有熟悉的能帮我看一下,主要是它里面的两个参数是做什么用的?
package text_category;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import edu.udo.cs.wvtool.main.WVTWordVector;

public class KNN {    
    
    private int k = 15;
    
    private double[] ClassSim = null;
    
    private static Map indexmap = null;
    
    static {
        if (indexmap == null)
        {
            indexmap = new HashMap();
            indexmap.put(0, "汽车");
            indexmap.put(1, "教育");
            indexmap.put(2, "娱乐");
            indexmap.put(3, "财经");
            indexmap.put(4, "房产");
            indexmap.put(5, "军事");
            indexmap.put(6, "奥运");
            indexmap.put(7, "时政");
            indexmap.put(8, "体育");
            indexmap.put(9, "科技");
        }
    }
    
    public KNN()
    {
        
    }
    
    public List LazyLearning(WVTWordVector v, WVTWordVector[] vectors, int numClasses)    
    {
        if (v == null || vectors == null)
            return null;
        
        System.out.println("number of documents : " + vectors.length);
        System.out.println("number of classes: " + numClasses);
        ClassSim = new double[numClasses];
        for (int i = 0; i < numClasses; i++)
        {
            ClassSim[i] = 0;
        }
        
        k = (k < vectors.length)? k : vectors.length;
        double[] Sim = new double[vectors.length];
        for (int i = 0; i < Sim.length; i++)
        {
            Sim[i] = 0;
            Map map1 = v.getWordMap();
            Map map2 = vectors[i].getWordMap();
            for (Iterator it = map1.keySet().iterator(); it.hasNext();)
            {
                String word1 = (String)it.next();
                if (map2.containsKey(word1))
                {
                    double value1 = Double.valueOf(map1.get(word1).toString());
                    double value2 = Double.valueOf(map2.get(word1).toString());
                    Sim[i] += (value1 * value2);
                }
            }
        }
        
        for (int i = 0; i < k; i++)
        {
            for (int j = i + 1; j < Sim.length; j++)
            {
                if (Sim[j] > Sim[i])
                {
                    double dtemp = Sim[i];
                    Sim[i] = Sim[j];
                    Sim[j] = dtemp;
                    
                    WVTWordVector wv = vectors[i];
                    vectors[i] = vectors[j];
                    vectors[j] = wv;
                }
            }
        }
        
        double TotalSim = 0;
        for (int i = 0; i < k; i++)
        {
            WVTWordVector wv = vectors[i];
            int numClass = wv.getDocumentInfo().getClassValue();
            ClassSim[numClass] += Sim[i];
            TotalSim += Sim[i];
        }
        
        // output the first 3 class
        int[] index = new int[ClassSim.length];
        for (int i = 0; i < ClassSim.length; i++)
            index[i] = i;
        for (int i = 0; i < 3; i++)
        {            
            for (int j = i + 1; j < ClassSim.length; j++)
            {
                if (ClassSim[j] > ClassSim[i])
                {
                    double dtemp = ClassSim[i];
                    ClassSim[i] = ClassSim[j];
                    ClassSim[j] = dtemp;
                    
                    int itemp = index[i];
                    index[i] = index[j];
                    index[j] = itemp;
                }
            }
        }
        
        List result = new ArrayList();
        for (int i = 0; i < 3; i++)
        {
            if (ClassSim[i] > 0)
            {
                CategoryResult cr = new CategoryResult(indexmap.get(index[i]).toString(), ClassSim[i] / TotalSim);
                result.add(cr);
            }
        }
        
        
        for (int i = 0; i < index.length; i++)
        {
            System.out.println(index[i] + ": " + ClassSim[i]);
        }            
        
        for (int i = 0; i < k; i++)
        {
            WVTWordVector wv = vectors[i];
            String id;
            /*int cutIndex = wv.getDocumentInfo().getSourceName().lastIndexOf(File.separator);

            if (cutIndex > 0)
                id = wv.getDocumentInfo().getSourceName().substring(cutIndex + 1);
            else*/
                id = wv.getDocumentInfo().getSourceName();
            
            System.out.println(id + ": " + Sim[i]);
        }
        
        return result;
    }
        
}

class CategoryResult
{
    private String CategoryName;
    private double similarity;
    
    public CategoryResult(String categoryname, double similarity)
    {
        this.CategoryName = categoryname;
        this.similarity = similarity;
    }

    public String getCategoryName() {
        return CategoryName;
    }

    public void setCategoryName(String categoryName) {
        CategoryName = categoryName;
    }

    public double getSimilarity() {
        return similarity;
    }

    public void setSimilarity(double similarity) {
        this.similarity = similarity;
    }
}




有熟悉的朋友能帮我看一下public List LazyLearning(WVTWordVector v, WVTWordVector[] vectors, int numClasses)这个方法主要是做什么的,它里面的v和vectors是做什么用的?
Global site tag (gtag.js) - Google Analytics