Example usage for org.apache.mahout.classifier.naivebayes NaiveBayesModel weight

List of usage examples for org.apache.mahout.classifier.naivebayes NaiveBayesModel weight

Introduction

In this page you can find the example usage for org.apache.mahout.classifier.naivebayes NaiveBayesModel weight.

Prototype

public double weight(int label, int feature) 

Source Link

Usage

From source file:com.chimpler.example.bayes.TopCategoryWords.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 4) {
        System.out.println("Arguments: [model] [label index] [dictionnary] [document frequency]");
        return;//from   www  .j  a  va  2 s. c om
    }
    String modelPath = args[0];
    String labelIndexPath = args[1];
    String dictionaryPath = args[2];
    String documentFrequencyPath = args[3];

    Configuration configuration = new Configuration();

    // model is a matrix (wordId, labelId) => probability score
    NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelPath), configuration);

    // labels is a map label => classId
    Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration, new Path(labelIndexPath));
    Map<Integer, String> inverseDictionary = readInverseDictionnary(configuration, new Path(dictionaryPath));
    Map<Integer, Long> documentFrequency = readDocumentFrequency(configuration,
            new Path(documentFrequencyPath));

    Map<Integer, Long> topWords = getTopWords(documentFrequency, 10);
    System.out.println("Top words");
    for (Map.Entry<Integer, Long> entry : topWords.entrySet()) {
        System.out.println(" - " + inverseDictionary.get(entry.getKey()) + ": " + entry.getValue());
    }

    int labelCount = labels.size();
    int documentCount = documentFrequency.get(-1).intValue();

    System.out.println("Number of labels: " + labelCount);
    System.out.println("Number of documents in training set: " + documentCount);

    for (int labelId = 0; labelId < model.numLabels(); labelId++) {
        SortedSet<WordWeight> wordWeights = new TreeSet<WordWeight>();
        for (int wordId = 0; wordId < model.numFeatures(); wordId++) {
            WordWeight w = new WordWeight(wordId, model.weight(labelId, wordId));
            wordWeights.add(w);
        }
        System.out.println("Top 10 words for label " + labels.get(labelId));
        int i = 0;
        for (WordWeight w : wordWeights) {
            System.out.println(" - " + inverseDictionary.get(w.getWordId()) + ": " + w.getWeight());
            i++;
            if (i >= 10) {
                break;
            }
        }
    }
}