Example usage for org.apache.mahout.classifier.naivebayes NaiveBayesModel numLabels

List of usage examples for org.apache.mahout.classifier.naivebayes NaiveBayesModel numLabels

Introduction

In this page you can find the example usage for org.apache.mahout.classifier.naivebayes NaiveBayesModel numLabels.

Prototype

public int numLabels() 

Source Link

Usage

From source file:com.chimpler.example.bayes.TopCategoryWords.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 4) {
        System.out.println("Arguments: [model] [label index] [dictionnary] [document frequency]");
        return;/*from  w  w w. j a v a  2  s.c om*/
    }
    String modelPath = args[0];
    String labelIndexPath = args[1];
    String dictionaryPath = args[2];
    String documentFrequencyPath = args[3];

    Configuration configuration = new Configuration();

    // model is a matrix (wordId, labelId) => probability score
    NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelPath), configuration);

    // labels is a map label => classId
    Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration, new Path(labelIndexPath));
    Map<Integer, String> inverseDictionary = readInverseDictionnary(configuration, new Path(dictionaryPath));
    Map<Integer, Long> documentFrequency = readDocumentFrequency(configuration,
            new Path(documentFrequencyPath));

    Map<Integer, Long> topWords = getTopWords(documentFrequency, 10);
    System.out.println("Top words");
    for (Map.Entry<Integer, Long> entry : topWords.entrySet()) {
        System.out.println(" - " + inverseDictionary.get(entry.getKey()) + ": " + entry.getValue());
    }

    int labelCount = labels.size();
    int documentCount = documentFrequency.get(-1).intValue();

    System.out.println("Number of labels: " + labelCount);
    System.out.println("Number of documents in training set: " + documentCount);

    for (int labelId = 0; labelId < model.numLabels(); labelId++) {
        SortedSet<WordWeight> wordWeights = new TreeSet<WordWeight>();
        for (int wordId = 0; wordId < model.numFeatures(); wordId++) {
            WordWeight w = new WordWeight(wordId, model.weight(labelId, wordId));
            wordWeights.add(w);
        }
        System.out.println("Top 10 words for label " + labels.get(labelId));
        int i = 0;
        for (WordWeight w : wordWeights) {
            System.out.println(" - " + inverseDictionary.get(w.getWordId()) + ": " + w.getWeight());
            i++;
            if (i >= 10) {
                break;
            }
        }
    }
}