Example usage for org.apache.mahout.classifier.naivebayes NaiveBayesModel numFeatures

List of usage examples for org.apache.mahout.classifier.naivebayes NaiveBayesModel numFeatures

Introduction

In this page you can find the example usage for org.apache.mahout.classifier.naivebayes NaiveBayesModel numFeatures.

Prototype

double numFeatures

To view the source code for org.apache.mahout.classifier.naivebayes NaiveBayesModel numFeatures.

Click Source Link

Usage

From source file:com.chimpler.example.bayes.TopCategoryWords.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 4) {
        System.out.println("Arguments: [model] [label index] [dictionnary] [document frequency]");
        return;//from  w  ww.  j a  v a2s .c o m
    }
    String modelPath = args[0];
    String labelIndexPath = args[1];
    String dictionaryPath = args[2];
    String documentFrequencyPath = args[3];

    Configuration configuration = new Configuration();

    // model is a matrix (wordId, labelId) => probability score
    NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelPath), configuration);

    // labels is a map label => classId
    Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration, new Path(labelIndexPath));
    Map<Integer, String> inverseDictionary = readInverseDictionnary(configuration, new Path(dictionaryPath));
    Map<Integer, Long> documentFrequency = readDocumentFrequency(configuration,
            new Path(documentFrequencyPath));

    Map<Integer, Long> topWords = getTopWords(documentFrequency, 10);
    System.out.println("Top words");
    for (Map.Entry<Integer, Long> entry : topWords.entrySet()) {
        System.out.println(" - " + inverseDictionary.get(entry.getKey()) + ": " + entry.getValue());
    }

    int labelCount = labels.size();
    int documentCount = documentFrequency.get(-1).intValue();

    System.out.println("Number of labels: " + labelCount);
    System.out.println("Number of documents in training set: " + documentCount);

    for (int labelId = 0; labelId < model.numLabels(); labelId++) {
        SortedSet<WordWeight> wordWeights = new TreeSet<WordWeight>();
        for (int wordId = 0; wordId < model.numFeatures(); wordId++) {
            WordWeight w = new WordWeight(wordId, model.weight(labelId, wordId));
            wordWeights.add(w);
        }
        System.out.println("Top 10 words for label " + labels.get(labelId));
        int i = 0;
        for (WordWeight w : wordWeights) {
            System.out.println(" - " + inverseDictionary.get(w.getWordId()) + ": " + w.getWeight());
            i++;
            if (i >= 10) {
                break;
            }
        }
    }
}