List of usage examples for org.apache.mahout.classifier.naivebayes NaiveBayesModel numFeatures
double numFeatures
To view the source code for org.apache.mahout.classifier.naivebayes NaiveBayesModel numFeatures.
Click Source Link
From source file:com.chimpler.example.bayes.TopCategoryWords.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 4) { System.out.println("Arguments: [model] [label index] [dictionnary] [document frequency]"); return;//from w ww. j a v a2s .c o m } String modelPath = args[0]; String labelIndexPath = args[1]; String dictionaryPath = args[2]; String documentFrequencyPath = args[3]; Configuration configuration = new Configuration(); // model is a matrix (wordId, labelId) => probability score NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelPath), configuration); // labels is a map label => classId Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration, new Path(labelIndexPath)); Map<Integer, String> inverseDictionary = readInverseDictionnary(configuration, new Path(dictionaryPath)); Map<Integer, Long> documentFrequency = readDocumentFrequency(configuration, new Path(documentFrequencyPath)); Map<Integer, Long> topWords = getTopWords(documentFrequency, 10); System.out.println("Top words"); for (Map.Entry<Integer, Long> entry : topWords.entrySet()) { System.out.println(" - " + inverseDictionary.get(entry.getKey()) + ": " + entry.getValue()); } int labelCount = labels.size(); int documentCount = documentFrequency.get(-1).intValue(); System.out.println("Number of labels: " + labelCount); System.out.println("Number of documents in training set: " + documentCount); for (int labelId = 0; labelId < model.numLabels(); labelId++) { SortedSet<WordWeight> wordWeights = new TreeSet<WordWeight>(); for (int wordId = 0; wordId < model.numFeatures(); wordId++) { WordWeight w = new WordWeight(wordId, model.weight(labelId, wordId)); wordWeights.add(w); } System.out.println("Top 10 words for label " + labels.get(labelId)); int i = 0; for (WordWeight w : wordWeights) { System.out.println(" - " + inverseDictionary.get(w.getWordId()) + ": " + w.getWeight()); i++; if (i >= 10) { break; } } } }