List of usage examples for org.apache.hadoop.io DoubleWritable get
public double get()
From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java
License:Apache License
public static void loadWeightMatrix(InMemoryBayesDatastore datastore, Path pathPattern, Configuration conf) { // the key is label,feature for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>( pathPattern, PathType.GLOB, null, null, true, conf)) { StringTuple key = record.getFirst(); DoubleWritable value = record.getSecond(); datastore.loadFeatureWeight(key.stringAt(2), key.stringAt(1), value.get()); }/*from w ww. jav a2 s . c o m*/ }
From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java
License:Apache License
public static void loadFeatureWeights(InMemoryBayesDatastore datastore, Path pathPattern, Configuration conf) { // the key is either _label_ or label,feature long count = 0; for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>( pathPattern, PathType.GLOB, null, null, true, conf)) { // Sum of weights for a Feature StringTuple key = record.getFirst(); DoubleWritable value = record.getSecond(); if (key.stringAt(0).equals(BayesConstants.FEATURE_SUM)) { datastore.setSumFeatureWeight(key.stringAt(1), value.get()); if (++count % 50000 == 0) { log.info("Read {} feature weights", count); }//w w w . j av a2s.c o m } } }
From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java
License:Apache License
public static void loadLabelWeights(InMemoryBayesDatastore datastore, Path pathPattern, Configuration conf) { long count = 0; for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>( pathPattern, PathType.GLOB, null, null, true, conf)) { // Sum of weights in a Label StringTuple key = record.getFirst(); DoubleWritable value = record.getSecond(); if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) { datastore.setSumLabelWeight(key.stringAt(1), value.get()); if (++count % 10000 == 0) { log.info("Read {} label weights", count); }// www .java2s. c om } } }
From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java
License:Apache License
public static void loadThetaNormalizer(InMemoryBayesDatastore datastore, Path pathPattern, Configuration conf) { long count = 0; for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>( pathPattern, PathType.GLOB, null, null, true, conf)) { StringTuple key = record.getFirst(); DoubleWritable value = record.getSecond(); // Sum of weights in a Label if (key.stringAt(0).equals(BayesConstants.LABEL_THETA_NORMALIZER)) { datastore.setThetaNormalizer(key.stringAt(1), value.get()); if (++count % 50000 == 0) { log.info("Read {} theta norms", count); }/* w w w . j a v a2 s . c o m*/ } } }
From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java
License:Apache License
public static void loadSumWeight(InMemoryBayesDatastore datastore, Path pathPattern, Configuration conf) { // the key is _label for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>( pathPattern, PathType.GLOB, null, null, true, conf)) { StringTuple key = record.getFirst(); DoubleWritable value = record.getSecond(); if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) { // Sum of weights for all Features and all Labels datastore.setSigmaJSigmaK(value.get()); log.info("{}", value.get()); }/* w ww.java 2 s .c om*/ } }
From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java
License:Apache License
public static Map<String, Double> readLabelSums(Path pathPattern, Configuration conf) { Map<String, Double> labelSum = Maps.newHashMap(); // the key is either _label_ or label,feature for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>( pathPattern, PathType.GLOB, null, null, true, conf)) { StringTuple key = record.getFirst(); DoubleWritable value = record.getSecond(); if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) { // Sum of counts of labels labelSum.put(key.stringAt(1), value.get()); }//from ww w . ja v a 2 s .c o m } return labelSum; }
From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java
License:Apache License
public static Map<String, Double> readLabelDocumentCounts(Path pathPattern, Configuration conf) { Map<String, Double> labelDocumentCounts = Maps.newHashMap(); // the key is either _label_ or label,feature for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>( pathPattern, PathType.GLOB, null, null, true, conf)) { StringTuple key = record.getFirst(); DoubleWritable value = record.getSecond(); // Count of Documents in a Label if (key.stringAt(0).equals(BayesConstants.LABEL_COUNT)) { labelDocumentCounts.put(key.stringAt(1), value.get()); }//from ww w . j a va 2 s. c om } return labelDocumentCounts; }
From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java
License:Apache License
public static double readSigmaJSigmaK(Path pathPattern, Configuration conf) { Map<String, Double> weightSum = Maps.newHashMap(); for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>( pathPattern, PathType.GLOB, null, null, true, conf)) { StringTuple key = record.getFirst(); DoubleWritable value = record.getSecond(); if (weightSum.size() > 1) { throw new IllegalStateException("Incorrect Sum File"); } else if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) { weightSum.put(BayesConstants.TOTAL_SUM, value.get()); }// ww w . j av a 2 s. c o m } return weightSum.get(BayesConstants.TOTAL_SUM); }
From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java
License:Apache License
public static double readVocabCount(Path pathPattern, Configuration conf) { Map<String, Double> weightSum = Maps.newHashMap(); for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>( pathPattern, PathType.GLOB, null, null, true, conf)) { if (weightSum.size() > 1) { throw new IllegalStateException("Incorrect vocabCount File"); }/* w w w . j a v a 2 s . c o m*/ StringTuple key = record.getFirst(); DoubleWritable value = record.getSecond(); if (key.stringAt(0).equals(BayesConstants.FEATURE_SET_SIZE)) { weightSum.put(BayesConstants.FEATURE_SET_SIZE, value.get()); } } return weightSum.get(BayesConstants.FEATURE_SET_SIZE); }
From source file:org.apache.mahout.classifier.cbayes.CBayesNormalizedWeightMapper.java
License:Apache License
/** * We need to calculate the idf of each feature in each label * * @param key The label,feature pair (can either be the freq Count or the term Document count *//*from w w w . j a v a 2s . com*/ @Override public void map(Text key, DoubleWritable value, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException { String labelFeaturePair = key.toString(); String label = labelFeaturePair.split(",")[0]; output.collect(key, new DoubleWritable(-Math.log(value.get()) / thetaNormalizer.get(label)));// output -D_ij }