Example usage for org.apache.hadoop.io DoubleWritable get

List of usage examples for org.apache.hadoop.io DoubleWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io DoubleWritable get.

Prototype

public double get() 

Source Link

Usage

From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java

License:Apache License

public static void loadWeightMatrix(InMemoryBayesDatastore datastore, Path pathPattern, Configuration conf) {
    // the key is label,feature
    for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>(
            pathPattern, PathType.GLOB, null, null, true, conf)) {
        StringTuple key = record.getFirst();
        DoubleWritable value = record.getSecond();
        datastore.loadFeatureWeight(key.stringAt(2), key.stringAt(1), value.get());
    }/*from   w ww.  jav  a2 s  .  c o  m*/
}

From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java

License:Apache License

public static void loadFeatureWeights(InMemoryBayesDatastore datastore, Path pathPattern, Configuration conf) {
    // the key is either _label_ or label,feature
    long count = 0;
    for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>(
            pathPattern, PathType.GLOB, null, null, true, conf)) {
        // Sum of weights for a Feature
        StringTuple key = record.getFirst();
        DoubleWritable value = record.getSecond();
        if (key.stringAt(0).equals(BayesConstants.FEATURE_SUM)) {
            datastore.setSumFeatureWeight(key.stringAt(1), value.get());
            if (++count % 50000 == 0) {
                log.info("Read {} feature weights", count);
            }//w w  w  . j  av a2s.c  o  m
        }
    }
}

From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java

License:Apache License

public static void loadLabelWeights(InMemoryBayesDatastore datastore, Path pathPattern, Configuration conf) {
    long count = 0;
    for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>(
            pathPattern, PathType.GLOB, null, null, true, conf)) {
        // Sum of weights in a Label
        StringTuple key = record.getFirst();
        DoubleWritable value = record.getSecond();
        if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) {
            datastore.setSumLabelWeight(key.stringAt(1), value.get());
            if (++count % 10000 == 0) {
                log.info("Read {} label weights", count);
            }//  www  .java2s. c  om
        }
    }
}

From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java

License:Apache License

public static void loadThetaNormalizer(InMemoryBayesDatastore datastore, Path pathPattern, Configuration conf) {
    long count = 0;
    for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>(
            pathPattern, PathType.GLOB, null, null, true, conf)) {
        StringTuple key = record.getFirst();
        DoubleWritable value = record.getSecond();
        // Sum of weights in a Label
        if (key.stringAt(0).equals(BayesConstants.LABEL_THETA_NORMALIZER)) {
            datastore.setThetaNormalizer(key.stringAt(1), value.get());
            if (++count % 50000 == 0) {
                log.info("Read {} theta norms", count);
            }/*  w w w . j a  v a2 s .  c  o m*/
        }
    }
}

From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java

License:Apache License

public static void loadSumWeight(InMemoryBayesDatastore datastore, Path pathPattern, Configuration conf) {
    // the key is _label
    for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>(
            pathPattern, PathType.GLOB, null, null, true, conf)) {
        StringTuple key = record.getFirst();
        DoubleWritable value = record.getSecond();
        if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) {
            // Sum of weights for all Features and all Labels
            datastore.setSigmaJSigmaK(value.get());
            log.info("{}", value.get());
        }/*  w ww.java 2 s .c  om*/
    }
}

From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java

License:Apache License

public static Map<String, Double> readLabelSums(Path pathPattern, Configuration conf) {
    Map<String, Double> labelSum = Maps.newHashMap();
    // the key is either _label_ or label,feature
    for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>(
            pathPattern, PathType.GLOB, null, null, true, conf)) {
        StringTuple key = record.getFirst();
        DoubleWritable value = record.getSecond();
        if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) {
            // Sum of counts of labels
            labelSum.put(key.stringAt(1), value.get());
        }//from  ww w  .  ja v a 2 s .c  o m
    }
    return labelSum;
}

From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java

License:Apache License

public static Map<String, Double> readLabelDocumentCounts(Path pathPattern, Configuration conf) {
    Map<String, Double> labelDocumentCounts = Maps.newHashMap();
    // the key is either _label_ or label,feature
    for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>(
            pathPattern, PathType.GLOB, null, null, true, conf)) {
        StringTuple key = record.getFirst();
        DoubleWritable value = record.getSecond();
        // Count of Documents in a Label
        if (key.stringAt(0).equals(BayesConstants.LABEL_COUNT)) {
            labelDocumentCounts.put(key.stringAt(1), value.get());
        }//from ww w  .  j  a va  2 s. c om
    }
    return labelDocumentCounts;
}

From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java

License:Apache License

public static double readSigmaJSigmaK(Path pathPattern, Configuration conf) {
    Map<String, Double> weightSum = Maps.newHashMap();
    for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>(
            pathPattern, PathType.GLOB, null, null, true, conf)) {
        StringTuple key = record.getFirst();
        DoubleWritable value = record.getSecond();
        if (weightSum.size() > 1) {
            throw new IllegalStateException("Incorrect Sum File");
        } else if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) {
            weightSum.put(BayesConstants.TOTAL_SUM, value.get());
        }// ww w . j  av a 2 s. c o m
    }
    return weightSum.get(BayesConstants.TOTAL_SUM);
}

From source file:org.apache.mahout.classifier.bayes.SequenceFileModelReader.java

License:Apache License

public static double readVocabCount(Path pathPattern, Configuration conf) {
    Map<String, Double> weightSum = Maps.newHashMap();
    for (Pair<StringTuple, DoubleWritable> record : new SequenceFileDirIterable<StringTuple, DoubleWritable>(
            pathPattern, PathType.GLOB, null, null, true, conf)) {
        if (weightSum.size() > 1) {
            throw new IllegalStateException("Incorrect vocabCount File");
        }/* w  w  w  . j  a  v  a  2 s  .  c o m*/
        StringTuple key = record.getFirst();
        DoubleWritable value = record.getSecond();
        if (key.stringAt(0).equals(BayesConstants.FEATURE_SET_SIZE)) {
            weightSum.put(BayesConstants.FEATURE_SET_SIZE, value.get());
        }
    }
    return weightSum.get(BayesConstants.FEATURE_SET_SIZE);
}

From source file:org.apache.mahout.classifier.cbayes.CBayesNormalizedWeightMapper.java

License:Apache License

/**
 * We need to calculate the idf of each feature in each label
 *
 * @param key The label,feature pair (can either be the freq Count or the term Document count
 *//*from  w  w w  . j  a  v  a  2s .  com*/
@Override
public void map(Text key, DoubleWritable value, OutputCollector<Text, DoubleWritable> output, Reporter reporter)
        throws IOException {

    String labelFeaturePair = key.toString();

    String label = labelFeaturePair.split(",")[0];
    output.collect(key, new DoubleWritable(-Math.log(value.get()) / thetaNormalizer.get(label)));// output -D_ij

}