Example usage for org.apache.hadoop.io DoubleWritable get

List of usage examples for org.apache.hadoop.io DoubleWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io DoubleWritable get.

Prototype

public double get() 

Source Link

Usage

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static void loadThetaNormalizer(InMemoryBayesDatastore datastore, FileSystem fs, Path pathPattern,
        Configuration conf) throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        log.info("{}", path);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

        long count = 0;
        while (reader.next(key, value)) {
            // Sum of weights in a Label
            if (key.stringAt(0).equals(BayesConstants.LABEL_THETA_NORMALIZER)) {
                datastore.setThetaNormalizer(key.stringAt(1), value.get());
                count++;/*  w w  w .  j  av a  2s.c  om*/
                if (count % 50000 == 0) {
                    log.info("Read {} theta norms", count);
                }
            }
        }
    }
}

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static void loadSumWeight(InMemoryBayesDatastore datastore, FileSystem fs, Path pathPattern,
        Configuration conf) throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        log.info("{}", path);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

        // the key is _label
        while (reader.next(key, value)) {

            if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) { // Sum of
                // weights for
                // all Features and all Labels
                datastore.setSigmaJSigmaK(value.get());
                log.info("{}", value.get());
            }//from w ww.ja  v a 2  s .c om
        }
    }
}

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static Map<String, Double> readLabelSums(FileSystem fs, Path pathPattern, Configuration conf)
        throws IOException {
    Map<String, Double> labelSum = new HashMap<String, Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);

    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        // the key is either _label_ or label,feature
        while (reader.next(key, value)) {
            if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) { // Sum of counts
                // of labels
                labelSum.put(key.stringAt(1), value.get());
            }//from w  ww .j ava2 s .  co  m

        }
    }

    return labelSum;
}

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static Map<String, Double> readLabelDocumentCounts(FileSystem fs, Path pathPattern, Configuration conf)
        throws IOException {
    Map<String, Double> labelDocumentCounts = new HashMap<String, Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        // the key is either _label_ or label,feature
        while (reader.next(key, value)) {
            // Count of Documents in a Label
            if (key.stringAt(0).equals(BayesConstants.LABEL_COUNT)) {
                labelDocumentCounts.put(key.stringAt(1), value.get());
            }//w  w w .  j av  a 2s  . co m

        }
    }

    return labelDocumentCounts;
}

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static double readSigmaJSigmaK(FileSystem fs, Path pathPattern, Configuration conf) throws IOException {
    Map<String, Double> weightSum = new HashMap<String, Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        while (reader.next(key, value)) {
            if (weightSum.size() > 1) {
                throw new IOException("Incorrect Sum File");
            } else if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) {
                weightSum.put(BayesConstants.TOTAL_SUM, value.get());
            }//  ww w. ja  va2 s .c om

        }
    }

    return weightSum.get(BayesConstants.TOTAL_SUM);
}

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static double readVocabCount(FileSystem fs, Path pathPattern, Configuration conf) throws IOException {
    Map<String, Double> weightSum = new HashMap<String, Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        while (reader.next(key, value)) {
            if (weightSum.size() > 1) {
                throw new IOException("Incorrect vocabCount File");
            }/*from w  w  w .  ja va2 s .c  o  m*/
            if (key.stringAt(0).equals(BayesConstants.FEATURE_SET_SIZE)) {
                weightSum.put(BayesConstants.FEATURE_SET_SIZE, value.get());
            }

        }
    }

    return weightSum.get(BayesConstants.FEATURE_SET_SIZE);
}

From source file:org.apache.mahout.classifier.bayes.mapreduce.bayes.BayesClassifierDriver.java

License:Apache License

public static ConfusionMatrix readResult(FileSystem fs, Path pathPattern, Configuration conf, Parameters params)
        throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
    String defaultLabel = params.get("defaultCat");
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    Map<String, Map<String, Integer>> confusionMatrix = new HashMap<String, Map<String, Integer>>();

    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        while (reader.next(key, value)) {
            String correctLabel = key.stringAt(1);
            String classifiedLabel = key.stringAt(2);
            Map<String, Integer> rowMatrix = confusionMatrix.get(correctLabel);
            if (rowMatrix == null) {
                rowMatrix = new HashMap<String, Integer>();
            }/* ww w  .  java  2  s .  c  o  m*/
            Integer count = Double.valueOf(value.get()).intValue();
            rowMatrix.put(classifiedLabel, count);
            confusionMatrix.put(correctLabel, rowMatrix);

        }
    }

    ConfusionMatrix matrix = new ConfusionMatrix(confusionMatrix.keySet(), defaultLabel);
    for (Map.Entry<String, Map<String, Integer>> correctLabelSet : confusionMatrix.entrySet()) {
        Map<String, Integer> rowMatrix = correctLabelSet.getValue();
        for (Map.Entry<String, Integer> classifiedLabelSet : rowMatrix.entrySet()) {
            matrix.addInstance(correctLabelSet.getKey(), classifiedLabelSet.getKey());
            matrix.putCount(correctLabelSet.getKey(), classifiedLabelSet.getKey(),
                    classifiedLabelSet.getValue());
        }
    }
    return matrix;

}

From source file:org.apache.mahout.classifier.bayes.mapreduce.bayes.BayesThetaNormalizerMapper.java

License:Apache License

/**
 * We need to calculate the thetaNormalization factor of each label
 * /*from   www .jav a  2s  .  co  m*/
 * @param key
 *          The label,feature pair
 * @param value
 *          The tfIdf of the pair
 */
@Override
public void map(StringTuple key, DoubleWritable value, OutputCollector<StringTuple, DoubleWritable> output,
        Reporter reporter) throws IOException {

    String label = key.stringAt(1);

    reporter.setStatus("Bayes Theta Normalizer Mapper: " + label);

    double weight = Math.log((value.get() + alphaI) / (labelWeightSum.get(label) + vocabCount));
    StringTuple thetaNormalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
    thetaNormalizerTuple.add(label);
    output.collect(thetaNormalizerTuple, new DoubleWritable(weight));
}

From source file:org.apache.mahout.classifier.bayes.mapreduce.cbayes.CBayesThetaNormalizerMapper.java

License:Apache License

/**
 * We need to calculate the idf of each feature in each label
 * /*  w  w  w. j a va2  s.c o m*/
 * @param key
 *          The label,feature pair (can either be the freq Count or the term Document count
 */
@Override
public void map(StringTuple key, final DoubleWritable value,
        final OutputCollector<StringTuple, DoubleWritable> output, final Reporter reporter) throws IOException {

    if (key.stringAt(0).equals(BayesConstants.FEATURE_SUM)) { // if it is from
        // the Sigma_j
        // folder
        labelWeightSum.forEachPair(new ObjectDoubleProcedure<String>() {

            @Override
            public boolean apply(String label, double sigmaJ) {
                double weight = Math.log((value.get() + alphaI) / (sigmaJSigmaK - sigmaJ + vocabCount));

                reporter.setStatus("Complementary Bayes Theta Normalizer Mapper: " + label + " => " + weight);
                StringTuple normalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
                normalizerTuple.add(label);
                try {
                    output.collect(normalizerTuple, new DoubleWritable(weight));
                } catch (IOException e) {
                    throw new IllegalStateException(e);
                } // output Sigma_j
                return true;
            }
        });

    } else {
        String label = key.stringAt(1);

        double dIJ = value.get();
        double denominator = 0.5 * (sigmaJSigmaK / vocabCount + dIJ * this.labelWeightSum.size());
        double weight = Math.log(1.0 - dIJ / denominator);

        reporter.setStatus("Complementary Bayes Theta Normalizer Mapper: " + label + " => " + weight);

        StringTuple normalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
        normalizerTuple.add(label);

        // output -D_ij
        output.collect(normalizerTuple, new DoubleWritable(weight));

    }

}

From source file:org.apache.mahout.classifier.bayes.mapreduce.common.BayesTfIdfMapper.java

License:Apache License

/**
 * We need to calculate the Tf-Idf of each feature in each label
 * /*  www  . j  av a 2s.  com*/
 * @param key
 *          The label,feature pair (can either be the freq Count or the term Document count
 */
@Override
public void map(StringTuple key, DoubleWritable value, OutputCollector<StringTuple, DoubleWritable> output,
        Reporter reporter) throws IOException {

    if (key.length() == 3) {
        if (key.stringAt(0).equals(BayesConstants.WEIGHT)) {
            reporter.setStatus("Bayes TfIdf Mapper: Tf: " + key);
            output.collect(key, value);
        } else if (key.stringAt(0).equals(BayesConstants.DOCUMENT_FREQUENCY)) {
            String label = key.stringAt(1);
            Double labelDocumentCount = labelDocumentCounts.get(label);
            double logIdf = Math.log(labelDocumentCount / value.get());
            key.replaceAt(0, BayesConstants.WEIGHT);
            output.collect(key, new DoubleWritable(logIdf));
            reporter.setStatus("Bayes TfIdf Mapper: log(Idf): " + key);
        } else {
            throw new IllegalArgumentException("Unrecognized Tuple: " + key);
        }
    } else if (key.length() == 2) {
        if (key.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) {
            output.collect(VOCAB_COUNT, ONE);
            reporter.setStatus("Bayes TfIdf Mapper: vocabCount");
        } else {
            throw new IllegalArgumentException("Unexpected Tuple: " + key);
        }
    }

}