Example usage for org.apache.hadoop.io DoubleWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io DoubleWritable get.

Prototype

public double get()

Source Link

Usage

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static void loadThetaNormalizer(InMemoryBayesDatastore datastore, FileSystem fs, Path pathPattern,
        Configuration conf) throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        log.info("{}", path);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

        long count = 0;
        while (reader.next(key, value)) {
            // Sum of weights in a Label
            if (key.stringAt(0).equals(BayesConstants.LABEL_THETA_NORMALIZER)) {
                datastore.setThetaNormalizer(key.stringAt(1), value.get());
                count++;/*  w w  w .  j  av a  2s.c  om*/
                if (count % 50000 == 0) {
                    log.info("Read {} theta norms", count);
                }
            }
        }
    }
}

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static void loadSumWeight(InMemoryBayesDatastore datastore, FileSystem fs, Path pathPattern,
        Configuration conf) throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        log.info("{}", path);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

        // the key is _label
        while (reader.next(key, value)) {

            if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) { // Sum of
                // weights for
                // all Features and all Labels
                datastore.setSigmaJSigmaK(value.get());
                log.info("{}", value.get());
            }//from w ww.ja  v a 2  s .c om
        }
    }
}

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static Map<String, Double> readLabelSums(FileSystem fs, Path pathPattern, Configuration conf)
        throws IOException {
    Map<String, Double> labelSum = new HashMap<String, Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);

    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        // the key is either _label_ or label,feature
        while (reader.next(key, value)) {
            if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) { // Sum of counts
                // of labels
                labelSum.put(key.stringAt(1), value.get());
            }//from w  ww .j ava2 s .  co  m

        }
    }

    return labelSum;
}

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static Map<String, Double> readLabelDocumentCounts(FileSystem fs, Path pathPattern, Configuration conf)
        throws IOException {
    Map<String, Double> labelDocumentCounts = new HashMap<String, Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        // the key is either _label_ or label,feature
        while (reader.next(key, value)) {
            // Count of Documents in a Label
            if (key.stringAt(0).equals(BayesConstants.LABEL_COUNT)) {
                labelDocumentCounts.put(key.stringAt(1), value.get());
            }//w  w w .  j av  a 2s  . co m

        }
    }

    return labelDocumentCounts;
}

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static double readSigmaJSigmaK(FileSystem fs, Path pathPattern, Configuration conf) throws IOException {
    Map<String, Double> weightSum = new HashMap<String, Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        while (reader.next(key, value)) {
            if (weightSum.size() > 1) {
                throw new IOException("Incorrect Sum File");
            } else if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) {
                weightSum.put(BayesConstants.TOTAL_SUM, value.get());
            }//  ww w. ja  va2 s .c om

        }
    }

    return weightSum.get(BayesConstants.TOTAL_SUM);
}

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static double readVocabCount(FileSystem fs, Path pathPattern, Configuration conf) throws IOException {
    Map<String, Double> weightSum = new HashMap<String, Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        while (reader.next(key, value)) {
            if (weightSum.size() > 1) {
                throw new IOException("Incorrect vocabCount File");
            }/*from w  w  w .  ja va2 s .c  o  m*/
            if (key.stringAt(0).equals(BayesConstants.FEATURE_SET_SIZE)) {
                weightSum.put(BayesConstants.FEATURE_SET_SIZE, value.get());
            }

        }
    }

    return weightSum.get(BayesConstants.FEATURE_SET_SIZE);
}

From source file:org.apache.mahout.classifier.bayes.mapreduce.bayes.BayesClassifierDriver.java

License:Apache License

public static ConfusionMatrix readResult(FileSystem fs, Path pathPattern, Configuration conf, Parameters params)
        throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
    String defaultLabel = params.get("defaultCat");
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    Map<String, Map<String, Integer>> confusionMatrix = new HashMap<String, Map<String, Integer>>();

    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        while (reader.next(key, value)) {
            String correctLabel = key.stringAt(1);
            String classifiedLabel = key.stringAt(2);
            Map<String, Integer> rowMatrix = confusionMatrix.get(correctLabel);
            if (rowMatrix == null) {
                rowMatrix = new HashMap<String, Integer>();
            }/* ww w  .  java  2  s .  c  o  m*/
            Integer count = Double.valueOf(value.get()).intValue();
            rowMatrix.put(classifiedLabel, count);
            confusionMatrix.put(correctLabel, rowMatrix);

        }
    }

    ConfusionMatrix matrix = new ConfusionMatrix(confusionMatrix.keySet(), defaultLabel);
    for (Map.Entry<String, Map<String, Integer>> correctLabelSet : confusionMatrix.entrySet()) {
        Map<String, Integer> rowMatrix = correctLabelSet.getValue();
        for (Map.Entry<String, Integer> classifiedLabelSet : rowMatrix.entrySet()) {
            matrix.addInstance(correctLabelSet.getKey(), classifiedLabelSet.getKey());
            matrix.putCount(correctLabelSet.getKey(), classifiedLabelSet.getKey(),
                    classifiedLabelSet.getValue());
        }
    }
    return matrix;

}

From source file:org.apache.mahout.classifier.bayes.mapreduce.bayes.BayesThetaNormalizerMapper.java

License:Apache License

/**
 * We need to calculate the thetaNormalization factor of each label
 * /*from   www .jav a  2s  .  co  m*/
 * @param key
 *          The label,feature pair
 * @param value
 *          The tfIdf of the pair
 */
@Override
public void map(StringTuple key, DoubleWritable value, OutputCollector<StringTuple, DoubleWritable> output,
        Reporter reporter) throws IOException {

    String label = key.stringAt(1);

    reporter.setStatus("Bayes Theta Normalizer Mapper: " + label);

    double weight = Math.log((value.get() + alphaI) / (labelWeightSum.get(label) + vocabCount));
    StringTuple thetaNormalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
    thetaNormalizerTuple.add(label);
    output.collect(thetaNormalizerTuple, new DoubleWritable(weight));
}

From source file:org.apache.mahout.classifier.bayes.mapreduce.cbayes.CBayesThetaNormalizerMapper.java

License:Apache License

/**
 * We need to calculate the idf of each feature in each label
 * /*  w  w  w. j a va2  s.c o m*/
 * @param key
 *          The label,feature pair (can either be the freq Count or the term Document count
 */
@Override
public void map(StringTuple key, final DoubleWritable value,
        final OutputCollector<StringTuple, DoubleWritable> output, final Reporter reporter) throws IOException {

    if (key.stringAt(0).equals(BayesConstants.FEATURE_SUM)) { // if it is from
        // the Sigma_j
        // folder
        labelWeightSum.forEachPair(new ObjectDoubleProcedure<String>() {

            @Override
            public boolean apply(String label, double sigmaJ) {
                double weight = Math.log((value.get() + alphaI) / (sigmaJSigmaK - sigmaJ + vocabCount));

                reporter.setStatus("Complementary Bayes Theta Normalizer Mapper: " + label + " => " + weight);
                StringTuple normalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
                normalizerTuple.add(label);
                try {
                    output.collect(normalizerTuple, new DoubleWritable(weight));
                } catch (IOException e) {
                    throw new IllegalStateException(e);
                } // output Sigma_j
                return true;
            }
        });

    } else {
        String label = key.stringAt(1);

        double dIJ = value.get();
        double denominator = 0.5 * (sigmaJSigmaK / vocabCount + dIJ * this.labelWeightSum.size());
        double weight = Math.log(1.0 - dIJ / denominator);

        reporter.setStatus("Complementary Bayes Theta Normalizer Mapper: " + label + " => " + weight);

        StringTuple normalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
        normalizerTuple.add(label);

        // output -D_ij
        output.collect(normalizerTuple, new DoubleWritable(weight));

    }

}

From source file:org.apache.mahout.classifier.bayes.mapreduce.common.BayesTfIdfMapper.java

License:Apache License

/**
 * We need to calculate the Tf-Idf of each feature in each label
 * /*  www  . j  av a 2s.  com*/
 * @param key
 *          The label,feature pair (can either be the freq Count or the term Document count
 */
@Override
public void map(StringTuple key, DoubleWritable value, OutputCollector<StringTuple, DoubleWritable> output,
        Reporter reporter) throws IOException {

    if (key.length() == 3) {
        if (key.stringAt(0).equals(BayesConstants.WEIGHT)) {
            reporter.setStatus("Bayes TfIdf Mapper: Tf: " + key);
            output.collect(key, value);
        } else if (key.stringAt(0).equals(BayesConstants.DOCUMENT_FREQUENCY)) {
            String label = key.stringAt(1);
            Double labelDocumentCount = labelDocumentCounts.get(label);
            double logIdf = Math.log(labelDocumentCount / value.get());
            key.replaceAt(0, BayesConstants.WEIGHT);
            output.collect(key, new DoubleWritable(logIdf));
            reporter.setStatus("Bayes TfIdf Mapper: log(Idf): " + key);
        } else {
            throw new IllegalArgumentException("Unrecognized Tuple: " + key);
        }
    } else if (key.length() == 2) {
        if (key.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) {
            output.collect(VOCAB_COUNT, ONE);
            reporter.setStatus("Bayes TfIdf Mapper: vocabCount");
        } else {
            throw new IllegalArgumentException("Unexpected Tuple: " + key);
        }
    }

}