Example usage for org.apache.hadoop.io DoubleWritable get

List of usage examples for org.apache.hadoop.io DoubleWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io DoubleWritable get.

Prototype

public double get() 

Source Link

Usage

From source file:org.apache.hama.pipes.TestPipes.java

License:Apache License

static void verifyOutput(HamaConfiguration conf, Path outputPath, double expectedResult, double delta)
        throws IOException {
    FileStatus[] listStatus = fs.listStatus(outputPath);
    for (FileStatus status : listStatus) {
        if (!status.isDir()) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(), conf);
            NullWritable key = NullWritable.get();
            DoubleWritable value = new DoubleWritable();
            if (reader.next(key, value)) {
                LOG.info("Output File: " + status.getPath());
                LOG.info("key: '" + key + "' value: '" + value + "' expected: '" + expectedResult + "'");
                assertEquals("Expected value: '" + expectedResult + "' != '" + value + "'", expectedResult,
                        value.get(), delta);
            }/*from  w w  w  . jav a  2 s .c o  m*/
            reader.close();
        }
    }
}

From source file:org.apache.jena.grande.giraph.pagerank.PageRankVertex.java

License:Apache License

@Override
public void compute(Iterable<DoubleWritable> msgIterator) {
    log.debug("{}#{} compute() vertexValue={}", new Object[] { getId(), getSuperstep(), getValue() });

    if (getConf() != null) {
        numIterations = getConf().getInt("giraph.pagerank.iterations", DEFAULT_NUM_ITERATIONS);
        tolerance = getConf().getFloat("giraph.pagerank.tolerance", DEFAULT_TOLERANCE);
    } else {//w w w .ja  v  a  2 s  .c o m
        log.warn("{}#{} compute() getConf() is null!", getId(), getSuperstep());
    }

    if (getSuperstep() == 0) {
        log.debug(
                "{}#{} compute(): sending fake messages to count vertices, including 'implicit' dangling ones",
                getId(), getSuperstep());
        sendMessageToAllEdges(new DoubleWritable());
    } else if (getSuperstep() == 1) {
        log.debug("{}#{} compute(): counting vertices including 'implicit' dangling ones", getId(),
                getSuperstep());
        aggregate("vertices-count", new LongWritable(1L));
        aggregate("error-current", new DoubleWritable(Double.MAX_VALUE));
    } else if (getSuperstep() == 2) {
        long numVertices = ((LongWritable) getAggregatedValue("vertices-count")).get();
        aggregate("error-current", new DoubleWritable(Double.MAX_VALUE));
        log.debug("{}#{} compute(): initializing pagerank scores to 1/N, N={}",
                new Object[] { getId(), getSuperstep(), numVertices });
        DoubleWritable vertexValue = new DoubleWritable(1.0 / numVertices);
        setValue(vertexValue);
        log.debug("{}#{} compute() vertexValue <-- {}", new Object[] { getId(), getSuperstep(), getValue() });
        sendMessages();
    } else if (getSuperstep() > 2) {
        long numVertices = ((LongWritable) getAggregatedValue("vertices-count")).get();
        double sum = 0;
        for (DoubleWritable msgValue : msgIterator) {
            log.debug("{}#{} compute() <-- {}", new Object[] { getId(), getSuperstep(), msgValue });
            sum += msgValue.get();
        }
        double danglingNodesContribution = ((DoubleWritable) getAggregatedValue("dangling-previous")).get();
        DoubleWritable vertexValue = new DoubleWritable(
                (0.15f / numVertices) + 0.85f * (sum + danglingNodesContribution / numVertices));
        aggregate("error-current", new DoubleWritable(Math.abs(vertexValue.get() - getValue().get())));
        setValue(vertexValue);
        log.debug("{}#{} compute() vertexValue <-- {}", new Object[] { getId(), getSuperstep(), getValue() });
        sendMessages();
    }
}

From source file:org.apache.jena.grande.giraph.pagerank.SimplePageRankVertex.java

License:Apache License

@Override
public void compute(Iterable<DoubleWritable> msgIterator) {
    log.debug("{}#{} - compute(...) vertexValue={}", new Object[] { getId(), getSuperstep(), getValue() });

    if (getSuperstep() >= 1) {
        double sum = 0;
        for (DoubleWritable msg : msgIterator) {
            sum += msg.get();
        }/*from w w w  . j  av a  2s. c om*/
        DoubleWritable vertexValue = new DoubleWritable((0.15f / getTotalNumVertices()) + 0.85f * sum);
        setValue(vertexValue);
    }

    if (getSuperstep() < NUM_ITERATIONS) {
        long edges = getNumEdges();
        sendMessageToAllEdges(new DoubleWritable(getValue().get() / edges));
    } else {
        voteToHalt();
    }
}

From source file:org.apache.mahout.cf.taste.hadoop.als.eval.ParallelFactorizationEvaluator.java

License:Apache License

protected double computeRmse(Path errors) {
    RunningAverage average = new FullRunningAverage();
    for (Pair<DoubleWritable, NullWritable> entry : new SequenceFileDirIterable<DoubleWritable, NullWritable>(
            errors, PathType.LIST, PathFilters.logsCRCFilter(), getConf())) {
        DoubleWritable error = entry.getFirst();
        average.addDatum(error.get() * error.get());
    }//from  w  ww . jav  a  2  s .  c  om

    return Math.sqrt(average.getAverage());
}

From source file:org.apache.mahout.cf.taste.hadoop.als.FactorizationEvaluator.java

License:Apache License

double computeRmse(Path errors) {
    RunningAverage average = new FullRunningAverage();
    for (Pair<DoubleWritable, NullWritable> entry : new SequenceFileDirIterable<DoubleWritable, NullWritable>(
            errors, PathType.LIST, PathFilters.logsCRCFilter(), getConf())) {
        DoubleWritable error = entry.getFirst();
        average.addDatum(error.get() * error.get());
    }//from ww  w .ja va 2 s  . c  om

    return Math.sqrt(average.getAverage());
}

From source file:org.apache.mahout.classifier.bayes.BayesThetaNormalizerMapper.java

License:Apache License

/**
 * We need to calculate the thetaNormalization factor of each label
 *
 * @param key   The label,feature pair//  w ww  .  j  a  v  a 2 s. co m
 * @param value The tfIdf of the pair
 */
@Override
public void map(Text key, DoubleWritable value, OutputCollector<Text, DoubleWritable> output, Reporter reporter)
        throws IOException {

    String labelFeaturePair = key.toString();
    double alpha_i = 1.0;

    String label = labelFeaturePair.split(",")[0];
    double weight = Math.log((value.get() + alpha_i) / (labelWeightSum.get(label) + vocabCount));
    output.collect(new Text(('_' + label).trim()), new DoubleWritable(weight));
}

From source file:org.apache.mahout.classifier.bayes.common.BayesTfIdfMapper.java

License:Apache License

/**
 * We need to calculate the Tf-Idf of each feature in each label
 *
 * @param key The label,feature pair (can either be the freq Count or the term Document count
 *//*from   w w  w . j  a  v a 2 s  .  c  om*/
@Override
public void map(Text key, DoubleWritable value, OutputCollector<Text, DoubleWritable> output, Reporter reporter)
        throws IOException {

    String labelFeaturePair = key.toString();

    char firstChar = labelFeaturePair.charAt(0);
    switch (firstChar) {
    case '-': {// if it is the termDocumentCount
        labelFeaturePair = labelFeaturePair.substring(1);
        //-17th_century_mathematicians_anderson__alexander,1582
        int idx = labelFeaturePair.indexOf(",");
        if (idx != -1) {
            String label = labelFeaturePair.substring(0, idx);

            Double labelDocumentCount = labelDocumentCounts.get(label);
            if (labelDocumentCount == null) {
                throw new IOException("Invalid label: " + label);
            }
            double logIdf = Math.log(labelDocumentCount / value.get());
            output.collect(new Text(labelFeaturePair), new DoubleWritable(logIdf));
        } else {
            throw new IOException("Invalid ");
        }
        break;
    }
    case ',': {
        output.collect(new Text("*vocabCount"), new DoubleWritable(1.0));
        break;
    }
    default: {
        output.collect(key, value);
        break;
    }
    }
}

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static void loadWeightMatrix(InMemoryBayesDatastore datastore, FileSystem fs, Path pathPattern,
        Configuration conf) throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        log.info("{}", path);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

        // the key is label,feature
        while (reader.next(key, value)) {

            datastore.loadFeatureWeight(key.stringAt(2), key.stringAt(1), value.get());

        }/*from  w  w  w.j  a  va  2  s.  c o  m*/
    }
}

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static void loadFeatureWeights(InMemoryBayesDatastore datastore, FileSystem fs, Path pathPattern,
        Configuration conf) throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        log.info("{}", path);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

        // the key is either _label_ or label,feature
        long count = 0;
        while (reader.next(key, value)) {
            // Sum of weights for a Feature
            if (key.stringAt(0).equals(BayesConstants.FEATURE_SUM)) {
                datastore.setSumFeatureWeight(key.stringAt(1), value.get());
                count++;/*from w  w  w. j  a v  a 2 s .c o  m*/
                if (count % 50000 == 0) {
                    log.info("Read {} feature weights", count);
                }
            }
        }
    }
}

From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java

License:Apache License

public static void loadLabelWeights(InMemoryBayesDatastore datastore, FileSystem fs, Path pathPattern,
        Configuration conf) throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        log.info("{}", path);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

        long count = 0;
        while (reader.next(key, value)) {
            // Sum of weights in a Label
            if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) {
                datastore.setSumLabelWeight(key.stringAt(1), value.get());
                count++;// w w w  .  j a va 2s . c  o m
                if (count % 10000 == 0) {
                    log.info("Read {} label weights", count);
                }
            }
        }
    }
}