List of usage examples for org.apache.hadoop.io DoubleWritable get
public double get()
From source file:org.apache.hama.pipes.TestPipes.java
License:Apache License
static void verifyOutput(HamaConfiguration conf, Path outputPath, double expectedResult, double delta) throws IOException { FileStatus[] listStatus = fs.listStatus(outputPath); for (FileStatus status : listStatus) { if (!status.isDir()) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(), conf); NullWritable key = NullWritable.get(); DoubleWritable value = new DoubleWritable(); if (reader.next(key, value)) { LOG.info("Output File: " + status.getPath()); LOG.info("key: '" + key + "' value: '" + value + "' expected: '" + expectedResult + "'"); assertEquals("Expected value: '" + expectedResult + "' != '" + value + "'", expectedResult, value.get(), delta); }/*from w w w . jav a 2 s .c o m*/ reader.close(); } } }
From source file:org.apache.jena.grande.giraph.pagerank.PageRankVertex.java
License:Apache License
@Override public void compute(Iterable<DoubleWritable> msgIterator) { log.debug("{}#{} compute() vertexValue={}", new Object[] { getId(), getSuperstep(), getValue() }); if (getConf() != null) { numIterations = getConf().getInt("giraph.pagerank.iterations", DEFAULT_NUM_ITERATIONS); tolerance = getConf().getFloat("giraph.pagerank.tolerance", DEFAULT_TOLERANCE); } else {//w w w .ja v a 2 s .c o m log.warn("{}#{} compute() getConf() is null!", getId(), getSuperstep()); } if (getSuperstep() == 0) { log.debug( "{}#{} compute(): sending fake messages to count vertices, including 'implicit' dangling ones", getId(), getSuperstep()); sendMessageToAllEdges(new DoubleWritable()); } else if (getSuperstep() == 1) { log.debug("{}#{} compute(): counting vertices including 'implicit' dangling ones", getId(), getSuperstep()); aggregate("vertices-count", new LongWritable(1L)); aggregate("error-current", new DoubleWritable(Double.MAX_VALUE)); } else if (getSuperstep() == 2) { long numVertices = ((LongWritable) getAggregatedValue("vertices-count")).get(); aggregate("error-current", new DoubleWritable(Double.MAX_VALUE)); log.debug("{}#{} compute(): initializing pagerank scores to 1/N, N={}", new Object[] { getId(), getSuperstep(), numVertices }); DoubleWritable vertexValue = new DoubleWritable(1.0 / numVertices); setValue(vertexValue); log.debug("{}#{} compute() vertexValue <-- {}", new Object[] { getId(), getSuperstep(), getValue() }); sendMessages(); } else if (getSuperstep() > 2) { long numVertices = ((LongWritable) getAggregatedValue("vertices-count")).get(); double sum = 0; for (DoubleWritable msgValue : msgIterator) { log.debug("{}#{} compute() <-- {}", new Object[] { getId(), getSuperstep(), msgValue }); sum += msgValue.get(); } double danglingNodesContribution = ((DoubleWritable) getAggregatedValue("dangling-previous")).get(); DoubleWritable vertexValue = new DoubleWritable( (0.15f / numVertices) + 0.85f * (sum + danglingNodesContribution / numVertices)); aggregate("error-current", new DoubleWritable(Math.abs(vertexValue.get() - getValue().get()))); setValue(vertexValue); log.debug("{}#{} compute() vertexValue <-- {}", new Object[] { getId(), getSuperstep(), getValue() }); sendMessages(); } }
From source file:org.apache.jena.grande.giraph.pagerank.SimplePageRankVertex.java
License:Apache License
@Override public void compute(Iterable<DoubleWritable> msgIterator) { log.debug("{}#{} - compute(...) vertexValue={}", new Object[] { getId(), getSuperstep(), getValue() }); if (getSuperstep() >= 1) { double sum = 0; for (DoubleWritable msg : msgIterator) { sum += msg.get(); }/*from w w w . j av a 2s. c om*/ DoubleWritable vertexValue = new DoubleWritable((0.15f / getTotalNumVertices()) + 0.85f * sum); setValue(vertexValue); } if (getSuperstep() < NUM_ITERATIONS) { long edges = getNumEdges(); sendMessageToAllEdges(new DoubleWritable(getValue().get() / edges)); } else { voteToHalt(); } }
From source file:org.apache.mahout.cf.taste.hadoop.als.eval.ParallelFactorizationEvaluator.java
License:Apache License
protected double computeRmse(Path errors) { RunningAverage average = new FullRunningAverage(); for (Pair<DoubleWritable, NullWritable> entry : new SequenceFileDirIterable<DoubleWritable, NullWritable>( errors, PathType.LIST, PathFilters.logsCRCFilter(), getConf())) { DoubleWritable error = entry.getFirst(); average.addDatum(error.get() * error.get()); }//from w ww . jav a 2 s . c om return Math.sqrt(average.getAverage()); }
From source file:org.apache.mahout.cf.taste.hadoop.als.FactorizationEvaluator.java
License:Apache License
double computeRmse(Path errors) { RunningAverage average = new FullRunningAverage(); for (Pair<DoubleWritable, NullWritable> entry : new SequenceFileDirIterable<DoubleWritable, NullWritable>( errors, PathType.LIST, PathFilters.logsCRCFilter(), getConf())) { DoubleWritable error = entry.getFirst(); average.addDatum(error.get() * error.get()); }//from ww w .ja va 2 s . c om return Math.sqrt(average.getAverage()); }
From source file:org.apache.mahout.classifier.bayes.BayesThetaNormalizerMapper.java
License:Apache License
/** * We need to calculate the thetaNormalization factor of each label * * @param key The label,feature pair// w ww . j a v a 2 s. co m * @param value The tfIdf of the pair */ @Override public void map(Text key, DoubleWritable value, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException { String labelFeaturePair = key.toString(); double alpha_i = 1.0; String label = labelFeaturePair.split(",")[0]; double weight = Math.log((value.get() + alpha_i) / (labelWeightSum.get(label) + vocabCount)); output.collect(new Text(('_' + label).trim()), new DoubleWritable(weight)); }
From source file:org.apache.mahout.classifier.bayes.common.BayesTfIdfMapper.java
License:Apache License
/** * We need to calculate the Tf-Idf of each feature in each label * * @param key The label,feature pair (can either be the freq Count or the term Document count *//*from w w w . j a v a 2 s . c om*/ @Override public void map(Text key, DoubleWritable value, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException { String labelFeaturePair = key.toString(); char firstChar = labelFeaturePair.charAt(0); switch (firstChar) { case '-': {// if it is the termDocumentCount labelFeaturePair = labelFeaturePair.substring(1); //-17th_century_mathematicians_anderson__alexander,1582 int idx = labelFeaturePair.indexOf(","); if (idx != -1) { String label = labelFeaturePair.substring(0, idx); Double labelDocumentCount = labelDocumentCounts.get(label); if (labelDocumentCount == null) { throw new IOException("Invalid label: " + label); } double logIdf = Math.log(labelDocumentCount / value.get()); output.collect(new Text(labelFeaturePair), new DoubleWritable(logIdf)); } else { throw new IOException("Invalid "); } break; } case ',': { output.collect(new Text("*vocabCount"), new DoubleWritable(1.0)); break; } default: { output.collect(key, value); break; } } }
From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java
License:Apache License
public static void loadWeightMatrix(InMemoryBayesDatastore datastore, FileSystem fs, Path pathPattern, Configuration conf) throws IOException { StringTuple key = new StringTuple(); DoubleWritable value = new DoubleWritable(); FileStatus[] outputFiles = fs.globStatus(pathPattern); for (FileStatus fileStatus : outputFiles) { Path path = fileStatus.getPath(); log.info("{}", path); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); // the key is label,feature while (reader.next(key, value)) { datastore.loadFeatureWeight(key.stringAt(2), key.stringAt(1), value.get()); }/*from w w w.j a va 2 s. c o m*/ } }
From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java
License:Apache License
public static void loadFeatureWeights(InMemoryBayesDatastore datastore, FileSystem fs, Path pathPattern, Configuration conf) throws IOException { StringTuple key = new StringTuple(); DoubleWritable value = new DoubleWritable(); FileStatus[] outputFiles = fs.globStatus(pathPattern); for (FileStatus fileStatus : outputFiles) { Path path = fileStatus.getPath(); log.info("{}", path); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); // the key is either _label_ or label,feature long count = 0; while (reader.next(key, value)) { // Sum of weights for a Feature if (key.stringAt(0).equals(BayesConstants.FEATURE_SUM)) { datastore.setSumFeatureWeight(key.stringAt(1), value.get()); count++;/*from w w w. j a v a 2 s .c o m*/ if (count % 50000 == 0) { log.info("Read {} feature weights", count); } } } } }
From source file:org.apache.mahout.classifier.bayes.io.SequenceFileModelReader.java
License:Apache License
public static void loadLabelWeights(InMemoryBayesDatastore datastore, FileSystem fs, Path pathPattern, Configuration conf) throws IOException { StringTuple key = new StringTuple(); DoubleWritable value = new DoubleWritable(); FileStatus[] outputFiles = fs.globStatus(pathPattern); for (FileStatus fileStatus : outputFiles) { Path path = fileStatus.getPath(); log.info("{}", path); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); long count = 0; while (reader.next(key, value)) { // Sum of weights in a Label if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) { datastore.setSumLabelWeight(key.stringAt(1), value.get()); count++;// w w w . j a va 2s . c o m if (count % 10000 == 0) { log.info("Read {} label weights", count); } } } } }