List of usage examples for org.apache.hadoop.io DoubleWritable get
public double get()
From source file:com.talis.labs.pagerank.mapreduce.CheckConvergenceReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException { double tolerance = 0; for (DoubleWritable value : values) { tolerance += value.get(); }/*from ww w. jav a 2 s.c o m*/ context.write(key, new DoubleWritable(tolerance)); }
From source file:com.talis.labs.pagerank.mapreduce.DanglingPagesReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException { double sum = 0; for (DoubleWritable value : values) { sum += value.get(); }//from www. j a v a2 s .c o m context.write(key, new DoubleWritable(sum)); }
From source file:com.uber.hoodie.hadoop.realtime.HoodieRealtimeRecordReaderTest.java
License:Apache License
@Test public void testReaderWithNestedAndComplexSchema() throws Exception { // initial commit Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getComplexEvolvedSchema()); HoodieTestUtils.initTableType(hadoopConf, basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ); String commitTime = "100"; int numberOfRecords = 100; int numberOfLogRecords = numberOfRecords / 2; File partitionDir = InputFormatTestUtil.prepareParquetDataset(basePath, schema, 1, numberOfRecords, commitTime);/*from w ww . j a v a 2 s. c om*/ InputFormatTestUtil.commit(basePath, commitTime); // Add the paths FileInputFormat.setInputPaths(jobConf, partitionDir.getPath()); // update files or generate new log file String newCommitTime = "101"; HoodieLogFormat.Writer writer = writeLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime, numberOfLogRecords); long size = writer.getCurrentSize(); writer.close(); assertTrue("block - size should be > 0", size > 0); InputFormatTestUtil.deltaCommit(basePath, newCommitTime); //create a split with baseFile (parquet file written earlier) and new log file(s) String logFilePath = writer.getLogFile().getPath().toString(); HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit( new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf), basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime); //create a RecordReader to be used by HoodieRealtimeRecordReader RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader( new FileSplit(split.getPath(), 0, fs.getLength(split.getPath()), (String[]) null), jobConf, null); JobConf jobConf = new JobConf(); List<Schema.Field> fields = schema.getFields(); String names = fields.stream().map(f -> f.name()).collect(Collectors.joining(",")); String positions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(",")); jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names); jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, positions); jobConf.set("partition_columns", "datestr"); // validate record reader compaction HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf, reader); // use reader to read base Parquet File and log file, merge in flight and return latest commit // here the first 50 records should be updated, see above NullWritable key = recordReader.createKey(); ArrayWritable value = recordReader.createValue(); int numRecordsRead = 0; while (recordReader.next(key, value)) { int currentRecordNo = numRecordsRead; ++numRecordsRead; Writable[] values = value.get(); String recordCommitTime; //check if the record written is with latest commit, here "101" if (numRecordsRead > numberOfLogRecords) { recordCommitTime = commitTime; } else { recordCommitTime = newCommitTime; } String recordCommitTimeSuffix = "@" + recordCommitTime; Assert.assertEquals(values[0].toString(), recordCommitTime); key = recordReader.createKey(); value = recordReader.createValue(); // Assert type STRING Assert.assertEquals("test value for field: field1", values[5].toString(), "field" + currentRecordNo); Assert.assertEquals("test value for field: field2", values[6].toString(), "field" + currentRecordNo + recordCommitTimeSuffix); Assert.assertEquals("test value for field: name", values[7].toString(), "name" + currentRecordNo); // Assert type INT IntWritable intWritable = (IntWritable) values[8]; Assert.assertEquals("test value for field: favoriteIntNumber", intWritable.get(), currentRecordNo + recordCommitTime.hashCode()); // Assert type LONG LongWritable longWritable = (LongWritable) values[9]; Assert.assertEquals("test value for field: favoriteNumber", longWritable.get(), currentRecordNo + recordCommitTime.hashCode()); // Assert type FLOAT FloatWritable floatWritable = (FloatWritable) values[10]; Assert.assertEquals("test value for field: favoriteFloatNumber", floatWritable.get(), (float) ((currentRecordNo + recordCommitTime.hashCode()) / 1024.0), 0); // Assert type DOUBLE DoubleWritable doubleWritable = (DoubleWritable) values[11]; Assert.assertEquals("test value for field: favoriteDoubleNumber", doubleWritable.get(), (currentRecordNo + recordCommitTime.hashCode()) / 1024.0, 0); // Assert type MAP ArrayWritable mapItem = (ArrayWritable) values[12]; Writable mapItemValue1 = mapItem.get()[0]; Writable mapItemValue2 = mapItem.get()[1]; Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue1).get()[0].toString(), "mapItem1"); Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue2).get()[0].toString(), "mapItem2"); Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue1).get().length, 2); Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue2).get().length, 2); Writable mapItemValue1value = ((ArrayWritable) mapItemValue1).get()[1]; Writable mapItemValue2value = ((ArrayWritable) mapItemValue2).get()[1]; Assert.assertEquals("test value for field: tags[\"mapItem1\"].item1", ((ArrayWritable) mapItemValue1value).get()[0].toString(), "item" + currentRecordNo); Assert.assertEquals("test value for field: tags[\"mapItem2\"].item1", ((ArrayWritable) mapItemValue2value).get()[0].toString(), "item2" + currentRecordNo); Assert.assertEquals("test value for field: tags[\"mapItem1\"].item2", ((ArrayWritable) mapItemValue1value).get()[1].toString(), "item" + currentRecordNo + recordCommitTimeSuffix); Assert.assertEquals("test value for field: tags[\"mapItem2\"].item2", ((ArrayWritable) mapItemValue2value).get()[1].toString(), "item2" + currentRecordNo + recordCommitTimeSuffix); // Assert type RECORD ArrayWritable recordItem = (ArrayWritable) values[13]; Writable[] nestedRecord = recordItem.get(); Assert.assertEquals("test value for field: testNestedRecord.isAdmin", ((BooleanWritable) nestedRecord[0]).get(), false); Assert.assertEquals("test value for field: testNestedRecord.userId", nestedRecord[1].toString(), "UserId" + currentRecordNo + recordCommitTimeSuffix); // Assert type ARRAY ArrayWritable arrayValue = (ArrayWritable) values[14]; Writable[] arrayValues = arrayValue.get(); for (int i = 0; i < arrayValues.length; i++) { Assert.assertEquals("test value for field: stringArray", "stringArray" + i + recordCommitTimeSuffix, arrayValues[i].toString()); } } }
From source file:com.zqh.hadoop.mr.Financial.HighLowDayReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException { double high = 0; double low = Double.MAX_VALUE; // Go through all values to find the high and low for (DoubleWritable value : values) { if (value.get() > high) { high = value.get();//w w w . j av a 2s. com } if (value.get() < low) { low = value.get(); } } Text value = new Text("High:" + high + " Low:" + low); context.write(key, value); }
From source file:eagle.storage.hbase.aggregate.coprocessor.TestGroupAggregateClient.java
License:Apache License
private void logGroupbyKeyValue(List<GroupbyKeyValue> keyValues) { for (GroupbyKeyValue keyValue : keyValues) { GroupbyKey key = keyValue.getKey(); List<String> keys = new ArrayList<String>(); for (BytesWritable bytes : key.getValue()) { keys.add(new String(bytes.copyBytes())); }/*from w ww . j a v a 2 s . c o m*/ List<Double> vals = new ArrayList<Double>(); GroupbyValue val = keyValue.getValue(); for (DoubleWritable dw : val.getValue()) { vals.add(dw.get()); } if (LOG.isDebugEnabled()) LOG.debug("KEY: " + keys + ", VALUE: " + vals); } }
From source file:edu.ub.ahstfg.kmeans.document.DocumentCentroid.java
License:Open Source License
@Override public void readFields(DataInput in) throws IOException { ArrayWritable k = new ArrayWritable(IntWritable.class); k.readFields(in);//ww w . j a v a2s. co m keywordVector = WritableConverter.arrayWritable2ShortArray(k); ArrayWritable t = new ArrayWritable(IntWritable.class); t.readFields(in); termVector = WritableConverter.arrayWritable2ShortArray(t); DoubleWritable dist = new DoubleWritable(); dist.readFields(in); distance = dist.get(); }
From source file:edu.uci.ics.pregelix.benchmark.vertex.PageRankVertex.java
License:Apache License
@Override public void compute(Iterable<DoubleWritable> messages) throws IOException { if (getSuperstep() >= 1) { float sum = 0; for (DoubleWritable message : messages) { sum += message.get(); }// www . j a v a2 s . c o m getValue().set((0.15f / getTotalNumVertices()) + 0.85f * sum); } if (getSuperstep() < maxSuperStep) { sendMessageToAllEdges(new DoubleWritable(getValue().get() / getNumEdges())); } else { voteToHalt(); } }
From source file:edu.uci.ics.pregelix.benchmark.vertex.ShortestPathsVertex.java
License:Apache License
@Override public void compute(Iterable<DoubleWritable> messages) throws IOException { if (getSuperstep() == 0) { setValue(new DoubleWritable(Double.MAX_VALUE)); }// w w w . j a v a2 s. c o m double minDist = isSource() ? 0d : Double.MAX_VALUE; for (DoubleWritable message : messages) { minDist = Math.min(minDist, message.get()); } if (minDist < getValue().get()) { setValue(new DoubleWritable(minDist)); for (Edge<VLongWritable, DoubleWritable> edge : getEdges()) { double distance = minDist + edge.getValue().get(); sendMessage(edge.getTargetVertexId(), new DoubleWritable(distance)); } } voteToHalt(); }
From source file:edu.udel.mxv.MxvRed.java
@Override protected void reduce(IntWritable row, Iterable<DoubleWritable> values, Reducer<IntWritable, DoubleWritable, IntWritable, DoubleWritable>.Context context) throws IOException, InterruptedException { double result = 0; for (DoubleWritable val : values) { result += val.get(); }//from w w w . ja v a2s . c o m yi.set(result); context.write(row, yi); }
From source file:edu.umn.cs.spatialHadoop.operations.KNN.java
License:Open Source License
/** * A MapReduce version of KNN query./*from ww w .jav a 2 s.c om*/ * @param fs * @param inputPath * @param queryPoint * @param shape * @param output * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(params, "KNN"); job.setJarByClass(KNN.class); FileSystem inFs = inputPath.getFileSystem(params); job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inputPath); job.setMapperClass(KNNMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TextWithDistance.class); job.setReducerClass(KNNReduce.class); job.setNumReduceTasks(1); job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); final Point queryPoint = (Point) params.getShape("point"); final int k = params.getInt("k", 1); final IntWritable additional_blocks_2b_processed = new IntWritable(0); long resultCount; int iterations = 0; Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000)); } while (inFs.exists(outputPath)); } job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath); Configuration templateConf = job.getConfiguration(); FileSystem outFs = outputPath.getFileSystem(params); // Start with the query point to select all partitions overlapping with it Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y); do { job = new Job(templateConf); // Delete results of last iteration if not first iteration if (outputPath != null) outFs.delete(outputPath, true); LOG.info("Running iteration: " + (++iterations)); // Set query range for the SpatialInputFormat OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration); // Submit the job if (params.getBoolean("background", false)) { // XXX this is incorrect because if the job needs multiple iterations, // it will run only the first one job.waitForCompletion(false); return job; } job.waitForCompletion(false); // Retrieve answers for this iteration Counters counters = job.getCounters(); Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); resultCount = resultSizeCounter.getValue(); if (globalIndex != null) { Circle range_for_next_iteration; if (resultCount < k) { LOG.info("Found only " + resultCount + " results"); // Did not find enough results in the query space // Increase the distance by doubling the maximum distance among all // partitions that were processed final DoubleWritable maximum_distance = new DoubleWritable(0); int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y); if (distance > maximum_distance.get()) maximum_distance.set(distance); } }); if (matched_partitions == 0) { // The query point is outside the search space // Set the range to include the closest partition globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() { @Override public void collect(Partition r, Double s) { maximum_distance.set(s); } }); } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2); LOG.info("Expanding to " + maximum_distance.get() * 2); } else { // Calculate the new test range which is a circle centered at the // query point and distance to the k^{th} neighbor // Get distance to the kth neighbor final DoubleWritable distance_to_kth_neighbor = new DoubleWritable(); FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus result_file : results) { if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) { // Read the last line (kth neighbor) Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(), new ResultCollector<TextWithDistance>() { @Override public void collect(TextWithDistance r) { distance_to_kth_neighbor.set(r.distance); } }); } } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, distance_to_kth_neighbor.get()); LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor); } // Calculate the number of blocks to be processed to check the // terminating condition; additional_blocks_2b_processed.set(0); final Shape temp = range_for_this_iteration; globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { if (!(p.isIntersected(temp))) { additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1); } } }); range_for_this_iteration = range_for_next_iteration; } } while (additional_blocks_2b_processed.get() > 0); // If output file is not set by user, delete it if (userOutputPath == null) outFs.delete(outputPath, true); TotalIterations.addAndGet(iterations); return job; }