Example usage for org.apache.hadoop.io DoubleWritable get

List of usage examples for org.apache.hadoop.io DoubleWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io DoubleWritable get.

Prototype

public double get() 

Source Link

Usage

From source file:com.talis.labs.pagerank.mapreduce.CheckConvergenceReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<DoubleWritable> values, Context context)
        throws IOException, InterruptedException {
    double tolerance = 0;
    for (DoubleWritable value : values) {
        tolerance += value.get();
    }/*from ww  w. jav a 2 s.c o  m*/
    context.write(key, new DoubleWritable(tolerance));
}

From source file:com.talis.labs.pagerank.mapreduce.DanglingPagesReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<DoubleWritable> values, Context context)
        throws IOException, InterruptedException {
    double sum = 0;
    for (DoubleWritable value : values) {
        sum += value.get();
    }//from   www. j  a v  a2 s .c o m
    context.write(key, new DoubleWritable(sum));
}

From source file:com.uber.hoodie.hadoop.realtime.HoodieRealtimeRecordReaderTest.java

License:Apache License

@Test
public void testReaderWithNestedAndComplexSchema() throws Exception {
    // initial commit
    Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getComplexEvolvedSchema());
    HoodieTestUtils.initTableType(hadoopConf, basePath.getRoot().getAbsolutePath(),
            HoodieTableType.MERGE_ON_READ);
    String commitTime = "100";
    int numberOfRecords = 100;
    int numberOfLogRecords = numberOfRecords / 2;
    File partitionDir = InputFormatTestUtil.prepareParquetDataset(basePath, schema, 1, numberOfRecords,
            commitTime);/*from  w  ww  .  j a v  a  2 s. c om*/
    InputFormatTestUtil.commit(basePath, commitTime);
    // Add the paths
    FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());

    // update files or generate new log file
    String newCommitTime = "101";
    HoodieLogFormat.Writer writer = writeLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime,
            numberOfLogRecords);
    long size = writer.getCurrentSize();
    writer.close();
    assertTrue("block - size should be > 0", size > 0);
    InputFormatTestUtil.deltaCommit(basePath, newCommitTime);

    //create a split with baseFile (parquet file written earlier) and new log file(s)
    String logFilePath = writer.getLogFile().getPath().toString();
    HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
            new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf),
            basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);

    //create a RecordReader to be used by HoodieRealtimeRecordReader
    RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(
            new FileSplit(split.getPath(), 0, fs.getLength(split.getPath()), (String[]) null), jobConf, null);
    JobConf jobConf = new JobConf();
    List<Schema.Field> fields = schema.getFields();

    String names = fields.stream().map(f -> f.name()).collect(Collectors.joining(","));
    String positions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
    jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
    jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, positions);
    jobConf.set("partition_columns", "datestr");

    // validate record reader compaction
    HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf, reader);

    // use reader to read base Parquet File and log file, merge in flight and return latest commit
    // here the first 50 records should be updated, see above
    NullWritable key = recordReader.createKey();
    ArrayWritable value = recordReader.createValue();
    int numRecordsRead = 0;
    while (recordReader.next(key, value)) {
        int currentRecordNo = numRecordsRead;
        ++numRecordsRead;
        Writable[] values = value.get();
        String recordCommitTime;
        //check if the record written is with latest commit, here "101"
        if (numRecordsRead > numberOfLogRecords) {
            recordCommitTime = commitTime;
        } else {
            recordCommitTime = newCommitTime;
        }
        String recordCommitTimeSuffix = "@" + recordCommitTime;

        Assert.assertEquals(values[0].toString(), recordCommitTime);
        key = recordReader.createKey();
        value = recordReader.createValue();

        // Assert type STRING
        Assert.assertEquals("test value for field: field1", values[5].toString(), "field" + currentRecordNo);
        Assert.assertEquals("test value for field: field2", values[6].toString(),
                "field" + currentRecordNo + recordCommitTimeSuffix);
        Assert.assertEquals("test value for field: name", values[7].toString(), "name" + currentRecordNo);

        // Assert type INT
        IntWritable intWritable = (IntWritable) values[8];
        Assert.assertEquals("test value for field: favoriteIntNumber", intWritable.get(),
                currentRecordNo + recordCommitTime.hashCode());

        // Assert type LONG
        LongWritable longWritable = (LongWritable) values[9];
        Assert.assertEquals("test value for field: favoriteNumber", longWritable.get(),
                currentRecordNo + recordCommitTime.hashCode());

        // Assert type FLOAT
        FloatWritable floatWritable = (FloatWritable) values[10];
        Assert.assertEquals("test value for field: favoriteFloatNumber", floatWritable.get(),
                (float) ((currentRecordNo + recordCommitTime.hashCode()) / 1024.0), 0);

        // Assert type DOUBLE
        DoubleWritable doubleWritable = (DoubleWritable) values[11];
        Assert.assertEquals("test value for field: favoriteDoubleNumber", doubleWritable.get(),
                (currentRecordNo + recordCommitTime.hashCode()) / 1024.0, 0);

        // Assert type MAP
        ArrayWritable mapItem = (ArrayWritable) values[12];
        Writable mapItemValue1 = mapItem.get()[0];
        Writable mapItemValue2 = mapItem.get()[1];

        Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue1).get()[0].toString(),
                "mapItem1");
        Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue2).get()[0].toString(),
                "mapItem2");
        Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue1).get().length, 2);
        Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue2).get().length, 2);
        Writable mapItemValue1value = ((ArrayWritable) mapItemValue1).get()[1];
        Writable mapItemValue2value = ((ArrayWritable) mapItemValue2).get()[1];
        Assert.assertEquals("test value for field: tags[\"mapItem1\"].item1",
                ((ArrayWritable) mapItemValue1value).get()[0].toString(), "item" + currentRecordNo);
        Assert.assertEquals("test value for field: tags[\"mapItem2\"].item1",
                ((ArrayWritable) mapItemValue2value).get()[0].toString(), "item2" + currentRecordNo);
        Assert.assertEquals("test value for field: tags[\"mapItem1\"].item2",
                ((ArrayWritable) mapItemValue1value).get()[1].toString(),
                "item" + currentRecordNo + recordCommitTimeSuffix);
        Assert.assertEquals("test value for field: tags[\"mapItem2\"].item2",
                ((ArrayWritable) mapItemValue2value).get()[1].toString(),
                "item2" + currentRecordNo + recordCommitTimeSuffix);

        // Assert type RECORD
        ArrayWritable recordItem = (ArrayWritable) values[13];
        Writable[] nestedRecord = recordItem.get();
        Assert.assertEquals("test value for field: testNestedRecord.isAdmin",
                ((BooleanWritable) nestedRecord[0]).get(), false);
        Assert.assertEquals("test value for field: testNestedRecord.userId", nestedRecord[1].toString(),
                "UserId" + currentRecordNo + recordCommitTimeSuffix);

        // Assert type ARRAY
        ArrayWritable arrayValue = (ArrayWritable) values[14];
        Writable[] arrayValues = arrayValue.get();
        for (int i = 0; i < arrayValues.length; i++) {
            Assert.assertEquals("test value for field: stringArray", "stringArray" + i + recordCommitTimeSuffix,
                    arrayValues[i].toString());
        }
    }
}

From source file:com.zqh.hadoop.mr.Financial.HighLowDayReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<DoubleWritable> values, Context context)
        throws IOException, InterruptedException {
    double high = 0;
    double low = Double.MAX_VALUE;

    // Go through all values to find the high and low
    for (DoubleWritable value : values) {
        if (value.get() > high) {
            high = value.get();//w  w w . j  av a  2s.  com
        }

        if (value.get() < low) {
            low = value.get();
        }
    }

    Text value = new Text("High:" + high + " Low:" + low);

    context.write(key, value);
}

From source file:eagle.storage.hbase.aggregate.coprocessor.TestGroupAggregateClient.java

License:Apache License

private void logGroupbyKeyValue(List<GroupbyKeyValue> keyValues) {
    for (GroupbyKeyValue keyValue : keyValues) {
        GroupbyKey key = keyValue.getKey();
        List<String> keys = new ArrayList<String>();
        for (BytesWritable bytes : key.getValue()) {
            keys.add(new String(bytes.copyBytes()));
        }/*from  w ww .  j a v a  2 s  . c  o m*/
        List<Double> vals = new ArrayList<Double>();
        GroupbyValue val = keyValue.getValue();
        for (DoubleWritable dw : val.getValue()) {
            vals.add(dw.get());
        }
        if (LOG.isDebugEnabled())
            LOG.debug("KEY: " + keys + ", VALUE: " + vals);
    }
}

From source file:edu.ub.ahstfg.kmeans.document.DocumentCentroid.java

License:Open Source License

@Override
public void readFields(DataInput in) throws IOException {
    ArrayWritable k = new ArrayWritable(IntWritable.class);
    k.readFields(in);//ww w .  j  a  v a2s. co  m
    keywordVector = WritableConverter.arrayWritable2ShortArray(k);
    ArrayWritable t = new ArrayWritable(IntWritable.class);
    t.readFields(in);
    termVector = WritableConverter.arrayWritable2ShortArray(t);
    DoubleWritable dist = new DoubleWritable();
    dist.readFields(in);
    distance = dist.get();
}

From source file:edu.uci.ics.pregelix.benchmark.vertex.PageRankVertex.java

License:Apache License

@Override
public void compute(Iterable<DoubleWritable> messages) throws IOException {
    if (getSuperstep() >= 1) {
        float sum = 0;
        for (DoubleWritable message : messages) {
            sum += message.get();
        }//  www . j  a v  a2 s  .  c o  m
        getValue().set((0.15f / getTotalNumVertices()) + 0.85f * sum);
    }

    if (getSuperstep() < maxSuperStep) {
        sendMessageToAllEdges(new DoubleWritable(getValue().get() / getNumEdges()));
    } else {
        voteToHalt();
    }
}

From source file:edu.uci.ics.pregelix.benchmark.vertex.ShortestPathsVertex.java

License:Apache License

@Override
public void compute(Iterable<DoubleWritable> messages) throws IOException {
    if (getSuperstep() == 0) {
        setValue(new DoubleWritable(Double.MAX_VALUE));
    }// w  w  w . j  a  v  a2 s. c  o  m

    double minDist = isSource() ? 0d : Double.MAX_VALUE;
    for (DoubleWritable message : messages) {
        minDist = Math.min(minDist, message.get());
    }

    if (minDist < getValue().get()) {
        setValue(new DoubleWritable(minDist));
        for (Edge<VLongWritable, DoubleWritable> edge : getEdges()) {
            double distance = minDist + edge.getValue().get();
            sendMessage(edge.getTargetVertexId(), new DoubleWritable(distance));
        }
    }

    voteToHalt();
}

From source file:edu.udel.mxv.MxvRed.java

@Override
protected void reduce(IntWritable row, Iterable<DoubleWritable> values,
        Reducer<IntWritable, DoubleWritable, IntWritable, DoubleWritable>.Context context)
        throws IOException, InterruptedException {

    double result = 0;
    for (DoubleWritable val : values) {
        result += val.get();
    }//from  w  w w . ja  v a2s . c  o  m
    yi.set(result);
    context.write(row, yi);
}

From source file:edu.umn.cs.spatialHadoop.operations.KNN.java

License:Open Source License

/**
 * A MapReduce version of KNN query./*from   ww  w .jav  a  2 s.c om*/
 * @param fs
 * @param inputPath
 * @param queryPoint
 * @param shape
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(params, "KNN");
    job.setJarByClass(KNN.class);

    FileSystem inFs = inputPath.getFileSystem(params);
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inputPath);

    job.setMapperClass(KNNMap.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TextWithDistance.class);

    job.setReducerClass(KNNReduce.class);
    job.setNumReduceTasks(1);

    job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    final Point queryPoint = (Point) params.getShape("point");
    final int k = params.getInt("k", 1);

    final IntWritable additional_blocks_2b_processed = new IntWritable(0);
    long resultCount;
    int iterations = 0;

    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000));
        } while (inFs.exists(outputPath));
    }
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);

    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath);
    Configuration templateConf = job.getConfiguration();

    FileSystem outFs = outputPath.getFileSystem(params);
    // Start with the query point to select all partitions overlapping with it
    Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y);

    do {
        job = new Job(templateConf);
        // Delete results of last iteration if not first iteration
        if (outputPath != null)
            outFs.delete(outputPath, true);

        LOG.info("Running iteration: " + (++iterations));
        // Set query range for the SpatialInputFormat
        OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration);

        // Submit the job
        if (params.getBoolean("background", false)) {
            // XXX this is incorrect because if the job needs multiple iterations,
            // it will run only the first one
            job.waitForCompletion(false);
            return job;
        }
        job.waitForCompletion(false);

        // Retrieve answers for this iteration
        Counters counters = job.getCounters();
        Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
        resultCount = resultSizeCounter.getValue();

        if (globalIndex != null) {
            Circle range_for_next_iteration;
            if (resultCount < k) {
                LOG.info("Found only " + resultCount + " results");
                // Did not find enough results in the query space
                // Increase the distance by doubling the maximum distance among all
                // partitions that were processed
                final DoubleWritable maximum_distance = new DoubleWritable(0);
                int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration,
                        new ResultCollector<Partition>() {
                            @Override
                            public void collect(Partition p) {
                                double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y);
                                if (distance > maximum_distance.get())
                                    maximum_distance.set(distance);
                            }
                        });
                if (matched_partitions == 0) {
                    // The query point is outside the search space
                    // Set the range to include the closest partition
                    globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() {
                        @Override
                        public void collect(Partition r, Double s) {
                            maximum_distance.set(s);
                        }
                    });
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2);
                LOG.info("Expanding to " + maximum_distance.get() * 2);
            } else {
                // Calculate the new test range which is a circle centered at the
                // query point and distance to the k^{th} neighbor

                // Get distance to the kth neighbor
                final DoubleWritable distance_to_kth_neighbor = new DoubleWritable();
                FileStatus[] results = outFs.listStatus(outputPath);
                for (FileStatus result_file : results) {
                    if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) {
                        // Read the last line (kth neighbor)
                        Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(),
                                new ResultCollector<TextWithDistance>() {

                                    @Override
                                    public void collect(TextWithDistance r) {
                                        distance_to_kth_neighbor.set(r.distance);
                                    }
                                });
                    }
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y,
                        distance_to_kth_neighbor.get());
                LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor);
            }

            // Calculate the number of blocks to be processed to check the
            // terminating condition;
            additional_blocks_2b_processed.set(0);
            final Shape temp = range_for_this_iteration;
            globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() {
                @Override
                public void collect(Partition p) {
                    if (!(p.isIntersected(temp))) {
                        additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1);
                    }
                }
            });
            range_for_this_iteration = range_for_next_iteration;
        }
    } while (additional_blocks_2b_processed.get() > 0);

    // If output file is not set by user, delete it
    if (userOutputPath == null)
        outFs.delete(outputPath, true);
    TotalIterations.addAndGet(iterations);

    return job;
}