Example usage for org.apache.hadoop.io LongWritable get

List of usage examples for org.apache.hadoop.io LongWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable get.

Prototype

public long get() 

Source Link

Document

Return the value of this LongWritable.

Usage

From source file:com.twitter.elephanttwin.io.ListLongWritable.java

License:Apache License

public void add(LongWritable value) {
    LongWritable a = new LongWritable(value.get());
    list.add(a);
}

From source file:com.twitter.elephanttwin.io.LongPairWritable.java

License:Apache License

public LongPairWritable(LongWritable s, LongWritable e) {
    first = s.get();
    second = e.get();
}

From source file:com.twitter.elephanttwin.io.TextLongPairWritable.java

License:Apache License

public TextLongPairWritable(Text t, LongWritable v) {
    key = t;
    value = v.get();
}

From source file:com.twitter.elephanttwin.io.TextLongPairWritable.java

License:Apache License

public void setLong(LongWritable v) {
    value = v.get();
}

From source file:com.twitter.elephanttwin.lzo.retrieval.LZOBlockOffsetMapper.java

License:Open Source License

@Override
public void map(LongWritable key, BinaryWritable<M> value, Context context)
        throws IOException, InterruptedException {

    try {/*from   w  w w .  j  a  va 2  s . c o m*/
        columnValue = (String) method.invoke(value.get());
    } catch (Exception e) {
        LOG.error("cannot instantiate the value class to read the input", e);
        throw new IOException(e);
    }

    if (LOG.isDebugEnabled()) {
        LOG.debug("key: " + key + "value: " + columnValue);
    }

    if (columnValue == null) {
        columnValue = "";
    }

    long lineOffset = key.get();

    // treat the first row specially.
    // workaround for the current way lzo readers produce key offset for the
    // first row.

    /*
     * Need to deal with the special case when the last inputsplit has
     * only the last lzoblock. This cannot happen in practice since HDFS block
     * contains thousands of lzo blocks and Hadoop has split_slope to make sure the last split
     * contains the same or more bytes than previous splits. But this could happen in test cases
     * where we manipulate hdfs block size and the size of lzo block size;
     */

    if (lzoOffsets.length > 2) {
        if (lineOffset == 0)
            lineOffset = lzoOffsets[1];

        if (lineOffset == previousRowLineOffset) {
            totalRowsInABlock++;
            //in the same lzo block, increase the cnt for the number of rows in
            //this block.
        } else {
            // a new lzo block, set up the outputValue to be sent to reducer.
            if (LOG.isDebugEnabled()) {
                LOG.debug("totalRowsInABlock is:" + totalRowsInABlock + " in [" + previousRowLineOffset + ","
                        + lineOffset + "]");
            }

            totalRowsInABlock = 1;
            //for very long row which spans many lzoblocks, we need to advance
            //the currentLzoBlock multiple times.
            while (currentLzoBlock <= totalLZOBlocks && (lzoOffsets[currentLzoBlock] < lineOffset))
                currentLzoBlock++;

            /*
             * Logically the following should be the right way to index the lzo
             * blocks. However, due to the way the current lzo readers produce key
             * offset, we have to do some compensation in indexing in order to make it
             * work correctly.
             */
            /*
             * outputValue.setFirst(currentLzoBlockStartOffset);
             * outputValue.setSecond(currentLzoBlockEndOffset);
             * pair.setLong(currentLzoBlockStartOffset);
             */

            //the real start offset is either 2 blocks back or even further if
            //this row is long spanning across many lzo blocks.
            outputValue.setFirst(Math.min(previousRowLineOffset, lzoOffsets[Math.max(0, currentLzoBlock - 2)]));

            /* we need to treat the last (two) lzo block differently than any other
             * lzo block, due to the current lzo readers, because we cannot
             * distinguish a row from the last lzo block and a row from the second
             * to the last lzo block. the solution is to combine the last two lzo
             * blocks;
             */
            if (lineOffset >= lastLZOBlockStartOffset)
                outputValue.setSecond(fileSize);
            else
                outputValue.setSecond(lineOffset);

            if (LOG.isDebugEnabled()) {
                LOG.debug("outputValue:" + outputValue);
            }

            if (outputValue.getSecond() <= outputValue.getFirst()) {
                throw new RuntimeException("Index Block end offset is not more than start offset:");
            }
        }
    }
    if (!map.containsKey(columnValue)) {
        map.put(columnValue, new LongPairWritable(outputValue));
    } else {

        LongPairWritable prevPair = map.get(columnValue);

        if (prevPair.getSecond() > outputValue.getFirst() && outputValue.getFirst() < prevPair.getFirst()) {
            throw new RuntimeException("error: overalapping index blocks at offset: " + key);
        }

        /*
         *if we can combine the two blocks, then combine, otherwise send the
         * previously stored index entry to reducer and store the new index entry.
         * Two conditions to be met to combine:
         *  a) "adjacent" to each other;
         *  b) combined size cannot be more than the threshold
        */
        if (prevPair.getSecond() + gapsize < outputValue.getFirst()
                || outputValue.getSecond() - prevPair.getFirst() > maxBlockSize) {
            context.write(new TextLongPairWritable(new Text(columnValue), prevPair.getFirst()), prevPair);
            if (LOG.isDebugEnabled()) {
                LOG.debug("write to reducer: " + prevPair);
            }

            map.put(columnValue, new LongPairWritable(outputValue));
        } else {
            prevPair.setSecond(outputValue.getSecond());
            map.put(columnValue, prevPair);
        }
    }

    previousRowLineOffset = lineOffset;
}

From source file:com.twitter.hraven.FlowKey.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    super.readFields(in);
    LongWritable lw = new LongWritable();
    lw.readFields(in);//from w ww  . j  a v a  2 s  .c  o  m
    this.runId = lw.get();
}

From source file:com.twitter.hraven.JobId.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    LongWritable lw = new LongWritable();
    lw.readFields(in);/*from   ww w  .  j av  a2 s  .c  o  m*/
    this.jobEpoch = lw.get();
    lw.readFields(in);
    this.jobSequence = lw.get();
}

From source file:com.uber.hoodie.hadoop.realtime.HoodieRealtimeRecordReaderTest.java

License:Apache License

@Test
public void testReaderWithNestedAndComplexSchema() throws Exception {
    // initial commit
    Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getComplexEvolvedSchema());
    HoodieTestUtils.initTableType(hadoopConf, basePath.getRoot().getAbsolutePath(),
            HoodieTableType.MERGE_ON_READ);
    String commitTime = "100";
    int numberOfRecords = 100;
    int numberOfLogRecords = numberOfRecords / 2;
    File partitionDir = InputFormatTestUtil.prepareParquetDataset(basePath, schema, 1, numberOfRecords,
            commitTime);/*from w ww.jav  a  2  s  . co m*/
    InputFormatTestUtil.commit(basePath, commitTime);
    // Add the paths
    FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());

    // update files or generate new log file
    String newCommitTime = "101";
    HoodieLogFormat.Writer writer = writeLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime,
            numberOfLogRecords);
    long size = writer.getCurrentSize();
    writer.close();
    assertTrue("block - size should be > 0", size > 0);
    InputFormatTestUtil.deltaCommit(basePath, newCommitTime);

    //create a split with baseFile (parquet file written earlier) and new log file(s)
    String logFilePath = writer.getLogFile().getPath().toString();
    HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
            new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf),
            basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);

    //create a RecordReader to be used by HoodieRealtimeRecordReader
    RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(
            new FileSplit(split.getPath(), 0, fs.getLength(split.getPath()), (String[]) null), jobConf, null);
    JobConf jobConf = new JobConf();
    List<Schema.Field> fields = schema.getFields();

    String names = fields.stream().map(f -> f.name()).collect(Collectors.joining(","));
    String positions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
    jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
    jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, positions);
    jobConf.set("partition_columns", "datestr");

    // validate record reader compaction
    HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf, reader);

    // use reader to read base Parquet File and log file, merge in flight and return latest commit
    // here the first 50 records should be updated, see above
    NullWritable key = recordReader.createKey();
    ArrayWritable value = recordReader.createValue();
    int numRecordsRead = 0;
    while (recordReader.next(key, value)) {
        int currentRecordNo = numRecordsRead;
        ++numRecordsRead;
        Writable[] values = value.get();
        String recordCommitTime;
        //check if the record written is with latest commit, here "101"
        if (numRecordsRead > numberOfLogRecords) {
            recordCommitTime = commitTime;
        } else {
            recordCommitTime = newCommitTime;
        }
        String recordCommitTimeSuffix = "@" + recordCommitTime;

        Assert.assertEquals(values[0].toString(), recordCommitTime);
        key = recordReader.createKey();
        value = recordReader.createValue();

        // Assert type STRING
        Assert.assertEquals("test value for field: field1", values[5].toString(), "field" + currentRecordNo);
        Assert.assertEquals("test value for field: field2", values[6].toString(),
                "field" + currentRecordNo + recordCommitTimeSuffix);
        Assert.assertEquals("test value for field: name", values[7].toString(), "name" + currentRecordNo);

        // Assert type INT
        IntWritable intWritable = (IntWritable) values[8];
        Assert.assertEquals("test value for field: favoriteIntNumber", intWritable.get(),
                currentRecordNo + recordCommitTime.hashCode());

        // Assert type LONG
        LongWritable longWritable = (LongWritable) values[9];
        Assert.assertEquals("test value for field: favoriteNumber", longWritable.get(),
                currentRecordNo + recordCommitTime.hashCode());

        // Assert type FLOAT
        FloatWritable floatWritable = (FloatWritable) values[10];
        Assert.assertEquals("test value for field: favoriteFloatNumber", floatWritable.get(),
                (float) ((currentRecordNo + recordCommitTime.hashCode()) / 1024.0), 0);

        // Assert type DOUBLE
        DoubleWritable doubleWritable = (DoubleWritable) values[11];
        Assert.assertEquals("test value for field: favoriteDoubleNumber", doubleWritable.get(),
                (currentRecordNo + recordCommitTime.hashCode()) / 1024.0, 0);

        // Assert type MAP
        ArrayWritable mapItem = (ArrayWritable) values[12];
        Writable mapItemValue1 = mapItem.get()[0];
        Writable mapItemValue2 = mapItem.get()[1];

        Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue1).get()[0].toString(),
                "mapItem1");
        Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue2).get()[0].toString(),
                "mapItem2");
        Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue1).get().length, 2);
        Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue2).get().length, 2);
        Writable mapItemValue1value = ((ArrayWritable) mapItemValue1).get()[1];
        Writable mapItemValue2value = ((ArrayWritable) mapItemValue2).get()[1];
        Assert.assertEquals("test value for field: tags[\"mapItem1\"].item1",
                ((ArrayWritable) mapItemValue1value).get()[0].toString(), "item" + currentRecordNo);
        Assert.assertEquals("test value for field: tags[\"mapItem2\"].item1",
                ((ArrayWritable) mapItemValue2value).get()[0].toString(), "item2" + currentRecordNo);
        Assert.assertEquals("test value for field: tags[\"mapItem1\"].item2",
                ((ArrayWritable) mapItemValue1value).get()[1].toString(),
                "item" + currentRecordNo + recordCommitTimeSuffix);
        Assert.assertEquals("test value for field: tags[\"mapItem2\"].item2",
                ((ArrayWritable) mapItemValue2value).get()[1].toString(),
                "item2" + currentRecordNo + recordCommitTimeSuffix);

        // Assert type RECORD
        ArrayWritable recordItem = (ArrayWritable) values[13];
        Writable[] nestedRecord = recordItem.get();
        Assert.assertEquals("test value for field: testNestedRecord.isAdmin",
                ((BooleanWritable) nestedRecord[0]).get(), false);
        Assert.assertEquals("test value for field: testNestedRecord.userId", nestedRecord[1].toString(),
                "UserId" + currentRecordNo + recordCommitTimeSuffix);

        // Assert type ARRAY
        ArrayWritable arrayValue = (ArrayWritable) values[14];
        Writable[] arrayValues = arrayValue.get();
        for (int i = 0; i < arrayValues.length; i++) {
            Assert.assertEquals("test value for field: stringArray", "stringArray" + i + recordCommitTimeSuffix,
                    arrayValues[i].toString());
        }
    }
}

From source file:core.advanced.ConnectedComponentsVertex.java

License:Apache License

/**
 * Propagates the smallest vertex id to all neighbors. Will always choose to
 * halt and only reactivate if a smaller id has been sent to it.
 *
 * @param messages Iterator of messages from the previous superstep.
 * @throws IOException//from  www. jav  a  2  s.  c o m
 */
@Override
public void compute(Iterable<LongWritable> messages) throws IOException {
    long currentComponent = getValue().get();

    // First superstep is special, because we can simply look at the neighbors
    if (getSuperstep() == 0) {
        for (Edge<LongWritable, NullWritable> edge : getEdges()) {
            long neighbor = edge.getTargetVertexId().get();
            if (neighbor < currentComponent) {
                currentComponent = neighbor;
            }
        }
        // Only need to send value if it is not the own id
        if (currentComponent != getValue().get()) {
            setValue(new LongWritable(currentComponent));
            for (Edge<LongWritable, NullWritable> edge : getEdges()) {
                LongWritable neighbor = edge.getTargetVertexId();
                if (neighbor.get() > currentComponent) {
                    sendMessage(neighbor, getValue());
                }
            }
        }

        voteToHalt();
        return;
    }

    boolean changed = false;
    // did we get a smaller id ?
    for (LongWritable message : messages) {
        long candidateComponent = message.get();
        if (candidateComponent < currentComponent) {
            currentComponent = candidateComponent;
            changed = true;
        }
    }

    // propagate new component id to the neighbors
    if (changed) {
        setValue(new LongWritable(currentComponent));
        sendMessageToAllEdges(getValue());
    }
    voteToHalt();
}

From source file:crunch.MaxTemperature.java

License:Apache License

private void checkNextLine(RecordReader<LongWritable, Text> recordReader, long expectedKey,
            String expectedValue) throws IOException {
        LongWritable key = new LongWritable();
        Text value = new Text();
        assertThat(expectedValue, recordReader.next(key, value), is(true));
        assertThat(key.get(), is(expectedKey));
        assertThat(value.toString(), is(expectedValue));
    }/*from  w  w  w.ja  va  2 s . co m*/