List of usage examples for org.apache.hadoop.io LongWritable get
public long get()
From source file:com.twitter.elephanttwin.io.ListLongWritable.java
License:Apache License
public void add(LongWritable value) { LongWritable a = new LongWritable(value.get()); list.add(a); }
From source file:com.twitter.elephanttwin.io.LongPairWritable.java
License:Apache License
public LongPairWritable(LongWritable s, LongWritable e) { first = s.get(); second = e.get(); }
From source file:com.twitter.elephanttwin.io.TextLongPairWritable.java
License:Apache License
public TextLongPairWritable(Text t, LongWritable v) { key = t; value = v.get(); }
From source file:com.twitter.elephanttwin.io.TextLongPairWritable.java
License:Apache License
public void setLong(LongWritable v) { value = v.get(); }
From source file:com.twitter.elephanttwin.lzo.retrieval.LZOBlockOffsetMapper.java
License:Open Source License
@Override public void map(LongWritable key, BinaryWritable<M> value, Context context) throws IOException, InterruptedException { try {/*from w w w . j a va 2 s . c o m*/ columnValue = (String) method.invoke(value.get()); } catch (Exception e) { LOG.error("cannot instantiate the value class to read the input", e); throw new IOException(e); } if (LOG.isDebugEnabled()) { LOG.debug("key: " + key + "value: " + columnValue); } if (columnValue == null) { columnValue = ""; } long lineOffset = key.get(); // treat the first row specially. // workaround for the current way lzo readers produce key offset for the // first row. /* * Need to deal with the special case when the last inputsplit has * only the last lzoblock. This cannot happen in practice since HDFS block * contains thousands of lzo blocks and Hadoop has split_slope to make sure the last split * contains the same or more bytes than previous splits. But this could happen in test cases * where we manipulate hdfs block size and the size of lzo block size; */ if (lzoOffsets.length > 2) { if (lineOffset == 0) lineOffset = lzoOffsets[1]; if (lineOffset == previousRowLineOffset) { totalRowsInABlock++; //in the same lzo block, increase the cnt for the number of rows in //this block. } else { // a new lzo block, set up the outputValue to be sent to reducer. if (LOG.isDebugEnabled()) { LOG.debug("totalRowsInABlock is:" + totalRowsInABlock + " in [" + previousRowLineOffset + "," + lineOffset + "]"); } totalRowsInABlock = 1; //for very long row which spans many lzoblocks, we need to advance //the currentLzoBlock multiple times. while (currentLzoBlock <= totalLZOBlocks && (lzoOffsets[currentLzoBlock] < lineOffset)) currentLzoBlock++; /* * Logically the following should be the right way to index the lzo * blocks. However, due to the way the current lzo readers produce key * offset, we have to do some compensation in indexing in order to make it * work correctly. */ /* * outputValue.setFirst(currentLzoBlockStartOffset); * outputValue.setSecond(currentLzoBlockEndOffset); * pair.setLong(currentLzoBlockStartOffset); */ //the real start offset is either 2 blocks back or even further if //this row is long spanning across many lzo blocks. outputValue.setFirst(Math.min(previousRowLineOffset, lzoOffsets[Math.max(0, currentLzoBlock - 2)])); /* we need to treat the last (two) lzo block differently than any other * lzo block, due to the current lzo readers, because we cannot * distinguish a row from the last lzo block and a row from the second * to the last lzo block. the solution is to combine the last two lzo * blocks; */ if (lineOffset >= lastLZOBlockStartOffset) outputValue.setSecond(fileSize); else outputValue.setSecond(lineOffset); if (LOG.isDebugEnabled()) { LOG.debug("outputValue:" + outputValue); } if (outputValue.getSecond() <= outputValue.getFirst()) { throw new RuntimeException("Index Block end offset is not more than start offset:"); } } } if (!map.containsKey(columnValue)) { map.put(columnValue, new LongPairWritable(outputValue)); } else { LongPairWritable prevPair = map.get(columnValue); if (prevPair.getSecond() > outputValue.getFirst() && outputValue.getFirst() < prevPair.getFirst()) { throw new RuntimeException("error: overalapping index blocks at offset: " + key); } /* *if we can combine the two blocks, then combine, otherwise send the * previously stored index entry to reducer and store the new index entry. * Two conditions to be met to combine: * a) "adjacent" to each other; * b) combined size cannot be more than the threshold */ if (prevPair.getSecond() + gapsize < outputValue.getFirst() || outputValue.getSecond() - prevPair.getFirst() > maxBlockSize) { context.write(new TextLongPairWritable(new Text(columnValue), prevPair.getFirst()), prevPair); if (LOG.isDebugEnabled()) { LOG.debug("write to reducer: " + prevPair); } map.put(columnValue, new LongPairWritable(outputValue)); } else { prevPair.setSecond(outputValue.getSecond()); map.put(columnValue, prevPair); } } previousRowLineOffset = lineOffset; }
From source file:com.twitter.hraven.FlowKey.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { super.readFields(in); LongWritable lw = new LongWritable(); lw.readFields(in);//from w ww . j a v a 2 s .c o m this.runId = lw.get(); }
From source file:com.twitter.hraven.JobId.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { LongWritable lw = new LongWritable(); lw.readFields(in);/*from ww w . j av a2 s .c o m*/ this.jobEpoch = lw.get(); lw.readFields(in); this.jobSequence = lw.get(); }
From source file:com.uber.hoodie.hadoop.realtime.HoodieRealtimeRecordReaderTest.java
License:Apache License
@Test public void testReaderWithNestedAndComplexSchema() throws Exception { // initial commit Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getComplexEvolvedSchema()); HoodieTestUtils.initTableType(hadoopConf, basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ); String commitTime = "100"; int numberOfRecords = 100; int numberOfLogRecords = numberOfRecords / 2; File partitionDir = InputFormatTestUtil.prepareParquetDataset(basePath, schema, 1, numberOfRecords, commitTime);/*from w ww.jav a 2 s . co m*/ InputFormatTestUtil.commit(basePath, commitTime); // Add the paths FileInputFormat.setInputPaths(jobConf, partitionDir.getPath()); // update files or generate new log file String newCommitTime = "101"; HoodieLogFormat.Writer writer = writeLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime, numberOfLogRecords); long size = writer.getCurrentSize(); writer.close(); assertTrue("block - size should be > 0", size > 0); InputFormatTestUtil.deltaCommit(basePath, newCommitTime); //create a split with baseFile (parquet file written earlier) and new log file(s) String logFilePath = writer.getLogFile().getPath().toString(); HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit( new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf), basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime); //create a RecordReader to be used by HoodieRealtimeRecordReader RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader( new FileSplit(split.getPath(), 0, fs.getLength(split.getPath()), (String[]) null), jobConf, null); JobConf jobConf = new JobConf(); List<Schema.Field> fields = schema.getFields(); String names = fields.stream().map(f -> f.name()).collect(Collectors.joining(",")); String positions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(",")); jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names); jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, positions); jobConf.set("partition_columns", "datestr"); // validate record reader compaction HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf, reader); // use reader to read base Parquet File and log file, merge in flight and return latest commit // here the first 50 records should be updated, see above NullWritable key = recordReader.createKey(); ArrayWritable value = recordReader.createValue(); int numRecordsRead = 0; while (recordReader.next(key, value)) { int currentRecordNo = numRecordsRead; ++numRecordsRead; Writable[] values = value.get(); String recordCommitTime; //check if the record written is with latest commit, here "101" if (numRecordsRead > numberOfLogRecords) { recordCommitTime = commitTime; } else { recordCommitTime = newCommitTime; } String recordCommitTimeSuffix = "@" + recordCommitTime; Assert.assertEquals(values[0].toString(), recordCommitTime); key = recordReader.createKey(); value = recordReader.createValue(); // Assert type STRING Assert.assertEquals("test value for field: field1", values[5].toString(), "field" + currentRecordNo); Assert.assertEquals("test value for field: field2", values[6].toString(), "field" + currentRecordNo + recordCommitTimeSuffix); Assert.assertEquals("test value for field: name", values[7].toString(), "name" + currentRecordNo); // Assert type INT IntWritable intWritable = (IntWritable) values[8]; Assert.assertEquals("test value for field: favoriteIntNumber", intWritable.get(), currentRecordNo + recordCommitTime.hashCode()); // Assert type LONG LongWritable longWritable = (LongWritable) values[9]; Assert.assertEquals("test value for field: favoriteNumber", longWritable.get(), currentRecordNo + recordCommitTime.hashCode()); // Assert type FLOAT FloatWritable floatWritable = (FloatWritable) values[10]; Assert.assertEquals("test value for field: favoriteFloatNumber", floatWritable.get(), (float) ((currentRecordNo + recordCommitTime.hashCode()) / 1024.0), 0); // Assert type DOUBLE DoubleWritable doubleWritable = (DoubleWritable) values[11]; Assert.assertEquals("test value for field: favoriteDoubleNumber", doubleWritable.get(), (currentRecordNo + recordCommitTime.hashCode()) / 1024.0, 0); // Assert type MAP ArrayWritable mapItem = (ArrayWritable) values[12]; Writable mapItemValue1 = mapItem.get()[0]; Writable mapItemValue2 = mapItem.get()[1]; Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue1).get()[0].toString(), "mapItem1"); Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue2).get()[0].toString(), "mapItem2"); Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue1).get().length, 2); Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue2).get().length, 2); Writable mapItemValue1value = ((ArrayWritable) mapItemValue1).get()[1]; Writable mapItemValue2value = ((ArrayWritable) mapItemValue2).get()[1]; Assert.assertEquals("test value for field: tags[\"mapItem1\"].item1", ((ArrayWritable) mapItemValue1value).get()[0].toString(), "item" + currentRecordNo); Assert.assertEquals("test value for field: tags[\"mapItem2\"].item1", ((ArrayWritable) mapItemValue2value).get()[0].toString(), "item2" + currentRecordNo); Assert.assertEquals("test value for field: tags[\"mapItem1\"].item2", ((ArrayWritable) mapItemValue1value).get()[1].toString(), "item" + currentRecordNo + recordCommitTimeSuffix); Assert.assertEquals("test value for field: tags[\"mapItem2\"].item2", ((ArrayWritable) mapItemValue2value).get()[1].toString(), "item2" + currentRecordNo + recordCommitTimeSuffix); // Assert type RECORD ArrayWritable recordItem = (ArrayWritable) values[13]; Writable[] nestedRecord = recordItem.get(); Assert.assertEquals("test value for field: testNestedRecord.isAdmin", ((BooleanWritable) nestedRecord[0]).get(), false); Assert.assertEquals("test value for field: testNestedRecord.userId", nestedRecord[1].toString(), "UserId" + currentRecordNo + recordCommitTimeSuffix); // Assert type ARRAY ArrayWritable arrayValue = (ArrayWritable) values[14]; Writable[] arrayValues = arrayValue.get(); for (int i = 0; i < arrayValues.length; i++) { Assert.assertEquals("test value for field: stringArray", "stringArray" + i + recordCommitTimeSuffix, arrayValues[i].toString()); } } }
From source file:core.advanced.ConnectedComponentsVertex.java
License:Apache License
/** * Propagates the smallest vertex id to all neighbors. Will always choose to * halt and only reactivate if a smaller id has been sent to it. * * @param messages Iterator of messages from the previous superstep. * @throws IOException//from www. jav a 2 s. c o m */ @Override public void compute(Iterable<LongWritable> messages) throws IOException { long currentComponent = getValue().get(); // First superstep is special, because we can simply look at the neighbors if (getSuperstep() == 0) { for (Edge<LongWritable, NullWritable> edge : getEdges()) { long neighbor = edge.getTargetVertexId().get(); if (neighbor < currentComponent) { currentComponent = neighbor; } } // Only need to send value if it is not the own id if (currentComponent != getValue().get()) { setValue(new LongWritable(currentComponent)); for (Edge<LongWritable, NullWritable> edge : getEdges()) { LongWritable neighbor = edge.getTargetVertexId(); if (neighbor.get() > currentComponent) { sendMessage(neighbor, getValue()); } } } voteToHalt(); return; } boolean changed = false; // did we get a smaller id ? for (LongWritable message : messages) { long candidateComponent = message.get(); if (candidateComponent < currentComponent) { currentComponent = candidateComponent; changed = true; } } // propagate new component id to the neighbors if (changed) { setValue(new LongWritable(currentComponent)); sendMessageToAllEdges(getValue()); } voteToHalt(); }
From source file:crunch.MaxTemperature.java
License:Apache License
private void checkNextLine(RecordReader<LongWritable, Text> recordReader, long expectedKey, String expectedValue) throws IOException { LongWritable key = new LongWritable(); Text value = new Text(); assertThat(expectedValue, recordReader.next(key, value), is(true)); assertThat(key.get(), is(expectedKey)); assertThat(value.toString(), is(expectedValue)); }/*from w w w.ja va 2 s . co m*/