List of usage examples for org.apache.hadoop.io DataInputBuffer reset
public void reset(byte[] input, int start, int length)
From source file:org.apache.tez.engine.common.shuffle.impl.InMemoryReader.java
License:Apache License
public KeyState readRawKey(DataInputBuffer key) throws IOException { try {/*from www . j av a 2 s . c o m*/ if (!positionToNextRecord(memDataIn)) { return KeyState.NO_KEY; } // Setup the key int pos = memDataIn.getPosition(); byte[] data = memDataIn.getData(); if (currentKeyLength == IFile.RLE_MARKER) { key.reset(data, prevKeyPos, prevKeyLength); currentKeyLength = prevKeyLength; return KeyState.SAME_KEY; } key.reset(data, pos, currentKeyLength); prevKeyPos = pos; // Position for the next value long skipped = memDataIn.skip(currentKeyLength); if (skipped != currentKeyLength) { throw new IOException("Rec# " + recNo + ": Failed to skip past key of length: " + currentKeyLength); } // Record the byte bytesRead += currentKeyLength; return KeyState.NEW_KEY; } catch (IOException ioe) { dumpOnError(); throw ioe; } }
From source file:org.apache.tez.engine.common.shuffle.impl.InMemoryReader.java
License:Apache License
public void nextRawValue(DataInputBuffer value) throws IOException { try {//from w w w. ja v a 2 s . com int pos = memDataIn.getPosition(); byte[] data = memDataIn.getData(); value.reset(data, pos, currentValueLength); // Position for the next record long skipped = memDataIn.skip(currentValueLength); if (skipped != currentValueLength) { throw new IOException( "Rec# " + recNo + ": Failed to skip past value of length: " + currentValueLength); } // Record the byte bytesRead += currentValueLength; ++recNo; } catch (IOException ioe) { dumpOnError(); throw ioe; } }
From source file:org.apache.tez.engine.common.sort.impl.dflt.DefaultSorter.java
License:Apache License
protected void spill(int mstart, int mend) throws IOException, InterruptedException { //approximate the length of the output file to be the length of the //buffer + header lengths for the partitions final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart) + partitions * APPROX_HEADER_LENGTH; FSDataOutputStream out = null;//from w w w .java 2 s.c om try { // create spill file final TezSpillRecord spillRec = new TezSpillRecord(partitions); final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size); out = rfs.create(filename); int spindex = mstart; final InMemValBytes value = createInMemValBytes(); for (int i = 0; i < partitions; ++i) { IFile.Writer writer = null; try { long segmentStart = out.getPos(); writer = new Writer(job, out, keyClass, valClass, codec, spilledRecordsCounter); if (combineProcessor == null) { // spill directly DataInputBuffer key = new DataInputBuffer(); while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) { final int kvoff = offsetFor(spindex); key.reset(kvbuffer, kvmeta.get(kvoff + KEYSTART), (kvmeta.get(kvoff + VALSTART) - kvmeta.get(kvoff + KEYSTART))); getVBytesForOffset(kvoff, value); writer.append(key, value); ++spindex; } } else { int spstart = spindex; while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) { ++spindex; } // Note: we would like to avoid the combiner if we've fewer // than some threshold of records for a partition if (spstart != spindex) { TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex); if (LOG.isDebugEnabled()) { LOG.debug("Running combine processor"); } runCombineProcessor(kvIter, writer); } } // close the writer writer.close(); // record offsets final TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength()); spillRec.putIndex(rec, i); writer = null; } finally { if (null != writer) writer.close(); } } if (totalIndexCacheMemory >= indexCacheMemoryLimit) { // create spill index file Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillRec.writeToFile(indexFilename, job); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH; } LOG.info("Finished spill " + numSpills); ++numSpills; } finally { if (out != null) out.close(); } }
From source file:org.apache.tez.engine.common.sort.impl.dflt.SortBufferInputStream.java
License:Apache License
@Override public int read(byte[] b, int off, int len) throws IOException { if (available() == 0) { return -1; }/* w w w. j ava 2 s . com*/ int currentOffset = off; int currentLength = len; int currentReadBytes = 0; // Check if there is residual data in the dualBuf int residualLen = out.getCurrent(); if (residualLen > 0) { int readable = Math.min(currentLength, residualLen); System.arraycopy(dualBuf, 0, b, currentOffset, readable); currentOffset += readable; currentReadBytes += readable; out.setCurrentPointer(-readable); // buffer has less capacity currentLength -= readable; if (LOG.isDebugEnabled()) { LOG.debug("XXX read_residual:" + " readable=" + readable + " readBytes=" + readBytes); } } // Now, use the provided buffer if (LOG.isDebugEnabled()) { LOG.debug("XXX read: out.reset" + " b=" + b + " currentOffset=" + currentOffset + " currentLength=" + currentLength + " recIndex=" + recIndex); } out.reset(b, currentOffset, currentLength); // Read from sort-buffer into the provided buffer, space permitting DataInputBuffer key = new DataInputBuffer(); final InMemValBytes value = sorter.createInMemValBytes(); int kvPartition = 0; int numRec = 0; for (; currentLength > 0 && recIndex < mend && (kvPartition = getKVPartition(recIndex)) == partition; ++recIndex) { final int kvoff = sorter.offsetFor(recIndex); int keyLen = (kvmeta.get(kvoff + InMemoryShuffleSorter.VALSTART) - kvmeta.get(kvoff + InMemoryShuffleSorter.KEYSTART)); key.reset(kvbuffer, kvmeta.get(kvoff + InMemoryShuffleSorter.KEYSTART), keyLen); int valLen = sorter.getVBytesForOffset(kvoff, value); int recLen = (keyLen + WritableUtils.getVIntSize(keyLen)) + (valLen + WritableUtils.getVIntSize(valLen)); currentReadBytes += recLen; currentOffset += recLen; currentLength -= recLen; // Write out key/value into the in-mem ifile if (LOG.isDebugEnabled()) { LOG.debug("XXX read: sortOutput.append" + " #rec=" + ++numRec + " recIndex=" + recIndex + " kvoff=" + kvoff + " keyLen=" + keyLen + " valLen=" + valLen + " recLen=" + recLen + " readBytes=" + readBytes + " currentReadBytes=" + currentReadBytes + " currentLength=" + currentLength); } sortOutput.append(key, value); } // If we are at the end of the segment, close the ifile if (currentLength > 0 && (recIndex == mend || kvPartition != partition)) { if (LOG.isDebugEnabled()) { LOG.debug("XXX About to call close:" + " currentLength=" + currentLength + " recIndex=" + recIndex + " mend=" + mend + " kvPartition=" + kvPartition + " partitino=" + partition); } sortOutput.close(); currentReadBytes += (InMemoryShuffleSorter.IFILE_EOF_LENGTH + InMemoryShuffleSorter.IFILE_CHECKSUM_LENGTH); } else { if (LOG.isDebugEnabled()) { LOG.debug("XXX Hmm..." + " currentLength=" + currentLength + " recIndex=" + recIndex + " mend=" + mend + " kvPartition=" + kvPartition + " partitino=" + partition); } } int retVal = Math.min(currentReadBytes, len); readBytes += retVal; if (LOG.isDebugEnabled()) { LOG.debug("XXX read: done" + " retVal=" + retVal + " currentReadBytes=" + currentReadBytes + " len=" + len + " readBytes=" + readBytes + " partitionBytes=" + partitionBytes + " residualBytes=" + out.getCurrent()); } return retVal; }
From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader.java
License:Apache License
public KeyState readRawKey(DataInputBuffer key) throws IOException { try {/* w ww . j a va2 s.com*/ if (!positionToNextRecord(memDataIn)) { return KeyState.NO_KEY; } // Setup the key int pos = memDataIn.getPosition(); byte[] data = memDataIn.getData(); if (currentKeyLength == IFile.RLE_MARKER) { // get key length from original key key.reset(data, originalKeyPos, originalKeyLength); return KeyState.SAME_KEY; } key.reset(data, pos, currentKeyLength); // Position for the next value long skipped = memDataIn.skip(currentKeyLength); if (skipped != currentKeyLength) { throw new IOException("Rec# " + recNo + ": Failed to skip past key of length: " + currentKeyLength); } bytesRead += currentKeyLength; return KeyState.NEW_KEY; } catch (IOException ioe) { dumpOnError(); throw ioe; } }
From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader.java
License:Apache License
public void nextRawValue(DataInputBuffer value) throws IOException { try {/*from www . j a va 2 s . com*/ int pos = memDataIn.getPosition(); byte[] data = memDataIn.getData(); value.reset(data, pos, currentValueLength); // Position for the next record long skipped = memDataIn.skip(currentValueLength); if (skipped != currentValueLength) { throw new IOException( "Rec# " + recNo + ": Failed to skip past value of length: " + currentValueLength); } // Record the byte bytesRead += currentValueLength; ++recNo; } catch (IOException ioe) { dumpOnError(); throw ioe; } }
From source file:org.apache.tez.runtime.library.common.sort.impl.dflt.DefaultSorter.java
License:Apache License
protected void spill(int mstart, int mend) throws IOException, InterruptedException { //approximate the length of the output file to be the length of the //buffer + header lengths for the partitions final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart) + partitions * APPROX_HEADER_LENGTH; FSDataOutputStream out = null;/*from w ww . jav a 2s . c o m*/ try { // create spill file final TezSpillRecord spillRec = new TezSpillRecord(partitions); final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size); spillFilePaths.put(numSpills, filename); out = rfs.create(filename); int spindex = mstart; final InMemValBytes value = createInMemValBytes(); boolean rle = isRLENeeded(); for (int i = 0; i < partitions; ++i) { IFile.Writer writer = null; try { long segmentStart = out.getPos(); writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, rle); if (combiner == null) { // spill directly DataInputBuffer key = new DataInputBuffer(); while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) { final int kvoff = offsetFor(spindex); int keystart = kvmeta.get(kvoff + KEYSTART); int valstart = kvmeta.get(kvoff + VALSTART); key.reset(kvbuffer, keystart, valstart - keystart); getVBytesForOffset(kvoff, value); writer.append(key, value); ++spindex; } } else { int spstart = spindex; while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) { ++spindex; } // Note: we would like to avoid the combiner if we've fewer // than some threshold of records for a partition if (spstart != spindex) { TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex); if (LOG.isDebugEnabled()) { LOG.debug("Running combine processor"); } runCombineProcessor(kvIter, writer); } } // close the writer writer.close(); if (numSpills > 0) { additionalSpillBytesWritten.increment(writer.getCompressedLength()); numAdditionalSpills.increment(1); // Reset the value will be set during the final merge. outputBytesWithOverheadCounter.setValue(0); } else { // Set this up for the first write only. Subsequent ones will be handled in the final merge. outputBytesWithOverheadCounter.increment(writer.getRawLength()); } // record offsets final TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength()); spillRec.putIndex(rec, i); writer = null; } finally { if (null != writer) writer.close(); } } if (totalIndexCacheMemory >= indexCacheMemoryLimit) { // create spill index file Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillFileIndexPaths.put(numSpills, indexFilename); spillRec.writeToFile(indexFilename, conf); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH; } LOG.info("Finished spill " + numSpills); ++numSpills; } finally { if (out != null) out.close(); } }
From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java
License:Apache License
@Test(timeout = 5000) //test with sorted data and repeat keys public void testWithRLEMarker() throws IOException { //Test with append(Object, Object) FSDataOutputStream out = localFs.create(outputPath); IFile.Writer writer = new IFile.Writer(defaultConf, out, Text.class, IntWritable.class, codec, null, null, true);// w w w . j a v a 2 s . c om Text key = new Text("key0"); IntWritable value = new IntWritable(0); writer.append(key, value); //same key (RLE should kick in) key = new Text("key0"); writer.append(key, value); assertTrue(writer.sameKey); //Different key key = new Text("key1"); writer.append(key, value); assertFalse(writer.sameKey); writer.close(); out.close(); //Test with append(DataInputBuffer key, DataInputBuffer value) byte[] kvbuffer = "key1Value1key1Value2key3Value3".getBytes(); int keyLength = 4; int valueLength = 6; int pos = 0; out = localFs.create(outputPath); writer = new IFile.Writer(defaultConf, out, Text.class, IntWritable.class, codec, null, null, true); DataInputBuffer kin = new DataInputBuffer(); kin.reset(kvbuffer, pos, keyLength); DataInputBuffer vin = new DataInputBuffer(); DataOutputBuffer vout = new DataOutputBuffer(); (new IntWritable(0)).write(vout); vin.reset(vout.getData(), vout.getLength()); //Write initial KV pair writer.append(kin, vin); assertFalse(writer.sameKey); pos += (keyLength + valueLength); //Second key is similar to key1 (RLE should kick in) kin.reset(kvbuffer, pos, keyLength); (new IntWritable(0)).write(vout); vin.reset(vout.getData(), vout.getLength()); writer.append(kin, vin); assertTrue(writer.sameKey); pos += (keyLength + valueLength); //Next key (key3) is different (RLE should not kick in) kin.reset(kvbuffer, pos, keyLength); (new IntWritable(0)).write(vout); vin.reset(vout.getData(), vout.getLength()); writer.append(kin, vin); assertFalse(writer.sameKey); writer.close(); out.close(); }
From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java
License:Apache License
@Test(timeout = 5000) //Test appendValue with DataInputBuffer public void testAppendValueWithDataInputBuffer() throws IOException { List<KVPair> data = KVDataGen.generateTestData(false, rnd.nextInt(100)); IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, Text.class, IntWritable.class, codec, null, null);/* w ww . j a v a 2 s .c om*/ final DataInputBuffer previousKey = new DataInputBuffer(); DataInputBuffer key = new DataInputBuffer(); DataInputBuffer value = new DataInputBuffer(); for (KVPair kvp : data) { populateData(kvp, key, value); if ((previousKey != null && BufferUtils.compare(key, previousKey) == 0)) { writer.appendValue(value); } else { writer.append(key, value); } previousKey.reset(k.getData(), 0, k.getLength()); } writer.close(); readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec); }
From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java
License:Apache License
private Writer writeTestFileUsingDataBuffer(IFile.Writer writer, boolean rle, boolean repeatKeys, List<KVPair> data, CompressionCodec codec) throws IOException { DataInputBuffer previousKey = new DataInputBuffer(); DataInputBuffer key = new DataInputBuffer(); DataInputBuffer value = new DataInputBuffer(); for (KVPair kvp : data) { populateData(kvp, key, value);/* www .java2 s.c o m*/ if (repeatKeys && (previousKey != null && BufferUtils.compare(key, previousKey) == 0)) { writer.append(IFile.REPEAT_KEY, value); } else { writer.append(key, value); } previousKey.reset(key.getData(), 0, key.getLength()); } writer.close(); LOG.info("Uncompressed: " + writer.getRawLength()); LOG.info("CompressedSize: " + writer.getCompressedLength()); return writer; }