List of usage examples for org.apache.hadoop.io DataInputBuffer getLength
public int getLength()
From source file:cn.ac.ncic.mastiff.io.coding.RedBlackTreeStringReader.java
License:Apache License
@Override public byte[] ensureDecompressed() throws IOException { DataOutputBuffer transfer = new DataOutputBuffer(); transfer.write(inBuf.getData(), 12, inBuf.getLength() - 12); DataInputBuffer dib = new DataInputBuffer(); dib.reset(transfer.getData(), 0, transfer.getLength()); int dictionarySize = dib.readInt(); int length1 = dib.readInt(); byte[] data = transfer.getData(); transfer.close();/*from www . j a v a 2 s. c o m*/ dib.reset(data, Integer.SIZE + Integer.SIZE, length1); FlexibleEncoding.ORC.StreamName name = new FlexibleEncoding.ORC.StreamName(0, OrcProto.Stream.Kind.DICTIONARY_DATA); ByteBuffer inBuf1 = ByteBuffer.allocate(length1); inBuf1.put(dib.getData(), 0, dib.getLength()); inBuf1.flip(); InStream in = InStream.create("test1", inBuf1, null, dictionarySize); if (in.available() > 0) { dictionaryBuffer = new DynamicByteArray(64, in.available()); dictionaryBuffer.readAll(in); in.close(); // read the lengths google proto buffer name = new StreamName(1, OrcProto.Stream.Kind.LENGTH); dib.reset(data, 4 + 4 + length1, 4); int length2 = dib.readInt(); dib.reset(data, 4 + 4 + length1 + 4, length2); // in = streams.get(name); ByteBuffer inBuf2 = ByteBuffer.allocate(length2); inBuf2.put(dib.getData(), 0, length2); inBuf2.flip(); in = InStream.create("test2", inBuf2, null, dictionarySize); // IntegerReader lenReader = createIntegerReader(encodings.get(columnId) // .getKind(), in, false); IntegerReader lenReader = createIntegerReader(OrcProto.ColumnEncoding.Kind.DIRECT_V2, in, false); int offset = 0; dictionaryOffsets = new int[dictionarySize + 1]; for (int i = 0; i < dictionarySize; ++i) { dictionaryOffsets[i] = offset; offset += (int) lenReader.next(); } dictionaryOffsets[dictionarySize] = offset; in.close(); name = new FlexibleEncoding.ORC.StreamName(2, OrcProto.Stream.Kind.DATA); dib.reset(data, 4 + 4 + length1 + 4 + length2, 4); int length3 = dib.readInt(); dib.reset(data, 4 + 4 + length1 + 4 + length2 + 4, length3); ByteBuffer inBuf3 = ByteBuffer.allocate(length3); inBuf3.put(dib.getData(), 0, length3); inBuf3.flip(); in = InStream.create("test3", inBuf3, null, dictionarySize); reader = createIntegerReader(OrcProto.ColumnEncoding.Kind.DIRECT_V2, in, false); } inBuf.close(); DataOutputBuffer decoding = new DataOutputBuffer(); DataOutputBuffer offsets = new DataOutputBuffer(); decoding.writeInt(decompressedSize); decoding.writeInt(numPairs); decoding.writeInt(startPos); int dataoffset = 12; String str; for (int i = 0; i < numPairs; i++) { str = readEachValue(null); decoding.writeUTF(str); // if(i<5){ // System.out.println("304 bin[i] "+str+" decoding "+ decoding.size()); // } dataoffset = decoding.size(); offsets.writeInt(dataoffset); } System.out.println("315 offset.size() " + offsets.size() + " decoding.szie " + decoding.size()); System.out.println("316 dataoffet " + dataoffset); decoding.write(offsets.getData(), 0, offsets.size()); inBuf.close(); offsets.close(); dib.close(); System.out.println("316 decoding " + decoding.size() + decoding.getLength() + " decoding.getData() " + decoding.getData().length); inBuf1.clear(); return decoding.getData(); }
From source file:org.apache.tez.engine.common.combine.CombineInput.java
License:Apache License
private boolean nextKeyValue() throws IOException, InterruptedException { if (!hasMore) { key = null;//from w w w .j ava 2s .c om value = null; return false; } firstValue = !nextKeyIsSame; DataInputBuffer nextKey = input.getKey(); currentRawKey.set(nextKey.getData(), nextKey.getPosition(), nextKey.getLength() - nextKey.getPosition()); buffer.reset(currentRawKey.getBytes(), 0, currentRawKey.getLength()); key = keyDeserializer.deserialize(key); DataInputBuffer nextVal = input.getValue(); buffer.reset(nextVal.getData(), nextVal.getPosition(), nextVal.getLength()); value = valueDeserializer.deserialize(value); hasMore = input.next(); if (hasMore) { nextKey = input.getKey(); nextKeyIsSame = comparator.compare(currentRawKey.getBytes(), 0, currentRawKey.getLength(), nextKey.getData(), nextKey.getPosition(), nextKey.getLength() - nextKey.getPosition()) == 0; } else { nextKeyIsSame = false; } inputValueCounter.increment(1); return true; }
From source file:org.apache.tez.engine.common.shuffle.impl.InMemoryWriter.java
License:Apache License
public void append(DataInputBuffer key, DataInputBuffer value) throws IOException { int keyLength = key.getLength() - key.getPosition(); if (keyLength < 0) { throw new IOException("Negative key-length not allowed: " + keyLength + " for " + key); }/*from w ww. j a va2 s. co m*/ boolean sameKey = (key == IFile.REPEAT_KEY); int valueLength = value.getLength() - value.getPosition(); if (valueLength < 0) { throw new IOException("Negative value-length not allowed: " + valueLength + " for " + value); } if (sameKey) { WritableUtils.writeVInt(out, IFile.RLE_MARKER); WritableUtils.writeVInt(out, valueLength); out.write(value.getData(), value.getPosition(), valueLength); } else { if (LOG.isDebugEnabled()) { LOG.debug("InMemWriter.append" + " key.data=" + key.getData() + " key.pos=" + key.getPosition() + " key.len=" + key.getLength() + " val.data=" + value.getData() + " val.pos=" + value.getPosition() + " val.len=" + value.getLength()); } WritableUtils.writeVInt(out, keyLength); WritableUtils.writeVInt(out, valueLength); out.write(key.getData(), key.getPosition(), keyLength); out.write(value.getData(), value.getPosition(), valueLength); } }
From source file:org.apache.tez.engine.common.task.impl.ValuesIterator.java
License:Apache License
/** * read the next key // w w w . java2s . c o m */ private void readNextKey() throws IOException { more = in.next(); if (more) { DataInputBuffer nextKeyBytes = in.getKey(); keyIn.reset(nextKeyBytes.getData(), nextKeyBytes.getPosition(), nextKeyBytes.getLength()); nextKey = keyDeserializer.deserialize(nextKey); hasNext = key != null && (comparator.compare(key, nextKey) == 0); } else { hasNext = false; } }
From source file:org.apache.tez.engine.common.task.impl.ValuesIterator.java
License:Apache License
/** * Read the next value/*from w w w . ja v a2s. co m*/ * @throws IOException */ private void readNextValue() throws IOException { DataInputBuffer nextValueBytes = in.getValue(); valueIn.reset(nextValueBytes.getData(), nextValueBytes.getPosition(), nextValueBytes.getLength()); value = valDeserializer.deserialize(value); }
From source file:org.apache.tez.mapreduce.hadoop.mapreduce.ReduceContextImpl.java
License:Apache License
/** * Advance to the next key/value pair./* w w w.ja v a 2s . c om*/ */ @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (!hasMore) { key = null; value = null; return false; } firstValue = !nextKeyIsSame; DataInputBuffer nextKey = input.getKey(); currentRawKey.set(nextKey.getData(), nextKey.getPosition(), nextKey.getLength() - nextKey.getPosition()); buffer.reset(currentRawKey.getBytes(), 0, currentRawKey.getLength()); key = keyDeserializer.deserialize(key); DataInputBuffer nextVal = input.getValue(); buffer.reset(nextVal.getData(), nextVal.getPosition(), nextVal.getLength()); value = valueDeserializer.deserialize(value); currentKeyLength = nextKey.getLength() - nextKey.getPosition(); currentValueLength = nextVal.getLength() - nextVal.getPosition(); if (isMarked) { backupStore.write(nextKey, nextVal); } hasMore = input.next(); if (hasMore) { nextKey = input.getKey(); nextKeyIsSame = comparator.compare(currentRawKey.getBytes(), 0, currentRawKey.getLength(), nextKey.getData(), nextKey.getPosition(), nextKey.getLength() - nextKey.getPosition()) == 0; } else { nextKeyIsSame = false; } inputValueCounter.increment(1); return true; }
From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java
License:Apache License
@Test(timeout = 5000) //Write empty key value pairs public void testWritingEmptyKeyValues() throws IOException { DataInputBuffer key = new DataInputBuffer(); DataInputBuffer value = new DataInputBuffer(); IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, null, null, null, null, null); writer.append(key, value);//from w w w . j av a 2 s . com writer.append(key, value); writer.append(key, value); writer.append(key, value); writer.close(); IFile.Reader reader = new Reader(localFs, outputPath, null, null, null, false, -1, 1024); DataInputBuffer keyIn = new DataInputBuffer(); DataInputBuffer valIn = new DataInputBuffer(); int records = 0; while (reader.nextRawKey(keyIn)) { reader.nextRawValue(valIn); records++; assert (keyIn.getLength() == 0); assert (valIn.getLength() == 0); } assertTrue("Number of records read does not match", (records == 4)); reader.close(); }
From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java
License:Apache License
private Writer writeTestFileUsingDataBuffer(IFile.Writer writer, boolean rle, boolean repeatKeys, List<KVPair> data, CompressionCodec codec) throws IOException { DataInputBuffer previousKey = new DataInputBuffer(); DataInputBuffer key = new DataInputBuffer(); DataInputBuffer value = new DataInputBuffer(); for (KVPair kvp : data) { populateData(kvp, key, value);/*from ww w.ja va2s . c o m*/ if (repeatKeys && (previousKey != null && BufferUtils.compare(key, previousKey) == 0)) { writer.append(IFile.REPEAT_KEY, value); } else { writer.append(key, value); } previousKey.reset(key.getData(), 0, key.getLength()); } writer.close(); LOG.info("Uncompressed: " + writer.getRawLength()); LOG.info("CompressedSize: " + writer.getCompressedLength()); return writer; }
From source file:org.apache.tez.runtime.library.common.ValuesIterator.java
License:Apache License
/** * read the next key - which may be the same as the current key. *//* w ww . j ava 2 s . co m*/ private void readNextKey() throws IOException { more = in.next(); if (more) { DataInputBuffer nextKeyBytes = in.getKey(); if (!in.isSameKey()) { keyIn.reset(nextKeyBytes.getData(), nextKeyBytes.getPosition(), nextKeyBytes.getLength() - nextKeyBytes.getPosition()); nextKey = keyDeserializer.deserialize(nextKey); // TODO Is a counter increment required here ? hasMoreValues = key != null && (comparator.compare(key, nextKey) == 0); } else { hasMoreValues = in.isSameKey(); } } else { hasMoreValues = false; } }
From source file:org.apache.tez.runtime.library.common.ValuesIterator.java
License:Apache License
/** * Read the next value//w w w . j a va 2s. c o m * @throws IOException */ private void readNextValue() throws IOException { DataInputBuffer nextValueBytes = in.getValue(); valueIn.reset(nextValueBytes.getData(), nextValueBytes.getPosition(), nextValueBytes.getLength() - nextValueBytes.getPosition()); value = valDeserializer.deserialize(value); }