List of usage examples for org.apache.hadoop.fs FSDataInputStream seek
@Override public void seek(long desired) throws IOException
From source file:org.apache.sqoop.connector.hdfs.HdfsExtractor.java
License:Apache License
/** * Extracts Text file/*from w ww . j ava 2s. co m*/ * @param file * @param start * @param length * @throws IOException */ @SuppressWarnings("resource") private void extractTextFile(Path file, long start, long length) throws IOException { LOG.info("Extracting text file"); long end = start + length; FileSystem fs = file.getFileSystem(conf); FSDataInputStream filestream = fs.open(file); CompressionCodec codec = (new CompressionCodecFactory(conf)).getCodec(file); LineReader filereader; Seekable fileseeker = filestream; // Hadoop 1.0 does not have support for custom record delimiter and thus // we // are supporting only default one. // We might add another "else if" case for SplittableCompressionCodec once // we drop support for Hadoop 1.0. if (codec == null) { filestream.seek(start); filereader = new LineReader(filestream); } else { filereader = new LineReader(codec.createInputStream(filestream, codec.createDecompressor()), conf); fileseeker = filestream; } if (start != 0) { // always throw away first record because // one extra line is read in previous split start += filereader.readLine(new Text(), 0); } int size; LOG.info("Start position: " + String.valueOf(start)); long next = start; while (next <= end) { Text line = new Text(); size = filereader.readLine(line, Integer.MAX_VALUE); if (size == 0) { break; } if (codec == null) { next += size; } else { next = fileseeker.getPos(); } rowRead++; dataWriter.writeStringRecord(line.toString()); } LOG.info("Extracting ended on position: " + fileseeker.getPos()); filestream.close(); }
From source file:org.apache.tez.mapreduce.input.SimpleInput.java
License:Apache License
public org.apache.hadoop.mapred.InputSplit getOldSplitDetails(TaskSplitIndex splitMetaInfo) throws IOException { Path file = new Path(splitMetaInfo.getSplitLocation()); FileSystem fs = FileSystem.getLocal(jobConf); file = fs.makeQualified(file);/*ww w.j a v a 2 s . co m*/ LOG.info("Reading input split file from : " + file); long offset = splitMetaInfo.getStartOffset(); FSDataInputStream inFile = fs.open(file); inFile.seek(offset); String className = Text.readString(inFile); Class<org.apache.hadoop.mapred.InputSplit> cls; try { cls = (Class<org.apache.hadoop.mapred.InputSplit>) jobConf.getClassByName(className); } catch (ClassNotFoundException ce) { IOException wrap = new IOException("Split class " + className + " not found"); wrap.initCause(ce); throw wrap; } SerializationFactory factory = new SerializationFactory(jobConf); Deserializer<org.apache.hadoop.mapred.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapred.InputSplit>) factory .getDeserializer(cls); deserializer.open(inFile); org.apache.hadoop.mapred.InputSplit split = deserializer.deserialize(null); long pos = inFile.getPos(); reporter.getCounter(TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset); inFile.close(); return split; }
From source file:org.apache.tez.mapreduce.input.SimpleInput.java
License:Apache License
public org.apache.hadoop.mapreduce.InputSplit getNewSplitDetails(TaskSplitIndex splitMetaInfo) throws IOException { Path file = new Path(splitMetaInfo.getSplitLocation()); long offset = splitMetaInfo.getStartOffset(); // Split information read from local filesystem. FileSystem fs = FileSystem.getLocal(jobConf); file = fs.makeQualified(file);/*from www. j a v a2s . c o m*/ LOG.info("Reading input split file from : " + file); FSDataInputStream inFile = fs.open(file); inFile.seek(offset); String className = Text.readString(inFile); Class<org.apache.hadoop.mapreduce.InputSplit> cls; try { cls = (Class<org.apache.hadoop.mapreduce.InputSplit>) jobConf.getClassByName(className); } catch (ClassNotFoundException ce) { IOException wrap = new IOException("Split class " + className + " not found"); wrap.initCause(ce); throw wrap; } SerializationFactory factory = new SerializationFactory(jobConf); Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(cls); deserializer.open(inFile); org.apache.hadoop.mapreduce.InputSplit split = deserializer.deserialize(null); long pos = inFile.getPos(); reporter.getCounter(TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset); inFile.close(); return split; }
From source file:org.apache.tez.mapreduce.lib.MRInputUtils.java
License:Apache License
@SuppressWarnings("unchecked") public static org.apache.hadoop.mapreduce.InputSplit getNewSplitDetailsFromDisk(TaskSplitIndex splitMetaInfo, JobConf jobConf, TezCounter splitBytesCounter) throws IOException { Path file = new Path(splitMetaInfo.getSplitLocation()); long offset = splitMetaInfo.getStartOffset(); // Split information read from local filesystem. FileSystem fs = FileSystem.getLocal(jobConf); file = fs.makeQualified(file);//w w w .j a v a 2 s. c om LOG.info("Reading input split file from : " + file); FSDataInputStream inFile = fs.open(file); inFile.seek(offset); String className = Text.readString(inFile); Class<org.apache.hadoop.mapreduce.InputSplit> cls; try { cls = (Class<org.apache.hadoop.mapreduce.InputSplit>) jobConf.getClassByName(className); } catch (ClassNotFoundException ce) { IOException wrap = new IOException("Split class " + className + " not found"); wrap.initCause(ce); throw wrap; } SerializationFactory factory = new SerializationFactory(jobConf); Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(cls); deserializer.open(inFile); org.apache.hadoop.mapreduce.InputSplit split = deserializer.deserialize(null); long pos = inFile.getPos(); if (splitBytesCounter != null) { splitBytesCounter.increment(pos - offset); } inFile.close(); return split; }
From source file:org.apache.tez.mapreduce.lib.MRInputUtils.java
License:Apache License
@SuppressWarnings("unchecked") public static InputSplit getOldSplitDetailsFromDisk(TaskSplitIndex splitMetaInfo, JobConf jobConf, TezCounter splitBytesCounter) throws IOException { Path file = new Path(splitMetaInfo.getSplitLocation()); FileSystem fs = FileSystem.getLocal(jobConf); file = fs.makeQualified(file);/*from ww w . j av a2 s . co m*/ LOG.info("Reading input split file from : " + file); long offset = splitMetaInfo.getStartOffset(); FSDataInputStream inFile = fs.open(file); inFile.seek(offset); String className = Text.readString(inFile); Class<org.apache.hadoop.mapred.InputSplit> cls; try { cls = (Class<org.apache.hadoop.mapred.InputSplit>) jobConf.getClassByName(className); } catch (ClassNotFoundException ce) { IOException wrap = new IOException("Split class " + className + " not found"); wrap.initCause(ce); throw wrap; } SerializationFactory factory = new SerializationFactory(jobConf); Deserializer<org.apache.hadoop.mapred.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapred.InputSplit>) factory .getDeserializer(cls); deserializer.open(inFile); org.apache.hadoop.mapred.InputSplit split = deserializer.deserialize(null); long pos = inFile.getPos(); if (splitBytesCounter != null) { splitBytesCounter.increment(pos - offset); } inFile.close(); return split; }
From source file:org.apache.tez.runtime.library.common.shuffle.LocalDiskFetchedInput.java
License:Apache License
@Override public InputStream getInputStream() throws IOException { FSDataInputStream inputStream = localFS.open(inputFile); inputStream.seek(startOffset); return new BoundedInputStream(inputStream, compressedSize); }
From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java
License:Apache License
public void textTest(int numRegularRecords, int numPartitions, long availableMemory, int numLargeKeys, int numLargevalues, int numLargeKvPairs) throws IOException, InterruptedException { Partitioner partitioner = new HashPartitioner(); ApplicationId appId = ApplicationId.newInstance(10000, 1); TezCounters counters = new TezCounters(); String uniqueId = UUID.randomUUID().toString(); OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId); Random random = new Random(); Configuration conf = createConfiguration(outputContext, Text.class, Text.class, shouldCompress, -1, HashPartitioner.class); CompressionCodec codec = null;/*from w w w .ja v a 2 s .co m*/ if (shouldCompress) { codec = new DefaultCodec(); ((Configurable) codec).setConf(conf); } int numRecordsWritten = 0; Map<Integer, Multimap<String, String>> expectedValues = new HashMap<Integer, Multimap<String, String>>(); for (int i = 0; i < numPartitions; i++) { expectedValues.put(i, LinkedListMultimap.<String, String>create()); } UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numPartitions, availableMemory); int sizePerBuffer = kvWriter.sizePerBuffer; BitSet partitionsWithData = new BitSet(numPartitions); Text keyText = new Text(); Text valText = new Text(); for (int i = 0; i < numRegularRecords; i++) { String key = createRandomString(Math.abs(random.nextInt(10))); String val = createRandomString(Math.abs(random.nextInt(20))); keyText.set(key); valText.set(val); int partition = partitioner.getPartition(keyText, valText, numPartitions); partitionsWithData.set(partition); expectedValues.get(partition).put(key, val); kvWriter.write(keyText, valText); numRecordsWritten++; } // Write Large key records for (int i = 0; i < numLargeKeys; i++) { String key = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100))); String val = createRandomString(Math.abs(random.nextInt(20))); keyText.set(key); valText.set(val); int partition = partitioner.getPartition(keyText, valText, numPartitions); partitionsWithData.set(partition); expectedValues.get(partition).put(key, val); kvWriter.write(keyText, valText); numRecordsWritten++; } // Write Large val records for (int i = 0; i < numLargevalues; i++) { String key = createRandomString(Math.abs(random.nextInt(10))); String val = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100))); keyText.set(key); valText.set(val); int partition = partitioner.getPartition(keyText, valText, numPartitions); partitionsWithData.set(partition); expectedValues.get(partition).put(key, val); kvWriter.write(keyText, valText); numRecordsWritten++; } // Write records where key + val are large (but both can fit in the buffer individually) for (int i = 0; i < numLargeKvPairs; i++) { String key = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100))); String val = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100))); keyText.set(key); valText.set(val); int partition = partitioner.getPartition(keyText, valText, numPartitions); partitionsWithData.set(partition); expectedValues.get(partition).put(key, val); kvWriter.write(keyText, valText); numRecordsWritten++; } List<Event> events = kvWriter.close(); verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class)); TezCounter outputLargeRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_LARGE_RECORDS); assertEquals(numLargeKeys + numLargevalues + numLargeKvPairs, outputLargeRecordsCounter.getValue()); // Validate the event assertEquals(1, events.size()); assertTrue(events.get(0) instanceof CompositeDataMovementEvent); CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0); assertEquals(0, cdme.getSourceIndexStart()); assertEquals(numPartitions, cdme.getCount()); DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto .parseFrom(ByteString.copyFrom(cdme.getUserPayload())); assertFalse(eventProto.hasData()); BitSet emptyPartitionBits = null; if (partitionsWithData.cardinality() != numPartitions) { assertTrue(eventProto.hasEmptyPartitions()); byte[] emptyPartitions = TezCommonUtils .decompressByteStringToByteArray(eventProto.getEmptyPartitions()); emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions); assertEquals(numPartitions - partitionsWithData.cardinality(), emptyPartitionBits.cardinality()); } else { assertFalse(eventProto.hasEmptyPartitions()); emptyPartitionBits = new BitSet(numPartitions); } assertEquals(HOST_STRING, eventProto.getHost()); assertEquals(SHUFFLE_PORT, eventProto.getPort()); assertEquals(uniqueId, eventProto.getPathComponent()); // Verify the data // Verify the actual data TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId); Path outputFilePath = kvWriter.finalOutPath; Path spillFilePath = kvWriter.finalIndexPath; if (numRecordsWritten > 0) { assertTrue(localFs.exists(outputFilePath)); assertTrue(localFs.exists(spillFilePath)); } else { return; } // Special case for 0 records. TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf); DataInputBuffer keyBuffer = new DataInputBuffer(); DataInputBuffer valBuffer = new DataInputBuffer(); Text keyDeser = new Text(); Text valDeser = new Text(); for (int i = 0; i < numPartitions; i++) { if (emptyPartitionBits.get(i)) { continue; } TezIndexRecord indexRecord = spillRecord.getIndex(i); FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath); inStream.seek(indexRecord.getStartOffset()); IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1); while (reader.nextRawKey(keyBuffer)) { reader.nextRawValue(valBuffer); keyDeser.readFields(keyBuffer); valDeser.readFields(valBuffer); int partition = partitioner.getPartition(keyDeser, valDeser, numPartitions); assertTrue(expectedValues.get(partition).remove(keyDeser.toString(), valDeser.toString())); } inStream.close(); } for (int i = 0; i < numPartitions; i++) { assertEquals(0, expectedValues.get(i).size()); expectedValues.remove(i); } assertEquals(0, expectedValues.size()); }
From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java
License:Apache License
private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException { PartitionerForTest partitioner = new PartitionerForTest(); ApplicationId appId = ApplicationId.newInstance(10000, 1); TezCounters counters = new TezCounters(); String uniqueId = UUID.randomUUID().toString(); OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId); Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);/* w ww . j av a 2 s . c o m*/ CompressionCodec codec = null; if (shouldCompress) { codec = new DefaultCodec(); ((Configurable) codec).setConf(conf); } int numOutputs = numPartitions; long availableMemory = 2048; int numRecordsWritten = 0; Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>(); for (int i = 0; i < numOutputs; i++) { expectedValues.put(i, LinkedListMultimap.<Integer, Long>create()); } UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory); int sizePerBuffer = kvWriter.sizePerBuffer; int sizePerRecord = 4 + 8; // IntW + LongW int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD IntWritable intWritable = new IntWritable(); LongWritable longWritable = new LongWritable(); for (int i = 0; i < numRecords; i++) { intWritable.set(i); longWritable.set(i); int partition = partitioner.getPartition(intWritable, longWritable, numOutputs); if (skippedPartitions != null && skippedPartitions.contains(partition)) { continue; } expectedValues.get(partition).put(intWritable.get(), longWritable.get()); kvWriter.write(intWritable, longWritable); numRecordsWritten++; } List<Event> events = kvWriter.close(); int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead; int numExpectedSpills = numRecordsWritten / recordsPerBuffer; verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class)); // Verify the status of the buffers if (numExpectedSpills == 0) { assertEquals(1, kvWriter.numInitializedBuffers); } else { assertTrue(kvWriter.numInitializedBuffers > 1); } assertNull(kvWriter.currentBuffer); assertEquals(0, kvWriter.availableBuffers.size()); // Verify the counters TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES); TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS); TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD); TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL); TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS); TezCounter additionalSpillBytesWritternCounter = counters .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN); TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ); TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT); assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue()); assertEquals(numRecordsWritten, outputRecordsCounter.getValue()); assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue()); long fileOutputBytes = fileOutputBytesCounter.getValue(); if (numRecordsWritten > 0) { assertTrue(fileOutputBytes > 0); if (!shouldCompress) { assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue()); } } else { assertEquals(0, fileOutputBytes); } assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue()); long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue(); long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue(); if (numExpectedSpills == 0) { assertEquals(0, additionalSpillBytesWritten); assertEquals(0, additionalSpillBytesRead); } else { assertTrue(additionalSpillBytesWritten > 0); assertTrue(additionalSpillBytesRead > 0); if (!shouldCompress) { assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); } } assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead); assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue()); BitSet emptyPartitionBits = null; // Verify the event returned assertEquals(1, events.size()); assertTrue(events.get(0) instanceof CompositeDataMovementEvent); CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0); assertEquals(0, cdme.getSourceIndexStart()); assertEquals(numOutputs, cdme.getCount()); DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto .parseFrom(ByteString.copyFrom(cdme.getUserPayload())); assertFalse(eventProto.hasData()); if (skippedPartitions == null && numRecordsWritten > 0) { assertFalse(eventProto.hasEmptyPartitions()); emptyPartitionBits = new BitSet(numPartitions); } else { assertTrue(eventProto.hasEmptyPartitions()); byte[] emptyPartitions = TezCommonUtils .decompressByteStringToByteArray(eventProto.getEmptyPartitions()); emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions); if (numRecordsWritten == 0) { assertEquals(numPartitions, emptyPartitionBits.cardinality()); } else { for (Integer e : skippedPartitions) { assertTrue(emptyPartitionBits.get(e)); } assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality()); } } if (emptyPartitionBits.cardinality() != numPartitions) { assertEquals(HOST_STRING, eventProto.getHost()); assertEquals(SHUFFLE_PORT, eventProto.getPort()); assertEquals(uniqueId, eventProto.getPathComponent()); } else { assertFalse(eventProto.hasHost()); assertFalse(eventProto.hasPort()); assertFalse(eventProto.hasPathComponent()); } // Verify the actual data TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId); Path outputFilePath = kvWriter.finalOutPath; Path spillFilePath = kvWriter.finalIndexPath; if (numRecordsWritten > 0) { assertTrue(localFs.exists(outputFilePath)); assertTrue(localFs.exists(spillFilePath)); } else { return; } // Special case for 0 records. TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf); DataInputBuffer keyBuffer = new DataInputBuffer(); DataInputBuffer valBuffer = new DataInputBuffer(); IntWritable keyDeser = new IntWritable(); LongWritable valDeser = new LongWritable(); for (int i = 0; i < numOutputs; i++) { if (skippedPartitions != null && skippedPartitions.contains(i)) { continue; } TezIndexRecord indexRecord = spillRecord.getIndex(i); FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath); inStream.seek(indexRecord.getStartOffset()); IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1); while (reader.nextRawKey(keyBuffer)) { reader.nextRawValue(valBuffer); keyDeser.readFields(keyBuffer); valDeser.readFields(valBuffer); int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs); assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get())); } inStream.close(); } for (int i = 0; i < numOutputs; i++) { assertEquals(0, expectedValues.get(i).size()); expectedValues.remove(i); } assertEquals(0, expectedValues.size()); }
From source file:org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.java
License:Apache License
private void mergeAll() throws IOException { long expectedSize = spilledSize; if (currentBuffer.nextPosition != 0) { expectedSize += currentBuffer.nextPosition - (currentBuffer.numRecords * META_SIZE) - currentBuffer.skipSize + numPartitions * APPROX_HEADER_LENGTH; // Update final statistics. updateGlobalStats(currentBuffer); }//w w w. j a va 2s . c o m long indexFileSizeEstimate = numPartitions * Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH; finalOutPath = outputFileHandler.getOutputFileForWrite(expectedSize); finalIndexPath = outputFileHandler.getOutputIndexFileForWrite(indexFileSizeEstimate); TezSpillRecord finalSpillRecord = new TezSpillRecord(numPartitions); DataInputBuffer keyBuffer = new DataInputBuffer(); DataInputBuffer valBuffer = new DataInputBuffer(); DataInputBuffer keyBufferIFile = new DataInputBuffer(); DataInputBuffer valBufferIFile = new DataInputBuffer(); FSDataOutputStream out = null; try { out = rfs.create(finalOutPath); Writer writer = null; for (int i = 0; i < numPartitions; i++) { long segmentStart = out.getPos(); if (numRecordsPerPartition[i] == 0) { LOG.info("Skipping partition: " + i + " in final merge since it has no records"); continue; } writer = new Writer(conf, out, keyClass, valClass, codec, null, null); try { if (currentBuffer.nextPosition != 0 && currentBuffer.partitionPositions[i] != WrappedBuffer.PARTITION_ABSENT_POSITION) { // Write current buffer. writePartition(currentBuffer.partitionPositions[i], currentBuffer, writer, keyBuffer, valBuffer); } synchronized (spillInfoList) { for (SpillInfo spillInfo : spillInfoList) { TezIndexRecord indexRecord = spillInfo.spillRecord.getIndex(i); if (indexRecord.getPartLength() == 0) { // Skip empty partitions within a spill continue; } FSDataInputStream in = rfs.open(spillInfo.outPath); in.seek(indexRecord.getStartOffset()); IFile.Reader reader = new IFile.Reader(in, indexRecord.getPartLength(), codec, null, additionalSpillBytesReadCounter, ifileReadAhead, ifileReadAheadLength, ifileBufferSize); while (reader.nextRawKey(keyBufferIFile)) { // TODO Inefficient. If spills are not compressed, a direct copy should be possible // given the current IFile format. Also exteremely inefficient for large records, // since the entire record will be read into memory. reader.nextRawValue(valBufferIFile); writer.append(keyBufferIFile, valBufferIFile); } reader.close(); } } writer.close(); fileOutputBytesCounter.increment(writer.getCompressedLength()); TezIndexRecord indexRecord = new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength()); writer = null; finalSpillRecord.putIndex(indexRecord, i); } finally { if (writer != null) { writer.close(); } } } } finally { if (out != null) { out.close(); } } finalSpillRecord.writeToFile(finalIndexPath, conf); fileOutputBytesCounter.increment(indexFileSizeEstimate); LOG.info("Finished final spill after merging : " + numSpills.get() + " spills"); }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoRecordReader.java
License:Apache License
private static long seekToHeader(final FSDataInputStream inputStream, final long start) throws IOException { inputStream.seek(start); long nextStart = start; final byte[] buffer = new byte[PATTERN.length]; while (true) { if ((buffer[0] = PATTERN[0]) == inputStream.readByte()) { inputStream.read(nextStart + 1, buffer, 1, PATTERN.length - 1); if (patternMatch(buffer)) { inputStream.seek(nextStart); return nextStart; }//from www .ja va2 s .c om } else { nextStart = nextStart + 1; inputStream.seek(nextStart); } } }