Example usage for org.apache.hadoop.io.compress CodecPool getDecompressor

List of usage examples for org.apache.hadoop.io.compress CodecPool getDecompressor

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress CodecPool getDecompressor.

Prototype

public static Decompressor getDecompressor(CompressionCodec codec) 

Source Link

Document

Get a Decompressor for the given CompressionCodec from the pool or a new one.

Usage

From source file:de.l3s.streamcorpus.terrier.ThriftFileCollectionRecordReader.java

License:Apache License

/** 
 * Reading a bunch of lines of file paths in a list.
 * The code in this method is redistributed from Hadoop LineRecordReader
 * //from   w ww.  ja va  2s .co m
 * @throws IOException 
 */
private void loadPathsFromInputSplit(InputSplit split, Configuration conf) throws IOException {
    FileSplit fileSplit = (FileSplit) split;
    Path path = fileSplit.getPath();

    long begin = fileSplit.getStart();
    long end = begin + fileSplit.getLength();

    LOG.info("Reading paths in file " + path.getName());

    // First check the compression codec
    CompressionCodecFactory compressionCodec = new CompressionCodecFactory(conf);
    CompressionCodec codec = compressionCodec.getCodec(path);
    FSDataInputStream fis = fs.open(path);
    SplitLineReader in;

    Seekable filePosition;

    boolean compressed = false;
    Decompressor decompressor = null;
    if (null != codec) {
        compressed = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(fis,
                    decompressor, begin, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitLineReader(cIn, conf, (byte[]) null);
            begin = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new SplitLineReader(codec.createInputStream(fis, decompressor), conf, null);
            filePosition = fis;
        }
    } else {
        fis.seek(begin);
        in = new SplitLineReader(fis, conf, (byte[]) null);
        filePosition = fis;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (begin != 0) {
        begin += in.readLine(new Text(), 0, maxBytesToConsume(compressed, begin, end));
    }
    long pos = begin;

    int newSize = 0;
    final Text nextLine = new Text();
    paths = new ArrayList<>();
    while (getFilePosition(compressed, filePosition, pos) <= end || in.needAdditionalRecordAfterSplit()) {

        if (pos == 0) {
            // Strip BOM(Byte Order Mark)
            // Text only support UTF-8, we only need to check UTF-8 BOM
            // (0xEF,0xBB,0xBF) at the start of the text stream.
            newSize = in.readLine(nextLine, Integer.MAX_VALUE, Integer.MAX_VALUE);
            pos += newSize;
            int textLength = nextLine.getLength();
            byte[] textBytes = nextLine.getBytes();
            if ((textLength >= 3) && (textBytes[0] == (byte) 0xEF) && (textBytes[1] == (byte) 0xBB)
                    && (textBytes[2] == (byte) 0xBF)) {
                // find UTF-8 BOM, strip it.
                LOG.info("Found UTF-8 BOM and skipped it");
                textLength -= 3;
                newSize -= 3;
                if (textLength > 0) {
                    // It may work to use the same buffer and 
                    // not do the copyBytes
                    textBytes = nextLine.copyBytes();
                    nextLine.set(textBytes, 3, textLength);
                } else {
                    nextLine.clear();
                }
            }
        } else {
            newSize = in.readLine(nextLine, Integer.MAX_VALUE, maxBytesToConsume(compressed, pos, end));
            pos += newSize;
        }

        paths.add(nextLine.toString());
        // line too long. try again
        LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize));
    }

    try {
        if (in != null) {
            in.close();
        }
        if (fis != null) {
            fis.close();
        }
    } finally {
        if (decompressor != null) {
            CodecPool.returnDecompressor(decompressor);
        }
    }
}

From source file:edu.umn.cs.spatialHadoop.core.SpatialSite.java

License:Open Source License

/**
 * Checks whether a file is indexed using an R-tree or not. This allows
 * an operation to use the R-tree to speedup the processing if it exists.
 * This function opens the specified file and reads the first eight bytes
 * which include the R-tree signature. If the signatures matches with the
 * R-tree signature, true is returned. Otherwise, false is returned.
 * If the parameter is a path to a directory, only the first data file in that
 * directory is tested./*  w  w  w .  j  a v a 2 s .  c  o  m*/
 * @param fs
 * @param path
 * @return
 * @throws IOException
 */
public static boolean isRTree(FileSystem fs, Path path) throws IOException {
    if (FileUtil.getExtensionWithoutCompression(path).equals("rtree"))
        return true;

    FileStatus file = fs.getFileStatus(path);
    Path fileToCheck;
    if (file.isDir()) {
        // Check any cell (e.g., first cell)
        GlobalIndex<Partition> gIndex = getGlobalIndex(fs, path);
        if (gIndex == null)
            return false;
        fileToCheck = new Path(path, gIndex.iterator().next().filename);
    } else {
        fileToCheck = file.getPath();
    }
    InputStream fileIn = fs.open(fileToCheck);

    // Check if file is compressed
    CompressionCodec codec = compressionCodecs.getCodec(fileToCheck);
    Decompressor decompressor = null;
    if (codec != null) {
        synchronized (compressionCodecs) {
            // CodecPool is not thread-safe
            decompressor = CodecPool.getDecompressor(codec);
        }
        fileIn = codec.createInputStream(fileIn, decompressor);
    }
    byte[] signature = new byte[RTreeFileMarkerB.length];
    fileIn.read(signature);
    fileIn.close();
    if (decompressor != null) {
        CodecPool.returnDecompressor(decompressor);
    }
    return Arrays.equals(signature, SpatialSite.RTreeFileMarkerB);
}

From source file:edu.umn.cs.spatialHadoop.mapred.SpatialRecordReader.java

License:Open Source License

/**
 * Initialize from a path and file range
 * @param job//from www  . j  a v a 2s . c om
 * @param s
 * @param l
 * @param p
 * @throws IOException
 */
public SpatialRecordReader(Configuration job, long s, long l, Path p) throws IOException {
    this.start = s;
    this.end = s + l;
    this.path = p;
    LOG.info("Open a SpatialRecordReader to file: " + p + "[" + s + "," + (s + l) + ")");
    this.fs = this.path.getFileSystem(job);
    this.directIn = fs.open(this.path);
    this.blockSize = fs.getFileStatus(this.path).getBlockSize();
    this.cellMbr = new Rectangle();

    codec = new CompressionCodecFactory(job).getCodec(this.path);

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    directIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = cIn;
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn; // take pos from compressed stream
        } else {
            in = codec.createInputStream(directIn, decompressor);
            filePosition = directIn;
        }
    } else {
        directIn.seek(start);
        in = directIn;
        filePosition = directIn;
    }
    this.pos = start;
    this.maxShapesInOneRead = job.getInt(SpatialSite.MaxShapesInOneRead, 1000000);
    this.maxBytesInOneRead = job.getInt(SpatialSite.MaxBytesInOneRead, 32 * 1024 * 1024);

    initializeReader();
}

From source file:edu.umn.cs.spatialHadoop.mapreduce.RTreeRecordReader3.java

License:Open Source License

public void initialize(InputSplit split, Configuration conf) throws IOException, InterruptedException {
    LOG.info("Open a SpatialRecordReader to split: " + split);
    FileSplit fsplit = (FileSplit) split;
    this.path = fsplit.getPath();
    this.start = fsplit.getStart();
    this.end = this.start + split.getLength();
    this.fs = this.path.getFileSystem(conf);
    this.directIn = fs.open(this.path);
    codec = new CompressionCodecFactory(conf).getCodec(this.path);

    if (codec != null) {
        // Input is compressed, create a decompressor to decompress it
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            // A splittable compression codec, can seek to the desired input pos
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    directIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new DataInputStream(cIn);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();//from ww  w.ja v a  2 s  . c  o m
            // take pos from compressed stream as we adjusted both start and end
            // to match with the compressed file
            filePosition = cIn;
        } else {
            // Non-splittable input, need to start from the beginning
            CompressionInputStream cIn = codec.createInputStream(directIn, decompressor);
            in = new DataInputStream(cIn);
            filePosition = cIn;
        }
    } else {
        // Non-compressed file, seek to the desired position and use this stream
        // to get the progress and position
        directIn.seek(start);
        in = directIn;
        filePosition = directIn;
    }
    byte[] signature = new byte[8];
    in.readFully(signature);
    if (!Arrays.equals(signature, SpatialSite.RTreeFileMarkerB)) {
        throw new RuntimeException("Incorrect signature for RTree");
    }
    this.stockShape = (V) OperationsParams.getShape(conf, "shape");

    if (conf.get(SpatialInputFormat3.InputQueryRange) != null) {
        // Retrieve the input query range to apply on all records
        this.inputQueryRange = OperationsParams.getShape(conf, SpatialInputFormat3.InputQueryRange);
        this.inputQueryMBR = this.inputQueryRange.getMBR();
    }

    // Check if there is an associated global index to read cell boundaries
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, path.getParent());
    if (gindex == null) {
        cellMBR = new Partition();
        cellMBR.invalidate();
    } else {
        // Set from the associated partition in the global index
        for (Partition p : gindex) {
            if (p.filename.equals(this.path.getName()))
                cellMBR = p;
        }
    }
}

From source file:edu.umn.cs.spatialHadoop.mapreduce.SpatialRecordReader3.java

License:Open Source License

public void initialize(InputSplit split, Configuration conf) throws IOException, InterruptedException {
    FileSplit fsplit = (FileSplit) split;
    if (compressionCodecFactory == null)
        compressionCodecFactory = new CompressionCodecFactory(conf);

    LOG.info("Open a SpatialRecordReader to split: " + split);
    this.path = fsplit.getPath();
    this.start = fsplit.getStart();
    this.end = this.start + split.getLength();
    this.fs = this.path.getFileSystem(conf);
    this.directIn = fs.open(this.path);
    codec = compressionCodecFactory.getCodec(this.path);

    if (codec != null) {
        // Input is compressed, create a decompressor to decompress it
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            // A splittable compression codec, can seek to the desired input pos
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    directIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = cIn;/*  w ww .j  a  v a2  s  .co m*/
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            // take pos from compressed stream as we adjusted both start and end
            // to match with the compressed file
            progressPosition = cIn;
        } else {
            // Non-splittable input, need to start from the beginning
            CompressionInputStream cIn = codec.createInputStream(directIn, decompressor);
            in = cIn;
            progressPosition = cIn;
        }
    } else {
        // Non-compressed file, seek to the desired position and use this stream
        // to get the progress and position
        directIn.seek(start);
        in = directIn;
        progressPosition = directIn;
    }
    this.stockShape = (V) OperationsParams.getShape(conf, "shape");
    this.tempLine = new Text();

    this.lineReader = new LineReader(in);
    bytesRead = 0;

    if (this.start != 0) {
        // Skip until first end-of-line reached
        bytesRead += lineReader.readLine(tempLine);
    }
    if (conf.get(SpatialInputFormat3.InputQueryRange) != null) {
        // Retrieve the input query range to apply on all records
        this.inputQueryRange = OperationsParams.getShape(conf, SpatialInputFormat3.InputQueryRange);
        this.inputQueryMBR = this.inputQueryRange.getMBR();
    }

    // Check if there is an associated global index to read cell boundaries
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, path.getParent());
    if (gindex == null) {
        cellMBR = new Partition();
        cellMBR.filename = path.getName();
        cellMBR.invalidate();
    } else {
        // Set from the associated partition in the global index
        for (Partition p : gindex) {
            if (p.filename.equals(this.path.getName()))
                cellMBR = p;
        }
    }

    this.value = new ShapeIterator<V>();
    value.setShape(stockShape);
}

From source file:edu.umn.cs.spatialHadoop.operations.LocalSampler.java

License:Open Source License

/**
 * Sample a specific number of lines from a given file
 * @param fs//from   w  w  w .  j a va 2 s.c  o m
 * @param file
 * @param count
 * @param seed
 * @param output
 * @return
 * @throws IOException
 */
private static int sampleFileSplitByCount(FileSplit file, Configuration conf, int count, long seed,
        ResultCollector<Text> output) throws IOException {
    InputStream in = null;
    Decompressor decompressor = null;
    try {
        CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
        CompressionCodec codec = compressionCodecFactory.getCodec(file.getPath());

        // Open the file and read the sample
        FileSystem fs = file.getPath().getFileSystem(conf);
        in = fs.open(file.getPath());
        int sampledLines = 0;

        if (codec != null) {
            // Special handling for compressed file as we cannot compute the actual
            // size of the underlying data
            decompressor = CodecPool.getDecompressor(codec);

            if (codec instanceof SplittableCompressionCodec) {
                // A splittable compression codec, can seek to the desired input pos
                final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                        in, decompressor, file.getStart(), file.getStart() + file.getLength(),
                        SplittableCompressionCodec.READ_MODE.BYBLOCK);
                in = cIn;
                // Adjust the start of the end based on the compressed data
                long start = cIn.getAdjustedStart();
                long end = cIn.getAdjustedEnd();
                sampledLines = sampleStreamByCount(in, end - start, count, seed, output);
            } else {
                // Non-splittable input, need to start from the beginning
                in = codec.createInputStream(in, decompressor);
                sampledLines = sampleStreamByCount(in, Long.MAX_VALUE, count, seed, output);
            }
        } else {
            long pos = 0; // Current position in file

            // Generate random offsets and keep them sorted for IO efficiency
            Random rand = new Random(seed);
            long[] sampleOffsets = new long[count];
            for (int i = 0; i < count; i++)
                sampleOffsets[i] = Math.abs(rand.nextLong()) % file.getLength() + file.getStart();
            Arrays.sort(sampleOffsets);

            // Sample the generated numbers
            Text line = new Text2();
            for (int i = 0; i < count; i++) {
                pos += in.skip(sampleOffsets[i] - pos);
                // Skip until end of line
                line.clear();
                pos += readUntilEOL(in, line);
                // Read the next full line
                line.clear();
                if ((pos += readUntilEOL(in, line)) > 1) {
                    sampledLines++;
                    if (output != null)
                        output.collect(line);
                }
            }
        }

        return sampledLines;
    } finally {
        if (in != null)
            in.close();
        if (decompressor != null)
            CodecPool.returnDecompressor(decompressor);
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.LocalSampler.java

License:Open Source License

/**
 * Sample text lines from the given split with the given sampling ratio
 * @param fs/*  w w w. j  a  va2 s.c  om*/
 * @param file
 * @param ratio
 * @param seed
 * @param output
 * @return
 * @throws IOException
 */
private static int sampleFileSplitByRatio(FileSplit file, Configuration conf, float ratio, long seed,
        ResultCollector<Text> output) throws IOException {

    InputStream in = null;
    Decompressor decompressor = null;
    int sampledLines;
    Text line = new Text2();

    try {
        CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
        CompressionCodec codec = compressionCodecFactory.getCodec(file.getPath());
        FileSystem fs = file.getPath().getFileSystem(conf);
        in = fs.open(file.getPath());

        if (codec != null) {
            // Special handling for compressed file as we cannot compute the actual
            // size of the underlying data
            decompressor = CodecPool.getDecompressor(codec);

            if (codec instanceof SplittableCompressionCodec) {
                // A splittable compression codec, can seek to the desired input pos
                final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                        in, decompressor, file.getStart(), file.getStart() + file.getLength(),
                        SplittableCompressionCodec.READ_MODE.BYBLOCK);
                in = cIn;
                // Adjust the start of the end based on the compressed data
                long start = cIn.getAdjustedStart();
                long end = cIn.getAdjustedEnd();
                // Skip first line if needed
                if (file.getStart() > 0)
                    start += readUntilEOL(cIn, line);

                sampledLines = sampleStreamByRatio(in, ratio, seed, output);
            } else {
                // Non-splittable input, need to start from the beginning
                in = codec.createInputStream(in, decompressor);
                // No need to skip first line because we actually read the file from
                // the beginning
                sampledLines = sampleStreamByRatio(in, ratio, seed, output);
            }
        } else {
            // Not a compressed file. Apply a more efficient, though approximate,
            // solution
            // Open the file and read the sample
            long pos = 0; // Current position in file
            if (file.getStart() > 0) {
                pos += in.skip(file.getStart());
                pos += readUntilEOL(in, line);
            }

            // Initialize the random variable which is used for sampling
            Random rand = new Random(seed);
            sampledLines = 0;

            // Read the first 10 lines to estimate the average record size
            long end = file.getStart() + file.getLength();
            for (int i = 0; i < 10 && pos < end; i++) {
                line.clear();
                pos += readUntilEOL(in, line);
                if (rand.nextFloat() < ratio) {
                    sampledLines++;
                    if (output != null)
                        output.collect(line);
                }
            }

            int averageLineSize = (int) ((pos - file.getStart()) / 10);
            int count = Math.round(ratio * file.getLength() / averageLineSize) - sampledLines;
            long[] sampleOffsets = new long[count];
            for (int i = 0; i < count; i++)
                sampleOffsets[i] = Math.abs(rand.nextLong()) % (end - pos) + file.getStart();
            Arrays.sort(sampleOffsets);

            // Sample the generated numbers
            for (int i = 0; i < count; i++) {
                pos += in.skip(sampleOffsets[i] - pos);
                // Skip until end of line
                line.clear();
                pos += readUntilEOL(in, line);
                // Read the next full line
                line.clear();
                if ((pos += readUntilEOL(in, line)) > 1) {
                    sampledLines++;
                    if (output != null)
                        output.collect(line);
                }
            }
        }
    } finally {
        if (in != null)
            in.close();
        if (decompressor != null)
            CodecPool.returnDecompressor(decompressor);
    }

    in.close();
    return sampledLines;
}

From source file:example.TestLineRecordReader.java

License:Apache License

@Test
public void testMultipleClose() throws IOException {
    URL testFileUrl = getClass().getClassLoader().getResource("recordSpanningMultipleSplits.txt.bz2");
    assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl);
    File testFile = new File(testFileUrl.getFile());
    Path testFilePath = new Path(testFile.getAbsolutePath());
    long testFileSize = testFile.length();
    Configuration conf = new Configuration();
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

    // read the data and check whether BOM is skipped
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
    LineRecordReader reader = new LineRecordReader();
    reader.initialize(split, context);/*w  ww .j a  v  a  2s. co m*/

    //noinspection StatementWithEmptyBody
    while (reader.nextKeyValue())
        ;
    reader.close();
    reader.close();

    BZip2Codec codec = new BZip2Codec();
    codec.setConf(conf);
    Set<Decompressor> decompressors = new HashSet<Decompressor>();
    for (int i = 0; i < 10; ++i) {
        decompressors.add(CodecPool.getDecompressor(codec));
    }
    assertEquals(10, decompressors.size());
}

From source file:format.OverlapLengthRecordReader.java

License:Apache License

public void initialize(Configuration job, long splitStart, long splitLength, Path file) throws IOException {
    start = splitStart;/*  w  w  w  . j  a v  a  2s  .  co m*/
    end = start + splitLength;
    long partialRecordLength = start % recordLength;
    long numBytesToSkip = 0;

    /* This if check is not necessary since for this, we will read one entire split */
    /*
    if (partialRecordLength != 0) {
      numBytesToSkip = recordLength - partialRecordLength;
    }
    */

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        CompressionInputStream cIn = codec.createInputStream(fileIn, decompressor);
        filePosition = cIn;
        inputStream = cIn;
        numRecordsRemainingInSplit = Long.MAX_VALUE;
        LOG.info("Compressed input; cannot compute number of records in the split");
    } else {
        fileIn.seek(start);
        filePosition = fileIn;
        inputStream = fileIn;
        long splitSize = end - start - numBytesToSkip;
        /* This remains to be observed, since we are assuming recordLength = splitSize */
        //      numRecordsRemainingInSplit = (splitSize + recordLength - 1)/recordLength;
        numRecordsRemainingInSplit = 1;
        if (numRecordsRemainingInSplit < 0) {
            numRecordsRemainingInSplit = 0;
        }
        LOG.info("Expecting " + numRecordsRemainingInSplit + " records each with a length of " + recordLength
                + " bytes in the split with an effective size of " + splitSize + " bytes");
    }
    if (numBytesToSkip != 0) {
        start += inputStream.skip(numBytesToSkip);
    }
    this.pos = start;
}

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.FastqLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/*  w  ww  .j av a2 s  .  c  o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitFastqLineReader(cIn, job, this.recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job,
                    this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}