Example usage for org.apache.hadoop.io.compress SplitCompressionInputStream getAdjustedStart

List of usage examples for org.apache.hadoop.io.compress SplitCompressionInputStream getAdjustedStart

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress SplitCompressionInputStream getAdjustedStart.

Prototype

public long getAdjustedStart() 

Source Link

Document

After calling createInputStream, the values of start or end might change.

Usage

From source file:com.ricemap.spateDB.mapred.SpatialRecordReader.java

License:Apache License

/**
 * Initialize from a path and range/*www .java2 s  .c  o m*/
 * @param job
 * @param s
 * @param l
 * @param p
 * @throws IOException
 */
public SpatialRecordReader(Configuration job, long s, long l, Path p) throws IOException {
    this.start = s;
    this.end = s + l;
    this.path = p;
    this.fs = this.path.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(this.path);
    this.blockSize = fs.getFileStatus(this.path).getBlockSize();
    this.cellMbr = new Prism();

    LOG.info("Open a SpatialRecordReader to file: " + this.path);

    codec = new CompressionCodecFactory(job).getCodec(this.path);

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = cIn;
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn; // take pos from compressed stream
        } else {
            in = codec.createInputStream(fileIn, decompressor);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = fileIn;
        filePosition = fileIn;
    }
    this.pos = start;
    this.maxShapesInOneRead = job.getInt(SpatialSite.MaxShapesInOneRead, 1000000);
    this.maxBytesInOneRead = job.getInt(SpatialSite.MaxBytesInOneRead, 32 * 1024 * 1024);

    initializeReader();
}

From source file:com.rw.legion.input.LegionRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    /*//from   w w  w  . j av  a2  s  .  c  om
     * fileBroken tracks whether there's been an IOException while reading
     * this file. If there has, the record reader will simply stop reading
     * records for this particular file, rather than blowing up the whole
     * job.
     */
    fileBroken = false;
    currentLine = new Text();
    currentLineNumber = 0;

    FileSplit split = (FileSplit) genericSplit;

    if (split.getLength() == 0) {
        fileBroken = true;
    }

    // Load the Legion Objective.
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    legionObjective = ObjectiveDeserializer.deserialize(job.get("legion_objective"));

    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();

    // Open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    // Grab the file name to include with the data.
    fileName = file.toString();

    // Does the Legion Objective specify an input codec to use?
    if (legionObjective.getCodecOverride() != null) {
        isCompressedInput = true;
        CompressionCodec codec = new CompressionCodecFactory(job)
                .getCodecByClassName(legionObjective.getCodecOverride());
        decompressor = CodecPool.getDecompressor(codec);
        in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes);
        filePosition = fileIn;
    } else {
        CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
        if (null != codec) {
            isCompressedInput = true;
            decompressor = CodecPool.getDecompressor(codec);

            if (codec instanceof SplittableCompressionCodec) {
                final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                        fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
                in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
                start = cIn.getAdjustedStart();
                end = cIn.getAdjustedEnd();
                filePosition = cIn;
            } else {
                in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job,
                        this.recordDelimiterBytes);
                filePosition = fileIn;
            }
        } else {
            fileIn.seek(start);
            in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    }

    /*
     * If this is not the first split, we always throw away first record
     * because we always (except the last split) read one extra line in
     * next() method.
     */
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }

    this.pos = start;
}

From source file:de.l3s.streamcorpus.terrier.ThriftFileCollectionRecordReader.java

License:Apache License

/** 
 * Reading a bunch of lines of file paths in a list.
 * The code in this method is redistributed from Hadoop LineRecordReader
 * //  ww  w.  j ava2  s .c  o  m
 * @throws IOException 
 */
private void loadPathsFromInputSplit(InputSplit split, Configuration conf) throws IOException {
    FileSplit fileSplit = (FileSplit) split;
    Path path = fileSplit.getPath();

    long begin = fileSplit.getStart();
    long end = begin + fileSplit.getLength();

    LOG.info("Reading paths in file " + path.getName());

    // First check the compression codec
    CompressionCodecFactory compressionCodec = new CompressionCodecFactory(conf);
    CompressionCodec codec = compressionCodec.getCodec(path);
    FSDataInputStream fis = fs.open(path);
    SplitLineReader in;

    Seekable filePosition;

    boolean compressed = false;
    Decompressor decompressor = null;
    if (null != codec) {
        compressed = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(fis,
                    decompressor, begin, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitLineReader(cIn, conf, (byte[]) null);
            begin = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new SplitLineReader(codec.createInputStream(fis, decompressor), conf, null);
            filePosition = fis;
        }
    } else {
        fis.seek(begin);
        in = new SplitLineReader(fis, conf, (byte[]) null);
        filePosition = fis;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (begin != 0) {
        begin += in.readLine(new Text(), 0, maxBytesToConsume(compressed, begin, end));
    }
    long pos = begin;

    int newSize = 0;
    final Text nextLine = new Text();
    paths = new ArrayList<>();
    while (getFilePosition(compressed, filePosition, pos) <= end || in.needAdditionalRecordAfterSplit()) {

        if (pos == 0) {
            // Strip BOM(Byte Order Mark)
            // Text only support UTF-8, we only need to check UTF-8 BOM
            // (0xEF,0xBB,0xBF) at the start of the text stream.
            newSize = in.readLine(nextLine, Integer.MAX_VALUE, Integer.MAX_VALUE);
            pos += newSize;
            int textLength = nextLine.getLength();
            byte[] textBytes = nextLine.getBytes();
            if ((textLength >= 3) && (textBytes[0] == (byte) 0xEF) && (textBytes[1] == (byte) 0xBB)
                    && (textBytes[2] == (byte) 0xBF)) {
                // find UTF-8 BOM, strip it.
                LOG.info("Found UTF-8 BOM and skipped it");
                textLength -= 3;
                newSize -= 3;
                if (textLength > 0) {
                    // It may work to use the same buffer and 
                    // not do the copyBytes
                    textBytes = nextLine.copyBytes();
                    nextLine.set(textBytes, 3, textLength);
                } else {
                    nextLine.clear();
                }
            }
        } else {
            newSize = in.readLine(nextLine, Integer.MAX_VALUE, maxBytesToConsume(compressed, pos, end));
            pos += newSize;
        }

        paths.add(nextLine.toString());
        // line too long. try again
        LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize));
    }

    try {
        if (in != null) {
            in.close();
        }
        if (fis != null) {
            fis.close();
        }
    } finally {
        if (decompressor != null) {
            CodecPool.returnDecompressor(decompressor);
        }
    }
}

From source file:edu.umn.cs.spatialHadoop.mapred.SpatialRecordReader.java

License:Open Source License

/**
 * Initialize from a path and file range
 * @param job/*from ww w .  j a  v a  2 s .c  om*/
 * @param s
 * @param l
 * @param p
 * @throws IOException
 */
public SpatialRecordReader(Configuration job, long s, long l, Path p) throws IOException {
    this.start = s;
    this.end = s + l;
    this.path = p;
    LOG.info("Open a SpatialRecordReader to file: " + p + "[" + s + "," + (s + l) + ")");
    this.fs = this.path.getFileSystem(job);
    this.directIn = fs.open(this.path);
    this.blockSize = fs.getFileStatus(this.path).getBlockSize();
    this.cellMbr = new Rectangle();

    codec = new CompressionCodecFactory(job).getCodec(this.path);

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    directIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = cIn;
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn; // take pos from compressed stream
        } else {
            in = codec.createInputStream(directIn, decompressor);
            filePosition = directIn;
        }
    } else {
        directIn.seek(start);
        in = directIn;
        filePosition = directIn;
    }
    this.pos = start;
    this.maxShapesInOneRead = job.getInt(SpatialSite.MaxShapesInOneRead, 1000000);
    this.maxBytesInOneRead = job.getInt(SpatialSite.MaxBytesInOneRead, 32 * 1024 * 1024);

    initializeReader();
}

From source file:edu.umn.cs.spatialHadoop.mapreduce.RTreeRecordReader3.java

License:Open Source License

public void initialize(InputSplit split, Configuration conf) throws IOException, InterruptedException {
    LOG.info("Open a SpatialRecordReader to split: " + split);
    FileSplit fsplit = (FileSplit) split;
    this.path = fsplit.getPath();
    this.start = fsplit.getStart();
    this.end = this.start + split.getLength();
    this.fs = this.path.getFileSystem(conf);
    this.directIn = fs.open(this.path);
    codec = new CompressionCodecFactory(conf).getCodec(this.path);

    if (codec != null) {
        // Input is compressed, create a decompressor to decompress it
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            // A splittable compression codec, can seek to the desired input pos
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    directIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new DataInputStream(cIn);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();/*  ww w .  ja v a 2  s .c o  m*/
            // take pos from compressed stream as we adjusted both start and end
            // to match with the compressed file
            filePosition = cIn;
        } else {
            // Non-splittable input, need to start from the beginning
            CompressionInputStream cIn = codec.createInputStream(directIn, decompressor);
            in = new DataInputStream(cIn);
            filePosition = cIn;
        }
    } else {
        // Non-compressed file, seek to the desired position and use this stream
        // to get the progress and position
        directIn.seek(start);
        in = directIn;
        filePosition = directIn;
    }
    byte[] signature = new byte[8];
    in.readFully(signature);
    if (!Arrays.equals(signature, SpatialSite.RTreeFileMarkerB)) {
        throw new RuntimeException("Incorrect signature for RTree");
    }
    this.stockShape = (V) OperationsParams.getShape(conf, "shape");

    if (conf.get(SpatialInputFormat3.InputQueryRange) != null) {
        // Retrieve the input query range to apply on all records
        this.inputQueryRange = OperationsParams.getShape(conf, SpatialInputFormat3.InputQueryRange);
        this.inputQueryMBR = this.inputQueryRange.getMBR();
    }

    // Check if there is an associated global index to read cell boundaries
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, path.getParent());
    if (gindex == null) {
        cellMBR = new Partition();
        cellMBR.invalidate();
    } else {
        // Set from the associated partition in the global index
        for (Partition p : gindex) {
            if (p.filename.equals(this.path.getName()))
                cellMBR = p;
        }
    }
}

From source file:edu.umn.cs.spatialHadoop.mapreduce.SpatialRecordReader3.java

License:Open Source License

public void initialize(InputSplit split, Configuration conf) throws IOException, InterruptedException {
    FileSplit fsplit = (FileSplit) split;
    if (compressionCodecFactory == null)
        compressionCodecFactory = new CompressionCodecFactory(conf);

    LOG.info("Open a SpatialRecordReader to split: " + split);
    this.path = fsplit.getPath();
    this.start = fsplit.getStart();
    this.end = this.start + split.getLength();
    this.fs = this.path.getFileSystem(conf);
    this.directIn = fs.open(this.path);
    codec = compressionCodecFactory.getCodec(this.path);

    if (codec != null) {
        // Input is compressed, create a decompressor to decompress it
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            // A splittable compression codec, can seek to the desired input pos
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    directIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = cIn;//from  w  w  w .  j  a  v  a2 s .  c o  m
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            // take pos from compressed stream as we adjusted both start and end
            // to match with the compressed file
            progressPosition = cIn;
        } else {
            // Non-splittable input, need to start from the beginning
            CompressionInputStream cIn = codec.createInputStream(directIn, decompressor);
            in = cIn;
            progressPosition = cIn;
        }
    } else {
        // Non-compressed file, seek to the desired position and use this stream
        // to get the progress and position
        directIn.seek(start);
        in = directIn;
        progressPosition = directIn;
    }
    this.stockShape = (V) OperationsParams.getShape(conf, "shape");
    this.tempLine = new Text();

    this.lineReader = new LineReader(in);
    bytesRead = 0;

    if (this.start != 0) {
        // Skip until first end-of-line reached
        bytesRead += lineReader.readLine(tempLine);
    }
    if (conf.get(SpatialInputFormat3.InputQueryRange) != null) {
        // Retrieve the input query range to apply on all records
        this.inputQueryRange = OperationsParams.getShape(conf, SpatialInputFormat3.InputQueryRange);
        this.inputQueryMBR = this.inputQueryRange.getMBR();
    }

    // Check if there is an associated global index to read cell boundaries
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, path.getParent());
    if (gindex == null) {
        cellMBR = new Partition();
        cellMBR.filename = path.getName();
        cellMBR.invalidate();
    } else {
        // Set from the associated partition in the global index
        for (Partition p : gindex) {
            if (p.filename.equals(this.path.getName()))
                cellMBR = p;
        }
    }

    this.value = new ShapeIterator<V>();
    value.setShape(stockShape);
}

From source file:edu.umn.cs.spatialHadoop.operations.LocalSampler.java

License:Open Source License

/**
 * Sample a specific number of lines from a given file
 * @param fs/* w ww.  j  av  a 2s  . c o  m*/
 * @param file
 * @param count
 * @param seed
 * @param output
 * @return
 * @throws IOException
 */
private static int sampleFileSplitByCount(FileSplit file, Configuration conf, int count, long seed,
        ResultCollector<Text> output) throws IOException {
    InputStream in = null;
    Decompressor decompressor = null;
    try {
        CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
        CompressionCodec codec = compressionCodecFactory.getCodec(file.getPath());

        // Open the file and read the sample
        FileSystem fs = file.getPath().getFileSystem(conf);
        in = fs.open(file.getPath());
        int sampledLines = 0;

        if (codec != null) {
            // Special handling for compressed file as we cannot compute the actual
            // size of the underlying data
            decompressor = CodecPool.getDecompressor(codec);

            if (codec instanceof SplittableCompressionCodec) {
                // A splittable compression codec, can seek to the desired input pos
                final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                        in, decompressor, file.getStart(), file.getStart() + file.getLength(),
                        SplittableCompressionCodec.READ_MODE.BYBLOCK);
                in = cIn;
                // Adjust the start of the end based on the compressed data
                long start = cIn.getAdjustedStart();
                long end = cIn.getAdjustedEnd();
                sampledLines = sampleStreamByCount(in, end - start, count, seed, output);
            } else {
                // Non-splittable input, need to start from the beginning
                in = codec.createInputStream(in, decompressor);
                sampledLines = sampleStreamByCount(in, Long.MAX_VALUE, count, seed, output);
            }
        } else {
            long pos = 0; // Current position in file

            // Generate random offsets and keep them sorted for IO efficiency
            Random rand = new Random(seed);
            long[] sampleOffsets = new long[count];
            for (int i = 0; i < count; i++)
                sampleOffsets[i] = Math.abs(rand.nextLong()) % file.getLength() + file.getStart();
            Arrays.sort(sampleOffsets);

            // Sample the generated numbers
            Text line = new Text2();
            for (int i = 0; i < count; i++) {
                pos += in.skip(sampleOffsets[i] - pos);
                // Skip until end of line
                line.clear();
                pos += readUntilEOL(in, line);
                // Read the next full line
                line.clear();
                if ((pos += readUntilEOL(in, line)) > 1) {
                    sampledLines++;
                    if (output != null)
                        output.collect(line);
                }
            }
        }

        return sampledLines;
    } finally {
        if (in != null)
            in.close();
        if (decompressor != null)
            CodecPool.returnDecompressor(decompressor);
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.LocalSampler.java

License:Open Source License

/**
 * Sample text lines from the given split with the given sampling ratio
 * @param fs// w ww  . j a  v  a2s. c o  m
 * @param file
 * @param ratio
 * @param seed
 * @param output
 * @return
 * @throws IOException
 */
private static int sampleFileSplitByRatio(FileSplit file, Configuration conf, float ratio, long seed,
        ResultCollector<Text> output) throws IOException {

    InputStream in = null;
    Decompressor decompressor = null;
    int sampledLines;
    Text line = new Text2();

    try {
        CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
        CompressionCodec codec = compressionCodecFactory.getCodec(file.getPath());
        FileSystem fs = file.getPath().getFileSystem(conf);
        in = fs.open(file.getPath());

        if (codec != null) {
            // Special handling for compressed file as we cannot compute the actual
            // size of the underlying data
            decompressor = CodecPool.getDecompressor(codec);

            if (codec instanceof SplittableCompressionCodec) {
                // A splittable compression codec, can seek to the desired input pos
                final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                        in, decompressor, file.getStart(), file.getStart() + file.getLength(),
                        SplittableCompressionCodec.READ_MODE.BYBLOCK);
                in = cIn;
                // Adjust the start of the end based on the compressed data
                long start = cIn.getAdjustedStart();
                long end = cIn.getAdjustedEnd();
                // Skip first line if needed
                if (file.getStart() > 0)
                    start += readUntilEOL(cIn, line);

                sampledLines = sampleStreamByRatio(in, ratio, seed, output);
            } else {
                // Non-splittable input, need to start from the beginning
                in = codec.createInputStream(in, decompressor);
                // No need to skip first line because we actually read the file from
                // the beginning
                sampledLines = sampleStreamByRatio(in, ratio, seed, output);
            }
        } else {
            // Not a compressed file. Apply a more efficient, though approximate,
            // solution
            // Open the file and read the sample
            long pos = 0; // Current position in file
            if (file.getStart() > 0) {
                pos += in.skip(file.getStart());
                pos += readUntilEOL(in, line);
            }

            // Initialize the random variable which is used for sampling
            Random rand = new Random(seed);
            sampledLines = 0;

            // Read the first 10 lines to estimate the average record size
            long end = file.getStart() + file.getLength();
            for (int i = 0; i < 10 && pos < end; i++) {
                line.clear();
                pos += readUntilEOL(in, line);
                if (rand.nextFloat() < ratio) {
                    sampledLines++;
                    if (output != null)
                        output.collect(line);
                }
            }

            int averageLineSize = (int) ((pos - file.getStart()) / 10);
            int count = Math.round(ratio * file.getLength() / averageLineSize) - sampledLines;
            long[] sampleOffsets = new long[count];
            for (int i = 0; i < count; i++)
                sampleOffsets[i] = Math.abs(rand.nextLong()) % (end - pos) + file.getStart();
            Arrays.sort(sampleOffsets);

            // Sample the generated numbers
            for (int i = 0; i < count; i++) {
                pos += in.skip(sampleOffsets[i] - pos);
                // Skip until end of line
                line.clear();
                pos += readUntilEOL(in, line);
                // Read the next full line
                line.clear();
                if ((pos += readUntilEOL(in, line)) > 1) {
                    sampledLines++;
                    if (output != null)
                        output.collect(line);
                }
            }
        }
    } finally {
        if (in != null)
            in.close();
        if (decompressor != null)
            CodecPool.returnDecompressor(decompressor);
    }

    in.close();
    return sampledLines;
}

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.FastqLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();//from   w  ww .jav  a  2  s .  c om
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitFastqLineReader(cIn, job, this.recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job,
                    this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:hadoop.inputsplit.FastaLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();

    done = false;/*from   ww  w. j  a  v a2 s  . co  m*/

    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();

    file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    currentValue = new ValueWritable();
    value = new Text();
    tmpValue = new Text();
    tmp = new Text();

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

    String homeHdfs = context.getConfiguration().get("HDFS_HOME_DIR");
    //maxK = HadoopUtil.getMaxkFromPatterns(fs, new Path(homeHdfs+Constant.HDFS_PATTERNS_FILE_HDFS));

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new LineReader(cIn, job, recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new LineReader(codec.createInputStream(fileIn, decompressor), job, recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job, recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;

    setKeySeq(fs, job); //Set currentKey

    nextMyKeyValue(); //Leggo il primo record se esiste.

}