Example usage for org.apache.hadoop.fs FSDataInputStream seek

List of usage examples for org.apache.hadoop.fs FSDataInputStream seek

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream seek.

Prototype

@Override
public void seek(long desired) throws IOException 

Source Link

Document

Seek to the given offset.

Usage

From source file:io.hops.erasure_coding.FileStripeReader.java

License:Apache License

@Override
public InputStream buildOneInput(int locationIndex, long offsetInBlock, FileSystem srcFs, Path srcFile,
        FileStatus srcStat, FileSystem parityFs, Path parityFile, FileStatus parityStat) throws IOException {
    final long blockSize = srcStat.getBlockSize();

    LOG.info("buildOneInput srcfile " + srcFile + " srclen " + srcStat.getLen() + " parityfile " + parityFile
            + " paritylen " + parityStat.getLen() + " stripeindex " + stripeStartIdx + " locationindex "
            + locationIndex + " offsetinblock " + offsetInBlock);
    if (locationIndex < codec.parityLength) {
        return this.getParityFileInput(locationIndex, parityFile, parityFs, parityStat, offsetInBlock);
    } else {/*  www.  j a va2  s. c  o m*/
        // Dealing with a src file here.
        int blockIdxInStripe = locationIndex - codec.parityLength;
        int blockIdx = (int) (codec.stripeLength * stripeStartIdx + blockIdxInStripe);
        long offset = blockSize * blockIdx + offsetInBlock;
        if (offset >= srcStat.getLen()) {
            LOG.info("Using zeros for " + srcFile + ":" + offset + " for location " + locationIndex);
            return new RaidUtils.ZeroInputStream(blockSize * (blockIdx + 1));
        } else {
            LOG.info("Opening " + srcFile + ":" + offset + " for location " + locationIndex);
            FSDataInputStream s = fs.open(srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
            s.seek(offset);
            return s;
        }
    }
}

From source file:io.hops.erasure_coding.ReedSolomonDecoder.java

License:Apache License

protected int[] buildInputs(FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
        boolean fixSource, long errorOffset, FSDataInputStream[] inputs) throws IOException {
    LOG.info("Building inputs to recover block starting at " + errorOffset);
    try {//from  w  w  w.  ja v a 2 s  .  co m
        FileStatus srcStat = fs.getFileStatus(srcFile);
        FileStatus parityStat = fs.getFileStatus(parityFile);
        long blockSize = srcStat.getBlockSize();
        long blockIdx = (int) (errorOffset / blockSize);
        long stripeIdx;
        if (fixSource) {
            stripeIdx = blockIdx / stripeSize;
        } else {
            stripeIdx = blockIdx / paritySize;
        }

        LOG.info("FileSize = " + srcStat.getLen() + ", blockSize = " + blockSize + ", blockIdx = " + blockIdx
                + ", stripeIdx = " + stripeIdx);
        ArrayList<Integer> erasedLocations = new ArrayList<Integer>();
        // First open streams to the parity blocks.
        for (int i = 0; i < paritySize; i++) {
            long offset = blockSize * (stripeIdx * paritySize + i);
            if ((!fixSource) && offset == errorOffset) {
                LOG.info(parityFile + ":" + offset + " is known to have error, adding zeros as input " + i);
                inputs[i] = new FSDataInputStream(new RaidUtils.ZeroInputStream(offset + blockSize));
                erasedLocations.add(i);
            } else if (offset > parityStat.getLen()) {
                LOG.info(parityFile + ":" + offset + " is past file size, adding zeros as input " + i);
                inputs[i] = new FSDataInputStream(new RaidUtils.ZeroInputStream(offset + blockSize));
            } else {
                FSDataInputStream in = parityFs.open(parityFile, conf.getInt("io.file.buffer.size", 64 * 1024));
                in.seek(offset);
                LOG.info("Adding " + parityFile + ":" + offset + " as input " + i);
                inputs[i] = in;
            }
        }
        // Now open streams to the data blocks.
        for (int i = paritySize; i < paritySize + stripeSize; i++) {
            long offset = blockSize * (stripeIdx * stripeSize + i - paritySize);
            if (fixSource && offset == errorOffset) {
                LOG.info(srcFile + ":" + offset + " is known to have error, adding zeros as input " + i);
                inputs[i] = new FSDataInputStream(new RaidUtils.ZeroInputStream(offset + blockSize));
                erasedLocations.add(i);
            } else if (offset > srcStat.getLen()) {
                LOG.info(srcFile + ":" + offset + " is past file size, adding zeros as input " + i);
                inputs[i] = new FSDataInputStream(new RaidUtils.ZeroInputStream(offset + blockSize));
            } else {
                FSDataInputStream in = fs.open(srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
                in.seek(offset);
                LOG.info("Adding " + srcFile + ":" + offset + " as input " + i);
                inputs[i] = in;
            }
        }
        if (erasedLocations.size() > paritySize) {
            String msg = "Too many erased locations: " + erasedLocations.size();
            LOG.error(msg);
            throw new IOException(msg);
        }
        int[] locs = new int[erasedLocations.size()];
        for (int i = 0; i < locs.length; i++) {
            locs[i] = erasedLocations.get(i);
        }
        return locs;
    } catch (IOException e) {
        RaidUtils.closeStreams(inputs);
        throw e;
    }

}

From source file:io.hops.erasure_coding.StripeReader.java

License:Apache License

protected InputStream getParityFileInput(int locationIndex, Path parityFile, FileSystem parityFs,
        FileStatus parityStat, long offsetInBlock) throws IOException {
    // Dealing with a parity file here.
    int parityBlockIdx = (int) (codec.parityLength * stripeStartIdx + locationIndex);
    long offset = parityStat.getBlockSize() * parityBlockIdx + offsetInBlock;
    assert (offset < parityStat.getLen());
    LOG.info("Opening " + parityFile + ":" + offset + " for location " + locationIndex);
    FSDataInputStream s = parityFs.open(parityFile, conf.getInt("io.file.buffer.size", 64 * 1024));
    s.seek(offset);
    return s;//  w  w w. ja  va 2s.  c  o m
}

From source file:io.hops.erasure_coding.XORDecoder.java

License:Apache License

@Override
protected long fixErasedBlockImpl(FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
        boolean fixSource, long blockSize, long errorOffset, long limit, boolean partial, OutputStream out,
        Progressable reporter, CRC32 crc) throws IOException {
    if (partial) {
        throw new IOException("We don't support partial reconstruction");
    }//from  w  w  w  .jav a  2  s  .  c  o  m
    LOG.info("Fixing block at " + srcFile + ":" + errorOffset + ", limit " + limit);
    if (crc != null) {
        crc.reset();
    }
    FileStatus srcStat = fs.getFileStatus(srcFile);
    FSDataInputStream[] inputs = new FSDataInputStream[stripeSize + this.codec.parityLength];

    try {
        long errorBlockOffset = (errorOffset / blockSize) * blockSize;
        long[] srcOffsets = stripeOffsets(errorOffset, blockSize, fixSource);
        for (int i = 0; i < srcOffsets.length; i++) {
            if (fixSource && srcOffsets[i] == errorBlockOffset) {
                inputs[i] = new FSDataInputStream(new RaidUtils.ZeroInputStream(blockSize));
                LOG.info("Using zeros at " + srcFile + ":" + errorBlockOffset);
                continue;
            }
            if (srcOffsets[i] < srcStat.getLen()) {
                FSDataInputStream in = fs.open(srcFile);
                in.seek(srcOffsets[i]);
                inputs[i] = in;
            } else {
                inputs[i] = new FSDataInputStream(new RaidUtils.ZeroInputStream(blockSize));
                LOG.info("Using zeros at " + srcFile + ":" + errorBlockOffset);
            }
        }

        if (fixSource) {
            FSDataInputStream parityFileIn = parityFs.open(parityFile);
            parityFileIn.seek(parityOffset(errorOffset, blockSize));
            inputs[inputs.length - 1] = parityFileIn;
        } else {
            inputs[inputs.length - 1] = new FSDataInputStream(new RaidUtils.ZeroInputStream(blockSize));
            LOG.info("Using zeros at " + parityFile + ":" + errorBlockOffset);
        }
    } catch (IOException e) {
        RaidUtils.closeStreams(inputs);
        throw e;
    }

    int boundedBufferCapacity = 1;
    ParallelStreamReader parallelReader = new ParallelStreamReader(reporter, inputs, bufSize, parallelism,
            boundedBufferCapacity, blockSize);
    parallelReader.start();
    try {
        // Loop while the number of skipped + written bytes is less than the max.
        long written;
        for (written = 0; written < limit;) {
            ParallelStreamReader.ReadResult readResult;
            try {
                readResult = parallelReader.getReadResult();
            } catch (InterruptedException e) {
                throw new IOException("Interrupted while waiting for read result");
            }
            // Cannot tolerate any IO errors.
            IOException readEx = readResult.getException();
            if (readEx != null) {
                throw readEx;
            }

            int toWrite = (int) Math.min((long) bufSize, limit - written);

            XOREncoder.xor(readResult.readBufs, writeBufs[0]);

            out.write(writeBufs[0], 0, toWrite);
            if (crc != null) {
                crc.update(writeBufs[0], 0, toWrite);
            }
            written += toWrite;
        }
        return written;
    } finally {
        // Inputs will be closed by parallelReader.shutdown().
        parallelReader.shutdown();
    }
}

From source file:io.prestosql.parquet.reader.MetadataReader.java

License:Apache License

public static ParquetMetadata readFooter(FSDataInputStream inputStream, Path file, long fileSize)
        throws IOException

{
    // Parquet File Layout:
    ///*from   ww w  . j  a  va2  s. co  m*/
    // MAGIC
    // variable: Data
    // variable: Metadata
    // 4 bytes: MetadataLength
    // MAGIC

    validateParquet(fileSize >= MAGIC.length + PARQUET_METADATA_LENGTH + MAGIC.length,
            "%s is not a valid Parquet File", file);
    long metadataLengthIndex = fileSize - PARQUET_METADATA_LENGTH - MAGIC.length;

    inputStream.seek(metadataLengthIndex);
    int metadataLength = readIntLittleEndian(inputStream);

    byte[] magic = new byte[MAGIC.length];
    inputStream.readFully(magic);
    validateParquet(Arrays.equals(MAGIC, magic), "Not valid Parquet file: %s expected magic number: %s got: %s",
            file, Arrays.toString(MAGIC), Arrays.toString(magic));

    long metadataIndex = metadataLengthIndex - metadataLength;
    validateParquet(metadataIndex >= MAGIC.length && metadataIndex < metadataLengthIndex,
            "Corrupted Parquet file: %s metadata index: %s out of range", file, metadataIndex);
    inputStream.seek(metadataIndex);
    FileMetaData fileMetaData = readFileMetaData(inputStream);
    List<SchemaElement> schema = fileMetaData.getSchema();
    validateParquet(!schema.isEmpty(), "Empty Parquet schema in file: %s", file);

    MessageType messageType = readParquetSchema(schema);
    List<BlockMetaData> blocks = new ArrayList<>();
    List<RowGroup> rowGroups = fileMetaData.getRow_groups();
    if (rowGroups != null) {
        for (RowGroup rowGroup : rowGroups) {
            BlockMetaData blockMetaData = new BlockMetaData();
            blockMetaData.setRowCount(rowGroup.getNum_rows());
            blockMetaData.setTotalByteSize(rowGroup.getTotal_byte_size());
            List<ColumnChunk> columns = rowGroup.getColumns();
            validateParquet(!columns.isEmpty(), "No columns in row group: %s", rowGroup);
            String filePath = columns.get(0).getFile_path();
            for (ColumnChunk columnChunk : columns) {
                validateParquet(
                        (filePath == null && columnChunk.getFile_path() == null)
                                || (filePath != null && filePath.equals(columnChunk.getFile_path())),
                        "all column chunks of the same row group must be in the same file");
                ColumnMetaData metaData = columnChunk.meta_data;
                String[] path = metaData.path_in_schema.stream().map(value -> value.toLowerCase(Locale.ENGLISH))
                        .toArray(String[]::new);
                ColumnPath columnPath = ColumnPath.get(path);
                PrimitiveTypeName primitiveTypeName = messageType.getType(columnPath.toArray())
                        .asPrimitiveType().getPrimitiveTypeName();
                ColumnChunkMetaData column = ColumnChunkMetaData.get(columnPath, primitiveTypeName,
                        CompressionCodecName.fromParquet(metaData.codec), readEncodings(metaData.encodings),
                        readStats(metaData.statistics, primitiveTypeName), metaData.data_page_offset,
                        metaData.dictionary_page_offset, metaData.num_values, metaData.total_compressed_size,
                        metaData.total_uncompressed_size);
                blockMetaData.addColumn(column);
            }
            blockMetaData.setPath(filePath);
            blocks.add(blockMetaData);
        }
    }

    Map<String, String> keyValueMetaData = new HashMap<>();
    List<KeyValue> keyValueList = fileMetaData.getKey_value_metadata();
    if (keyValueList != null) {
        for (KeyValue keyValue : keyValueList) {
            keyValueMetaData.put(keyValue.key, keyValue.value);
        }
    }
    return new ParquetMetadata(new org.apache.parquet.hadoop.metadata.FileMetaData(messageType,
            keyValueMetaData, fileMetaData.getCreated_by()), blocks);
}

From source file:io.sanfran.wikiTrends.extraction.hadoop.FileNameLineRecordReader.java

License:Open Source License

public FileNameLineRecordReader(Configuration job, FileSplit split) throws IOException {
    this.maxLineLength = job.getInt("mapred.LineRecordReader.maxlength", Integer.MAX_VALUE);
    fileName = split.getPath().getName();
    start = split.getStart();/*from   w ww .j a  va 2  s  .co  m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new LineReader(cIn, job);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn; // take pos from compressed stream
        } else {
            in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:kogiri.common.hadoop.io.reader.fasta.FastaRawReadReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    this.filename = file.getName();

    this.firstRead = true;

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {//from   ww  w.  j  a va  2s.  c o m
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);

    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), job);
    } else {
        if (this.start != 0) {
            fileIn.seek(this.start);
        }
        this.in = new LineReader(fileIn, job);
    }

    // skip lines until we meet new read start
    while (this.start < this.end) {
        Text skipText = new Text();
        long newSize = this.in.readLine(skipText, this.maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength));
        if (newSize == 0) {
            // EOF
            this.hasNextRead = false;
            this.pos = this.end;
            break;
        }

        if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) {
            this.prevLine = skipText;
            this.prevSize = newSize;
            this.hasNextRead = true;
            this.pos = this.start;
            break;
        }

        this.start += newSize;

        if (this.start >= this.end) {
            // EOF
            this.hasNextRead = false;
            this.pos = this.end;
            break;
        }
    }

    this.key = null;
    this.value = null;
}

From source file:kogiri.common.hadoop.io.reader.fasta.FastaReadDescriptionReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    this.filename = file.getName();

    this.firstRead = true;

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {//from  ww  w . ja  v a2 s  . c o  m
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);

    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), job);
    } else {
        if (this.start != 0) {
            fileIn.seek(this.start);
        }
        this.in = new LineReader(fileIn, job);
    }

    // skip lines until we meet new record start
    while (this.start < this.end) {
        Text skipText = new Text();
        long newSize = this.in.readLine(skipText, this.maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength));
        if (newSize == 0) {
            // EOF
            this.hasNextRecord = false;
            this.pos = this.end;
            break;
        }

        if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) {
            this.prevLine = skipText;
            this.prevSize = newSize;
            this.hasNextRecord = true;
            this.pos = this.start;
            break;
        }

        this.start += newSize;

        if (this.start >= this.end) {
            // EOF
            this.hasNextRecord = false;
            this.pos = this.end;
            break;
        }
    }

    this.key = null;
    this.value = null;
}

From source file:lennard.PiRecordReader.java

License:Apache License

public PiRecordReader(Configuration job, FileSplit split) throws IOException {
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/*from www. j  av  a  2s .  co  m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;

    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:libra.common.hadoop.io.reader.fasta.FastaKmerReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration conf = context.getConfiguration();
    this.kmersize = FastaKmerInputFormat.getKmerSize(conf);
    this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(conf);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(conf);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {/*  ww  w  .j ava2  s.  c  om*/
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);
    boolean inTheMiddle = false;
    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), conf);
    } else {
        if (this.start != 0) {
            this.start--;
            fileIn.seek(this.start);

            inTheMiddle = true;
        }
        this.in = new LineReader(fileIn, conf);
    }

    this.buffer = new Text();

    if (inTheMiddle) {
        // find new start line
        this.start += this.in.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start));

        // back off
        FSDataInputStream fileIn2 = fs.open(file);
        fileIn2.seek(this.start - 1000);

        LineReader in2 = new LineReader(fileIn2, conf);
        Text tempLine = new Text();
        long curpos = this.start - 1000;
        while (curpos < this.start) {
            curpos += in2.readLine(tempLine, 0, (int) (this.start - curpos));
        }

        if (tempLine.charAt(0) == READ_DELIMITER) {
            // clean start
            this.buffer.clear();
        } else {
            // leave k-1 seq in the buffer
            String seq = tempLine.toString().trim();
            String left = seq.substring(seq.length() - this.kmersize + 1);
            this.buffer.set(left);
        }

        in2.close();
    }

    this.pos = this.start;

    this.key = null;
    this.value = null;
}