Example usage for org.apache.hadoop.fs FSDataInputStream seek

List of usage examples for org.apache.hadoop.fs FSDataInputStream seek

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream seek.

Prototype

@Override
public void seek(long desired) throws IOException 

Source Link

Document

Seek to the given offset.

Usage

From source file:matrixFormat.MatrixRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    //FileSplit split = (FileSplit) genericSplit;

    MatrixFileSplit split = (MatrixFileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    method = (job.get("method").compareTo("IPB") == 0) ? 1
            : ((job.get("method").compareTo("OPB") == 0) ? 2 : 0);
    sparse = job.getBoolean("Sparse", false);
    this.maxLength = job.getInt("mapred.matrixrecordreader.maxlength", Integer.MAX_VALUE);

    start1 = split.getStart();//from   ww  w .  j  ava2  s  .  c  om
    start2 = split.getStart(1);
    end1 = start1 + split.getLength(0);
    end2 = start2 + split.getLength(1);
    blkID = split.getId();
    final Path file = split.getPath(0);
    final Path file2 = split.getPath(1);
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FileSystem fs2 = file2.getFileSystem(job);
    FSDataInputStream fileIn1 = fs.open(split.getPath(0));
    FSDataInputStream fileIn2 = fs2.open(split.getPath(1));
    //FileInputStream fileIn2 = new FileInputStream(file2.toString()); 
    //Don't care the compression stuff
    /*if (isCompressedInput()) {
      decompressor = CodecPool.getDecompressor(codec);
      if (codec instanceof SplittableCompressionCodec) {
        final SplitCompressionInputStream cIn =
          ((SplittableCompressionCodec)codec).createInputStream(
    fileIn1, decompressor, start1, end1,
    SplittableCompressionCodec.READ_MODE.BYBLOCK);
        final SplitCompressionInputStream cIn2 =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn2, decompressor, start2, end2,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
        in = new MatrixReader(cIn, cIn2);
        start1 = cIn.getAdjustedStart();
        end1 = cIn.getAdjustedEnd();
        filePosition1 = cIn;
      } else {
        in = new MatrixReader(codec.createInputStream(fileIn1, decompressor), codec.createInputStream(fileIn2, decompressor), job, split.getStarts(0), split.getStarts(1) );
        filePosition1 = fileIn1;
      }
    } else {*/
    fileIn1.seek(start1);
    fileIn2.seek(start2);
    if (sparse) {
        in = new MatrixReader(fileIn1, fileIn2, job, split.getStart(0), split.getStart(1));
    } else {
        in = new MatrixReader(fileIn1, fileIn2, job, split.getStarts(0), split.getStarts(1));
    }

    //in = new MatrixReader(file, file2, job, split.getStarts(0), split.getStarts(1));
    filePosition1 = fileIn1;
    filePosition2 = fileIn2;
    //}

    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    /*if (start1 != 0) {
       start1 += in.readOldBlock(maxLength, maxBytesToConsume(pos1));
       this.pos1 = start1;
    }
            
    in.readBlocks(maxLength, maxBytesToConsume(pos1));
    start1 += in.getBytesComsumed(0);
    //start2 += in.getBytesComsumed(1);
    this.pos1 = start1;*/
}

From source file:ml.shifu.guagua.hadoop.io.GuaguaLineRecordReader.java

License:Apache License

@Override
public void initialize(GuaguaFileSplit genericSplit) throws IOException {
    this.maxLineLength = Integer.MAX_VALUE;
    start = genericSplit.getOffset();// www  .  jav  a 2  s  . co  m
    end = start + genericSplit.getLength();
    final Path file = new Path(genericSplit.getPath());
    compressionCodecs = new CompressionCodecFactory(this.conf);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(this.conf);
    FSDataInputStream fileIn = fs.open(file);
    boolean skipFirstLine = false;
    if (codec != null) {
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(codec.createInputStream(fileIn), GuaguaConstants.DEFAULT_IO_BUFFER_SIZE);
        } else {
            in = new LineReader(codec.createInputStream(fileIn), GuaguaConstants.DEFAULT_IO_BUFFER_SIZE,
                    this.recordDelimiterBytes);
        }
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, GuaguaConstants.DEFAULT_IO_BUFFER_SIZE);
        } else {
            in = new LineReader(fileIn, GuaguaConstants.DEFAULT_IO_BUFFER_SIZE, this.recordDelimiterBytes);
        }
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:ml.shifu.guagua.mapreduce.GuaguaLineRecordReader.java

License:Apache License

@Override
public void initialize(GuaguaFileSplit split) throws IOException {
    this.maxLineLength = Integer.MAX_VALUE;
    start = split.getOffset();/*from w  ww  .  java  2s . c o  m*/
    end = start + split.getLength();
    final Path file = new Path(split.getPath());
    compressionCodecs = new CompressionCodecFactory(this.conf);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(new Configuration());
    FSDataInputStream fileIn = fs.open(file);
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), GuaguaConstants.DEFAULT_IO_BUFFER_SIZE);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, GuaguaConstants.DEFAULT_IO_BUFFER_SIZE);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:ml.shifu.guagua.yarn.GuaguaLineRecordReader.java

License:Apache License

@Override
public void initialize(GuaguaFileSplit genericSplit) throws IOException {
    this.maxLineLength = Integer.MAX_VALUE;
    start = genericSplit.getOffset();//  w w w.  j  ava 2  s. co  m
    end = start + genericSplit.getLength();
    final Path file = new Path(genericSplit.getPath());
    compressionCodecs = new CompressionCodecFactory(new Configuration());
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(new Configuration());
    FSDataInputStream fileIn = fs.open(file);
    boolean skipFirstLine = false;
    if (codec != null) {
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(codec.createInputStream(fileIn), GuaguaConstants.DEFAULT_IO_BUFFER_SIZE);
        } else {
            in = new LineReader(codec.createInputStream(fileIn), GuaguaConstants.DEFAULT_IO_BUFFER_SIZE,
                    this.recordDelimiterBytes);
        }
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, GuaguaConstants.DEFAULT_IO_BUFFER_SIZE);
        } else {
            in = new LineReader(fileIn, GuaguaConstants.DEFAULT_IO_BUFFER_SIZE, this.recordDelimiterBytes);
        }
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:ml.shifu.guagua.yarn.GuaguaYarnTask.java

License:Apache License

@SuppressWarnings({ "unchecked", "unused" })
private <T> T getSplitDetails(Path file, long offset) throws IOException {
    FileSystem fs = file.getFileSystem(getYarnConf());
    FSDataInputStream inFile = null;
    T split = null;/*from  w  w  w.ja v  a2 s.  c  o m*/
    try {
        inFile = fs.open(file);
        inFile.seek(offset);
        String className = Text.readString(inFile);
        Class<T> cls;
        try {
            cls = (Class<T>) getYarnConf().getClassByName(className);
        } catch (ClassNotFoundException ce) {
            IOException wrap = new IOException(String.format("Split class %s not found", className));
            wrap.initCause(ce);
            throw wrap;
        }
        SerializationFactory factory = new SerializationFactory(getYarnConf());
        Deserializer<T> deserializer = (Deserializer<T>) factory.getDeserializer(cls);
        deserializer.open(inFile);
        split = deserializer.deserialize(null);
    } finally {
        IOUtils.closeStream(inFile);
    }
    return split;
}

From source file:ml.shifu.shifu.core.mr.input.CombineRecordReader.java

License:Apache License

private void initializeOne(TaskAttemptContext context, FileSplit split) throws IOException {
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/*from  ww  w.  j  av a 2s  .  com*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(codec.createInputStream(fileIn), job);
        } else {
            in = new LineReader(codec.createInputStream(fileIn), job, this.recordDelimiterBytes);
        }
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, job);
        } else {
            in = new LineReader(fileIn, job, this.recordDelimiterBytes);
        }
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:net.darkseraphim.webanalytics.hadoop.csv.CSVLineRecordReader.java

License:Apache License

public void configure(InputSplit genericSplit, JobConf conf) throws IOException {
    FileSplit split = (FileSplit) genericSplit;

    start = split.getStart();//from w  w w .j a v a  2 s .c  om
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(conf);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(split.getPath());

    if (codec != null) {
        is = codec.createInputStream(fileIn);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            fileIn.seek(start);
        }
        is = fileIn;
    }

    this.pos = start;
    init(is, conf);
}

From source file:net.shun.mapreduce.lib.input.XmlRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    String[] beginMarks = job.getStrings("mapred.xmlrecordreader.begin", "<page>");
    this.beginMark = beginMarks[0];
    String[] endMarks = job.getStrings("mapred.xmlrecordreader.begin", "</page>");
    this.endMark = endMarks[0];

    start = split.getStart();/*from ww  w.  jav  a 2s .com*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    fileIn.seek(start);
    in = new BufferedInputStream(fileIn);
    /*
    boolean skipFirstLine = false;
    if (codec != null) {
      in = new LineReader(codec.createInputStream(fileIn), job);
      end = Long.MAX_VALUE;
    } else {
      if (start != 0) {
        skipFirstLine = true;
        --start;
        fileIn.seek(start);
      }
      in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) {  // skip first line and re-establish "start".
      start += in.readLine(new Text(), 0,
                   (int)Math.min((long)Integer.MAX_VALUE, end - start));
    }
    */
    this.pos = start;
    readUntilMatch(beginMark, false, null);
}

From source file:nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.NReadRecordReader.java

License:Open Source License

/**
 * Override method for instantiation./*ww w .j  av  a2  s.  c  o  m*/
 *
 * @param inputSplit The InputSplit to read.
 * @param context    The context for this task.
 * @throws IOException          Returns default exception.
 * @throws InterruptedException Returns default exception.
 */
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    // Initialize.
    Configuration conf = context.getConfiguration();
    FileSplit split = (FileSplit) inputSplit;
    Path file = split.getPath();
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream infile = fs.open(split.getPath());

    // Use number of lines given by user and set parameters.
    this.NLINESTOPROCESS = NLineInputFormat.getNumLinesPerSplit(context);
    this.maxLineLength = conf.getInt("mapreduce.input.linerecordreader.line.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    boolean skipFirstLine = false;

    // Skip first line?
    if (this.start != 0) {
        skipFirstLine = true;
        this.start--;
        infile.seek(this.start);
    }
    this.in = new LineReader(infile, conf);
    if (skipFirstLine) {
        this.start += this.in.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start));
    }
    this.pos = this.start;
}

From source file:nl.cwi.kba2013.thrift.bin.ThriftRecordReader.java

License:Apache License

/** Boilerplate initialization code for file input streams. */
@Override//from  w  w w  .  j av a 2s. c o m
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    conf = context.getConfiguration();
    fileSplit = (FileSplit) split;
    start = fileSplit.getStart();
    length = fileSplit.getLength();
    position = start;

    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(path);

    compressionCodecs = new CompressionCodecFactory(conf);
    codec = compressionCodecs.getCodec(path);

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        in = new DataInputStream(codec.createInputStream(fileIn, decompressor));
        filePosition = fileIn;
        //LOG.info("Successfully initialized input stream for compressed data.");
    } else {
        fileIn.seek(start);
        in = fileIn;
        filePosition = fileIn;
    }

    tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(in));
}