Example usage for org.apache.hadoop.fs FSDataInputStream seek

List of usage examples for org.apache.hadoop.fs FSDataInputStream seek

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream seek.

Prototype

@Override
public void seek(long desired) throws IOException 

Source Link

Document

Seek to the given offset.

Usage

From source file:ras.test.hadoop.fs.InMemoryFileSystemUnitTest.java

License:Apache License

@Test
public void testInputStreamSeek() throws IOException {
    Path path = new Path("message.txt");
    writeMessage(path);//from   ww w  .j  a v  a2s.  co  m
    byte[] bytesOut = message.getBytes();

    long seekPosition = 6;
    FSDataInputStream in = inMemoryFileSystem.open(path);
    byte[] bytesIn = new byte[bytesOut.length - (int) seekPosition];
    in.seek(seekPosition);
    assertThat("Wrong position after seek", in.getPos(), is(equalTo(seekPosition)));
    in.read(bytesIn);
    for (int i = 6; i < bytesOut.length; i++) {
        assertThat("Wrong byte at index " + i, bytesIn[i - 6], is(equalTo(bytesOut[i])));
    }
}

From source file:redpoll.examples.sogou.SogouRecordReader.java

License:Apache License

public SogouRecordReader(Configuration job, FileSplit split) throws IOException {
    start = split.getStart();/*from   w  ww.ja  va  2s.c  o m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    if (codec != null) {
        in = new SogouCorpusReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0)
            fileIn.seek(start);
        in = new SogouCorpusReader(fileIn, job);
    }
    this.pos = start;
}

From source file:simsql.runtime.InputFileDeserializer.java

License:Apache License

/** 
 * Initializes the RecordDeserializer for use in Hadoop
 *//*from   w w w . j a  v  a2  s. c o m*/
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();

    // open the file and seek to the start of the split; also remember the start, end, and current position
    Path file = split.getPath();
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileInLocal = fs.open(split.getPath());

    // do not do anything with a zero length split
    if (split.getLength() == 0) {
        done = true;
        return;
    }

    // not a non-zero length split
    start = split.getStart();
    end = split.getStart() + split.getLength() - 1;
    posCompressed = fileInLocal;
    fileInLocal.seek(start);

    // compression
    isCompressed = true;
    fileIn = RecordCompression.getInputStream(fileInLocal);
    pos = start;

    // now, find the start marker in our split
    fName = file.getName();
    findStart();
}

From source file:stargate.drivers.sourcefs.hdfs.HDFSChunkReader.java

License:Open Source License

private void initialize(FSDataInputStream is, long offset, int size) throws IOException {
    this.is = is;
    this.offset = offset;
    this.size = size;
    this.currentOffset = offset;
    is.seek(offset);
    long offsetMoved = is.getPos();
    if (offsetMoved != this.offset) {
        throw new IOException("failed to move offset to " + offsetMoved);
    }//w  w  w.j  av  a  2  s . co  m
}

From source file:StorageEngineClient.MyLineRecordReader.java

License:Open Source License

public MyLineRecordReader(Configuration job, FileSplit split) throws IOException {
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//from   w w  w  .j a v a  2  s  .c om
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    this.tempConf = job;
    this.tempSplit = split;

    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) {
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:streaming.core.DownloadRunner.java

License:Apache License

public static int getRawFileByPath(HttpServletResponse res, String path, long position) {

    try {//from   w  ww .j ava  2s  .c  o m
        FileSystem fs = FileSystem.get(new Configuration());

        Path p = new Path(path);
        if (fs.exists(p)) {

            List<FileStatus> files = new ArrayList<FileStatus>();

            //
            if (fs.isFile(p)) {
                files.add(fs.getFileStatus(p));
            } else if (fs.isDirectory(p)) {

                FileStatus[] fileStatusArr = fs.listStatus(p);
                if (fileStatusArr != null && fileStatusArr.length > 0) {

                    for (FileStatus cur : fileStatusArr) {
                        files.add(cur);
                    }
                }
            }

            //??
            if (files.size() > 0) {

                logger.info(path + "" + files.size());

                FSDataInputStream inputStream = null;
                OutputStream outputStream = res.getOutputStream();

                int len = files.size();
                int i = 1;
                long allPosition = 0;
                for (FileStatus cur : files) {

                    logger.info("[" + i++ + "/" + len + "]" + path + ",?" + cur);
                    inputStream = fs.open(cur.getPath());

                    if (position > 0) {

                        if (allPosition + cur.getLen() > position) {
                            inputStream.seek(position - allPosition);
                            logger.info("seek position " + (position - allPosition));
                            position = -1;
                        }
                        allPosition += cur.getLen();
                    }
                    org.apache.commons.io.IOUtils.copyLarge(inputStream, outputStream);
                    inputStream.close();

                }
                outputStream.flush();
                outputStream.close();
                return 200;

            } else {
                logger.info(path + "" + files.size());
            }

        } else {

            return 400;
        }

    } catch (Exception e) {
        e.printStackTrace();

    }

    return 500;
}

From source file:trec.MyLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/*from w  w  w  . j a  v a 2 s  .com*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new MyLineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new MyLineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:trec.PRRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//ww w .j a  v a2s  .  c om
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:trec.TrecRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    start = split.getStart();//from   www  .  j  a  va2  s .  c om
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}