Example usage for org.apache.hadoop.fs FSDataInputStream seek

List of usage examples for org.apache.hadoop.fs FSDataInputStream seek

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream seek.

Prototype

@Override
public void seek(long desired) throws IOException 

Source Link

Document

Seek to the given offset.

Usage

From source file:hadoop.examples.hdfs.FileSystemCat.java

License:Open Source License

public static void main(String[] args) throws Exception {
    String uri = "hdfs://exoplatform:9000/user/haint/input-0/test.txt";
    FileSystem fs = FileSystem.get(URI.create(uri), new Configuration());
    InputStream in = null;/*  w  w  w. j  av  a 2  s  . co  m*/
    try {
        in = fs.open(new Path(uri));
        IOUtils.copyBytes(in, System.out, 4096, false);
    } finally {
        IOUtils.closeStream(in);
    }
    System.out.println("---------------------------------------------------");
    FSDataInputStream fsDataInputStream = null;
    try {
        fsDataInputStream = fs.open(new Path(uri));
        IOUtils.copyBytes(fsDataInputStream, System.out, 256, false);
        System.out.println("---------------------------------------------------");
        fsDataInputStream.seek(0);
        IOUtils.copyBytes(fsDataInputStream, System.out, 256, false);
    } finally {
        IOUtils.closeStream(fsDataInputStream);
    }
}

From source file:hadoop.inputsplit.FastaLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();

    done = false;/*from   w  ww.j av a2s  .c o m*/

    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();

    file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    currentValue = new ValueWritable();
    value = new Text();
    tmpValue = new Text();
    tmp = new Text();

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

    String homeHdfs = context.getConfiguration().get("HDFS_HOME_DIR");
    //maxK = HadoopUtil.getMaxkFromPatterns(fs, new Path(homeHdfs+Constant.HDFS_PATTERNS_FILE_HDFS));

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new LineReader(cIn, job, recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new LineReader(codec.createInputStream(fileIn, decompressor), job, recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job, recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;

    setKeySeq(fs, job); //Set currentKey

    nextMyKeyValue(); //Leggo il primo record se esiste.

}

From source file:hadoop.TweetRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();

    start = split.getStart();/*from  w w  w .j ava  2  s  .  co m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new LineReader(cIn, job);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job);
        filePosition = fileIn;
    }

    this.pos = start;
}

From source file:hdfs.jsr203.HadoopFileSystem.java

License:Apache License

SeekableByteChannel newByteChannel(org.apache.hadoop.fs.Path path, Set<? extends OpenOption> options,
        FileAttribute<?>... attrs) throws IOException {
    // simple one : this.fs.create(hadoopPath);
    // TODO Auto-generated method stub
    //this.hdfs./*from   w w  w  . j  a v a2 s  .c om*/
    //      throw new IOException("Not implemented");

    checkOptions(options);

    // Check that file not exists
    if (options.contains(CREATE_NEW) && this.fs.exists(path)) {
        throw new FileAlreadyExistsException(path.toString());
    }

    if (options.contains(WRITE) || options.contains(APPEND)) {
        checkWritable();
        beginRead();
        try {
            final WritableByteChannel wbc = Channels.newChannel(newOutputStream(path, options, attrs));
            long leftover = 0;
            if (options.contains(APPEND)) {
                /*Entry e = getEntry0(path);
                if (e != null && e.size >= 0)
                leftover = e.size;*/
                throw new IOException("APPEND NOT IMPLEMENTED");
            }
            final long offset = leftover;
            return new SeekableByteChannel() {
                long written = offset;

                public boolean isOpen() {
                    return wbc.isOpen();
                }

                public long position() throws IOException {
                    return written;
                }

                public SeekableByteChannel position(long pos) throws IOException {
                    throw new UnsupportedOperationException();
                }

                public int read(ByteBuffer dst) throws IOException {
                    throw new UnsupportedOperationException();
                }

                public SeekableByteChannel truncate(long size) throws IOException {
                    throw new UnsupportedOperationException();
                }

                public int write(ByteBuffer src) throws IOException {
                    int n = wbc.write(src);
                    written += n;
                    return n;
                }

                public long size() throws IOException {
                    return written;
                }

                public void close() throws IOException {
                    wbc.close();
                }
            };
        } finally {
            endRead();
        }
    } else {
        beginRead();
        try {
            ensureOpen();
            FileStatus e = this.fs.getFileStatus(path);
            if (e == null || e.isDirectory())
                throw new NoSuchFileException(path.toString());
            final FSDataInputStream inputStream = getInputStream(path);
            final ReadableByteChannel rbc = Channels.newChannel(inputStream);
            final long size = e.getLen();
            return new SeekableByteChannel() {
                long read = 0;

                public boolean isOpen() {
                    return rbc.isOpen();
                }

                public long position() throws IOException {
                    return read;
                }

                public SeekableByteChannel position(long pos) throws IOException {
                    // ReadableByteChannel is not buffered, so it reads through
                    inputStream.seek(pos);
                    read = pos;
                    return this;
                }

                public int read(ByteBuffer dst) throws IOException {
                    int n = rbc.read(dst);
                    if (n > 0) {
                        read += n;
                    }
                    return n;
                }

                public SeekableByteChannel truncate(long size) throws IOException {
                    throw new NonWritableChannelException();
                }

                public int write(ByteBuffer src) throws IOException {
                    throw new NonWritableChannelException();
                }

                public long size() throws IOException {
                    return size;
                }

                public void close() throws IOException {
                    rbc.close();
                }
            };
        } finally {
            endRead();
        }
    }
}

From source file:InvertedIndex.NLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.job = job;
    this.context = context;
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/*from   ww  w.j  a  v  a2s.c  o m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    this.path = file;
    this.length = split.getLength();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        if (0 == split.getLength() && job.getBoolean("mapred.ignore.badcompress", false)) {
            if (null != context && context instanceof TaskInputOutputContext) {
                ((TaskInputOutputContext) context).getCounter("Input Counter", "Gzip File length is zero")
                        .increment(1);
            }
            if (null != this.path) {
                LOG.warn("Skip 0-length Zip file: " + this.path.toString());
            }
            in = new NLineReader(fileIn, job);
        } else {
            try {
                in = new NLineReader(codec.createInputStream(fileIn), job);
                end = Long.MAX_VALUE;
            } catch (IOException e) {
                if (isIgnoreBadCompress(job, e)) {
                    in = new NLineReader(fileIn, job);
                    end = start;
                    LOG.warn("Skip Bad Compress File: " + this.path.toString());
                    LOG.warn("initialize line read error", e);
                    ((TaskInputOutputContext) context).getCounter("Input Counter", "Skip Bad Zip File")
                            .increment(1);
                    ((TaskInputOutputContext) context).getCounter("Input Counter", "Total Skip Bad Zip Length")
                            .increment(this.length);
                } else {
                    throw e;
                }
            }
        }
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new NLineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:io.aos.hdfs.basics.AbstractHdfsFileTest.java

License:Apache License

private void testCreateHdfsFile(Path path) throws IOException {
    getLogger().info("Testing HDFS file creation.");
    byte[] buff = "The HDFS File content".getBytes(Charset.forName("UTF-8"));
    FSDataOutputStream outputStream = getFileSystem().create(path);
    outputStream.write(buff, 0, buff.length);
    outputStream.close();// ww  w. ja  v a2 s .  c  om
    FSDataInputStream in = getFileSystem().open(path);
    IOUtils.copyBytes(in, System.out, 4096, false);
    in.seek(0);
    IOUtils.copyBytes(in, System.out, 4096, false);
    IOUtils.closeStream(in);
}

From source file:io.aos.hdfs.basics.AbstractHdfsFileTest.java

License:Apache License

private void test1() throws IOException {
    Path srcPath = new Path("./src/test/resources/log4j.properties");
    Path dstPath = new Path("/log4j.properties");
    getFileSystem().copyFromLocalFile(srcPath, dstPath);
    FSDataInputStream in = getFileSystem().open(new Path("./log4j.properties"));
    IOUtils.copyBytes(in, System.out, 4096, false);
    in.seek(0);
    IOUtils.copyBytes(in, System.out, 4096, false);
    IOUtils.closeStream(in);//from   w w  w .  j a v  a2s. co  m
    OutputStream out = getFileSystem().create(new Path("./log4j.properties"), new Progressable() {
        @Override
        public void progress() {
            System.out.println(".");
        }
    });
    IOUtils.copyBytes(in, out, 4096, true);
}

From source file:io.aos.hdfs.FileSystemDoubleCat.java

License:Apache License

public static void main(String... args) throws Exception {
    String uri = args[0];//from  w w w  .ja  v a 2s . c o  m
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(uri), conf);
    FSDataInputStream in = null;
    try {
        in = fs.open(new Path(uri));
        IOUtils.copyBytes(in, System.out, 4096, false);
        in.seek(0); // go back to the start of the file
        IOUtils.copyBytes(in, System.out, 4096, false);
    } finally {
        IOUtils.closeStream(in);
    }
}

From source file:io.druid.storage.hdfs.tasklog.HdfsTaskLogs.java

License:Apache License

@Override
public Optional<ByteSource> streamTaskLog(final String taskId, final long offset) throws IOException {
    final Path path = getTaskLogFileFromId(taskId);
    final FileSystem fs = path.getFileSystem(hadoopConfig);
    if (fs.exists(path)) {
        return Optional.<ByteSource>of(new ByteSource() {
            @Override// w w w  .  j ava  2 s. c om
            public InputStream openStream() throws IOException {
                log.info("Reading task log from: %s", path);
                final long seekPos;
                if (offset < 0) {
                    final FileStatus stat = fs.getFileStatus(path);
                    seekPos = Math.max(0, stat.getLen() + offset);
                } else {
                    seekPos = offset;
                }
                final FSDataInputStream inputStream = fs.open(path);
                inputStream.seek(seekPos);
                log.info("Read task log from: %s (seek = %,d)", path, seekPos);
                return inputStream;
            }
        });
    } else {
        return Optional.absent();
    }
}

From source file:io.hops.erasure_coding.FileStripeReader.java

License:Apache License

@Override
public InputStream[] getNextStripeInputs() throws IOException {
    InputStream[] blocks = new InputStream[codec.stripeLength];
    try {//  w w w. j  a va2s . c  om
        for (int i = 0; i < codec.stripeLength; i++) {
            long seekOffset = stripeStartOffset + i * blockSize;
            if (seekOffset < srcSize) {
                FSDataInputStream in = fs.open(srcFile, bufferSize);
                in.seek(seekOffset);
                LOG.info("Opening stream at " + srcFile + ":" + seekOffset);
                blocks[i] = in;
            } else {
                LOG.info("Using zeros at offset " + seekOffset);
                // We have no src data at this offset.
                blocks[i] = new RaidUtils.ZeroInputStream(seekOffset + blockSize);
            }
        }
        stripeStartOffset += blockSize * codec.stripeLength;
        return blocks;
    } catch (IOException e) {
        // If there is an error during opening a stream, close the previously
        // opened streams and re-throw.
        RaidUtils.closeStreams(blocks);
        throw e;
    }
}