Example usage for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd, int bufferSize) throws IOException

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.linkedin.cubert.block.BlockUtils.java

License:Open Source License

@SuppressWarnings("unchecked")
public static Block loadBlock(BlockProperties props, IndexEntry indexEntry, Configuration conf, JsonNode json,
        BlockSerializationType serializationType, boolean isInMemoryBlock) throws IOException,
        ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException {
    Block block;//from  www.  j a  va 2s. c o m
    if (indexEntry == null) {
        if (emptyForMissing)
            return new EmptyBlock(props);

        throw new IOException(String.format("Index entry is null"));
    }

    // populate props
    props.setBlockId(indexEntry.getBlockId());
    props.setNumRecords(indexEntry.getNumRecords());

    // Open the file and seek to the offset for this block
    Path file = new Path(indexEntry.getFile());
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fsin = fs.open(file, BLOCK_BUFFER_SIZE);
    fsin.seek(indexEntry.getOffset());

    // Gather information needed to read this block
    Class<Tuple> valueClass = (Class<Tuple>) TupleFactory.getInstance().newTuple().getClass();
    CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(file);

    // Load the block now
    if (isInMemoryBlock) {
        print.f("LOADING IN MEMORY the block %d", indexEntry.getBlockId());

        ByteBuffer byteBuffer = inMemoryBlockCache.get(indexEntry);

        if (byteBuffer == null) {
            int read = 0;
            byte[] data = new byte[(int) indexEntry.getLength()];
            while (read != data.length) {
                read += fsin.read(data, read, data.length - read);
            }
            fsin.close();

            byteBuffer = ByteBuffer.wrap(data);

            inMemoryBlockCache.put(indexEntry, byteBuffer);
        } else {
            print.f("REUSED FROM CACHE!!");
            byteBuffer.rewind();
        }

        block = new RubixMemoryBlock(props, conf, byteBuffer, valueClass, codec, serializationType);
        block.configure(json);
        return block;
    } else {
        print.f("STREAMING the block %d", indexEntry.getBlockId());
        InputStream in = new BlockInputStream(fsin, indexEntry.getLength());

        if (codec != null) {
            in = codec.createInputStream(in);
        }

        block = new CubertBlock(props,
                new BlockIterator<Tuple>(conf, in, valueClass, serializationType, props.getSchema()));
        block.configure(json);

        print.f("Loaded block id=%d from file=%s offset=%d length=%d", indexEntry.getBlockId(), file.toString(),
                indexEntry.getOffset(), indexEntry.getLength());

        return block;
    }
}

From source file:com.mellanox.r4h.DistributedFileSystem.java

License:Apache License

@Override
public FSDataInputStream open(Path f, final int bufferSize) throws IOException {
    statistics.incrementReadOps(1);//from w  w  w  .jav  a  2 s.  c o m
    Path absF = fixRelativePart(f);
    return new FileSystemLinkResolver<FSDataInputStream>() {
        @Override
        public FSDataInputStream doCall(final Path p) throws IOException, UnresolvedLinkException {
            final DFSInputStream dfsis = dfs.open(getPathName(p), bufferSize, verifyChecksum);
            return dfs.createWrappedInputStream(dfsis);
        }

        @Override
        public FSDataInputStream next(final FileSystem fs, final Path p) throws IOException {
            return fs.open(p, bufferSize);
        }
    }.resolve(this, absF);
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) split;
    Configuration conf = context.getConfiguration();
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    //    fs.setVerifyChecksum(false);

    //Passing a null builder to createReader works as long as the file has metadata.
    in = MilanoProtoFile.createReader(fs.open(path, DEFAULT_BUFFER_SIZE), builder, extensionRegistry,
            fileSplit.getLength());/*from w ww .jav a2  s.c  o m*/

    // This should always be not null.
    metadata = in.getMetadata();
    assert metadata != null;

    // We keep statistics on how much has been read to be able to report progress.
    start = in.getBytesRead();
    end = fileSplit.getLength();
    more = start < end;
}

From source file:com.mongodb.hadoop.input.BSONFileRecordReader.java

License:Apache License

@Override
public void initialize(final InputSplit inputSplit, final TaskAttemptContext context)
        throws IOException, InterruptedException {
    fileSplit = (FileSplit) inputSplit;/*  w  w  w . j  av a  2s.co m*/
    final Configuration configuration = context.getConfiguration();
    if (LOG.isDebugEnabled()) {
        LOG.debug("reading split " + fileSplit);
    }
    Path file = fileSplit.getPath();
    FileSystem fs = file.getFileSystem(configuration);
    in = fs.open(file, 16 * 1024 * 1024);
    in.seek(fileSplit.getStart());

    if (MongoConfigUtil.getLazyBSON(configuration)) {
        callback = new LazyBSONCallback();
        decoder = new LazyBSONDecoder();
    } else {
        callback = new BasicBSONCallback();
        decoder = new BasicBSONDecoder();
    }
}

From source file:com.mongodb.hadoop.mapred.input.BSONFileRecordReader.java

License:Apache License

public void initialize(final InputSplit inputSplit, final Configuration conf) throws IOException {
    fileSplit = (FileSplit) inputSplit;//from  ww  w  .j  ava  2  s  .  c  o m
    Path file = fileSplit.getPath();
    FileSystem fs = file.getFileSystem(conf);
    in = fs.open(file, 16 * 1024 * 1024);
    in.seek(fileSplit.getStart());
    if (MongoConfigUtil.getLazyBSON(conf)) {
        callback = new LazyBSONCallback();
        decoder = new LazyBSONDecoder();
    } else {
        callback = new BasicBSONCallback();
        decoder = new BasicBSONDecoder();
    }
}