Example usage for org.apache.hadoop.fs FSDataInputStream getWrappedStream

List of usage examples for org.apache.hadoop.fs FSDataInputStream getWrappedStream

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream getWrappedStream.

Prototype

@InterfaceAudience.LimitedPrivate({ "HDFS" })
public InputStream getWrappedStream() 

Source Link

Document

Get a reference to the wrapped input stream.

Usage

From source file:co.cask.cdap.common.io.Locations.java

License:Apache License

/**
 * Creates a {@link StreamSizeProvider} for determining the size of the given {@link FSDataInputStream}.
 *//*from  w  w w.  ja v a 2s . c  om*/
private static StreamSizeProvider createDFSStreamSizeProvider(final FileSystem fs, final Path path,
        FSDataInputStream input) {
    // This is the default provider to use. It will try to determine if the file is closed and return the size of it.
    final StreamSizeProvider defaultSizeProvider = new StreamSizeProvider() {
        @Override
        public long size() throws IOException {
            if (fs instanceof DistributedFileSystem) {
                if (((DistributedFileSystem) fs).isFileClosed(path)) {
                    return fs.getFileStatus(path).getLen();
                } else {
                    return -1L;
                }
            }
            // If the the underlying file system is not DistributedFileSystem, just assume the file length tells the size
            return fs.getFileStatus(path).getLen();
        }
    };

    // This supplier is to abstract out the logic for getting the DFSInputStream#getFileLength method using reflection
    // Reflection is used to avoid ClassLoading error if the DFSInputStream class is moved or method get renamed
    final InputStream wrappedStream = input.getWrappedStream();
    final Supplier<Method> getFileLengthMethodSupplier = Suppliers.memoize(new Supplier<Method>() {
        @Override
        public Method get() {
            try {
                // This is a hack to get to the underlying DFSInputStream
                // Need to revisit it when need to support different distributed file system
                Class<? extends InputStream> cls = wrappedStream.getClass();
                String expectedName = "org.apache.hadoop.hdfs.DFSInputStream";
                if (!cls.getName().equals(expectedName)) {
                    throw new Exception(
                            "Expected wrapper class be " + expectedName + ", but got " + cls.getName());
                }

                Method getFileLengthMethod = cls.getMethod("getFileLength");
                if (!getFileLengthMethod.isAccessible()) {
                    getFileLengthMethod.setAccessible(true);
                }
                return getFileLengthMethod;
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        }
    });

    return new StreamSizeProvider() {
        @Override
        public long size() throws IOException {
            // Try to determine the size using default provider
            long size = defaultSizeProvider.size();
            if (size >= 0) {
                return size;
            }
            try {
                // If not able to get length from the default provider, use the DFSInputStream#getFileLength method
                return (Long) getFileLengthMethodSupplier.get().invoke(wrappedStream);
            } catch (Throwable t) {
                LOG.warn("Unable to get actual file length from DFS input.", t);
                return size;
            }
        }
    };
}

From source file:com.cloudera.ByteBufferRecordReader.java

License:Apache License

@Override
public synchronized boolean nextKeyValue() throws IOException {
    if (key == null) {
        key = new LongWritable();
    }//from   w w  w .j  a va  2  s. co m
    if (value == null) {
        value = new ByteBufferWritable();
    }
    if (pos >= end) {
        return false;
    }

    int numBytesRead = 0;
    // Use zero-copy ByteBuffer reads if available
    if (inputStream instanceof FSDataInputStream) {
        FSDataInputStream fsIn = (FSDataInputStream) inputStream;
        ByteBuffer buf = fsIn.read(bufferPool, (int) (end - start), readOption);
        numBytesRead += buf.limit();
        pos += buf.limit();
        // Update stats
        InputStream wrappedStream = fsIn.getWrappedStream();
        if (wrappedStream instanceof DFSInputStream) {
            DFSInputStream dfsIn = (DFSInputStream) wrappedStream;
            updateStats(dfsIn.getReadStatistics());
        }
        // Switch out the buffers
        if (value.getBuffer() != null) {
            fsIn.releaseBuffer(value.getBuffer());
        }
        value.setByteBuffer(buf);
    }
    // Fallback to normal byte[] based reads with a copy to the ByteBuffer
    else {
        byte[] b = new byte[(int) (end - start)];
        IOUtils.readFully(inputStream, b);
        numBytesRead += b.length;
        pos += b.length;
        value.setByteBuffer(ByteBuffer.wrap(b));
    }

    return numBytesRead > 0;
}

From source file:com.uber.hoodie.common.table.log.HoodieLogFileReader.java

License:Apache License

HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize,
        boolean readBlockLazily, boolean reverseReader) throws IOException {
    FSDataInputStream fsDataInputStream = fs.open(logFile.getPath(), bufferSize);
    if (fsDataInputStream.getWrappedStream() instanceof FSInputStream) {
        this.inputStream = new FSDataInputStream(
                new BufferedFSInputStream((FSInputStream) fsDataInputStream.getWrappedStream(), bufferSize));
    } else {/* ww  w . j  a va 2s  .  co  m*/
        // fsDataInputStream.getWrappedStream() maybe a BufferedFSInputStream
        // need to wrap in another BufferedFSInputStream the make bufferSize work?
        this.inputStream = fsDataInputStream;
    }

    this.logFile = logFile;
    this.readerSchema = readerSchema;
    this.readBlockLazily = readBlockLazily;
    this.reverseReader = reverseReader;
    if (this.reverseReader) {
        this.reverseLogFilePosition = this.lastReverseLogFilePosition = fs.getFileStatus(logFile.getPath())
                .getLen();
    }
    addShutDownHook();
}

From source file:com.yahoo.druid.hadoop.DruidRecordReader.java

License:Apache License

public static SegmentLoadSpec readSegmentJobSpec(Configuration config, ObjectMapper jsonMapper) {
    try {/*from  w  ww .j  a va 2 s  . com*/
        //first see if schema json itself is present in the config
        String schema = config.get(DruidInputFormat.CONF_DRUID_SCHEMA);
        if (schema != null) {
            logger.info("druid schema  = " + schema);
            return jsonMapper.readValue(schema, SegmentLoadSpec.class);
        }

        //then see if schema file location is in the config
        String schemaFile = config.get(DruidInputFormat.CONF_DRUID_SCHEMA_FILE);
        if (schemaFile == null) {
            throw new IllegalStateException("couldn't find schema");
        }

        logger.info("druid schema file location = " + schemaFile);

        FileSystem fs = FileSystem.get(config);
        FSDataInputStream in = fs.open(new Path(schemaFile));
        return jsonMapper.readValue(in.getWrappedStream(), SegmentLoadSpec.class);
    } catch (IOException ex) {
        throw new RuntimeException("couldn't load segment load spec", ex);
    }
}

From source file:com.yahoo.druid.pig.DruidStorage.java

License:Apache License

private PigSegmentLoadSpec readPigSegmentLoadSpecFromFile(String schemaFile, Job job) throws IOException {
    FileSystem fs = FileSystem.get(job.getConfiguration());
    FSDataInputStream in = fs.open(new Path(schemaFile));
    PigSegmentLoadSpec spec = jsonMapper.readValue(in.getWrappedStream(), PigSegmentLoadSpec.class);
    in.close();/* w  w w  .  j  a  va  2 s. c o m*/
    return spec;
}

From source file:org.apache.parquet.hadoop.util.HadoopStreams.java

License:Apache License

/**
 * Wraps a {@link FSDataInputStream} in a {@link SeekableInputStream}
 * implementation for Parquet readers./*from  w  w w .  j  ava  2 s. co  m*/
 *
 * @param stream a Hadoop FSDataInputStream
 * @return a SeekableInputStream
 */
public static SeekableInputStream wrap(FSDataInputStream stream) {
    if (byteBufferReadableClass != null && h2SeekableConstructor != null
            && byteBufferReadableClass.isInstance(stream.getWrappedStream())) {
        try {
            return h2SeekableConstructor.newInstance(stream);
        } catch (InstantiationException e) {
            LOG.warn("Could not instantiate H2SeekableInputStream, falling back to byte array reads", e);
            return new H1SeekableInputStream(stream);
        } catch (IllegalAccessException e) {
            LOG.warn("Could not instantiate H2SeekableInputStream, falling back to byte array reads", e);
            return new H1SeekableInputStream(stream);
        } catch (InvocationTargetException e) {
            throw new ParquetDecodingException("Could not instantiate H2SeekableInputStream",
                    e.getTargetException());
        }
    } else {
        return new H1SeekableInputStream(stream);
    }
}

From source file:org.apache.tajo.storage.FSDataInputChannel.java

License:Apache License

public FSDataInputChannel(FSDataInputStream inputStream) {
    if (inputStream.getWrappedStream() instanceof ByteBufferReadable) {
        this.isDirectRead = true;
    } else {// w  ww  . j  a  v a 2 s .  co  m
        /* LocalFileSystem, S3 does not support ByteBufferReadable */
        this.channel = Channels.newChannel(inputStream);
    }
    this.inputStream = inputStream;
}

From source file:org.apache.tajo.storage.TestByteBufLineReader.java

License:Apache License

@Test
public void testReaderWithLocalFS() throws Exception {
    Path tablePath = new Path(testDir, "testReaderWithLocalFS");
    Path filePath = new Path(tablePath, "data.dat");

    FileSystem fileSystem = filePath.getFileSystem(conf);
    assertTrue(fileSystem instanceof LocalFileSystem);

    FSDataOutputStream out = fs.create(filePath, true);
    out.write(LINE.getBytes(Charset.defaultCharset()));
    out.write('\n');
    out.close();/*from w  w  w .j  a  v a2 s  .c  o  m*/

    assertTrue(fs.exists(filePath));

    FSDataInputStream inputStream = fs.open(filePath);
    assertFalse(inputStream.getWrappedStream() instanceof ByteBufferReadable);

    ByteBufLineReader lineReader = new ByteBufLineReader(new FSDataInputChannel(inputStream));
    assertEquals(LINE, lineReader.readLine());
    lineReader.seek(0);
    assertEquals(LINE, lineReader.readLine());
    assertNull(lineReader.readLine());

    lineReader.close();
    fs.close();
}

From source file:org.apache.tajo.storage.TestByteBufLineReader.java

License:Apache License

@Test
public void testReaderWithDFS() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString();
    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath);
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0);
    conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, true);

    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    cluster.waitClusterUp();//from  ww w  .j  a  v  a  2s  .  c  o  m

    TajoConf tajoConf = new TajoConf(conf);
    tajoConf.setVar(TajoConf.ConfVars.ROOT_DIR, cluster.getFileSystem().getUri() + "/tajo");

    Path tablePath = new Path("/testReaderWithDFS");
    Path filePath = new Path(tablePath, "data.dat");
    try {
        DistributedFileSystem fs = cluster.getFileSystem();
        FSDataOutputStream out = fs.create(filePath, true);
        out.write(LINE.getBytes(Charset.defaultCharset()));
        out.write('\n');
        out.close();

        assertTrue(fs.exists(filePath));
        FSDataInputStream inputStream = fs.open(filePath);
        assertTrue(inputStream.getWrappedStream() instanceof ByteBufferReadable);

        ByteBufLineReader lineReader = new ByteBufLineReader(new FSDataInputChannel(inputStream));
        assertEquals(LINE, lineReader.readLine());
        lineReader.seek(0);
        assertEquals(LINE, lineReader.readLine());
        assertNull(lineReader.readLine());

        lineReader.close();
        fs.close();
    } finally {
        cluster.shutdown(true);
    }
}