List of usage examples for org.apache.hadoop.fs FSDataInputStream getWrappedStream
@InterfaceAudience.LimitedPrivate({ "HDFS" }) public InputStream getWrappedStream()
From source file:co.cask.cdap.common.io.Locations.java
License:Apache License
/** * Creates a {@link StreamSizeProvider} for determining the size of the given {@link FSDataInputStream}. *//*from w w w. ja v a 2s . c om*/ private static StreamSizeProvider createDFSStreamSizeProvider(final FileSystem fs, final Path path, FSDataInputStream input) { // This is the default provider to use. It will try to determine if the file is closed and return the size of it. final StreamSizeProvider defaultSizeProvider = new StreamSizeProvider() { @Override public long size() throws IOException { if (fs instanceof DistributedFileSystem) { if (((DistributedFileSystem) fs).isFileClosed(path)) { return fs.getFileStatus(path).getLen(); } else { return -1L; } } // If the the underlying file system is not DistributedFileSystem, just assume the file length tells the size return fs.getFileStatus(path).getLen(); } }; // This supplier is to abstract out the logic for getting the DFSInputStream#getFileLength method using reflection // Reflection is used to avoid ClassLoading error if the DFSInputStream class is moved or method get renamed final InputStream wrappedStream = input.getWrappedStream(); final Supplier<Method> getFileLengthMethodSupplier = Suppliers.memoize(new Supplier<Method>() { @Override public Method get() { try { // This is a hack to get to the underlying DFSInputStream // Need to revisit it when need to support different distributed file system Class<? extends InputStream> cls = wrappedStream.getClass(); String expectedName = "org.apache.hadoop.hdfs.DFSInputStream"; if (!cls.getName().equals(expectedName)) { throw new Exception( "Expected wrapper class be " + expectedName + ", but got " + cls.getName()); } Method getFileLengthMethod = cls.getMethod("getFileLength"); if (!getFileLengthMethod.isAccessible()) { getFileLengthMethod.setAccessible(true); } return getFileLengthMethod; } catch (Exception e) { throw Throwables.propagate(e); } } }); return new StreamSizeProvider() { @Override public long size() throws IOException { // Try to determine the size using default provider long size = defaultSizeProvider.size(); if (size >= 0) { return size; } try { // If not able to get length from the default provider, use the DFSInputStream#getFileLength method return (Long) getFileLengthMethodSupplier.get().invoke(wrappedStream); } catch (Throwable t) { LOG.warn("Unable to get actual file length from DFS input.", t); return size; } } }; }
From source file:com.cloudera.ByteBufferRecordReader.java
License:Apache License
@Override public synchronized boolean nextKeyValue() throws IOException { if (key == null) { key = new LongWritable(); }//from w w w .j a va 2 s. co m if (value == null) { value = new ByteBufferWritable(); } if (pos >= end) { return false; } int numBytesRead = 0; // Use zero-copy ByteBuffer reads if available if (inputStream instanceof FSDataInputStream) { FSDataInputStream fsIn = (FSDataInputStream) inputStream; ByteBuffer buf = fsIn.read(bufferPool, (int) (end - start), readOption); numBytesRead += buf.limit(); pos += buf.limit(); // Update stats InputStream wrappedStream = fsIn.getWrappedStream(); if (wrappedStream instanceof DFSInputStream) { DFSInputStream dfsIn = (DFSInputStream) wrappedStream; updateStats(dfsIn.getReadStatistics()); } // Switch out the buffers if (value.getBuffer() != null) { fsIn.releaseBuffer(value.getBuffer()); } value.setByteBuffer(buf); } // Fallback to normal byte[] based reads with a copy to the ByteBuffer else { byte[] b = new byte[(int) (end - start)]; IOUtils.readFully(inputStream, b); numBytesRead += b.length; pos += b.length; value.setByteBuffer(ByteBuffer.wrap(b)); } return numBytesRead > 0; }
From source file:com.uber.hoodie.common.table.log.HoodieLogFileReader.java
License:Apache License
HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize, boolean readBlockLazily, boolean reverseReader) throws IOException { FSDataInputStream fsDataInputStream = fs.open(logFile.getPath(), bufferSize); if (fsDataInputStream.getWrappedStream() instanceof FSInputStream) { this.inputStream = new FSDataInputStream( new BufferedFSInputStream((FSInputStream) fsDataInputStream.getWrappedStream(), bufferSize)); } else {/* ww w . j a va 2s . co m*/ // fsDataInputStream.getWrappedStream() maybe a BufferedFSInputStream // need to wrap in another BufferedFSInputStream the make bufferSize work? this.inputStream = fsDataInputStream; } this.logFile = logFile; this.readerSchema = readerSchema; this.readBlockLazily = readBlockLazily; this.reverseReader = reverseReader; if (this.reverseReader) { this.reverseLogFilePosition = this.lastReverseLogFilePosition = fs.getFileStatus(logFile.getPath()) .getLen(); } addShutDownHook(); }
From source file:com.yahoo.druid.hadoop.DruidRecordReader.java
License:Apache License
public static SegmentLoadSpec readSegmentJobSpec(Configuration config, ObjectMapper jsonMapper) { try {/*from w ww .j a va 2 s . com*/ //first see if schema json itself is present in the config String schema = config.get(DruidInputFormat.CONF_DRUID_SCHEMA); if (schema != null) { logger.info("druid schema = " + schema); return jsonMapper.readValue(schema, SegmentLoadSpec.class); } //then see if schema file location is in the config String schemaFile = config.get(DruidInputFormat.CONF_DRUID_SCHEMA_FILE); if (schemaFile == null) { throw new IllegalStateException("couldn't find schema"); } logger.info("druid schema file location = " + schemaFile); FileSystem fs = FileSystem.get(config); FSDataInputStream in = fs.open(new Path(schemaFile)); return jsonMapper.readValue(in.getWrappedStream(), SegmentLoadSpec.class); } catch (IOException ex) { throw new RuntimeException("couldn't load segment load spec", ex); } }
From source file:com.yahoo.druid.pig.DruidStorage.java
License:Apache License
private PigSegmentLoadSpec readPigSegmentLoadSpecFromFile(String schemaFile, Job job) throws IOException { FileSystem fs = FileSystem.get(job.getConfiguration()); FSDataInputStream in = fs.open(new Path(schemaFile)); PigSegmentLoadSpec spec = jsonMapper.readValue(in.getWrappedStream(), PigSegmentLoadSpec.class); in.close();/* w w w . j a va 2 s. c o m*/ return spec; }
From source file:org.apache.parquet.hadoop.util.HadoopStreams.java
License:Apache License
/** * Wraps a {@link FSDataInputStream} in a {@link SeekableInputStream} * implementation for Parquet readers./*from w w w . j ava 2 s. co m*/ * * @param stream a Hadoop FSDataInputStream * @return a SeekableInputStream */ public static SeekableInputStream wrap(FSDataInputStream stream) { if (byteBufferReadableClass != null && h2SeekableConstructor != null && byteBufferReadableClass.isInstance(stream.getWrappedStream())) { try { return h2SeekableConstructor.newInstance(stream); } catch (InstantiationException e) { LOG.warn("Could not instantiate H2SeekableInputStream, falling back to byte array reads", e); return new H1SeekableInputStream(stream); } catch (IllegalAccessException e) { LOG.warn("Could not instantiate H2SeekableInputStream, falling back to byte array reads", e); return new H1SeekableInputStream(stream); } catch (InvocationTargetException e) { throw new ParquetDecodingException("Could not instantiate H2SeekableInputStream", e.getTargetException()); } } else { return new H1SeekableInputStream(stream); } }
From source file:org.apache.tajo.storage.FSDataInputChannel.java
License:Apache License
public FSDataInputChannel(FSDataInputStream inputStream) { if (inputStream.getWrappedStream() instanceof ByteBufferReadable) { this.isDirectRead = true; } else {// w ww . j a v a 2 s . co m /* LocalFileSystem, S3 does not support ByteBufferReadable */ this.channel = Channels.newChannel(inputStream); } this.inputStream = inputStream; }
From source file:org.apache.tajo.storage.TestByteBufLineReader.java
License:Apache License
@Test public void testReaderWithLocalFS() throws Exception { Path tablePath = new Path(testDir, "testReaderWithLocalFS"); Path filePath = new Path(tablePath, "data.dat"); FileSystem fileSystem = filePath.getFileSystem(conf); assertTrue(fileSystem instanceof LocalFileSystem); FSDataOutputStream out = fs.create(filePath, true); out.write(LINE.getBytes(Charset.defaultCharset())); out.write('\n'); out.close();/*from w w w .j a v a2 s .c o m*/ assertTrue(fs.exists(filePath)); FSDataInputStream inputStream = fs.open(filePath); assertFalse(inputStream.getWrappedStream() instanceof ByteBufferReadable); ByteBufLineReader lineReader = new ByteBufLineReader(new FSDataInputChannel(inputStream)); assertEquals(LINE, lineReader.readLine()); lineReader.seek(0); assertEquals(LINE, lineReader.readLine()); assertNull(lineReader.readLine()); lineReader.close(); fs.close(); }
From source file:org.apache.tajo.storage.TestByteBufLineReader.java
License:Apache License
@Test public void testReaderWithDFS() throws Exception { final Configuration conf = new HdfsConfiguration(); String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString(); conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath); conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0); conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, true); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); cluster.waitClusterUp();//from ww w .j a v a 2s . c o m TajoConf tajoConf = new TajoConf(conf); tajoConf.setVar(TajoConf.ConfVars.ROOT_DIR, cluster.getFileSystem().getUri() + "/tajo"); Path tablePath = new Path("/testReaderWithDFS"); Path filePath = new Path(tablePath, "data.dat"); try { DistributedFileSystem fs = cluster.getFileSystem(); FSDataOutputStream out = fs.create(filePath, true); out.write(LINE.getBytes(Charset.defaultCharset())); out.write('\n'); out.close(); assertTrue(fs.exists(filePath)); FSDataInputStream inputStream = fs.open(filePath); assertTrue(inputStream.getWrappedStream() instanceof ByteBufferReadable); ByteBufLineReader lineReader = new ByteBufLineReader(new FSDataInputChannel(inputStream)); assertEquals(LINE, lineReader.readLine()); lineReader.seek(0); assertEquals(LINE, lineReader.readLine()); assertNull(lineReader.readLine()); lineReader.close(); fs.close(); } finally { cluster.shutdown(true); } }