List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd, int bufferSize) throws IOException
From source file:com.linkedin.cubert.block.BlockUtils.java
License:Open Source License
@SuppressWarnings("unchecked") public static Block loadBlock(BlockProperties props, IndexEntry indexEntry, Configuration conf, JsonNode json, BlockSerializationType serializationType, boolean isInMemoryBlock) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException { Block block;//from www. j a va 2s. c o m if (indexEntry == null) { if (emptyForMissing) return new EmptyBlock(props); throw new IOException(String.format("Index entry is null")); } // populate props props.setBlockId(indexEntry.getBlockId()); props.setNumRecords(indexEntry.getNumRecords()); // Open the file and seek to the offset for this block Path file = new Path(indexEntry.getFile()); FileSystem fs = file.getFileSystem(conf); FSDataInputStream fsin = fs.open(file, BLOCK_BUFFER_SIZE); fsin.seek(indexEntry.getOffset()); // Gather information needed to read this block Class<Tuple> valueClass = (Class<Tuple>) TupleFactory.getInstance().newTuple().getClass(); CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(file); // Load the block now if (isInMemoryBlock) { print.f("LOADING IN MEMORY the block %d", indexEntry.getBlockId()); ByteBuffer byteBuffer = inMemoryBlockCache.get(indexEntry); if (byteBuffer == null) { int read = 0; byte[] data = new byte[(int) indexEntry.getLength()]; while (read != data.length) { read += fsin.read(data, read, data.length - read); } fsin.close(); byteBuffer = ByteBuffer.wrap(data); inMemoryBlockCache.put(indexEntry, byteBuffer); } else { print.f("REUSED FROM CACHE!!"); byteBuffer.rewind(); } block = new RubixMemoryBlock(props, conf, byteBuffer, valueClass, codec, serializationType); block.configure(json); return block; } else { print.f("STREAMING the block %d", indexEntry.getBlockId()); InputStream in = new BlockInputStream(fsin, indexEntry.getLength()); if (codec != null) { in = codec.createInputStream(in); } block = new CubertBlock(props, new BlockIterator<Tuple>(conf, in, valueClass, serializationType, props.getSchema())); block.configure(json); print.f("Loaded block id=%d from file=%s offset=%d length=%d", indexEntry.getBlockId(), file.toString(), indexEntry.getOffset(), indexEntry.getLength()); return block; } }
From source file:com.mellanox.r4h.DistributedFileSystem.java
License:Apache License
@Override public FSDataInputStream open(Path f, final int bufferSize) throws IOException { statistics.incrementReadOps(1);//from w w w .jav a 2 s. c o m Path absF = fixRelativePart(f); return new FileSystemLinkResolver<FSDataInputStream>() { @Override public FSDataInputStream doCall(final Path p) throws IOException, UnresolvedLinkException { final DFSInputStream dfsis = dfs.open(getPathName(p), bufferSize, verifyChecksum); return dfs.createWrappedInputStream(dfsis); } @Override public FSDataInputStream next(final FileSystem fs, final Path p) throws IOException { return fs.open(p, bufferSize); } }.resolve(this, absF); }
From source file:com.metamx.milano.hadoop.MilanoProtoFileRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) split; Configuration conf = context.getConfiguration(); Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(conf); // fs.setVerifyChecksum(false); //Passing a null builder to createReader works as long as the file has metadata. in = MilanoProtoFile.createReader(fs.open(path, DEFAULT_BUFFER_SIZE), builder, extensionRegistry, fileSplit.getLength());/*from w ww .jav a2 s.c o m*/ // This should always be not null. metadata = in.getMetadata(); assert metadata != null; // We keep statistics on how much has been read to be able to report progress. start = in.getBytesRead(); end = fileSplit.getLength(); more = start < end; }
From source file:com.mongodb.hadoop.input.BSONFileRecordReader.java
License:Apache License
@Override public void initialize(final InputSplit inputSplit, final TaskAttemptContext context) throws IOException, InterruptedException { fileSplit = (FileSplit) inputSplit;/* w w w . j av a 2s.co m*/ final Configuration configuration = context.getConfiguration(); if (LOG.isDebugEnabled()) { LOG.debug("reading split " + fileSplit); } Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(configuration); in = fs.open(file, 16 * 1024 * 1024); in.seek(fileSplit.getStart()); if (MongoConfigUtil.getLazyBSON(configuration)) { callback = new LazyBSONCallback(); decoder = new LazyBSONDecoder(); } else { callback = new BasicBSONCallback(); decoder = new BasicBSONDecoder(); } }
From source file:com.mongodb.hadoop.mapred.input.BSONFileRecordReader.java
License:Apache License
public void initialize(final InputSplit inputSplit, final Configuration conf) throws IOException { fileSplit = (FileSplit) inputSplit;//from ww w .j ava 2 s . c o m Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); in = fs.open(file, 16 * 1024 * 1024); in.seek(fileSplit.getStart()); if (MongoConfigUtil.getLazyBSON(conf)) { callback = new LazyBSONCallback(); decoder = new LazyBSONDecoder(); } else { callback = new BasicBSONCallback(); decoder = new BasicBSONDecoder(); } }