List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:com.linkedin.cubert.block.BlockUtils.java
License:Open Source License
@SuppressWarnings("unchecked") public static Block loadBlock(BlockProperties props, IndexEntry indexEntry, Configuration conf, JsonNode json, BlockSerializationType serializationType, boolean isInMemoryBlock) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException { Block block;/* w w w .ja va2 s . c o m*/ if (indexEntry == null) { if (emptyForMissing) return new EmptyBlock(props); throw new IOException(String.format("Index entry is null")); } // populate props props.setBlockId(indexEntry.getBlockId()); props.setNumRecords(indexEntry.getNumRecords()); // Open the file and seek to the offset for this block Path file = new Path(indexEntry.getFile()); FileSystem fs = file.getFileSystem(conf); FSDataInputStream fsin = fs.open(file, BLOCK_BUFFER_SIZE); fsin.seek(indexEntry.getOffset()); // Gather information needed to read this block Class<Tuple> valueClass = (Class<Tuple>) TupleFactory.getInstance().newTuple().getClass(); CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(file); // Load the block now if (isInMemoryBlock) { print.f("LOADING IN MEMORY the block %d", indexEntry.getBlockId()); ByteBuffer byteBuffer = inMemoryBlockCache.get(indexEntry); if (byteBuffer == null) { int read = 0; byte[] data = new byte[(int) indexEntry.getLength()]; while (read != data.length) { read += fsin.read(data, read, data.length - read); } fsin.close(); byteBuffer = ByteBuffer.wrap(data); inMemoryBlockCache.put(indexEntry, byteBuffer); } else { print.f("REUSED FROM CACHE!!"); byteBuffer.rewind(); } block = new RubixMemoryBlock(props, conf, byteBuffer, valueClass, codec, serializationType); block.configure(json); return block; } else { print.f("STREAMING the block %d", indexEntry.getBlockId()); InputStream in = new BlockInputStream(fsin, indexEntry.getLength()); if (codec != null) { in = codec.createInputStream(in); } block = new CubertBlock(props, new BlockIterator<Tuple>(conf, in, valueClass, serializationType, props.getSchema())); block.configure(json); print.f("Loaded block id=%d from file=%s offset=%d length=%d", indexEntry.getBlockId(), file.toString(), indexEntry.getOffset(), indexEntry.getLength()); return block; } }
From source file:com.linkedin.cubert.examples.ListFiles.java
License:Open Source License
@Override public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props) throws IOException, InterruptedException { List<String> files = new ArrayList<String>(); String dirsStr = JsonUtils.getText(json.get("args"), "dirs"); String[] dirs = CommonUtils.trim(dirsStr.split(",")); for (String dir : dirs) { Path path = new Path(dir); FileSystem fs = path.getFileSystem(PhaseContext.getConf()); FileStatus[] allStatus = fs.globStatus(path); if (allStatus == null || allStatus.length == 0) continue; for (FileStatus status : allStatus) { if (status.isDir()) { listFiles(fs, status.getPath(), files); } else { files.add(status.getPath().toUri().getPath()); }// ww w. j ava2 s . co m } } iterator = files.iterator(); output = TupleFactory.getInstance().newTuple(1); }
From source file:com.linkedin.cubert.examples.Purge.java
License:Open Source License
private void swap(String original, String temp) throws IOException { Path source = new Path(temp); Path dest = new Path(original); FileSystem fs = dest.getFileSystem(conf); fs.delete(dest, true);/*from w w w. j a v a 2 s .co m*/ fs.rename(source, dest); }
From source file:com.linkedin.cubert.io.rubix.RubixOutputFormat.java
License:Open Source License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String extension = RubixConstants.RUBIX_EXTENSION; CompressionCodec codec = null;/*from www . jav a 2 s . c o m*/ boolean isCompressed = getCompressOutput(context); if (isCompressed) { Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension += codec.getDefaultExtension(); } Path file = getDefaultWorkFile(context, extension); FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, false); return new RubixRecordWriter<K, V>(conf, fileOut, context.getOutputKeyClass(), context.getOutputValueClass(), codec); }
From source file:com.linkedin.cubert.io.rubix.RubixRecordReader.java
License:Open Source License
public void initialize(InputSplit split, Configuration conf) throws IOException, InterruptedException { @SuppressWarnings("unchecked") RubixInputSplit<K, V> rsplit = (RubixInputSplit<K, V>) split; SerializationFactory serializationFactory = new SerializationFactory(conf); switch (rsplit.getBlockSerializationType()) { case DEFAULT: valueDeserializer = serializationFactory.getDeserializer(rsplit.getValueClass()); break;//from ww w . jav a2 s.c o m case COMPACT: BlockSchema schema = rsplit.getSchema(); valueDeserializer = new CompactDeserializer<V>(schema); break; } key = rsplit.getKey(); // store the blockid and partition key in the conf conf.setLong("MY_BLOCK_ID", rsplit.getBlockId()); conf.setLong("MY_NUM_RECORDS", rsplit.getNumRecords()); ByteArrayOutputStream tmpOut = new ByteArrayOutputStream(); ((Tuple) key).write(new DataOutputStream(tmpOut)); String keySerialized = SerializerUtils.serializeToString(tmpOut.toByteArray()); conf.set("MY_PARTITION_KEY", keySerialized); Path path = rsplit.getFilename(); offset = rsplit.getOffset(); length = rsplit.getLength(); FileSystem fs = path.getFileSystem(conf); FSDataInputStream fsin = fs.open(path); fsin.seek(offset); in = new BlockInputStream(fsin, length); CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path); if (codec != null) { print.f("codec is not null and it is %s", codec.getClass().toString()); in = codec.createInputStream(in); } else { print.f("codec is null"); } valueDeserializer.open(in); }
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorageInputStream.java
License:Apache License
/** Construct given a path and a configuration. */ public AvroStorageInputStream(Path path, TaskAttemptContext context) throws IOException { this.stream = path.getFileSystem(context.getConfiguration()).open(path); this.len = path.getFileSystem(context.getConfiguration()).getFileStatus(path).getLen(); }
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroOutputFormat.java
License:Apache License
@Override public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { if (schema == null) throw new IOException("Must provide a schema"); Configuration conf = context.getConfiguration(); DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema)); if (FileOutputFormat.getCompressOutput(context)) { int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory);/*from w w w.j ava 2 s .com*/ } // Do max as core-default.xml has io.file.buffer.size as 4K writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, Math.max(conf.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL))); Path path = getDefaultWorkFile(context, EXT); writer.create(schema, path.getFileSystem(conf).create(path)); return new PigAvroRecordWriter(writer); }
From source file:com.linkedin.cubert.utils.AvroUtils.java
License:Open Source License
/** * Extracts the schema of an Avro file./*from w w w. j a v a 2 s . c o m*/ * * @param conf * @param path * @return * @throws IOException */ public static Schema getSchema(Configuration conf, Path path) throws IOException { FileSystem fs = path.getFileSystem(conf); if (fs.getFileStatus(path).isDir()) { Path globPath = new Path(path, "*.avro"); FileStatus[] allFiles = fs.globStatus(globPath); if (allFiles.length == 0) { throw new IOException("there are no files in " + path.toString()); } path = allFiles[0].getPath(); } System.out.println("Obtaining schema of avro file " + path.toString()); return getSchema(new FsInput(path, conf)); }
From source file:com.linkedin.cubert.utils.CommonUtils.java
License:Open Source License
public static Path getAFileInPath(Configuration conf, Path path, String suffix) throws IOException { FileSystem fs = path.getFileSystem(conf); if (fs.getFileStatus(path).isDir()) { Path globPath = new Path(path, "*." + suffix); FileStatus[] allFiles = fs.globStatus(globPath); if (allFiles.length == 0) { throw new IOException("there are no files in " + path.toString()); }// w ww .j av a 2s .c o m path = allFiles[0].getPath(); } print.f("Obtaining schema of %s file %s", suffix, path.toString()); return path; }