Example usage for org.apache.hadoop.fs Path getFileSystem

List of usage examples for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException 

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:com.linkedin.cubert.block.BlockUtils.java

License:Open Source License

@SuppressWarnings("unchecked")
public static Block loadBlock(BlockProperties props, IndexEntry indexEntry, Configuration conf, JsonNode json,
        BlockSerializationType serializationType, boolean isInMemoryBlock) throws IOException,
        ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException {
    Block block;/*  w  w w  .ja  va2  s  . c o  m*/
    if (indexEntry == null) {
        if (emptyForMissing)
            return new EmptyBlock(props);

        throw new IOException(String.format("Index entry is null"));
    }

    // populate props
    props.setBlockId(indexEntry.getBlockId());
    props.setNumRecords(indexEntry.getNumRecords());

    // Open the file and seek to the offset for this block
    Path file = new Path(indexEntry.getFile());
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fsin = fs.open(file, BLOCK_BUFFER_SIZE);
    fsin.seek(indexEntry.getOffset());

    // Gather information needed to read this block
    Class<Tuple> valueClass = (Class<Tuple>) TupleFactory.getInstance().newTuple().getClass();
    CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(file);

    // Load the block now
    if (isInMemoryBlock) {
        print.f("LOADING IN MEMORY the block %d", indexEntry.getBlockId());

        ByteBuffer byteBuffer = inMemoryBlockCache.get(indexEntry);

        if (byteBuffer == null) {
            int read = 0;
            byte[] data = new byte[(int) indexEntry.getLength()];
            while (read != data.length) {
                read += fsin.read(data, read, data.length - read);
            }
            fsin.close();

            byteBuffer = ByteBuffer.wrap(data);

            inMemoryBlockCache.put(indexEntry, byteBuffer);
        } else {
            print.f("REUSED FROM CACHE!!");
            byteBuffer.rewind();
        }

        block = new RubixMemoryBlock(props, conf, byteBuffer, valueClass, codec, serializationType);
        block.configure(json);
        return block;
    } else {
        print.f("STREAMING the block %d", indexEntry.getBlockId());
        InputStream in = new BlockInputStream(fsin, indexEntry.getLength());

        if (codec != null) {
            in = codec.createInputStream(in);
        }

        block = new CubertBlock(props,
                new BlockIterator<Tuple>(conf, in, valueClass, serializationType, props.getSchema()));
        block.configure(json);

        print.f("Loaded block id=%d from file=%s offset=%d length=%d", indexEntry.getBlockId(), file.toString(),
                indexEntry.getOffset(), indexEntry.getLength());

        return block;
    }
}

From source file:com.linkedin.cubert.examples.ListFiles.java

License:Open Source License

@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props)
        throws IOException, InterruptedException {
    List<String> files = new ArrayList<String>();
    String dirsStr = JsonUtils.getText(json.get("args"), "dirs");
    String[] dirs = CommonUtils.trim(dirsStr.split(","));

    for (String dir : dirs) {
        Path path = new Path(dir);
        FileSystem fs = path.getFileSystem(PhaseContext.getConf());
        FileStatus[] allStatus = fs.globStatus(path);

        if (allStatus == null || allStatus.length == 0)
            continue;

        for (FileStatus status : allStatus) {
            if (status.isDir()) {
                listFiles(fs, status.getPath(), files);
            } else {
                files.add(status.getPath().toUri().getPath());
            }//  ww  w.  j ava2  s  .  co m
        }

    }

    iterator = files.iterator();
    output = TupleFactory.getInstance().newTuple(1);
}

From source file:com.linkedin.cubert.examples.Purge.java

License:Open Source License

private void swap(String original, String temp) throws IOException {
    Path source = new Path(temp);
    Path dest = new Path(original);
    FileSystem fs = dest.getFileSystem(conf);

    fs.delete(dest, true);/*from w w w. j  a  v  a 2  s  .co  m*/
    fs.rename(source, dest);
}

From source file:com.linkedin.cubert.io.rubix.RubixOutputFormat.java

License:Open Source License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String extension = RubixConstants.RUBIX_EXTENSION;

    CompressionCodec codec = null;/*from   www  . jav a 2  s .  c  o m*/
    boolean isCompressed = getCompressOutput(context);

    if (isCompressed) {
        Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension += codec.getDefaultExtension();
    }

    Path file = getDefaultWorkFile(context, extension);
    FileSystem fs = file.getFileSystem(conf);

    FSDataOutputStream fileOut = fs.create(file, false);
    return new RubixRecordWriter<K, V>(conf, fileOut, context.getOutputKeyClass(),
            context.getOutputValueClass(), codec);
}

From source file:com.linkedin.cubert.io.rubix.RubixRecordReader.java

License:Open Source License

public void initialize(InputSplit split, Configuration conf) throws IOException, InterruptedException {
    @SuppressWarnings("unchecked")
    RubixInputSplit<K, V> rsplit = (RubixInputSplit<K, V>) split;

    SerializationFactory serializationFactory = new SerializationFactory(conf);
    switch (rsplit.getBlockSerializationType()) {
    case DEFAULT:
        valueDeserializer = serializationFactory.getDeserializer(rsplit.getValueClass());
        break;//from  ww  w . jav  a2  s.c o  m
    case COMPACT:
        BlockSchema schema = rsplit.getSchema();
        valueDeserializer = new CompactDeserializer<V>(schema);
        break;
    }

    key = rsplit.getKey();

    // store the blockid and partition key in the conf
    conf.setLong("MY_BLOCK_ID", rsplit.getBlockId());
    conf.setLong("MY_NUM_RECORDS", rsplit.getNumRecords());
    ByteArrayOutputStream tmpOut = new ByteArrayOutputStream();
    ((Tuple) key).write(new DataOutputStream(tmpOut));
    String keySerialized = SerializerUtils.serializeToString(tmpOut.toByteArray());
    conf.set("MY_PARTITION_KEY", keySerialized);

    Path path = rsplit.getFilename();
    offset = rsplit.getOffset();
    length = rsplit.getLength();

    FileSystem fs = path.getFileSystem(conf);
    FSDataInputStream fsin = fs.open(path);
    fsin.seek(offset);

    in = new BlockInputStream(fsin, length);
    CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path);
    if (codec != null) {
        print.f("codec is not null and it is %s", codec.getClass().toString());
        in = codec.createInputStream(in);
    } else {
        print.f("codec is null");
    }

    valueDeserializer.open(in);
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorageInputStream.java

License:Apache License

/** Construct given a path and a configuration. */
public AvroStorageInputStream(Path path, TaskAttemptContext context) throws IOException {
    this.stream = path.getFileSystem(context.getConfiguration()).open(path);
    this.len = path.getFileSystem(context.getConfiguration()).getFileStatus(path).getLen();
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    if (schema == null)
        throw new IOException("Must provide a schema");

    Configuration conf = context.getConfiguration();

    DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema));

    if (FileOutputFormat.getCompressOutput(context)) {
        int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
                : CodecFactory.fromString(codecName);
        writer.setCodec(factory);/*from w  w  w.j ava  2  s .com*/
    }

    // Do max as core-default.xml has io.file.buffer.size as 4K
    writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY,
            Math.max(conf.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));

    Path path = getDefaultWorkFile(context, EXT);
    writer.create(schema, path.getFileSystem(conf).create(path));
    return new PigAvroRecordWriter(writer);
}

From source file:com.linkedin.cubert.utils.AvroUtils.java

License:Open Source License

/**
 * Extracts the schema of an Avro file./*from   w w w. j a  v  a  2 s . c o m*/
 * 
 * @param conf
 * @param path
 * @return
 * @throws IOException
 */
public static Schema getSchema(Configuration conf, Path path) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.getFileStatus(path).isDir()) {
        Path globPath = new Path(path, "*.avro");
        FileStatus[] allFiles = fs.globStatus(globPath);
        if (allFiles.length == 0) {
            throw new IOException("there are no files in " + path.toString());
        }

        path = allFiles[0].getPath();
    }
    System.out.println("Obtaining schema of avro file " + path.toString());

    return getSchema(new FsInput(path, conf));
}

From source file:com.linkedin.cubert.utils.CommonUtils.java

License:Open Source License

public static Path getAFileInPath(Configuration conf, Path path, String suffix) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.getFileStatus(path).isDir()) {
        Path globPath = new Path(path, "*." + suffix);
        FileStatus[] allFiles = fs.globStatus(globPath);
        if (allFiles.length == 0) {
            throw new IOException("there are no files in " + path.toString());
        }//  w  ww  .j  av a 2s .c o m

        path = allFiles[0].getPath();
    }

    print.f("Obtaining schema of %s file %s", suffix, path.toString());

    return path;
}