Example usage for org.apache.hadoop.fs Path getFileSystem

List of usage examples for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException 

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:com.cloudera.cdk.data.hcatalog.HCatalogExternalMetadataProvider.java

License:Apache License

public HCatalogExternalMetadataProvider(Configuration conf, Path rootDirectory) {
    super(conf);//from w w w .  j  av  a  2  s  . c o  m
    Preconditions.checkArgument(rootDirectory != null, "Root cannot be null");

    try {
        this.rootFileSystem = rootDirectory.getFileSystem(conf);
        this.rootDirectory = rootFileSystem.makeQualified(rootDirectory);
    } catch (IOException ex) {
        throw new MetadataProviderException("Could not get FileSystem for root path", ex);
    }
}

From source file:com.cloudera.cdk.data.hcatalog.HiveUtils.java

License:Apache License

static FileSystem fsForPath(Configuration conf, Path path) {
    try {//from w  w w.  j a v a2  s .  c  o m
        return path.getFileSystem(conf);
    } catch (IOException ex) {
        throw new MetadataProviderException("Cannot access FileSystem for uri:" + path, ex);
    }
}

From source file:com.cloudera.cdk.data.hcatalog.HiveUtils.java

License:Apache License

static FileSystem fsForPath(Configuration conf, String fsUri, Path path) {
    try {// ww w .  j a va 2 s .c om
        if (fsUri == null) {
            return path.getFileSystem(conf);
        } else {
            return FileSystem.get(new URI(fsUri), conf);
        }
    } catch (IOException ex) {
        throw new MetadataProviderException("Cannot access FileSystem for path:" + path, ex);
    } catch (URISyntaxException ex) {
        throw new MetadataProviderException("Cannot access FileSystem for uri:" + fsUri, ex);
    }
}

From source file:com.cloudera.cdk.data.MemoryMetadataProvider.java

License:Apache License

@Override
public DatasetDescriptor create(String name, DatasetDescriptor descriptor) {
    Preconditions.checkArgument(name != null, "Name cannot be null");
    Preconditions.checkArgument(descriptor != null, "Descriptor cannot be null");

    if (descriptors.containsKey(name)) {
        throw new DatasetExistsException("Dataset already exists for name:" + name);
    }//from   w  w w . j a  v  a 2s .  c o m

    final DatasetDescriptor newDescriptor;
    if (descriptor.getLocation() == null) {
        final Path location = fs.makeQualified(new Path(newLocation(name)));
        final String fsUri;
        try {
            fsUri = location.getFileSystem(conf).getUri().toString();
        } catch (IOException ex) {
            throw new MetadataProviderException("Cannot get FS for location" + location);
        }

        newDescriptor = new DatasetDescriptor.Builder(descriptor).location(location).build();
    } else {
        // don't need to modify it
        newDescriptor = descriptor;
    }

    // save and return
    descriptors.put(name, newDescriptor);
    return newDescriptor;
}

From source file:com.cloudera.cdk.morphline.hadoop.rcfile.ReadRCFileTest.java

License:Apache License

private InputStream readPath(final Path inputFile) throws IOException {
    FileSystem fs = inputFile.getFileSystem(new Configuration());
    return fs.open(inputFile);
}

From source file:com.cloudera.cdk.tools.CombinedLogFormatConverter.java

License:Apache License

@Override
public int run(String... args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: " + CombinedLogFormatConverter.class.getSimpleName()
                + " <input> <dataset_root> <dataset name>");
        return 1;
    }//from  w w w.  j av  a2s .co m
    String input = args[0];
    String datasetRoot = args[1];
    String datasetName = args[2];

    Schema schema = new Schema.Parser().parse(Resources.getResource("combined_log_format.avsc").openStream());

    // Create the dataset
    Path root = new Path(datasetRoot);
    Configuration conf = new Configuration();
    FileSystem fs = root.getFileSystem(conf);

    DatasetRepository repo = new FileSystemDatasetRepository(fs, root);
    DatasetDescriptor datasetDescriptor = new DatasetDescriptor.Builder().schema(schema).get();

    repo.create(datasetName, datasetDescriptor);

    // Run the job
    final String schemaString = schema.toString();
    AvroType<GenericData.Record> outputType = Avros.generics(schema);
    PCollection<String> lines = readTextFile(input);
    PCollection<GenericData.Record> records = lines.parallelDo(new ConvertFn(schemaString), outputType);
    getPipeline().write(records, new AvroFileTarget(new Path(root, datasetName)), Target.WriteMode.APPEND);
    run();
    return 0;
}

From source file:com.cloudera.cdk.tools.TestCombinedLogFormatConverter.java

License:Apache License

@Test
public void test() throws Exception {
    CombinedLogFormatConverter tool = new CombinedLogFormatConverter();

    String input = Resources.getResource("access_log.txt").toExternalForm();
    String datasetRoot = TEST_DIR.toURI().toURL().toExternalForm();
    String datasetName = "logs";

    int exitCode = tool.run(input, datasetRoot, datasetName);

    Assert.assertEquals(0, exitCode);/*  w  ww  . j a  v  a  2s. co  m*/

    Path root = new Path(datasetRoot);
    FileSystem fs = root.getFileSystem(new Configuration());
    DatasetRepository repo = new FileSystemDatasetRepository(fs, root);
    Dataset dataset = repo.get(datasetName);
    DatasetReader<GenericRecord> reader = dataset.getReader();
    try {
        reader.open();
        Assert.assertTrue(reader.hasNext());
        GenericRecord first = reader.read();

        Assert.assertEquals("ip1", first.get("host"));
        Assert.assertNull(first.get("rfc931_identity"));
        Assert.assertNull(first.get("username"));
        Assert.assertEquals("24/Apr/2011:04:06:01 -0400", first.get("datetime"));
        Assert.assertEquals("GET /~strabal/grease/photo9/927-3.jpg HTTP/1.1", first.get("request"));
        Assert.assertEquals(200, first.get("http_status_code"));
        Assert.assertEquals(40028, first.get("response_size"));
        Assert.assertNull(first.get("referrer"));
        Assert.assertEquals("Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex" + ".com/bots)",
                first.get("user_agent"));
    } finally {
        reader.close();
    }

}

From source file:com.cloudera.crunch.type.avro.AvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    Schema schema = AvroJob.getOutputSchema(context.getConfiguration());

    final DataFileWriter<T> WRITER = new DataFileWriter<T>(new GenericDatumWriter<T>());

    Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
    WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));

    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
        @Override/*from w w w  .j ava2  s.  c  o  m*/
        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            WRITER.append(wrapper.datum());
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            WRITER.close();
        }
    };
}

From source file:com.cloudera.crunch.util.DistCache.java

License:Open Source License

public static Object read(Configuration conf, Path path) throws IOException {
    URI target = null;//from w  w w  .ja  v  a2s.co m
    for (URI uri : DistributedCache.getCacheFiles(conf)) {
        if (uri.toString().equals(path.toString())) {
            target = uri;
            break;
        }
    }
    Object value = null;
    if (target != null) {
        Path targetPath = new Path(target.toString());
        ObjectInputStream ois = new ObjectInputStream(targetPath.getFileSystem(conf).open(targetPath));
        try {
            value = ois.readObject();
        } catch (ClassNotFoundException e) {
            throw new CrunchRuntimeException(e);
        }
        ois.close();
    }
    return value;
}

From source file:com.cloudera.dataflow.spark.TemplatedAvroKeyOutputFormat.java

License:Open Source License

@Override
protected OutputStream getAvroFileOutputStream(TaskAttemptContext context) throws IOException {
    Path path = ShardNameTemplateHelper.getDefaultWorkFile(this, context);
    return path.getFileSystem(context.getConfiguration()).create(path);
}