List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:com.cloudera.cdk.data.hcatalog.HCatalogExternalMetadataProvider.java
License:Apache License
public HCatalogExternalMetadataProvider(Configuration conf, Path rootDirectory) { super(conf);//from w w w . j av a 2 s . c o m Preconditions.checkArgument(rootDirectory != null, "Root cannot be null"); try { this.rootFileSystem = rootDirectory.getFileSystem(conf); this.rootDirectory = rootFileSystem.makeQualified(rootDirectory); } catch (IOException ex) { throw new MetadataProviderException("Could not get FileSystem for root path", ex); } }
From source file:com.cloudera.cdk.data.hcatalog.HiveUtils.java
License:Apache License
static FileSystem fsForPath(Configuration conf, Path path) { try {//from w w w. j a v a2 s . c o m return path.getFileSystem(conf); } catch (IOException ex) { throw new MetadataProviderException("Cannot access FileSystem for uri:" + path, ex); } }
From source file:com.cloudera.cdk.data.hcatalog.HiveUtils.java
License:Apache License
static FileSystem fsForPath(Configuration conf, String fsUri, Path path) { try {// ww w . j a va 2 s .c om if (fsUri == null) { return path.getFileSystem(conf); } else { return FileSystem.get(new URI(fsUri), conf); } } catch (IOException ex) { throw new MetadataProviderException("Cannot access FileSystem for path:" + path, ex); } catch (URISyntaxException ex) { throw new MetadataProviderException("Cannot access FileSystem for uri:" + fsUri, ex); } }
From source file:com.cloudera.cdk.data.MemoryMetadataProvider.java
License:Apache License
@Override public DatasetDescriptor create(String name, DatasetDescriptor descriptor) { Preconditions.checkArgument(name != null, "Name cannot be null"); Preconditions.checkArgument(descriptor != null, "Descriptor cannot be null"); if (descriptors.containsKey(name)) { throw new DatasetExistsException("Dataset already exists for name:" + name); }//from w w w . j a v a 2s . c o m final DatasetDescriptor newDescriptor; if (descriptor.getLocation() == null) { final Path location = fs.makeQualified(new Path(newLocation(name))); final String fsUri; try { fsUri = location.getFileSystem(conf).getUri().toString(); } catch (IOException ex) { throw new MetadataProviderException("Cannot get FS for location" + location); } newDescriptor = new DatasetDescriptor.Builder(descriptor).location(location).build(); } else { // don't need to modify it newDescriptor = descriptor; } // save and return descriptors.put(name, newDescriptor); return newDescriptor; }
From source file:com.cloudera.cdk.morphline.hadoop.rcfile.ReadRCFileTest.java
License:Apache License
private InputStream readPath(final Path inputFile) throws IOException { FileSystem fs = inputFile.getFileSystem(new Configuration()); return fs.open(inputFile); }
From source file:com.cloudera.cdk.tools.CombinedLogFormatConverter.java
License:Apache License
@Override public int run(String... args) throws Exception { if (args.length != 3) { System.err.println("Usage: " + CombinedLogFormatConverter.class.getSimpleName() + " <input> <dataset_root> <dataset name>"); return 1; }//from w w w. j av a2s .co m String input = args[0]; String datasetRoot = args[1]; String datasetName = args[2]; Schema schema = new Schema.Parser().parse(Resources.getResource("combined_log_format.avsc").openStream()); // Create the dataset Path root = new Path(datasetRoot); Configuration conf = new Configuration(); FileSystem fs = root.getFileSystem(conf); DatasetRepository repo = new FileSystemDatasetRepository(fs, root); DatasetDescriptor datasetDescriptor = new DatasetDescriptor.Builder().schema(schema).get(); repo.create(datasetName, datasetDescriptor); // Run the job final String schemaString = schema.toString(); AvroType<GenericData.Record> outputType = Avros.generics(schema); PCollection<String> lines = readTextFile(input); PCollection<GenericData.Record> records = lines.parallelDo(new ConvertFn(schemaString), outputType); getPipeline().write(records, new AvroFileTarget(new Path(root, datasetName)), Target.WriteMode.APPEND); run(); return 0; }
From source file:com.cloudera.cdk.tools.TestCombinedLogFormatConverter.java
License:Apache License
@Test public void test() throws Exception { CombinedLogFormatConverter tool = new CombinedLogFormatConverter(); String input = Resources.getResource("access_log.txt").toExternalForm(); String datasetRoot = TEST_DIR.toURI().toURL().toExternalForm(); String datasetName = "logs"; int exitCode = tool.run(input, datasetRoot, datasetName); Assert.assertEquals(0, exitCode);/* w ww . j a v a 2s. co m*/ Path root = new Path(datasetRoot); FileSystem fs = root.getFileSystem(new Configuration()); DatasetRepository repo = new FileSystemDatasetRepository(fs, root); Dataset dataset = repo.get(datasetName); DatasetReader<GenericRecord> reader = dataset.getReader(); try { reader.open(); Assert.assertTrue(reader.hasNext()); GenericRecord first = reader.read(); Assert.assertEquals("ip1", first.get("host")); Assert.assertNull(first.get("rfc931_identity")); Assert.assertNull(first.get("username")); Assert.assertEquals("24/Apr/2011:04:06:01 -0400", first.get("datetime")); Assert.assertEquals("GET /~strabal/grease/photo9/927-3.jpg HTTP/1.1", first.get("request")); Assert.assertEquals(200, first.get("http_status_code")); Assert.assertEquals(40028, first.get("response_size")); Assert.assertNull(first.get("referrer")); Assert.assertEquals("Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex" + ".com/bots)", first.get("user_agent")); } finally { reader.close(); } }
From source file:com.cloudera.crunch.type.avro.AvroOutputFormat.java
License:Apache License
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Schema schema = AvroJob.getOutputSchema(context.getConfiguration()); final DataFileWriter<T> WRITER = new DataFileWriter<T>(new GenericDatumWriter<T>()); Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT); WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { @Override/*from w w w .j ava2 s. c o m*/ public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { WRITER.append(wrapper.datum()); } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { WRITER.close(); } }; }
From source file:com.cloudera.crunch.util.DistCache.java
License:Open Source License
public static Object read(Configuration conf, Path path) throws IOException { URI target = null;//from w w w .ja v a2s.co m for (URI uri : DistributedCache.getCacheFiles(conf)) { if (uri.toString().equals(path.toString())) { target = uri; break; } } Object value = null; if (target != null) { Path targetPath = new Path(target.toString()); ObjectInputStream ois = new ObjectInputStream(targetPath.getFileSystem(conf).open(targetPath)); try { value = ois.readObject(); } catch (ClassNotFoundException e) { throw new CrunchRuntimeException(e); } ois.close(); } return value; }
From source file:com.cloudera.dataflow.spark.TemplatedAvroKeyOutputFormat.java
License:Open Source License
@Override protected OutputStream getAvroFileOutputStream(TaskAttemptContext context) throws IOException { Path path = ShardNameTemplateHelper.getDefaultWorkFile(this, context); return path.getFileSystem(context.getConfiguration()).create(path); }