List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static void mkdir(Path path, boolean overwrite) throws IOException { Configuration config = new Configuration(); FileSystem fs = path.getFileSystem(config); if (fs.exists(path) && !overwrite) { throw new IllegalStateException("Mkdir exception:path=" + path.toString() + " exists"); }/*w w w. ja v a 2 s . c om*/ if (fs.exists(path)) { fs.delete(path, true); } fs.mkdirs(path); fs.close(); }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static void delete(Configuration conf, Iterable<Path> paths) throws IOException { if (conf == null) { conf = new Configuration(); }//from w w w. jav a 2 s . c o m for (Path path : paths) { FileSystem fs = path.getFileSystem(conf); if (fs.exists(path)) { fs.delete(path, true); } } }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static String readString(Path path, Configuration conf) throws IOException { FileSystem fs = path.getFileSystem(conf); FileStatus[] statuses = fs.listStatus(path); StringBuilder re = new StringBuilder(); for (FileStatus status : statuses) { if (status.isFile() && !status.getPath().getName().equals("_SUCCESS")) { FSDataInputStream streaming = fs.open(status.getPath()); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(streaming)); re.append(bufferedReader.readLine() + System.lineSeparator()); }// w w w . j a va 2s . c o m } return re.toString(); }
From source file:com.github.sadikovi.hadoop.riff.RiffOutputCommitter.java
License:Open Source License
/** * Write Riff metadata file./*from w ww .j a v a 2 s .c om*/ * @param conf hadoop configuration * @param outputPath root path for output * @throws IOException */ private static void writeMetadataFile(Configuration conf, Path outputPath) throws IOException { // Right now, merging of the schema is not supported as we do not support schema evolution or // merge. At this point, just find the first file of riff format and write metadata for it. FileSystem fs = outputPath.getFileSystem(conf); FileStatus status = fs.getFileStatus(outputPath); List<FileStatus> partFiles = listFiles(fs, status, true); if (partFiles.isEmpty()) { LOG.warn("Could not find any part files for path {}, metadata is ignored", outputPath); } else { Metadata.MetadataWriter writer = Riff.metadataWriter(fs, conf, partFiles.get(0).getPath()); writer.writeMetadataFile(outputPath); LOG.info("Finished writing metadata file for {}", outputPath); } }
From source file:com.github.sadikovi.netflowlib.NetFlowReaderSuite.java
License:Apache License
private FSDataInputStream getTestStream(String file) throws IOException { Configuration conf = new Configuration(false); Path path = new Path(file); FileSystem fs = path.getFileSystem(conf); return fs.open(path); }
From source file:com.github.sadikovi.riff.Riff.java
License:Open Source License
/** * Get new writer./* w w w.j av a2 s . c o m*/ * Compression codec, if not set, is inferred from the file path. * @param conf configuration with Riff options * @param path path to write * @param td type description * @return file writer */ public static FileWriter writer(Configuration conf, Path path, TypeDescription td) { try { return writer(path.getFileSystem(conf), conf, path, td); } catch (IOException err) { throw new RuntimeException("Error occured: " + err.getMessage(), err); } }
From source file:com.github.sadikovi.riff.Riff.java
License:Open Source License
/** * Get new reader./*from w ww .j a v a 2s . c om*/ * @param conf configuration with Riff options * @param path file path to read * @return file reader */ public static FileReader reader(Configuration conf, Path path) { try { return reader(path.getFileSystem(conf), conf, path); } catch (IOException err) { throw new RuntimeException("Error occured: " + err.getMessage(), err); } }
From source file:com.github.sadikovi.riff.Riff.java
License:Open Source License
/** * Get metadata reader./* w w w.j ava2 s . c om*/ * @param conf configuration with Riff options * @param metadataPath path to the metadata file or directory where metadata is stored */ public static Metadata.MetadataReader metadataReader(Configuration conf, Path metadataPath) { try { return metadataReader(metadataPath.getFileSystem(conf), conf, metadataPath); } catch (IOException err) { throw new RuntimeException("Error occured: " + err.getMessage(), err); } }
From source file:com.github.sadikovi.riff.Riff.java
License:Open Source License
/** * Get metadata writer.//from w w w . jav a 2s .c o m * @param conf hadoop configuration with riff settings * @param filepath filepath to a valid Riff file */ public static Metadata.MetadataWriter metadataWriter(Configuration conf, Path filepath) { try { return metadataWriter(filepath.getFileSystem(conf), conf, filepath); } catch (IOException err) { throw new RuntimeException("Error occured: " + err.getMessage(), err); } }
From source file:com.github.sandgorgon.parmr.Main.java
License:Open Source License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: parmr <input file> <output path>"); return -1; }/* w w w .j a va 2s . c o m*/ Configuration conf = super.getConf(); conf.set("mapreduce.job.queuename", "prod"); Job job = Job.getInstance(conf); job.setJobName(jobName); job.setJarByClass(Main.class); // Parquet Schema // Read from the input file itself the schema that we will be assuming Path infile = new Path(args[0]); List<Footer> footers = ParquetFileReader.readFooters(conf, infile.getFileSystem(conf).getFileStatus(infile), true); MessageType schema = footers.get(0).getParquetMetadata().getFileMetaData().getSchema(); // Avro Schema // Convert the Parquet schema to an Avro schema AvroSchemaConverter avroSchemaConverter = new AvroSchemaConverter(); Schema avroSchema = avroSchemaConverter.convert(schema); // Set the Mapper job.setMapperClass(UserMapper.class); // This works for predicate pushdown on record assembly read. AvroParquetInputFormat.setUnboundRecordFilter(job, UserRecordFilter.class); AvroParquetInputFormat.addInputPath(job, new Path(args[0])); AvroParquetInputFormat.setAvroReadSchema(job, avroSchema); job.setInputFormatClass(AvroParquetInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // If you needed to return an avro object from the mapper, refer to this... //job.setMapOutputValueClass(AvroValue.class); //AvroJob.setMapOutputValueSchema(job, avroSchema); // Reducer job.setReducerClass(UserReducer.class); // Output job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); // If we need to return an avro class again, refer to this... //job.setOutputFormatClass(AvroParquetOutputFormat.class); //AvroParquetOutputFormat.setOutputPath(job, new Path(args[1])); //AvroParquetOutputFormat.setSchema(job, avroSchema); //job.setOutputKeyClass(Void.class); //job.setOutputValueClass(GenericRecord.class); // Rough way of testing the projection side of things. AvroParquetInputFormat.setRequestedProjection(job, Schema.parse("{\"namespace\": \"com.github.sandgorgon.parmr.avro\",\n" + " \"type\": \"record\",\n" + " \"name\": \"User\",\n" + " \"fields\": [\n" + " {\"name\": \"name\", \"type\": \"string\"},\n" + " {\"name\": \"favorite_number\", \"type\": [\"int\", \"null\"]}\n" + // " {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n" + " ]\n" + "}\n" + "")); // Do the deed! int completion = job.waitForCompletion(true) ? 0 : 1; return completion; }