Example usage for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

public static void mkdir(Path path, boolean overwrite) throws IOException {

    Configuration config = new Configuration();
    FileSystem fs = path.getFileSystem(config);
    if (fs.exists(path) && !overwrite) {
        throw new IllegalStateException("Mkdir exception:path=" + path.toString() + " exists");
    }/*w w w. ja  v  a 2  s . c  om*/
    if (fs.exists(path)) {
        fs.delete(path, true);
    }
    fs.mkdirs(path);
    fs.close();
}

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

public static void delete(Configuration conf, Iterable<Path> paths) throws IOException {
    if (conf == null) {
        conf = new Configuration();
    }//from   w w  w. jav  a  2  s  .  c o m
    for (Path path : paths) {
        FileSystem fs = path.getFileSystem(conf);
        if (fs.exists(path)) {
            fs.delete(path, true);
        }
    }
}

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

public static String readString(Path path, Configuration conf) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    FileStatus[] statuses = fs.listStatus(path);
    StringBuilder re = new StringBuilder();
    for (FileStatus status : statuses) {
        if (status.isFile() && !status.getPath().getName().equals("_SUCCESS")) {
            FSDataInputStream streaming = fs.open(status.getPath());
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(streaming));
            re.append(bufferedReader.readLine() + System.lineSeparator());
        }// w w w .  j a  va  2s . c  o m
    }
    return re.toString();
}

From source file:com.github.sadikovi.hadoop.riff.RiffOutputCommitter.java

License:Open Source License

/**
 * Write Riff metadata file./*from  w ww  .j  a  v a  2 s  .c om*/
 * @param conf hadoop configuration
 * @param outputPath root path for output
 * @throws IOException
 */
private static void writeMetadataFile(Configuration conf, Path outputPath) throws IOException {
    // Right now, merging of the schema is not supported as we do not support schema evolution or
    // merge. At this point, just find the first file of riff format and write metadata for it.
    FileSystem fs = outputPath.getFileSystem(conf);
    FileStatus status = fs.getFileStatus(outputPath);
    List<FileStatus> partFiles = listFiles(fs, status, true);
    if (partFiles.isEmpty()) {
        LOG.warn("Could not find any part files for path {}, metadata is ignored", outputPath);
    } else {
        Metadata.MetadataWriter writer = Riff.metadataWriter(fs, conf, partFiles.get(0).getPath());
        writer.writeMetadataFile(outputPath);
        LOG.info("Finished writing metadata file for {}", outputPath);
    }
}

From source file:com.github.sadikovi.netflowlib.NetFlowReaderSuite.java

License:Apache License

private FSDataInputStream getTestStream(String file) throws IOException {
    Configuration conf = new Configuration(false);
    Path path = new Path(file);
    FileSystem fs = path.getFileSystem(conf);
    return fs.open(path);
}

From source file:com.github.sadikovi.riff.Riff.java

License:Open Source License

/**
 * Get new writer./*  w w w.j av  a2 s  . c  o  m*/
 * Compression codec, if not set, is inferred from the file path.
 * @param conf configuration with Riff options
 * @param path path to write
 * @param td type description
 * @return file writer
 */
public static FileWriter writer(Configuration conf, Path path, TypeDescription td) {
    try {
        return writer(path.getFileSystem(conf), conf, path, td);
    } catch (IOException err) {
        throw new RuntimeException("Error occured: " + err.getMessage(), err);
    }
}

From source file:com.github.sadikovi.riff.Riff.java

License:Open Source License

/**
 * Get new reader./*from   w  ww .j a  v a 2s .  c  om*/
 * @param conf configuration with Riff options
 * @param path file path to read
 * @return file reader
 */
public static FileReader reader(Configuration conf, Path path) {
    try {
        return reader(path.getFileSystem(conf), conf, path);
    } catch (IOException err) {
        throw new RuntimeException("Error occured: " + err.getMessage(), err);
    }
}

From source file:com.github.sadikovi.riff.Riff.java

License:Open Source License

/**
 * Get metadata reader./* w  w  w.j ava2 s  .  c  om*/
 * @param conf configuration with Riff options
 * @param metadataPath path to the metadata file or directory where metadata is stored
 */
public static Metadata.MetadataReader metadataReader(Configuration conf, Path metadataPath) {
    try {
        return metadataReader(metadataPath.getFileSystem(conf), conf, metadataPath);
    } catch (IOException err) {
        throw new RuntimeException("Error occured: " + err.getMessage(), err);
    }
}

From source file:com.github.sadikovi.riff.Riff.java

License:Open Source License

/**
 * Get metadata writer.//from  w w  w  . jav a  2s .c  o m
 * @param conf hadoop configuration with riff settings
 * @param filepath filepath to a valid Riff file
 */
public static Metadata.MetadataWriter metadataWriter(Configuration conf, Path filepath) {
    try {
        return metadataWriter(filepath.getFileSystem(conf), conf, filepath);
    } catch (IOException err) {
        throw new RuntimeException("Error occured: " + err.getMessage(), err);
    }
}

From source file:com.github.sandgorgon.parmr.Main.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: parmr <input file> <output path>");
        return -1;
    }/* w w w .j a  va  2s . c o  m*/

    Configuration conf = super.getConf();
    conf.set("mapreduce.job.queuename", "prod");

    Job job = Job.getInstance(conf);
    job.setJobName(jobName);
    job.setJarByClass(Main.class);

    // Parquet Schema
    // Read from the input file itself the schema that we will be assuming
    Path infile = new Path(args[0]);
    List<Footer> footers = ParquetFileReader.readFooters(conf, infile.getFileSystem(conf).getFileStatus(infile),
            true);
    MessageType schema = footers.get(0).getParquetMetadata().getFileMetaData().getSchema();

    // Avro Schema
    // Convert the Parquet schema to an Avro schema
    AvroSchemaConverter avroSchemaConverter = new AvroSchemaConverter();
    Schema avroSchema = avroSchemaConverter.convert(schema);

    // Set the Mapper
    job.setMapperClass(UserMapper.class);

    // This works for predicate pushdown on record assembly read.
    AvroParquetInputFormat.setUnboundRecordFilter(job, UserRecordFilter.class);

    AvroParquetInputFormat.addInputPath(job, new Path(args[0]));
    AvroParquetInputFormat.setAvroReadSchema(job, avroSchema);
    job.setInputFormatClass(AvroParquetInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // If you needed to return an avro object from the mapper, refer to this...
    //job.setMapOutputValueClass(AvroValue.class);
    //AvroJob.setMapOutputValueSchema(job, avroSchema);

    // Reducer
    job.setReducerClass(UserReducer.class);

    // Output
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // If we need to return an avro class again, refer to this...
    //job.setOutputFormatClass(AvroParquetOutputFormat.class);
    //AvroParquetOutputFormat.setOutputPath(job, new Path(args[1]));
    //AvroParquetOutputFormat.setSchema(job, avroSchema);
    //job.setOutputKeyClass(Void.class);
    //job.setOutputValueClass(GenericRecord.class);

    // Rough way of testing the projection side of things.
    AvroParquetInputFormat.setRequestedProjection(job,
            Schema.parse("{\"namespace\": \"com.github.sandgorgon.parmr.avro\",\n" + " \"type\": \"record\",\n"
                    + " \"name\": \"User\",\n" + " \"fields\": [\n"
                    + "     {\"name\": \"name\", \"type\": \"string\"},\n"
                    + "     {\"name\": \"favorite_number\",  \"type\": [\"int\", \"null\"]}\n" +
                    //                "     {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n" +
                    " ]\n" + "}\n" + ""));

    // Do the deed!
    int completion = job.waitForCompletion(true) ? 0 : 1;

    return completion;
}