Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath.

Prototype

public static void setOutputPath(Job job, Path outputDir) 

Source Link

Document

Set the Path of the output directory for the map-reduce job.

Usage

From source file:com.cloudera.recordservice.avro.mapreduce.ColorCount.java

License:Apache License

/**
 * Run the MR2 color count with generic records, and return a map of favorite colors to
 * the number of users./*from  w ww  . ja  v a  2 s.  c o  m*/
 */
public static java.util.Map<String, Integer> countColors()
        throws IOException, ClassNotFoundException, InterruptedException {
    String output = TestUtil.getTempDirectory();
    Path outputPath = new Path(output);
    JobConf conf = new JobConf(ColorCount.class);
    conf.setInt("mapreduce.job.reduces", 1);

    Job job = Job.getInstance(conf);
    job.setJarByClass(ColorCount.class);
    job.setJobName("MR2 Color Count With Generic Records");

    RecordServiceConfig.setInputTable(job.getConfiguration(), "rs", "users");
    job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(Reduce.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    job.waitForCompletion(false);

    // Read the result and return it. Since we set the number of reducers to 1,
    // there is always just one file containing the value.
    SeekableInput input = new FsInput(new Path(output + "/part-r-00000.avro"), conf);
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
    java.util.Map<String, Integer> colorMap = new HashMap<String, Integer>();
    for (GenericRecord datum : fileReader) {
        colorMap.put(datum.get(0).toString(), Integer.parseInt(datum.get(1).toString()));
    }
    return colorMap;
}

From source file:com.cloudera.recordservice.examples.mapreduce.MapReduceAgeCount.java

License:Apache License

public int run(String[] args) throws Exception {
    org.apache.log4j.BasicConfigurator.configure();

    if (args.length != 2) {
        System.err.println("Usage: MapReduceAgeCount <input path> <output path>");
        return -1;
    }//  w w w .ja  v  a 2s  .com

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceAgeCount.class);
    job.setJobName("Age Count");

    // RECORDSERVICE:
    // To read from a table instead of a path, comment out
    // FileInputFormat.setInputPaths() and instead use:
    // FileInputFormat.setInputPaths(job, new Path(args[0]));
    RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]);

    // RECORDSERVICE:
    // Use the RecordService version of the AvroKeyValueInputFormat
    job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyValueInputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(AgeCountMapper.class);
    // Set schema for input key and value.
    AvroJob.setInputKeySchema(job, UserKey.getClassSchema());
    AvroJob.setInputValueSchema(job, UserValue.getClassSchema());

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(AgeCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.cloudera.recordservice.examples.mapreduce.MapReduceColorCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    org.apache.log4j.BasicConfigurator.configure();

    if (args.length != 2) {
        System.err.println("Usage: MapReduceColorCount <input path> <output path>");
        return -1;
    }/*from www.  j  a va2  s  . c  om*/

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceColorCount.class);
    job.setJobName("Color Count");

    // RECORDSERVICE:
    // To read from a table instead of a path, comment out
    // FileInputFormat.setInputPaths() and instead use:
    //FileInputFormat.setInputPaths(job, new Path(args[0]));
    RecordServiceConfig.setInputTable(job.getConfiguration(), "rs", "users");

    // RECORDSERVICE:
    // Use the RecordService version of the AvroKeyInputFormat
    job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.class);
    //job.setInputFormatClass(AvroKeyInputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(ColorCountMapper.class);
    AvroJob.setInputKeySchema(job, User.getClassSchema());
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(ColorCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.cloudera.recordservice.examples.mapreduce.RecordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: RecordCount <input_query> <output_path>");
        System.exit(1);/*from w ww  . j  a v a2  s.c  o  m*/
    }
    String inputQuery = args[0];
    String output = args[1];

    Job job = Job.getInstance(getConf());
    job.setJobName("recordcount");
    job.setJarByClass(RecordCount.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(LongWritable.class);

    RecordServiceConfig.setInputQuery(job.getConfiguration(), inputQuery);
    job.setInputFormatClass(RecordServiceInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    Path outputPath = new Path(output);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.recordservice.examples.terasort.TeraChecksum.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    boolean useRecordService = false;
    Job job = Job.getInstance(getConf());
    if (args.length != 2 && args.length != 3) {
        usage();//from   w  w w .j  a  va 2s .  c  om
        return 2;
    }
    if (args.length == 3) {
        useRecordService = Boolean.parseBoolean(args[2]);
    }

    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraSum");
    job.setJarByClass(TeraChecksum.class);
    job.setMapperClass(ChecksumMapper.class);
    job.setReducerClass(ChecksumReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Unsigned16.class);
    // force a single reducer
    job.setNumReduceTasks(1);
    if (useRecordService) {
        RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]);
        job.setInputFormatClass(RecordServiceTeraInputFormat.class);
    } else {
        TeraInputFormat.setInputPaths(job, new Path(args[0]));
        job.setInputFormatClass(TeraInputFormat.class);
    }
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.recordservice.examples.terasort.TeraGen.java

License:Apache License

/**
 * @param args the cli arguments/*from   w  w  w  .  j  ava  2 s .  c  o  m*/
 */
@Override
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();
        return 2;
    }
    setNumberOfRows(job, parseHumanLong(args[0]));
    Path outputDir = new Path(args[1]);
    if (outputDir.getFileSystem(getConf()).exists(outputDir)) {
        throw new IOException("Output directory " + outputDir + " already exists.");
    }
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraGen");
    job.setJarByClass(TeraGen.class);
    job.setMapperClass(SortGenMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(RangeInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.recordservice.examples.terasort.TeraSort.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    boolean useRecordService = false;
    if (args.length != 2 && args.length != 3) {
        usage();/*from www  .  j  a  v  a2 s. c o  m*/
        return 1;
    }
    if (args.length == 3) {
        useRecordService = Boolean.parseBoolean(args[2]);
    }

    LOG.info("starting");
    Job job = Job.getInstance(getConf());
    boolean useSimplePartitioner = getUseSimplePartitioner(job);

    if (useRecordService) {
        RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]);
        job.setInputFormatClass(RecordServiceTeraInputFormat.class);
        useSimplePartitioner = true;
    } else {
        Path inputDir = new Path(args[0]);
        TeraInputFormat.setInputPaths(job, inputDir);
        job.setInputFormatClass(TeraInputFormat.class);
    }

    Path outputDir = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraSort");
    job.setJarByClass(TeraSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    if (useSimplePartitioner) {
        job.setPartitionerClass(SimplePartitioner.class);
    } else {
        long start = System.currentTimeMillis();
        Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME);
        URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME);
        try {
            TeraInputFormat.writePartitionFile(job, partitionFile);
        } catch (Throwable e) {
            LOG.error(e.getMessage());
            return -1;
        }
        job.addCacheFile(partitionUri);
        long end = System.currentTimeMillis();
        System.out.println("Spent " + (end - start) + "ms computing partitions.");
        job.setPartitionerClass(TotalOrderPartitioner.class);
    }

    job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
    TeraOutputFormat.setFinalSync(job, true);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    LOG.info("done");
    return ret;
}

From source file:com.cloudera.recordservice.examples.terasort.TeraValidate.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    boolean useRecordService = false;
    if (args.length != 2 && args.length != 3) {
        usage();/*from   w ww  .j a  v a 2  s . c  o  m*/
        return 1;
    }
    if (args.length == 3) {
        useRecordService = Boolean.parseBoolean(args[2]);
    }

    Job job = Job.getInstance(getConf());
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraValidate");
    job.setJarByClass(TeraValidate.class);
    job.setMapperClass(ValidateMapper.class);
    job.setReducerClass(ValidateReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    // force a single reducer
    job.setNumReduceTasks(1);
    // force a single split
    FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
    if (useRecordService) {
        RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]);
        job.setInputFormatClass(RecordServiceTeraInputFormat.class);
    } else {
        TeraInputFormat.setInputPaths(job, new Path(args[0]));
        job.setInputFormatClass(TeraInputFormat.class);
    }
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.sa.giraph.examples.componentisation.Job.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 3) {
        System.out.println("Componentisation Help:");
        System.out.println("Parameters: <numbersOfWorkers> <inputLocation> <outputLocation>");
        System.out.println("Example: 1 inputFolder outputFolder");
        return;/*from w w  w  .ja  va  2s .  c o  m*/
    }

    String numberOfWorkers = args[0];
    String inputLocation = args[1];
    String outputLocation = args[2];

    GiraphJob bspJob = new GiraphJob(new Configuration(), Job.class.getName());

    bspJob.getConfiguration().setVertexClass(ComponentisationVertex.class);
    bspJob.getConfiguration().setVertexInputFormatClass(InputFormat.class);
    GiraphFileInputFormat.addVertexInputPath(bspJob.getConfiguration(), new Path(inputLocation));

    bspJob.getConfiguration().setVertexOutputFormatClass(OutputFormat.class);

    int minWorkers = Integer.parseInt(numberOfWorkers);
    int maxWorkers = Integer.parseInt(numberOfWorkers);
    bspJob.getConfiguration().setWorkerConfiguration(minWorkers, maxWorkers, 100.0f);

    FileOutputFormat.setOutputPath(bspJob.getInternalJob(), new Path(outputLocation));
    boolean verbose = true;

    if (bspJob.run(verbose)) {
        System.out.println("Ended well");
    } else {
        System.out.println("Ended with Failure");
    }

}

From source file:com.cloudera.sa.giraph.examples.kcore.Job.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 3) {
        System.out.println("KCore Help:");
        System.out.println("Parameters: <numbersOfWorkers> <inputLocation> <outputLocation>");
        System.out.println("Example: 1 inputFolder outputFolder");
        return;//  www.  ja  va 2  s . c o  m
    }

    String numberOfWorkers = args[0];
    String inputLocation = args[1];
    String outputLocation = args[2];

    GiraphJob bspJob = new GiraphJob(new Configuration(), Job.class.getName());

    bspJob.getConfiguration().setVertexClass(KCoreVertex.class);
    bspJob.getConfiguration().setVertexInputFormatClass(InputFormat.class);
    GiraphFileInputFormat.addVertexInputPath(bspJob.getConfiguration(), new Path(inputLocation));

    bspJob.getConfiguration().setVertexOutputFormatClass(OutputFormat.class);

    int minWorkers = Integer.parseInt(numberOfWorkers);
    int maxWorkers = Integer.parseInt(numberOfWorkers);
    bspJob.getConfiguration().setWorkerConfiguration(minWorkers, maxWorkers, 100.0f);

    FileOutputFormat.setOutputPath(bspJob.getInternalJob(), new Path(outputLocation));
    boolean verbose = true;

    if (bspJob.run(verbose)) {
        System.out.println("Ended well");
    } else {
        System.out.println("Ended with Failure");
    }

}