Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath.

Prototype

public static void setOutputPath(Job job, Path outputDir) 

Source Link

Document

Set the Path of the output directory for the map-reduce job.

Usage

From source file:com.javiertordable.mrif.MapReduceQuadraticSieve.java

License:Apache License

/**
 * Setup the MapReduce parameters and run it.
 *
 * Tool parses the command line arguments for us.
 *//*from  w  ww. ja  v  a 2  s.c  o m*/
public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    // Check the arguments. we need the integer to attempt to factor.
    if (args.length < 1) {
        System.out.println("Please indicate the integer to factor");
        LOGGER.severe("No integer to factor. Exit.");
        System.exit(1);
    }

    // Parse N and add it to the job configuration, so that the workers can
    // access it as well.
    BigInteger N = new BigInteger(args[0]);
    LOGGER.info("Attempting factorization of: " + N.toString());
    conf.set(INTEGER_TO_FACTOR_NAME, N.toString());

    // Obtain the factor base for the integer N.
    FactorBaseArray factorBase = SieveInput.factorBase(N);
    LOGGER.info("Factor base of size: " + factorBase.size());
    conf.set(FACTOR_BASE_NAME, factorBase.toString());

    // Prepare the input of the mapreduce.
    LOGGER.info("Sieve of size: " + SieveInput.fullSieveIntervalSize(N));
    try {
        // Write the full sieve interval to disk.
        SieveInput.writeFullSieveInterval(N, "input/" + INPUT_FILE_NAME);
    } catch (FileNotFoundException e) {
        System.out.println("Unable to open the file for writing.");
    } catch (IOException e) {
        System.out.println("Unable to write to the output file.");
    }

    // Configure the classes of the mapreducer
    Job job = new Job(conf, "QuadraticSieve");
    job.setJarByClass(MapReduceQuadraticSieve.class);
    job.setMapperClass(SieveMapper.class);
    job.setReducerClass(FindSquaresReducer.class);

    // Output will be two pairs of strings:
    // <"Factor1", "59">
    // <"Factor2", "101">
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path("input/"));
    FileOutputFormat.setOutputPath(job, new Path("output/"));

    // Submit the job.
    job.waitForCompletion(true);

    return 0;
}

From source file:com.jbw.mutioutputformat.PatitionByStation.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path input = new Path(conf.get("input"));
    Path output = new Path(conf.get("output"));
    Job job = Job.getInstance();//from  w  ww  .  j a va2 s .  co  m
    job.setJarByClass(PatitionByStation.class);
    job.setJobName("papapa");
    job.setMapperClass(StationMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setReducerClass(StationReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jet.hadoop.wordcount.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    long timeBegin = System.currentTimeMillis();
    System.out.println("hadoop wordcount begins at" + timeBegin);

    if (args == null || args.length == 0) {
        args = new String[2];
        args[0] = "E:\\Work\\input\\hello.txt";
        args[1] = "E:\\Work\\output";
    }//from w ww  .  j a  va2s  .c om

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    //      job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    //      job.setNumReduceTasks(2);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    boolean result = job.waitForCompletion(true);

    long timeEnd = System.currentTimeMillis();
    System.out.println("hadoop wordcount ended at" + timeEnd);
    System.out.println("hadoop wordcount cost time" + (timeEnd - timeBegin) / 1000 + " seconds.");

    System.exit(result ? 0 : 1);
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.AverageJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, AverageJob.class.getSimpleName());
    job.setJarByClass(AverageJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample Average Job");
    job.setMapperClass(AverageMapper.class);
    job.setCombinerClass(AverageCombiner.class);
    job.setReducerClass(AverageReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    //job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;//from   www. j  a va2 s  .c o m
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.AverageMultipleOutputJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, AverageMultipleOutputJob.class.getSimpleName());
    job.setJarByClass(AverageMultipleOutputJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample Multiple Output Job");
    job.setMapperClass(AverageMapper.class);
    job.setReducerClass(AverageMultipleOutputReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    MultipleOutputs.addNamedOutput(job, "greaterThan1000", TextOutputFormat.class, Text.class,
            DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "lessThan1000", TextOutputFormat.class, Text.class,
            DoubleWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;//www  .  j  ava  2 s  .  c  om
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.BloomFilterJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, BloomFilterJob.class.getSimpleName());
    job.setJarByClass(BloomFilterJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample BloomFilter Job");
    job.setMapperClass(BloomFilterMapper.class);
    job.setReducerClass(BloomFilterReducer.class);
    job.setNumReduceTasks(1);//from  w w  w  .  jav a 2 s .c o  m

    job.setInputFormatClass(TextInputFormat.class);

    /*
     * We want our reducer to output the final BloomFilter as a binary file. I think 
     * Hadoop doesn't have this format [check later], so using NullOutpuFormat.class.
     * 
     * In general life gets a little more dangerous when you deviate from MapReduce's input/output 
     * framework and start working with your own files. Your tasks are no longer guaranteed to be idempotent 
     * and you'll need to understand how various failure scenarios can affect your tasks. For example, your files 
     * may only be partially written when some tasks are restarted. Our example here is safe(r) because all the file 
     * operations take place together only once in the close() method and in only one reducer. A more 
     * careful/paranoid implementation would check each individual file operation more closely.
     */
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BloomFilter.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.ChainJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, ChainJob.class.getSimpleName());
    job.setJobName("Sample Chain Job");
    job.setJarByClass(ChainJob.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    ChainMapper.addMapper(job, ReverseMapper.class, Text.class, Text.class, Text.class, Text.class,
            new Configuration(false));
    ChainMapper.addMapper(job, AverageMapper.class, Text.class, Text.class, Text.class, AverageWritable.class,
            new Configuration(false));
    ChainReducer.setReducer(job, AverageReducer.class, Text.class, AverageWritable.class, Text.class,
            DoubleWritable.class, new Configuration(false));

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;/*from  www.j  a  v  a2s .co m*/
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.CountJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, CountJob.class.getSimpleName());
    job.setJarByClass(CountJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample Count Job");
    job.setMapperClass(CountMapper.class);
    job.setReducerClass(CountReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;//from  ww w. j  av  a  2 s  .c om
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.DistributedCacheJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, DistributedCacheJob.class.getSimpleName());
    job.setJarByClass(DistributedCacheJob.class);

    /*//from  w  ww.jav a 2  s.  com
     * The following will disseminate the file to all the nodes and the file defaults to HDFS.
     * The second and third arguments denote the input and output paths of the standard Hadoop 
     * job. Note that we've limited the number of data sources to two. This is not an inherent 
     * limitation of the technique, but a simplification that makes our code easier to follow.
     */
    //job.addCacheFile(new Path(args[0]).toUri());

    Path in = new Path(args[1]);
    Path out = new Path(args[2]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample DistributedCache Job");
    job.setMapperClass(DistributedCacheMapper.class);

    /*
     * Took out the Reduce class as the plan is performing the joining in the map phase and will 
     * configure the job to have no reduce.
     */
    job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;
}

From source file:com.juniarto.secondsorter.SsJob.java

public int run(String[] allArgs) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf, "secondary sort");

    job.setJarByClass(SsJob.class);
    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setMapOutputKeyClass(TextDsi.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(SsMapper.class);
    job.setReducerClass(SsReducer.class);
    job.setNumReduceTasks(2);/*from w  ww  . j  a v a  2 s. co  m*/

    String[] args = new GenericOptionsParser(getConf(), allArgs).getRemainingArgs();
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    //job.submit();

    long time1 = System.nanoTime();
    boolean status = job.waitForCompletion(true);
    long time2 = System.nanoTime();
    long timeSpent = time2 - time1;
    LOG.info("TIME: " + timeSpent);
    return 0;

}