Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath.

Prototype

public static void setOutputPath(Job job, Path outputDir)

Source Link

Document

Set the Path of the output directory for the map-reduce job.

Usage

From source file:com.kylinolap.job.hadoop.invertedindex.InvertedIndexJob.java

License:Apache License

private void setupReduceOutput(Path output, short sharding) throws IOException {
    job.setReducerClass(InvertedIndexReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(ImmutableBytesWritable.class);

    job.setNumReduceTasks(sharding);//from   w ww.ja v a 2 s  .  co m

    FileOutputFormat.setOutputPath(job, output);

    job.getConfiguration().set(BatchConstants.OUTPUT_PATH, output.toString());

    deletePath(job.getConfiguration(), output);
}

From source file:com.kylinolap.job.hadoop.invertedindex.RandomKeyDistributionJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {/*from   www.ja v  a 2 s  .c o  m*/
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_KEY_CLASS);
        options.addOption(OPTION_REGION_MB);

        parseOptions(options, args);

        // start job
        String jobName = getOptionValue(OPTION_JOB_NAME);
        job = Job.getInstance(getConf(), jobName);

        job.setJarByClass(this.getClass());
        addInputDirs(getOptionValue(OPTION_INPUT_PATH), job);

        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        FileOutputFormat.setOutputPath(job, output);

        String keyClass = getOptionValue(OPTION_KEY_CLASS);
        Class<?> keyClz = Class.forName(keyClass);

        int regionMB = Integer.parseInt(getOptionValue(OPTION_REGION_MB));

        // Mapper
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(RandomKeyDistributionMapper.class);
        job.setMapOutputKeyClass(keyClz);
        job.setMapOutputValueClass(NullWritable.class);

        // Reducer - only one
        job.setReducerClass(RandomKeyDistributionReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(keyClz);
        job.setOutputValueClass(NullWritable.class);
        job.setNumReduceTasks(1);

        this.deletePath(job.getConfiguration(), output);

        // total map input MB
        double totalMapInputMB = this.getTotalMapInputMB();
        int regionCount = Math.max(1, (int) (totalMapInputMB / regionMB));
        int mapSampleNumber = 1000;
        System.out.println("Total Map Input MB: " + totalMapInputMB);
        System.out.println("Region Count: " + regionCount);

        // set job configuration
        job.getConfiguration().set(BatchConstants.MAPPER_SAMPLE_NUMBER, String.valueOf(mapSampleNumber));
        job.getConfiguration().set(BatchConstants.REGION_NUMBER, String.valueOf(regionCount));

        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        log.error(e.getLocalizedMessage(), e);
        return 2;
    }
}

From source file:com.laizuozuoba.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    // System.setProperty("hadoop.home.dir", "D:\\hadoop-2.2.0");
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from  www . j av a2 s  .c  om
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    Job job2 = new Job(conf, "uv");
    job2.setJarByClass(WordCount.class);
    job2.setMapperClass(UVMapper.class);
    job2.setCombinerClass(UVReducer.class);
    job2.setReducerClass(UVReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job2, new Path("hdfs://10.18.106.67:9100/result2"));

    ControlledJob controlledJob = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());
    controlledJob2.addDependingJob(controlledJob);
    JobControl jc = new JobControl("123");
    jc.addJob(controlledJob);
    jc.addJob(controlledJob2);

    Thread jcThread = new Thread(jc);
    jcThread.start();
    while (true) {
        if (jc.allFinished()) {
            System.out.println(jc.getSuccessfulJobList());
            jc.stop();
            break;
        }
        if (jc.getFailedJobList().size() > 0) {
            System.out.println(jc.getFailedJobList());
            jc.stop();
            break;
        }
        Thread.sleep(1000);
    }
    System.out.println("Finished!!!!!!!!!!!!!!!!!!!!!!!");
}

From source file:com.lakhani.anchorgraph.anchorgraph.java

public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    //DistributedCache.addCacheFile(new URI("hdfs://zphdc1n1:8020/user/clakhani/anchorgraph/centroids.txt"), conf);
    conf.set("numberCentroids", args[3]);
    conf.set("numberFeatures", args[4]);
    Job job = new Job(conf, "anchorgraph");
    job.addCacheFile(new URI(args[2]));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(Map.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    job.setJarByClass(anchorgraph.class);
    job.submit();/* ww w .j a v  a 2 s .c  o m*/
    int rc = (job.waitForCompletion(true)) ? 1 : 0;
    return rc;
}

From source file:com.lakhani.anchorgraph.testCache.java

public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "testCache");
    job.addCacheFile(new URI("hdfs://zphdc1n1:8020/user/clakhani/anchorgraph/centroids.txt"));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(Map.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJarByClass(testCache.class);
    job.submit();//from www.  j  av  a  2s  .c om
    int rc = (job.waitForCompletion(true)) ? 1 : 0;
    return rc;
}

From source file:com.lakhani.anchorgraph.wordcount.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "wordcount");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setJarByClass(wordcount.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);//from  ww w  .  ja va 2 s .co m

}

From source file:com.leon.hadoop.loganalyse.DistributedGrep.java

License:Open Source License

public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: DistributedGrep <regex> <in> <out>");
        ToolRunner.printGenericCommandUsage(System.err);
        System.exit(2);//w  ww  . j av a 2s.  c o  m
    }
    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "Distributed Grep");
    job.setJarByClass(DistributedGrep.class);
    job.setMapperClass(GrepMapper.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.getConfiguration().set(REGEX_KEY, otherArgs[0]);
    FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.leon.hadoop.loganalyse.WordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.lightboxtechnologies.spectrum.BlockHasher.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: BlockHasher <imageID> <image> <output>");
        return 2;
    }/*from  w ww  .  ja v a  2s.c  o  m*/

    final String imageID = args[0];
    final String image = args[1];
    final String output = args[2];

    Configuration conf = getConf();

    final Job job = SKJobFactory.createJobFromConf(imageID, image, "BlockHasher", conf);
    job.setJarByClass(BlockHasher.class);
    job.setMapperClass(BlockHashMapper.class);
    // job.setReducerClass(Reducer.class);
    job.setNumReduceTasks(0);

    // job ctor copies the Configuration we pass it, get the real one
    conf = job.getConfiguration();

    conf.setLong("timestamp", System.currentTimeMillis());

    job.setInputFormatClass(RawFileInputFormat.class);
    RawFileInputFormat.addInputPath(job, new Path(image));

    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(MD5Hash.class);
    FileOutputFormat.setOutputPath(job, new Path(output));

    conf.setInt("mapred.job.reuse.jvm.num.tasks", -1);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.FolderCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();
    final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: FolderCount <table> <outpath>");
        System.exit(2);//from  w w  w  . j  a v  a 2  s  .com
    }

    final Job job = new Job(conf, "FolderCount");
    job.setJarByClass(FolderCount.class);
    job.setMapperClass(FolderCountMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    final Scan scan = new Scan();
    scan.addFamily(HBaseTables.ENTRIES_COLFAM_B);
    job.getConfiguration().set(TableInputFormat.INPUT_TABLE, otherArgs[0]);
    job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan));

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}