Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException 

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierMapReduce_Continuous_Features.java

/**
 * @param args/*from   w w w  .  j  a va 2  s .  com*/
 * @throws IOException 
 * @throws ClassNotFoundException 
 * @throws InterruptedException 
 */
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    int number_of_classes = 1;
    int number_of_features = 1;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Job job = new Job(conf, "NaiveBayesClassifierMapReduce_Continuous_Features");
    job.setJarByClass(NaiveBayesClassifierMapReduce_Continuous_Features.class);

    conf = job.getConfiguration(); // This line is mandatory. 

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FloatArrayWritable.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(MapArrayWritable.class);

    job.setMapperClass(NaiveBayesClassifierMap_Continuous_Features.class);
    job.setReducerClass(NaiveBayesClassifierReduce_Continuous_Features.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path out = new Path(args[1]);
    if (fs.exists(out))
        fs.delete(out, true);
    FileOutputFormat.setOutputPath(job, out);
    number_of_classes = Integer.parseInt(args[2]);
    number_of_features = Integer.parseInt(args[3]);
    conf.setInt("number_of_classes", number_of_classes);
    conf.setInt("number_of_features", number_of_features);

    try {
        job.waitForCompletion(true);

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.mongodb.hadoop.examples.snmp.SnmpStatisticMongoTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    final Configuration conf = getConf();
    final com.mongodb.MongoURI outputUri = MongoConfigUtil.getOutputURI(conf);
    if (outputUri == null)
        throw new IllegalStateException("output uri is not set");
    if (MongoConfigUtil.getInputURI(conf) == null)
        throw new IllegalStateException("input uri is not set");
    final String outputCollectionName = outputUri.getCollection();
    final Job job = new Job(conf, "snmp analysis " + outputCollectionName);
    job.setJarByClass(SnmpStatisticMongoTool.class);
    job.setMapperClass(MapHostUploadEachAPEachDay.class);
    job.setReducerClass(ReduceHostUploadEachAPEachDay.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);

    boolean result = job.waitForCompletion(true);
    return (result ? 0 : 1);
}

From source file:com.mongodb.hadoop.examples.snmp.SnmpStatisticMongoTool.java

License:Apache License

public static void main(String[] args) throws Exception {
    boolean use_shards = true;
    boolean use_chunks = false;
    final Configuration conf = new Configuration();
    String output_table = null;//w  ww . j  av  a  2 s  .co m

    MongoConfigUtil.setInputURI(conf, "mongodb://localhost:30000/test.snmp");
    conf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    conf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    if (use_chunks) {
        if (use_shards)
            output_table = "snmp_with_shards_and_chunks";
        else
            output_table = "snmp_with_chunks";
    } else {
        if (use_shards)
            output_table = "snmp_with_shards";
        else
            output_table = "snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(conf, "mongodb://localhost:30000/test." + output_table);
    final Job job = new Job(conf, "snmp analysis " + output_table);
    job.setJarByClass(SnmpStatisticMongoTool.class);
    job.setMapperClass(MapHostUploadEachAPEachDay.class);
    job.setReducerClass(ReduceHostUploadEachAPEachDay.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);
    boolean result = job.waitForCompletion(true);
    System.exit(result ? 0 : 1);
}

From source file:com.mongodb.hadoop.examples.snmp.SnmpStatisticWithCombiner.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    final Configuration conf = getConf();
    final com.mongodb.MongoURI outputUri = MongoConfigUtil.getOutputURI(conf);
    if (outputUri == null)
        throw new IllegalStateException("output uri is not set");
    if (MongoConfigUtil.getInputURI(conf) == null)
        throw new IllegalStateException("input uri is not set");
    final String outputCollectionName = outputUri.getCollection();
    final Job job = new Job(conf, "snmp analysis " + outputCollectionName);
    job.setJarByClass(SnmpStatisticWithCombiner.class);
    job.setMapperClass(MapHostUploadOnEachAPPerDay.class);
    job.setCombinerClass(CombineHostUploadOnEachAPPerDay.class);
    job.setReducerClass(ReduceHostUploadOnEachAPPerDay.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);
    try {/*from w ww  .  j a  v  a 2s .  c o  m*/
        boolean result = job.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }
    return 0;
}

From source file:com.mongodb.hadoop.examples.snmp.SnmpStatisticWithCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    boolean use_shards = true;
    boolean use_chunks = false;
    final Configuration Conf = new Configuration();
    MongoConfigUtil.setInputURI(Conf, "mongodb://localhost:30000/test.snmp");
    Conf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    Conf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table = null;//from   ww w  .j a va2 s .c  o m
    if (use_chunks) {
        if (use_shards)
            output_table = "snmp_with_shards_and_chunks";
        else
            output_table = "snmp_with_chunks";
    } else {
        if (use_shards)
            output_table = "snmpWithShards";
        else
            output_table = "snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(Conf, "mongodb://localhost:30000/test." + output_table);
    final Job snmpJob = new Job(Conf, "snmp analysis " + output_table);
    snmpJob.setJarByClass(SnmpStatisticWithCombiner.class);
    snmpJob.setMapperClass(MapHostUploadOnEachAPPerDay.class);
    snmpJob.setCombinerClass(CombineHostUploadOnEachAPPerDay.class);
    snmpJob.setReducerClass(ReduceHostUploadOnEachAPPerDay.class);
    snmpJob.setOutputKeyClass(Text.class);
    snmpJob.setOutputValueClass(LongWritable.class);
    snmpJob.setInputFormatClass(MongoInputFormat.class);
    snmpJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result = snmpJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }
}

From source file:com.mongodb.hadoop.examples.snmp.SnmpStatistic_MapReduceChain.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    final Configuration conf = getConf();
    final com.mongodb.MongoURI outputUri = MongoConfigUtil.getOutputURI(conf);
    if (outputUri == null)
        throw new IllegalStateException("output uri is not set");
    if (MongoConfigUtil.getInputURI(conf) == null)
        throw new IllegalStateException("input uri is not set");
    final String outputCollectionName = outputUri.getCollection();
    if (!outputCollectionName.startsWith("second")) {
        final Job job = new Job(conf, "snmp analysis " + outputCollectionName);
        job.setJarByClass(SnmpStatistic_MapReduceChain.class);
        job.setMapperClass(MapHostUploadEachAPEachDay.class);
        job.setReducerClass(ReduceHostUploadEachAPEachDay.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        job.setInputFormatClass(MongoInputFormat.class);
        job.setOutputFormatClass(MongoOutputFormat.class);
        boolean result = job.waitForCompletion(true);
        return (result ? 0 : 1);
    } else {/* w ww .j  a  v  a2  s.co m*/
        final Job secondJob = new Job(conf, "snmp analysis " + outputCollectionName);
        secondJob.setJarByClass(SnmpStatistic_MapReduceChain.class);
        secondJob.setMapperClass(MapHostUploadEachDay.class);
        secondJob.setReducerClass(ReduceHostUploadEachDay.class);
        secondJob.setOutputKeyClass(Text.class);
        secondJob.setOutputValueClass(LongWritable.class);
        secondJob.setInputFormatClass(MongoInputFormat.class);
        secondJob.setOutputFormatClass(MongoOutputFormat.class);
        boolean result2 = secondJob.waitForCompletion(true);
        return (result2 ? 0 : 1);
    }
}

From source file:com.mongodb.hadoop.examples.snmp.SnmpStatistic_MapReduceChain.java

License:Apache License

public static void main(String[] args) throws Exception {
    boolean use_shards = true;
    boolean use_chunks = false;
    //******************This is the first job.******************/
    final Configuration firstConf = new Configuration();
    MongoConfigUtil.setInputURI(firstConf, "mongodb://localhost:30000/test.snmp");
    firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table = null;/*from w w w . jav  a2  s. co m*/
    if (use_chunks) {
        if (use_shards)
            output_table = "snmp_with_shards_and_chunks";
        else
            output_table = "snmp_with_chunks";
    } else {
        if (use_shards)
            output_table = "snmp_with_shards";
        else
            output_table = "snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(firstConf, "mongodb://localhost:30000/test." + output_table);
    final Job firstJob = new Job(firstConf, "snmp analysis " + output_table);
    firstJob.setJarByClass(SnmpStatistic_MapReduceChain.class);
    firstJob.setMapperClass(MapHostUploadEachAPEachDay.class);
    firstJob.setReducerClass(ReduceHostUploadEachAPEachDay.class);
    firstJob.setOutputKeyClass(Text.class);
    firstJob.setOutputValueClass(LongWritable.class);
    firstJob.setInputFormatClass(MongoInputFormat.class);
    firstJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result = firstJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }

    //*****************This is the second job.********************/
    final Configuration secondConf = new Configuration();
    MongoConfigUtil.setInputURI(secondConf, "mongodb://localhost:30000/test." + output_table);
    secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table2 = null;
    if (use_chunks) {
        if (use_shards)
            output_table2 = "second_snmp_with_shards_and_chunks";
        else
            output_table2 = "second_snmp_with_chunks";
    } else {
        if (use_shards)
            output_table2 = "second_snmp_with_shards";
        else
            output_table2 = "second_snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(secondConf, "mongodb://localhost:30000/test." + output_table2);
    final Job secondJob = new Job(secondConf, "snmp analysis " + output_table2);
    secondJob.setJarByClass(SnmpStatistic_MapReduceChain.class);
    secondJob.setMapperClass(MapHostUploadEachDay.class);
    secondJob.setReducerClass(ReduceHostUploadEachDay.class);
    secondJob.setOutputKeyClass(Text.class);
    secondJob.setOutputValueClass(LongWritable.class);
    secondJob.setInputFormatClass(MongoInputFormat.class);
    secondJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result2 = secondJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result2);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }
}

From source file:com.mongodb.hadoop.examples.SnmpStatistic_MapReduceChain.java

License:Apache License

public static void main(String[] args) throws Exception {
    boolean use_shards = true;
    boolean use_chunks = false;
    //******************This is the first job.******************/
    final Configuration firstConf = new Configuration();
    MongoConfigUtil.setInputURI(firstConf, "mongodb://localhost:30000/test.snmp");
    firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table = null;/* www . j a v  a  2s  .c  o m*/
    if (use_chunks) {
        if (use_shards)
            output_table = "snmp_with_shards_and_chunks";
        else
            output_table = "snmp_with_chunks";
    } else {
        if (use_shards)
            output_table = "snmp_with_shards";
        else
            output_table = "snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(firstConf, "mongodb://localhost:30000/test." + output_table);
    final Job firstJob = new Job(firstConf, "snmp analysis " + output_table);
    firstJob.setJarByClass(SnmpStatistic_MapReduceChain.class);
    firstJob.setMapperClass(MapHostUploadEachAPEachDay.class);
    firstJob.setReducerClass(ReduceHostUploadEachAPEachDay.class);
    firstJob.setOutputKeyClass(Text.class);
    firstJob.setOutputValueClass(LongWritable.class);
    firstJob.setInputFormatClass(MongoInputFormat.class);
    firstJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result = firstJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }

    //*****************This is the second job.********************/       
    final Configuration secondConf = new Configuration();
    MongoConfigUtil.setInputURI(secondConf, "mongodb://localhost:30000/test." + output_table);
    secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table2 = null;
    if (use_chunks) {
        if (use_shards)
            output_table2 = "second_snmp_with_shards_and_chunks";
        else
            output_table2 = "second_snmp_with_chunks";
    } else {
        if (use_shards)
            output_table2 = "second_snmp_with_shards";
        else
            output_table2 = "second_snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(secondConf, "mongodb://localhost:30000/test." + output_table2);
    final Job secondJob = new Job(secondConf, "snmp analysis " + output_table2);
    secondJob.setJarByClass(SnmpStatistic_MapReduceChain.class);
    secondJob.setMapperClass(MapHostUploadEachDay.class);
    secondJob.setReducerClass(ReduceHostUploadEachDay.class);
    secondJob.setOutputKeyClass(Text.class);
    secondJob.setOutputValueClass(LongWritable.class);
    secondJob.setInputFormatClass(MongoInputFormat.class);
    secondJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result2 = secondJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result2);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }
}

From source file:com.mongodb.hadoop.examples.ufos.UfoSightings.java

License:Apache License

public int run(final String[] args) throws Exception {
    final Configuration conf = getConf();

    final Job job = new Job(conf, "ufo-sightings");

    job.setMapperClass(UfoSightingsMapper.class);
    job.setReducerClass(UfoSightingsReducer.class);
    job.setOutputFormatClass(com.mongodb.hadoop.MongoOutputFormat.class);
    job.setOutputKeyClass(org.apache.hadoop.io.Text.class);
    job.setOutputValueClass(org.apache.hadoop.io.IntWritable.class);
    job.setInputFormatClass(com.mongodb.hadoop.MongoInputFormat.class);

    final boolean verbose = true;
    try {/*from   w ww  .j  av a2  s.  c o  m*/
        if (BACKGROUND) {
            LOG.info("Setting up and running MapReduce job in background.");
            job.submit();
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? "
                    + verbose + "}");
            return job.waitForCompletion(true) ? 0 : 1;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }
}

From source file:com.mongodb.hadoop.examples.wordcount.split.WordCountSplitTest.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    final Configuration conf = getConf();
    boolean useQuery = false;
    for (int i = 0; i < args.length; i++) {
        final String argi = args[i];
        if (argi.equals("--use-query"))
            useQuery = true;/* w w  w  .  j  av a  2 s .  c o m*/
        else {
            throw new IllegalArgumentException(argi);
        }
    }

    if (useQuery) {
        //NOTE: must do this BEFORE Job is created
        final MongoConfig mongo_conf = new MongoConfig(conf);
        com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject();
        query.put("num", new com.mongodb.BasicDBObject(Collections.singletonMap("$mod", new int[] { 2, 0 })));
        System.out.println(" --- setting query on num");
        mongo_conf.setQuery(query);
        System.out.println(" --- query is: " + mongo_conf.getQuery());
    }

    final com.mongodb.MongoURI outputUri = MongoConfigUtil.getOutputURI(conf);
    if (outputUri == null)
        throw new IllegalStateException("output uri is not set");
    if (MongoConfigUtil.getInputURI(conf) == null)
        throw new IllegalStateException("input uri is not set");
    final String outputCollectionName = outputUri.getCollection();

    final Job job = new Job(conf, "word count " + outputCollectionName);

    job.setJarByClass(WordCountSplitTest.class);

    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);

    final long start = System.currentTimeMillis();
    System.out
            .println(" ----------------------- running test " + outputCollectionName + " --------------------");
    try {
        boolean result = job.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.err.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
        return 1;
    }
    final long end = System.currentTimeMillis();
    final float seconds = ((float) (end - start)) / 1000;
    java.text.NumberFormat nf = java.text.NumberFormat.getInstance();
    nf.setMaximumFractionDigits(3);
    System.out.println("finished run in " + nf.format(seconds) + " seconds");

    com.mongodb.Mongo m = new com.mongodb.Mongo(outputUri);
    com.mongodb.DB db = m.getDB(outputUri.getDatabase());
    com.mongodb.DBCollection coll = db.getCollection(outputCollectionName);
    com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject();
    query.put("_id", "the");
    com.mongodb.DBCursor cur = coll.find(query);
    if (!cur.hasNext())
        System.out.println("FAILURE: could not find count of \'the\'");
    else
        System.out.println("'the' count: " + cur.next());

    return 0; //is the return value supposed to be the program exit code?

    //        if (! result)
    //           System.exit(  1 );
}