Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:AllLab_Skeleton.Lab6.ReduceSideJoin.java

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();

    Job job = Job.getInstance(conf, "ReduceSideJoin");
    job.setJarByClass(ReduceSideJoin.class);

    // Use MultipleInputs to set which input uses what mapper
    // This will keep parsing of each data set separate from a logical
    // standpoint
    // The first two elements of the args array are the two inputs
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, UserJoinMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CommentJoinMapper.class);
    job.getConfiguration().set("join.type", "leftouter");
    //job.setNumReduceTasks(0);
    job.setReducerClass(UserJoinReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(args[2]));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.waitForCompletion(true);//  ww  w .ja v  a2s .  c om
}

From source file:alluxio.examples.keyvalue.hadoop.CloneStoreMapReduce.java

License:Apache License

/**
 * @param args two parameters, the first is the input key-value store path, the second is the
 *    output key-value store path//www. ja va 2 s.com
 * @throws Exception if any exception happens
 */
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    // NOTE(binfan): we are using the deprecated constructor of Job instance to compile with
    // hadoop-1.0. If this is not a concern, a better way is
    //     Job job = Job.getInstance(conf);
    Job job = new Job(conf);

    job.setJobName("CloneStoreMapReduce");
    job.setJarByClass(CloneStoreMapReduce.class);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setMapperClass(CloneStoreMapper.class);
    job.setReducerClass(CloneStoreReducer.class);

    job.setInputFormatClass(KeyValueInputFormat.class);
    job.setOutputFormatClass(KeyValueOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:ar.edu.ungs.garules.CensusJob.java

License:Apache License

/**
 * Main -> Ejecucion del proceso//from ww w . j  a v a  2  s  . c o  m
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {

    long time = System.currentTimeMillis();
    Individual<BitSet> bestInd = null;
    if (args.length != 2)
        args = DEFAULT_ARGS;

    // Preparacion del GA
    // --------------------------------------------------------------------------------------------------------------
    Set<Individual<BitSet>> bestIndividuals = new HashSet<Individual<BitSet>>();
    List<Gene> genes = new ArrayList<Gene>();
    genes.add(genCondicionACampo);
    genes.add(genCondicionAOperador);
    genes.add(genCondicionAValor);
    genes.add(genCondicionBPresente);
    genes.add(genCondicionBCampo);
    genes.add(genCondicionBOperador);
    genes.add(genCondicionBValor);
    genes.add(genCondicionCPresente);
    genes.add(genCondicionCCampo);
    genes.add(genCondicionCOperador);
    genes.add(genCondicionCValor);
    genes.add(genPrediccionCampo);
    genes.add(genPrediccionValor);

    Map<Gene, Ribosome<BitSet>> translators = new HashMap<Gene, Ribosome<BitSet>>();
    for (Gene gene : genes)
        translators.put(gene, new BitSetToIntegerRibosome(0));

    Genome<BitSet> genome = new BitSetGenome("Chromosome 1", genes, translators);

    Parameter<BitSet> par = new Parameter<BitSet>(0.035, 0.9, 200, new DescendantAcceptEvaluator<BitSet>(),
            new CensusFitnessEvaluator(), new BitSetOnePointCrossover(), new BitSetFlipMutator(), null,
            new BitSetRandomPopulationInitializer(), null, new ProbabilisticRouletteSelector(),
            new GlobalSinglePopulation<BitSet>(genome), 500, 100d, new BitSetMorphogenesisAgent(), genome);

    ParallelFitnessEvaluationGA<BitSet> ga = new ParallelFitnessEvaluationGA<BitSet>(par);
    ga.init();
    // --------------------------------------------------------------------------------------------------------------
    // Fin de Preparacion del GA

    // Itera hasta el maximo de generaciones permitidas 
    for (int i = 0; i < par.getMaxGenerations(); i++) {
        ga.initGeneration();
        Configuration conf = new Configuration();

        // Debug
        //showPopulation(ga.getPopulation());
        //System.out.println((System.currentTimeMillis()-time)/1000 + "s transcurridos desde el inicio");

        // Pasamos como parmetro las condiciones a evaluar
        Iterator<Individual<BitSet>> ite = ga.getPopulation().iterator();
        int contador = 0;
        Set<String> expUnicas = new HashSet<String>();
        while (ite.hasNext()) {
            Individual<BitSet> ind = ite.next();
            String rep = RuleStringAdaptor.adapt(RuleAdaptor.adapt(ind));
            expUnicas.add(rep);
        }
        for (String rep : expUnicas)
            if (ocurrencias.get(rep) == null) {
                conf.set(String.valueOf(contador), rep);
                contador++;
            }

        // Configuracion del job i
        Job job = new Job(conf, "GA rules - Generation " + i);
        job.setJarByClass(CensusJob.class);
        job.setMapperClass(CensusMapper.class);
        job.setCombinerClass(CensusReducer.class);
        job.setReducerClass(CensusReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        SequenceFileOutputFormat.setOutputPath(job, new Path(args[1] + "g" + i));

        // Corrida del trabajo map-reduce representando a la generacion i
        job.waitForCompletion(true);

        // Aca calculamos el fitness en base a lo que arrojo el job y si hay un mejor individuo lo agregamos al set de mejores individuos....  
        llenarOcurrencias(conf, args[1] + "g" + i);

        // Corremos GA para la generacion.
        Individual<BitSet> winnerGen = ga.run(new CensusFitnessEvaluator(ocurrencias));

        // Mantenemos los mejores individuos
        if (bestInd == null) {
            bestInd = winnerGen;
            bestIndividuals.add(winnerGen);
        } else if (winnerGen.getFitness() > bestInd.getFitness()) {
            bestInd = winnerGen;
            bestIndividuals.add(winnerGen);
        }

        // Debug
        System.out.println("Mejor Individuo Generacion " + i + " => " + RuleAdaptor.adapt(bestInd)
                + " => Fitness = " + bestInd.getFitness());

    }

    // Ordenamos y mostramos los mejores individuos
    List<Individual<BitSet>> bestIndList = new ArrayList<Individual<BitSet>>(bestIndividuals);
    Collections.sort(bestIndList, new Comparator<Individual<BitSet>>() {
        public int compare(Individual<BitSet> o1, Individual<BitSet> o2) {
            return (o1.getFitness() > o2.getFitness() ? -1 : (o1.getFitness() == o2.getFitness() ? 0 : 1));
        }
    });
    showPopulation(bestIndList);
    System.out.println("Tiempo total de corrida " + (System.currentTimeMillis() - time) / 1000 + "s");

}

From source file:at.illecker.hama.rootbeer.examples.util.RandomGraphGenerator.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 4) {
        System.out.println(// ww  w.  ja v a 2  s.  c om
                "USAGE: <Number of vertices> <Number of edges per vertex> <Number of partitions> <Outpath>");
        return;
    }
    System.out.println(Arrays.toString(args));
    Configuration conf = new Configuration();
    conf.setInt("hama.num.vertices", Integer.parseInt(args[0]));
    conf.setInt("hama.num.partitions", Integer.parseInt(args[2]));
    conf.setInt("number.edges", Integer.parseInt(args[1]));
    Job job = new Job(conf);

    Path generated = new Path(new Path(args[3]).getParent(), "generated");
    FileOutputFormat.setOutputPath(job, generated);
    FileSystem.get(conf).delete(generated, true);

    job.setJobName("RangeWriter");

    job.setJarByClass(SortGenMapper.class);
    job.setMapperClass(SortGenMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    job.setInputFormatClass(RangeInputFormat.class);

    job.waitForCompletion(true);
    conf.setInt("max.id", Integer.valueOf(args[0]));
    job = new Job(conf);

    FileOutputFormat.setOutputPath(job, new Path(args[3]));
    FileSystem.get(conf).delete(new Path(args[3]), true);

    job.setJobName("Random Vertex Writer");

    FileInputFormat.addInputPath(job, generated);

    job.setJarByClass(RandomMapper.class);
    job.setMapperClass(RandomMapper.class);
    job.setReducerClass(Reducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(conf.getInt("hama.num.partitions", 2));
    job.setPartitionerClass(HashPartitioner.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.waitForCompletion(true);
}

From source file:AverageProj.AveragePrice.java

public int run(String[] args) throws Exception, ClassNotFoundException {
    Configuration conf = getConf();
    Job job = new Job(conf, "Avg");
    job.setJarByClass(AveragePrice.class);
    //final File f = new File(AveragePrice.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setMapperClass(AvMapper.class);
    job.setMapOutputKeyClass(YearPrice.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setGroupingComparatorClass(YearSymComparator.class);
    // job.setCombinerClass(AvReducer.class);
    job.setReducerClass(AvReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(YearPrice.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setPartitionerClass(AvgPartitioner.class);
    job.setNumReduceTasks(7);// w  w  w  . ja v  a 2  s.c o m
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
}

From source file:avro.mr.MapReduceColorCount.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MapReduceColorCount <input path> <output path>");
        return -1;
    }//from  w  ww .ja va  2 s. com

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceColorCount.class);
    job.setJobName("Color Count");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapperClass(ColorCountMapper.class);
    AvroJob.setInputKeySchema(job, User.getClassSchema());
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(ColorCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:avro.mr.MapReduceColorCountSeqFile.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MapReduceColorCount <input path> <output path>");
        return -1;
    }//  ww  w .ja va2 s .co  m

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceColorCountSeqFile.class);
    job.setJobName("Color Count 2");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(AvroSequenceFileInputFormat.class);
    AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.LONG));
    AvroJob.setInputValueSchema(job, User.getClassSchema());

    job.setMapperClass(ColorCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroSequenceFileOutputFormat.class);
    job.setReducerClass(ColorCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:azkaban.jobtype.examples.java.WordCount2.java

License:Apache License

@Override
public void run() throws Exception {
    logger.info(String.format("Starting %s", getClass().getSimpleName()));
    //getConf() and set
    Job job = createJob(MapperClass.class, ReducerClass.class);
    job.setJarByClass(WordCount2.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    super.run();//from   w ww.j  a  v  a  2  s .co  m

}

From source file:basic.PartitionGraph.java

License:Apache License

/**
 * Runs this tool./* w  w w .j a  va 2  s . com*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(RANGE, "use range partitioner"));

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions")
            .create(NUM_PARTITIONS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(NUM_PARTITIONS)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inPath = cmdline.getOptionValue(INPUT);
    String outPath = cmdline.getOptionValue(OUTPUT);
    int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS));
    boolean useRange = cmdline.hasOption(RANGE);

    LOG.info("Tool name: " + PartitionGraph.class.getSimpleName());
    LOG.info(" - input dir: " + inPath);
    LOG.info(" - output dir: " + outPath);
    LOG.info(" - num partitions: " + numParts);
    LOG.info(" - node cnt: " + nodeCount);
    LOG.info(" - use range partitioner: " + useRange);

    Configuration conf = getConf();
    conf.setInt("NodeCount", nodeCount);

    Job job = Job.getInstance(conf);
    job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath);
    job.setJarByClass(PartitionGraph.class);

    job.setNumReduceTasks(numParts);

    FileInputFormat.setInputPaths(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    if (useRange) {
        job.setPartitionerClass(RangePartitioner.class);
    }

    FileSystem.get(conf).delete(new Path(outPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

@SuppressWarnings("rawtypes")
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {
    Job job = new Job(new Configuration());

    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }//from w  w  w  . ja  v a 2  s .c o  m
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}