Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException 

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:AllLab_Skeleton.Lab6.ReduceSideJoin.java

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();

    Job job = Job.getInstance(conf, "ReduceSideJoin");
    job.setJarByClass(ReduceSideJoin.class);

    // Use MultipleInputs to set which input uses what mapper
    // This will keep parsing of each data set separate from a logical
    // standpoint
    // The first two elements of the args array are the two inputs
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, UserJoinMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CommentJoinMapper.class);
    job.getConfiguration().set("join.type", "leftouter");
    //job.setNumReduceTasks(0);
    job.setReducerClass(UserJoinReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(args[2]));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.waitForCompletion(true);//  ww  w .ja v  a2s .  c om
}

From source file:alluxio.examples.keyvalue.hadoop.CloneStoreMapReduce.java

License:Apache License

/**
 * @param args two parameters, the first is the input key-value store path, the second is the
 *    output key-value store path//www. ja va 2 s.com
 * @throws Exception if any exception happens
 */
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    // NOTE(binfan): we are using the deprecated constructor of Job instance to compile with
    // hadoop-1.0. If this is not a concern, a better way is
    //     Job job = Job.getInstance(conf);
    Job job = new Job(conf);

    job.setJobName("CloneStoreMapReduce");
    job.setJarByClass(CloneStoreMapReduce.class);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setMapperClass(CloneStoreMapper.class);
    job.setReducerClass(CloneStoreReducer.class);

    job.setInputFormatClass(KeyValueInputFormat.class);
    job.setOutputFormatClass(KeyValueOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:ar.edu.ungs.garules.CensusJob.java

License:Apache License

/**
 * Main -> Ejecucion del proceso//from ww w . j  a v a  2  s  . c o  m
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {

    long time = System.currentTimeMillis();
    Individual<BitSet> bestInd = null;
    if (args.length != 2)
        args = DEFAULT_ARGS;

    // Preparacion del GA
    // --------------------------------------------------------------------------------------------------------------
    Set<Individual<BitSet>> bestIndividuals = new HashSet<Individual<BitSet>>();
    List<Gene> genes = new ArrayList<Gene>();
    genes.add(genCondicionACampo);
    genes.add(genCondicionAOperador);
    genes.add(genCondicionAValor);
    genes.add(genCondicionBPresente);
    genes.add(genCondicionBCampo);
    genes.add(genCondicionBOperador);
    genes.add(genCondicionBValor);
    genes.add(genCondicionCPresente);
    genes.add(genCondicionCCampo);
    genes.add(genCondicionCOperador);
    genes.add(genCondicionCValor);
    genes.add(genPrediccionCampo);
    genes.add(genPrediccionValor);

    Map<Gene, Ribosome<BitSet>> translators = new HashMap<Gene, Ribosome<BitSet>>();
    for (Gene gene : genes)
        translators.put(gene, new BitSetToIntegerRibosome(0));

    Genome<BitSet> genome = new BitSetGenome("Chromosome 1", genes, translators);

    Parameter<BitSet> par = new Parameter<BitSet>(0.035, 0.9, 200, new DescendantAcceptEvaluator<BitSet>(),
            new CensusFitnessEvaluator(), new BitSetOnePointCrossover(), new BitSetFlipMutator(), null,
            new BitSetRandomPopulationInitializer(), null, new ProbabilisticRouletteSelector(),
            new GlobalSinglePopulation<BitSet>(genome), 500, 100d, new BitSetMorphogenesisAgent(), genome);

    ParallelFitnessEvaluationGA<BitSet> ga = new ParallelFitnessEvaluationGA<BitSet>(par);
    ga.init();
    // --------------------------------------------------------------------------------------------------------------
    // Fin de Preparacion del GA

    // Itera hasta el maximo de generaciones permitidas 
    for (int i = 0; i < par.getMaxGenerations(); i++) {
        ga.initGeneration();
        Configuration conf = new Configuration();

        // Debug
        //showPopulation(ga.getPopulation());
        //System.out.println((System.currentTimeMillis()-time)/1000 + "s transcurridos desde el inicio");

        // Pasamos como parmetro las condiciones a evaluar
        Iterator<Individual<BitSet>> ite = ga.getPopulation().iterator();
        int contador = 0;
        Set<String> expUnicas = new HashSet<String>();
        while (ite.hasNext()) {
            Individual<BitSet> ind = ite.next();
            String rep = RuleStringAdaptor.adapt(RuleAdaptor.adapt(ind));
            expUnicas.add(rep);
        }
        for (String rep : expUnicas)
            if (ocurrencias.get(rep) == null) {
                conf.set(String.valueOf(contador), rep);
                contador++;
            }

        // Configuracion del job i
        Job job = new Job(conf, "GA rules - Generation " + i);
        job.setJarByClass(CensusJob.class);
        job.setMapperClass(CensusMapper.class);
        job.setCombinerClass(CensusReducer.class);
        job.setReducerClass(CensusReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        SequenceFileOutputFormat.setOutputPath(job, new Path(args[1] + "g" + i));

        // Corrida del trabajo map-reduce representando a la generacion i
        job.waitForCompletion(true);

        // Aca calculamos el fitness en base a lo que arrojo el job y si hay un mejor individuo lo agregamos al set de mejores individuos....  
        llenarOcurrencias(conf, args[1] + "g" + i);

        // Corremos GA para la generacion.
        Individual<BitSet> winnerGen = ga.run(new CensusFitnessEvaluator(ocurrencias));

        // Mantenemos los mejores individuos
        if (bestInd == null) {
            bestInd = winnerGen;
            bestIndividuals.add(winnerGen);
        } else if (winnerGen.getFitness() > bestInd.getFitness()) {
            bestInd = winnerGen;
            bestIndividuals.add(winnerGen);
        }

        // Debug
        System.out.println("Mejor Individuo Generacion " + i + " => " + RuleAdaptor.adapt(bestInd)
                + " => Fitness = " + bestInd.getFitness());

    }

    // Ordenamos y mostramos los mejores individuos
    List<Individual<BitSet>> bestIndList = new ArrayList<Individual<BitSet>>(bestIndividuals);
    Collections.sort(bestIndList, new Comparator<Individual<BitSet>>() {
        public int compare(Individual<BitSet> o1, Individual<BitSet> o2) {
            return (o1.getFitness() > o2.getFitness() ? -1 : (o1.getFitness() == o2.getFitness() ? 0 : 1));
        }
    });
    showPopulation(bestIndList);
    System.out.println("Tiempo total de corrida " + (System.currentTimeMillis() - time) / 1000 + "s");

}

From source file:at.illecker.hama.rootbeer.examples.util.RandomGraphGenerator.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 4) {
        System.out.println(// ww  w.  ja v a 2  s.  c om
                "USAGE: <Number of vertices> <Number of edges per vertex> <Number of partitions> <Outpath>");
        return;
    }
    System.out.println(Arrays.toString(args));
    Configuration conf = new Configuration();
    conf.setInt("hama.num.vertices", Integer.parseInt(args[0]));
    conf.setInt("hama.num.partitions", Integer.parseInt(args[2]));
    conf.setInt("number.edges", Integer.parseInt(args[1]));
    Job job = new Job(conf);

    Path generated = new Path(new Path(args[3]).getParent(), "generated");
    FileOutputFormat.setOutputPath(job, generated);
    FileSystem.get(conf).delete(generated, true);

    job.setJobName("RangeWriter");

    job.setJarByClass(SortGenMapper.class);
    job.setMapperClass(SortGenMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    job.setInputFormatClass(RangeInputFormat.class);

    job.waitForCompletion(true);
    conf.setInt("max.id", Integer.valueOf(args[0]));
    job = new Job(conf);

    FileOutputFormat.setOutputPath(job, new Path(args[3]));
    FileSystem.get(conf).delete(new Path(args[3]), true);

    job.setJobName("Random Vertex Writer");

    FileInputFormat.addInputPath(job, generated);

    job.setJarByClass(RandomMapper.class);
    job.setMapperClass(RandomMapper.class);
    job.setReducerClass(Reducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(conf.getInt("hama.num.partitions", 2));
    job.setPartitionerClass(HashPartitioner.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.waitForCompletion(true);
}

From source file:AverageProj.AveragePrice.java

public int run(String[] args) throws Exception, ClassNotFoundException {
    Configuration conf = getConf();
    Job job = new Job(conf, "Avg");
    job.setJarByClass(AveragePrice.class);
    //final File f = new File(AveragePrice.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setMapperClass(AvMapper.class);
    job.setMapOutputKeyClass(YearPrice.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setGroupingComparatorClass(YearSymComparator.class);
    // job.setCombinerClass(AvReducer.class);
    job.setReducerClass(AvReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(YearPrice.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setPartitionerClass(AvgPartitioner.class);
    job.setNumReduceTasks(7);// w  w  w  . ja v  a 2  s.c o m
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
}

From source file:avro.mr.MapReduceColorCount.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MapReduceColorCount <input path> <output path>");
        return -1;
    }//from  w  ww .ja va  2 s. com

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceColorCount.class);
    job.setJobName("Color Count");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapperClass(ColorCountMapper.class);
    AvroJob.setInputKeySchema(job, User.getClassSchema());
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(ColorCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:avro.mr.MapReduceColorCountSeqFile.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MapReduceColorCount <input path> <output path>");
        return -1;
    }//  ww  w .ja va2 s .co  m

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceColorCountSeqFile.class);
    job.setJobName("Color Count 2");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(AvroSequenceFileInputFormat.class);
    AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.LONG));
    AvroJob.setInputValueSchema(job, User.getClassSchema());

    job.setMapperClass(ColorCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroSequenceFileOutputFormat.class);
    job.setReducerClass(ColorCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:azkaban.jobtype.examples.java.WordCount2.java

License:Apache License

@Override
public void run() throws Exception {
    logger.info(String.format("Starting %s", getClass().getSimpleName()));
    //getConf() and set
    Job job = createJob(MapperClass.class, ReducerClass.class);
    job.setJarByClass(WordCount2.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    super.run();//from   w ww.j  a  v  a  2  s .co  m

}

From source file:basic.PartitionGraph.java

License:Apache License

/**
 * Runs this tool./* w  w w .j a  va 2  s . com*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(RANGE, "use range partitioner"));

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions")
            .create(NUM_PARTITIONS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(NUM_PARTITIONS)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inPath = cmdline.getOptionValue(INPUT);
    String outPath = cmdline.getOptionValue(OUTPUT);
    int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS));
    boolean useRange = cmdline.hasOption(RANGE);

    LOG.info("Tool name: " + PartitionGraph.class.getSimpleName());
    LOG.info(" - input dir: " + inPath);
    LOG.info(" - output dir: " + outPath);
    LOG.info(" - num partitions: " + numParts);
    LOG.info(" - node cnt: " + nodeCount);
    LOG.info(" - use range partitioner: " + useRange);

    Configuration conf = getConf();
    conf.setInt("NodeCount", nodeCount);

    Job job = Job.getInstance(conf);
    job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath);
    job.setJarByClass(PartitionGraph.class);

    job.setNumReduceTasks(numParts);

    FileInputFormat.setInputPaths(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    if (useRange) {
        job.setPartitionerClass(RangePartitioner.class);
    }

    FileSystem.get(conf).delete(new Path(outPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

@SuppressWarnings("rawtypes")
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {
    Job job = new Job(new Configuration());

    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }//from w  w  w  . ja  v a 2  s .c o  m
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}