List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass
public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException
From source file:AllLab_Skeleton.Lab6.ReduceSideJoin.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "ReduceSideJoin"); job.setJarByClass(ReduceSideJoin.class); // Use MultipleInputs to set which input uses what mapper // This will keep parsing of each data set separate from a logical // standpoint // The first two elements of the args array are the two inputs MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, UserJoinMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CommentJoinMapper.class); job.getConfiguration().set("join.type", "leftouter"); //job.setNumReduceTasks(0); job.setReducerClass(UserJoinReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[2])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.waitForCompletion(true);// ww w .ja v a2s . c om }
From source file:alluxio.examples.keyvalue.hadoop.CloneStoreMapReduce.java
License:Apache License
/** * @param args two parameters, the first is the input key-value store path, the second is the * output key-value store path//www. ja va 2 s.com * @throws Exception if any exception happens */ public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // NOTE(binfan): we are using the deprecated constructor of Job instance to compile with // hadoop-1.0. If this is not a concern, a better way is // Job job = Job.getInstance(conf); Job job = new Job(conf); job.setJobName("CloneStoreMapReduce"); job.setJarByClass(CloneStoreMapReduce.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setMapperClass(CloneStoreMapper.class); job.setReducerClass(CloneStoreReducer.class); job.setInputFormatClass(KeyValueInputFormat.class); job.setOutputFormatClass(KeyValueOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:ar.edu.ungs.garules.CensusJob.java
License:Apache License
/** * Main -> Ejecucion del proceso//from ww w . j a v a 2 s . c o m * @param args * @throws Exception */ public static void main(String[] args) throws Exception { long time = System.currentTimeMillis(); Individual<BitSet> bestInd = null; if (args.length != 2) args = DEFAULT_ARGS; // Preparacion del GA // -------------------------------------------------------------------------------------------------------------- Set<Individual<BitSet>> bestIndividuals = new HashSet<Individual<BitSet>>(); List<Gene> genes = new ArrayList<Gene>(); genes.add(genCondicionACampo); genes.add(genCondicionAOperador); genes.add(genCondicionAValor); genes.add(genCondicionBPresente); genes.add(genCondicionBCampo); genes.add(genCondicionBOperador); genes.add(genCondicionBValor); genes.add(genCondicionCPresente); genes.add(genCondicionCCampo); genes.add(genCondicionCOperador); genes.add(genCondicionCValor); genes.add(genPrediccionCampo); genes.add(genPrediccionValor); Map<Gene, Ribosome<BitSet>> translators = new HashMap<Gene, Ribosome<BitSet>>(); for (Gene gene : genes) translators.put(gene, new BitSetToIntegerRibosome(0)); Genome<BitSet> genome = new BitSetGenome("Chromosome 1", genes, translators); Parameter<BitSet> par = new Parameter<BitSet>(0.035, 0.9, 200, new DescendantAcceptEvaluator<BitSet>(), new CensusFitnessEvaluator(), new BitSetOnePointCrossover(), new BitSetFlipMutator(), null, new BitSetRandomPopulationInitializer(), null, new ProbabilisticRouletteSelector(), new GlobalSinglePopulation<BitSet>(genome), 500, 100d, new BitSetMorphogenesisAgent(), genome); ParallelFitnessEvaluationGA<BitSet> ga = new ParallelFitnessEvaluationGA<BitSet>(par); ga.init(); // -------------------------------------------------------------------------------------------------------------- // Fin de Preparacion del GA // Itera hasta el maximo de generaciones permitidas for (int i = 0; i < par.getMaxGenerations(); i++) { ga.initGeneration(); Configuration conf = new Configuration(); // Debug //showPopulation(ga.getPopulation()); //System.out.println((System.currentTimeMillis()-time)/1000 + "s transcurridos desde el inicio"); // Pasamos como parmetro las condiciones a evaluar Iterator<Individual<BitSet>> ite = ga.getPopulation().iterator(); int contador = 0; Set<String> expUnicas = new HashSet<String>(); while (ite.hasNext()) { Individual<BitSet> ind = ite.next(); String rep = RuleStringAdaptor.adapt(RuleAdaptor.adapt(ind)); expUnicas.add(rep); } for (String rep : expUnicas) if (ocurrencias.get(rep) == null) { conf.set(String.valueOf(contador), rep); contador++; } // Configuracion del job i Job job = new Job(conf, "GA rules - Generation " + i); job.setJarByClass(CensusJob.class); job.setMapperClass(CensusMapper.class); job.setCombinerClass(CensusReducer.class); job.setReducerClass(CensusReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); SequenceFileOutputFormat.setOutputPath(job, new Path(args[1] + "g" + i)); // Corrida del trabajo map-reduce representando a la generacion i job.waitForCompletion(true); // Aca calculamos el fitness en base a lo que arrojo el job y si hay un mejor individuo lo agregamos al set de mejores individuos.... llenarOcurrencias(conf, args[1] + "g" + i); // Corremos GA para la generacion. Individual<BitSet> winnerGen = ga.run(new CensusFitnessEvaluator(ocurrencias)); // Mantenemos los mejores individuos if (bestInd == null) { bestInd = winnerGen; bestIndividuals.add(winnerGen); } else if (winnerGen.getFitness() > bestInd.getFitness()) { bestInd = winnerGen; bestIndividuals.add(winnerGen); } // Debug System.out.println("Mejor Individuo Generacion " + i + " => " + RuleAdaptor.adapt(bestInd) + " => Fitness = " + bestInd.getFitness()); } // Ordenamos y mostramos los mejores individuos List<Individual<BitSet>> bestIndList = new ArrayList<Individual<BitSet>>(bestIndividuals); Collections.sort(bestIndList, new Comparator<Individual<BitSet>>() { public int compare(Individual<BitSet> o1, Individual<BitSet> o2) { return (o1.getFitness() > o2.getFitness() ? -1 : (o1.getFitness() == o2.getFitness() ? 0 : 1)); } }); showPopulation(bestIndList); System.out.println("Tiempo total de corrida " + (System.currentTimeMillis() - time) / 1000 + "s"); }
From source file:at.illecker.hama.rootbeer.examples.util.RandomGraphGenerator.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 4) { System.out.println(// ww w. ja v a 2 s. c om "USAGE: <Number of vertices> <Number of edges per vertex> <Number of partitions> <Outpath>"); return; } System.out.println(Arrays.toString(args)); Configuration conf = new Configuration(); conf.setInt("hama.num.vertices", Integer.parseInt(args[0])); conf.setInt("hama.num.partitions", Integer.parseInt(args[2])); conf.setInt("number.edges", Integer.parseInt(args[1])); Job job = new Job(conf); Path generated = new Path(new Path(args[3]).getParent(), "generated"); FileOutputFormat.setOutputPath(job, generated); FileSystem.get(conf).delete(generated, true); job.setJobName("RangeWriter"); job.setJarByClass(SortGenMapper.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setInputFormatClass(RangeInputFormat.class); job.waitForCompletion(true); conf.setInt("max.id", Integer.valueOf(args[0])); job = new Job(conf); FileOutputFormat.setOutputPath(job, new Path(args[3])); FileSystem.get(conf).delete(new Path(args[3]), true); job.setJobName("Random Vertex Writer"); FileInputFormat.addInputPath(job, generated); job.setJarByClass(RandomMapper.class); job.setMapperClass(RandomMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(conf.getInt("hama.num.partitions", 2)); job.setPartitionerClass(HashPartitioner.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.waitForCompletion(true); }
From source file:AverageProj.AveragePrice.java
public int run(String[] args) throws Exception, ClassNotFoundException { Configuration conf = getConf(); Job job = new Job(conf, "Avg"); job.setJarByClass(AveragePrice.class); //final File f = new File(AveragePrice.class.getProtectionDomain().getCodeSource().getLocation().getPath()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setMapperClass(AvMapper.class); job.setMapOutputKeyClass(YearPrice.class); job.setMapOutputValueClass(DoubleWritable.class); job.setGroupingComparatorClass(YearSymComparator.class); // job.setCombinerClass(AvReducer.class); job.setReducerClass(AvReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(YearPrice.class); job.setOutputValueClass(DoubleWritable.class); job.setPartitionerClass(AvgPartitioner.class); job.setNumReduceTasks(7);// w w w . ja v a 2 s.c o m System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
From source file:avro.mr.MapReduceColorCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MapReduceColorCount <input path> <output path>"); return -1; }//from w ww .ja va 2 s. com Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceColorCount.class); job.setJobName("Color Count"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapperClass(ColorCountMapper.class); AvroJob.setInputKeySchema(job, User.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:avro.mr.MapReduceColorCountSeqFile.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MapReduceColorCount <input path> <output path>"); return -1; }// ww w .ja va2 s .co m Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceColorCountSeqFile.class); job.setJobName("Color Count 2"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroSequenceFileInputFormat.class); AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.LONG)); AvroJob.setInputValueSchema(job, User.getClassSchema()); job.setMapperClass(ColorCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroSequenceFileOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:azkaban.jobtype.examples.java.WordCount2.java
License:Apache License
@Override public void run() throws Exception { logger.info(String.format("Starting %s", getClass().getSimpleName())); //getConf() and set Job job = createJob(MapperClass.class, ReducerClass.class); job.setJarByClass(WordCount2.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(inputPath)); super.run();//from w ww.j a v a 2 s .co m }
From source file:basic.PartitionGraph.java
License:Apache License
/** * Runs this tool./* w w w .j a va 2 s . com*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(RANGE, "use range partitioner")); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions") .create(NUM_PARTITIONS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES) || !cmdline.hasOption(NUM_PARTITIONS)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inPath = cmdline.getOptionValue(INPUT); String outPath = cmdline.getOptionValue(OUTPUT); int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS)); boolean useRange = cmdline.hasOption(RANGE); LOG.info("Tool name: " + PartitionGraph.class.getSimpleName()); LOG.info(" - input dir: " + inPath); LOG.info(" - output dir: " + outPath); LOG.info(" - num partitions: " + numParts); LOG.info(" - node cnt: " + nodeCount); LOG.info(" - use range partitioner: " + useRange); Configuration conf = getConf(); conf.setInt("NodeCount", nodeCount); Job job = Job.getInstance(conf); job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath); job.setJarByClass(PartitionGraph.class); job.setNumReduceTasks(numParts); FileInputFormat.setInputPaths(job, new Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); if (useRange) { job.setPartitionerClass(RangePartitioner.class); } FileSystem.get(conf).delete(new Path(outPath), true); job.waitForCompletion(true); return 0; }
From source file:be.uantwerpen.adrem.hadoop.util.Tools.java
License:Apache License
@SuppressWarnings("rawtypes") public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat) throws IOException { Job job = new Job(new Configuration()); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }//from w w w . ja v a 2 s .c o m job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }