List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobConf conf) throws IOException
From source file:ComRoughSetApproInputSampler.java
License:Apache License
/** * Driver for InputSampler from the command line. * Configures a JobConf instance and calls {@link #writePartitionFile}. *//* ww w .jav a 2 s. c o m*/ public int run(String[] args) throws Exception { Job job = new Job(getConf()); ArrayList<String> otherArgs = new ArrayList<String>(); Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { job.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-inFormat".equals(args[i])) { job.setInputFormatClass(Class.forName(args[++i]).asSubclass(InputFormat.class)); } else if ("-keyClass".equals(args[i])) { job.setMapOutputKeyClass(Class.forName(args[++i]).asSubclass(WritableComparable.class)); } else if ("-splitSample".equals(args[i])) { int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new SplitSampler<K, V>(numSamples, maxSplits); } else if ("-splitRandom".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else if ("-splitInterval".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new IntervalSampler<K, V>(pcnt, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } if (job.getNumReduceTasks() <= 1) { System.err.println("Sampler requires more than one reducer"); return printUsage(); } if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } if (null == sampler) { sampler = new RandomSampler<K, V>(0.1, 10000, 10); } Path outf = new Path(otherArgs.remove(otherArgs.size() - 1)); TotalOrderPartitioner.setPartitionFile(getConf(), outf); for (String s : otherArgs) { FileInputFormat.addInputPath(job, new Path(s)); } ComRoughSetApproInputSampler.<K, V>writePartitionFile(job, sampler); return 0; }
From source file:MapReduce3.java
public static void main(String[] args) throws Exception { ////from w ww. j a va2 s . com String dst = "hdfs://localhost:9000/data/2006a.csv"; //?? // String dstOut = "hdfs://localhost:9000/mapreduce/result3/1"; String dstOut = "/Users/wendyzhuo/NetBeansProjects/final_Hadoop/src/output3/1"; String outFiles = "/Users/wendyzhuo/NetBeansProjects/final_Hadoop/src/output3/2"; Configuration hadoopConfig = new Configuration(); hadoopConfig.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); hadoopConfig.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); Job job = new Job(hadoopConfig); Job job2 = new Job(hadoopConfig); FileInputFormat.addInputPath(job, new Path(dst)); FileOutputFormat.setOutputPath(job, new Path(dstOut)); FileInputFormat.addInputPath(job2, new Path(dstOut)); FileOutputFormat.setOutputPath(job2, new Path(outFiles)); JobConf map1Conf = new JobConf(false); ChainMapper.addMapper(job, TempMapper.class, LongWritable.class, Text.class, CompositeKey_wd.class, IntWritable.class, map1Conf); JobConf reduceConf = new JobConf(false); ChainReducer.setReducer(job, TempReducer.class, CompositeKey_wd.class, IntWritable.class, CompositeKey_wd.class, IntWritable.class, reduceConf); JobConf map2Conf = new JobConf(false); ChainMapper.addMapper(job2, TempMapper2.class, LongWritable.class, Text.class, IntWritable.class, CompositeKey_wd.class, map2Conf); JobConf map3Conf = new JobConf(false); ChainReducer.setReducer(job2, TempReduce2.class, IntWritable.class, CompositeKey_wd.class, Text.class, IntWritable.class, map3Conf); // // JobClient.runJob(job); //MapperReducer? // job.setMapperClass(TempMapper.class); // // job.setReducerClass(TempReducer.class); //?KeyValue job.setOutputKeyClass(CompositeKey_wd.class); job.setOutputValueClass(IntWritable.class); job2.setMapOutputKeyClass(IntWritable.class); job2.setMapOutputValueClass(CompositeKey_wd.class); // job2.setSortComparatorClass(LongWritable.DecreasingComparator.class); //job? job.waitForCompletion(true); System.out.println("Finished1"); job2.waitForCompletion(true); System.out.println("Finished2"); }
From source file:PerformanceEvaluation.java
License:Apache License
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Path inputDir = writeInputFile(this.conf); this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = new Job(this.conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1);/*from w ww . j a v a 2 s. co m*/ job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir, "outputs")); TableMapReduceUtil.addDependencyJars(job); job.waitForCompletion(true); }
From source file:HiddenMarkovModelBuilder.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "HMM model builder"; job.setJobName(jobName);// w ww . j av a 2 s . c om job.setJarByClass(HiddenMarkovModelBuilder.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "avenir"); job.setMapperClass(HiddenMarkovModelBuilder.StateTransitionMapper.class); job.setReducerClass(HiddenMarkovModelBuilder.StateTransitionReducer.class); job.setCombinerClass(MarkovStateTransitionModel.StateTransitionCombiner.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:MarkovStateTransitionModel.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Markov tate transition model"; job.setJobName(jobName);//from www. ja v a 2 s .c om job.setJarByClass(MarkovStateTransitionModel.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "avenir"); job.setMapperClass(MarkovStateTransitionModel.StateTransitionMapper.class); job.setReducerClass(MarkovStateTransitionModel.StateTransitionReducer.class); job.setCombinerClass(MarkovStateTransitionModel.StateTransitionCombiner.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:TestBAM.java
License:Open Source License
public int run(String[] args) throws Exception { final Configuration conf = getConf(); conf.set(MyOutputFormat.HEADER_FROM_FILE, args[0]); DistributedCache.addFileToClassPath(new Path("hdfs:///libjars/hadoop-bam-7.0.0-jar-with-dependencies.jar"), conf);//from w w w . j a v a 2 s . c om final Job job = new Job(conf); job.setJarByClass(TestBAM.class); job.setMapperClass(TestBAMMapper.class); job.setReducerClass(TestBAMReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(SAMRecordWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SAMRecordWritable.class); job.setInputFormatClass(AnySAMInputFormat.class); job.setOutputFormatClass(TestBAM.MyOutputFormat.class); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0])); org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1])); job.submit(); if (!job.waitForCompletion(true)) { System.err.println("sort :: Job failed."); return 1; } return 0; }
From source file:Top20AZRestaurants.java
@Override public int run(String[] args) throws Exception { Job job1 = new Job(getConf()); job1.setSortComparatorClass(MyDecreasingDoubleComparator.class); job1.setJobName("Top20 AZ Restaurants ChainJob"); job1.setJarByClass(Top20AZRestaurants.class); JobConf map1Conf = new JobConf(false); ChainMapper.addMapper(job1, Top20Mapper.class, LongWritable.class, Text.class, Text.class, Text.class, map1Conf);/* w ww.ja va 2 s .c om*/ JobConf map2Conf = new JobConf(false); ChainMapper.addMapper(job1, Top20MapperRedo.class, Text.class, Text.class, DoubleWritable.class, Text.class, map2Conf); JobConf reduceConf = new JobConf(false); ChainReducer.setReducer(job1, Top20ReducerRedo.class, DoubleWritable.class, Text.class, Text.class, DoubleWritable.class, reduceConf); FileInputFormat.setInputPaths(job1, new Path(args[0])); FileOutputFormat.setOutputPath(job1, new Path(args[1])); boolean success = job1.waitForCompletion(true); return success ? 0 : 1; }
From source file:a.b.c.MultiFileWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { printUsage();// w ww . j av a 2 s . c om return 2; } Job job = new Job(getConf()); job.setJobName("MultiFileWordCount"); job.setJarByClass(MultiFileWordCount.class); //set the InputFormat of the job to our InputFormat job.setInputFormatClass(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); //use the defined mapper job.setMapperClass(MapClass.class); //use the WordCount Reducer job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:alluxio.examples.keyvalue.hadoop.CloneStoreMapReduce.java
License:Apache License
/** * @param args two parameters, the first is the input key-value store path, the second is the * output key-value store path/*from w ww .j ava 2 s .c om*/ * @throws Exception if any exception happens */ public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // NOTE(binfan): we are using the deprecated constructor of Job instance to compile with // hadoop-1.0. If this is not a concern, a better way is // Job job = Job.getInstance(conf); Job job = new Job(conf); job.setJobName("CloneStoreMapReduce"); job.setJarByClass(CloneStoreMapReduce.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setMapperClass(CloneStoreMapper.class); job.setReducerClass(CloneStoreReducer.class); job.setInputFormatClass(KeyValueInputFormat.class); job.setOutputFormatClass(KeyValueOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.MatrixMultiplicationBenchmark.java
License:Apache License
public long doBenchmark() { try {// w w w. ja v a2s.c om Configuration conf = null; switch (type) { // case JAVA: // m_matrixA.multiplyJava(m_matrixB, m_matrixC); // break; case CPU: conf = MatrixMultiplicationCpu.createMatrixMultiplicationCpuConf(m_conf, m_transposedMatrixAPath, m_matrixBPath, m_matrixCPath, Integer.MAX_VALUE, false); break; case GPU: conf = MatrixMultiplicationGpu.createMatrixMultiplicationGpuConf(m_conf, m_transposedMatrixAPath, m_matrixBPath, m_matrixCPath, Integer.MAX_VALUE, TILE_WIDTH, false); break; default: break; } Job job = new Job(conf); job.setJobName("MatrixMultiplication on " + type + " with n=" + n); long startTime = System.currentTimeMillis(); boolean status = job.waitForCompletion(false); long endTime = System.currentTimeMillis() - startTime; System.out.println("MatrixMultiplication on " + type + " with size: " + n + " finished in " + (endTime / 1000.0) + " seconds with return: " + status); return endTime; } catch (Exception e) { e.printStackTrace(); } return 0; }