List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass
public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:hadoop.twitter.mapreduce.Main.java
public static void preProcess(String name, String input, String output) { try {// w ww . j a v a 2 s. com deleteFolder(output); Job job = Job.getInstance(conf, "(feryandi) " + name); job.setJarByClass(Preprocess.class); job.setMapperClass(UserMapper.class); job.setCombinerClass(UserReducer.class); job.setReducerClass(UserReducer.class); //job.setNumReduceTasks(8); job.setOutputKeyClass(Text.class); job.setOutputValueClass(UserWritable.class); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); while (job.waitForCompletion(true) ? false : true) { } } catch (Exception ex) { Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:hadoop.twitter.mapreduce.Main.java
public static void ranking(String name, String input, String output) { try {// w w w . ja v a 2 s. com deleteFolder(output); Job job = Job.getInstance(conf, "(feryandi) " + name); job.setJarByClass(Ranking.class); job.setMapperClass(RankingMapper.class); job.setCombinerClass(RankingReducer.class); job.setReducerClass(RankingReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(UserWritable.class); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); while (job.waitForCompletion(true) ? false : true) { } } catch (Exception ex) { Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:hadoop.wordcount.partitioner.combiner.WordCountAddPartitioner.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from ww w . j a va2s. c o m } Job job = new Job(conf, "word count"); job.setNumReduceTasks(5); job.setJarByClass(WordCountAddPartitioner.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setPartitionerClass(MyPartitioner.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:hadoop.wordcount.prefix.WordCountAddPrefix.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/*from ww w . j a va2 s. c om*/ } Job job = new Job(conf, "word count"); job.setJarByClass(WordCountAddPrefix.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:hadoop.WordCountDataLuogoMeteoArrivo.java
License:Apache License
public static void main(String[] args) throws Exception { long start = System.nanoTime(); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = new Job(conf, "word count"); job.setJarByClass(WordCountDataLuogoMeteoArrivo.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path("NewDatasetMobility.txt")); FileSystem fs;//www . j a va 2s . c o m try { fs = FileSystem.get(new Configuration()); if (fs.exists(new Path("resultsHADOOP"))) { fs.delete(new Path("resultsHADOOP"), true); System.out.println("ELIMINATA"); } } catch (Exception e) { // TODO Auto-generated catch block System.out.println("ERRORE"); e.printStackTrace(); System.out.println("ERRORE"); } FileOutputFormat.setOutputPath(job, new Path("resultsHADOOP")); if (job.waitForCompletion(true)) { System.out.println("DONE"); long end = System.nanoTime(); long microseconds = (end - start) / 1000; double seconds = (double) microseconds / 1000000; double minutes = (double) seconds / 60; System.out.println("microsecondi: " + microseconds + " "); System.out.println("secondi: " + seconds); System.out.println("minuti: " + minutes); System.exit(0); } else { System.exit(1); } }
From source file:hdfsBasicOper.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);/* w ww .j ava2s. c o m*/ } @SuppressWarnings("deprecation") Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); //combinereduce? job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { //FileInputFormat???)addInputPath?? //, TextInputFormat? //TextInputFormat?splitsLineRecorderReader??<key, value>? FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.out.println("over"); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:hd_knn.HD_KNN.java
public static void main(String[] args) throws Exception { // argumentos // Variante KNN: 0 Normal, 1 Media, 2 Inversa de la distancia // Distancia a utilizar: 0 Euclidea, 1 Manhattan, 2 Chebyshev // k/*from w w w . java 2 s . c o m*/ // test_file // Input path // Output path if (args.length != 6) { System.out.println("Arguments: knn_type distance k test_file input_path output_path"); System.exit(-1); } Configuration conf = new Configuration(); conf.set("knn_method", args[0]); conf.set("distance", args[1]); conf.setInt("k", Integer.parseInt(args[2])); conf.set("test_file", args[3]); Job job = Job.getInstance(conf, "KNN"); job.setJarByClass(HD_KNN.class); job.setMapperClass(DistanceCalculatorMapper.class); job.setCombinerClass(PredictClassReducer.class); job.setReducerClass(PredictClassReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DistanceClassOutput.class); FileInputFormat.addInputPath(job, new Path(args[4])); FileOutputFormat.setOutputPath(job, new Path(args[5])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:idv.takeshi.miao.examples.WordCount.java
License:Apache License
/** * Extract a method for test easily./*from ww w .java 2 s . co m*/ * @param conf * @param otherArgs * @return * @throws IOException * @author scott_miao */ public static Job createJob(Configuration conf, String[] otherArgs) throws IOException { Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); return job; }
From source file:ilps.hadoop.bin.CountAssessments.java
License:Apache License
@Override public int run(String[] args) throws Exception { String in = null;//www.j a v a 2s . c o m String out = null; String assessmentsfile = null; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-i".equals(args[i])) { in = args[++i]; } else if ("-o".equals(args[i])) { out = args[++i]; } else if ("-q".equals(args[i])) { assessmentsfile = args[++i]; } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } if (other_args.size() > 0) { return printUsage(); } if (in == null || out == null || assessmentsfile == null) return printUsage(); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input path: " + in); LOG.info(" - output path: " + out); LOG.info(" - assessments path: " + assessmentsfile); Configuration conf = getConf(); conf.set("ASSESSMENTS_HDFS", assessmentsfile); Job job = new Job(conf, "Count assessments"); job.setJarByClass(CountAssessments.class); // some weird issues with Thrift classes in the Hadoop distro. job.setUserClassesTakesPrecedence(true); job.setInputFormatClass(ThriftFileInputFormat.class); job.setMapperClass(MyMapper.class); FileInputFormat.addInputPath(job, new Path(in)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(StringLongPair.class); job.setCombinerClass(CountReducer.class); job.setReducerClass(CountReducer.class); job.setNumReduceTasks(1); FileSystem.get(conf).delete(new Path(out), true); TextOutputFormat.setOutputPath(job, new Path(out)); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(StringLongPair.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:ilps.hadoop.bin.CountGenres.java
License:Apache License
@Override public int run(String[] args) throws Exception { String in = null;//w w w . j a v a2 s. c o m String out = null; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-i".equals(args[i])) { in = args[++i]; } else if ("-o".equals(args[i])) { out = args[++i]; } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } if (other_args.size() > 0) { return printUsage(); } if (in == null || out == null) return printUsage(); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input path: " + in); LOG.info(" - output path: " + out); Configuration conf = getConf(); Job job = new Job(conf, "Count genres"); job.setJarByClass(CountGenres.class); // some weird issues with Thrift classes in the Hadoop distro. job.setUserClassesTakesPrecedence(true); job.setInputFormatClass(ThriftFileInputFormat.class); job.setMapperClass(MyMapper.class); FileInputFormat.addInputPath(job, new Path(in)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setNumReduceTasks(1); FileSystem.get(conf).delete(new Path(out), true); TextOutputFormat.setOutputPath(job, new Path(out)); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true) ? 0 : 1; }