List of usage examples for org.apache.hadoop.mapred JobConf setCombinerClass
public void setCombinerClass(Class<? extends Reducer> theClass)
From source file:proiectps.ProiectPS.java
public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), MaxTemp.class); conf.setJobName("maxtemp"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/* www. ja va 2 s . co m*/ return 0; }
From source file:redpoll.clusterer.kmeans.KMeansDriver.java
License:Apache License
/** * Run the job using supplied arguments// w ww. j a v a2 s . co m * * @param input the directory pathname for input points * @param clustersIn the directory pathname for iniput clusters * @param clustersOut the directory pathname for output clusters * @param measureClass the classname of the DistanceMeasure * @param convergenceDelta the convergence delta value * @return true if the iteration successfully runs */ private static boolean runIteration(String input, String clustersIn, String clustersOut, String measureClass, String convergenceDelta) { JobClient client = new JobClient(); JobConf conf = new JobConf(KMeansDriver.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(WritableVector.class); FileInputFormat.setInputPaths(conf, new Path(input)); Path outPath = new Path(clustersOut); FileOutputFormat.setOutputPath(conf, outPath); conf.setMapperClass(KMeansMapper.class); conf.setCombinerClass(KMeansCombiner.class); conf.setReducerClass(KMeansReducer.class); conf.setNumReduceTasks(1); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.set(Cluster.CLUSTER_PATH_KEY, clustersIn); conf.set(Cluster.DISTANCE_MEASURE_KEY, measureClass); conf.set(Cluster.CLUSTER_CONVERGENCE_KEY, convergenceDelta); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); try { FileSystem fs = FileSystem.get(conf); loadClusters(clustersIn + "/part-00000", conf, fs); client.setConf(conf); JobClient.runJob(conf); return isConverged(clustersOut + "/part-00000", conf, fs); } catch (Exception e) { log.warn(e.toString(), e); return true; } }
From source file:reverseIndexer.reverseIndexer.java
public static void startJob(String[] args) { try {/* ww w .ja va2s .c o m*/ /* Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: ReverseIndexer <output> <input file(s)>"); System.exit(2); } Job job = new Job(conf, "reverse indexer"); job.setJarByClass(this.getClass()); job.setMapperClass(IndexerMapper.class); job.setReducerClass(IndexerReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LineRecWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); for (int i = 1; i < otherArgs.length; i++) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[0])); System.exit(job.waitForCompletion(true) ? 0 : 1);*/ JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); reverseIndexMapper Map = new reverseIndexMapper(); conf.setMapperClass((Class<? extends org.apache.hadoop.mapred.Mapper>) Map.getClass()); reverseIndexReducer Reduce = new reverseIndexReducer(); conf.setCombinerClass((Class<? extends org.apache.hadoop.mapred.Reducer>) Reduce.getClass()); conf.setReducerClass((Class<? extends org.apache.hadoop.mapred.Reducer>) Reduce.getClass()); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputDir = new Path(args[2]); outputDir.getFileSystem(conf).delete(outputDir, true); FileSystem fs = FileSystem.get(conf); fs.delete(outputDir, true); FileOutputFormat.setOutputPath(conf, outputDir); JobClient.runJob(conf); } catch (Exception Exp) { Exp.printStackTrace(); } }
From source file:temp.WordCount.java
License:Apache License
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker.//from w w w. j a v a 2 s . co m */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), WordCount.class); conf.setJobName("wordcount"); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } conf.setInputPath(new Path(other_args.get(0))); conf.setOutputPath(new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:U.CC.SpeciesIterDriver2.java
public static void main(String[] args) { int i = 0;/*w w w . j a va2s. com*/ while (i < 10) { JobClient client = new JobClient(); JobConf conf = new JobConf(SpeciesIterDriver2.class); conf.setJobName("Species Iter"); conf.setNumReduceTasks(5); //~dk //conf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class); //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); if (args.length < 2) { System.out.println("Usage: PageRankIter <input path> <output path>"); System.exit(0); } //~dk //conf.setInputPath(new Path(args[0])); //conf.setOutputPath(new Path(args[1])); //FileInputFormat.setInputPaths(conf, new Path(args[0])); //FileOutputFormat.setOutputPath(conf, new Path(args[1])); FileInputFormat.setInputPaths(conf, "output" + i); int newFileVal = i + 1; FileOutputFormat.setOutputPath(conf, new Path("output" + newFileVal)); //conf.setInputPath(new Path("graph2")); //conf.setOutputPath(new Path("graph3")); conf.setMapperClass(SpeciesIterMapper2.class); conf.setReducerClass(SpeciesIterReducer2.class); conf.setCombinerClass(SpeciesIterReducer2.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } i++; } }
From source file:ucsc.hadoop.mapreduce.apache.Grep.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 3) { System.out.println("Grep <inDir> <outDir> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }/*from w w w . j ava 2s.c o m*/ Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf grepJob = new JobConf(getConf(), Grep.class); try { grepJob.setJobName("grep-search"); FileInputFormat.setInputPaths(grepJob, args[0]); grepJob.setMapperClass(RegexMapper.class); grepJob.set("mapred.mapper.regex", args[2]); if (args.length == 4) { grepJob.set("mapred.mapper.regex.group", args[3]); } grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(grepJob, tempDir); grepJob.setOutputFormat(SequenceFileOutputFormat.class); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); JobClient.runJob(grepJob); // second job JobConf sortJob = new JobConf(getConf(), Grep.class); sortJob.setJobName("grep-sort"); FileInputFormat.setInputPaths(sortJob, tempDir); sortJob.setInputFormat(SequenceFileInputFormat.class); sortJob.setMapperClass(InverseMapper.class); sortJob.setNumReduceTasks(1); // write a single file FileOutputFormat.setOutputPath(sortJob, new Path(args[1])); // sort by decreasing freq sortJob.setOutputKeyComparatorClass(LongWritable.DecreasingComparator.class); JobClient.runJob(sortJob); } finally { FileSystem.get(grepJob).delete(tempDir, true); } return 0; }
From source file:uk.bl.wa.hadoop.hosts.HostsReport.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), HostsReport.class); log.info("Adding logs..."); String line;// w w w . ja v a2s .c o m BufferedReader br = new BufferedReader(new FileReader(args[0])); while ((line = br.readLine()) != null) { log.info("Adding " + line); FileInputFormat.addInputPath(conf, new Path(line)); } br.close(); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.setJarByClass(HostsReport.class); conf.setInputFormat(TextInputFormat.class); conf.setMapperClass(HostsReportMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setCombinerClass(HostsReportReducer.class); conf.setReducerClass(HostsReportReducer.class); conf.setOutputFormat(TextOutputFormat.class); JobClient.runJob(conf); return 0; }
From source file:validatenacluster.ValidateNacluster.java
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(ValidateNacluster.class); conf.setJobName("Partition for Machine Count"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);//w w w . ja v a 2 s . com }
From source file:voldemort.store.readonly.mr.azkaban.AbstractHadoopJob.java
License:Apache License
public JobConf createJobConf(Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass, Class<? extends Reducer> combinerClass) throws IOException, URISyntaxException { JobConf conf = createJobConf(mapperClass, reducerClass); conf.setCombinerClass(combinerClass); return conf;/*from w ww. j av a 2s .c om*/ }
From source file:yangqi.hadoop.sample.WordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*from w w w .ja va 2 s .com*/ }