List of usage examples for org.apache.hadoop.mapred JobConf setCombinerClass
public void setCombinerClass(Class<? extends Reducer> theClass)
From source file:org.pentaho.hadoop.mapreduce.test.TransMapReduceJobTestFIXME.java
License:Open Source License
@Test public void submitJob() throws Exception { String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input", "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" }; JobConf conf = new JobConf(); conf.setJobName("wordcount"); KettleEnvironment.init();//from ww w.jav a 2 s. c om TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration(); TransMeta transMeta = new TransMeta("./test-res/wordcount-mapper.ktr"); TransConfiguration transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-map-xml", transConfig.getXML()); transMeta = new TransMeta("./test-res/wordcount-reducer.ktr"); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-reduce-xml", transConfig.getXML()); conf.set("transformation-map-input-stepname", "Injector"); conf.set("transformation-map-output-stepname", "Output"); conf.set("transformation-reduce-input-stepname", "Injector"); conf.set("transformation-reduce-output-stepname", "Output"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar"); URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() }); conf.setMapperClass( (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransMap")); conf.setCombinerClass( (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce")); conf.setReducerClass( (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce")); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort); conf.set("mapred.job.tracker", hostname + ":" + trackerPort); conf.setJar(jar.toURI().toURL().toExternalForm()); conf.setWorkingDirectory(new Path("/tmp/wordcount")); JobClient jobClient = new JobClient(conf); ClusterStatus status = jobClient.getClusterStatus(); assertEquals(State.RUNNING, status.getJobTrackerState()); RunningJob runningJob = jobClient.submitJob(conf); System.out.print("Running " + runningJob.getJobName() + ""); while (!runningJob.isComplete()) { System.out.print("."); Thread.sleep(500); } System.out.println(); System.out.println("Finished " + runningJob.getJobName() + "."); FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000")); String output = IOUtils.toString(file.getContent().getInputStream()); assertEquals( "Bye\t4\nGood\t2\nGoodbye\t1\nHadoop\t2\nHello\t5\nThis\t1\nWorld\t5\nand\t1\ncounting\t1\nextra\t1\nfor\t1\nis\t1\nsome\t1\ntext\t1\nwords\t1\n", output); }
From source file:org.pentaho.wordcount.LocalWordCount.java
License:Open Source License
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker./*from w w w . j a v a 2 s. co m*/ */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), LocalWordCount.class); conf.setJobName("wordcount"); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) conf.setOutputValueClass(LongWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(GenericTransReduce.class); conf.setReducerClass(GenericTransReduce.class); // these are set so the job is run in the same // JVM as the debugger - we are not submitting // to MR Node. conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "local"); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:org.sf.xrime.algorithms.layout.radialtree.Raw2OutLabeledAdjVertexTransformer.java
License:Apache License
@Override public void execute() throws ProcessorExecutionException { JobConf jobConf = new JobConf(conf, Raw2OutLabeledAdjVertexTransformer.class); jobConf.setJobName("Smth - OutLabeledAdjVertexesList"); jobConf.setMapperClass(MapClass.class); jobConf.setCombinerClass(ReduceClass.class); jobConf.setReducerClass(ReduceClass.class); // the keys are author names (strings) jobConf.setOutputKeyClass(Text.class); // the values are adjacent vertexes (Writable) jobConf.setOutputValueClass(LabeledAdjVertex.class); // makes the file format suitable for machine processing. jobConf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(jobConf, srcPath); FileOutputFormat.setOutputPath(jobConf, destPath); jobConf.setNumMapTasks(mapperNum);/*ww w .j a va 2s. c o m*/ jobConf.setNumReduceTasks(reducerNum); try { this.runningJob = JobClient.runJob(jobConf); } catch (IOException e) { throw new ProcessorExecutionException(e); } }
From source file:org.sf.xrime.algorithms.partitions.connected.bi.EdgeSetMinorExpand.java
License:Apache License
@Override public void execute() throws ProcessorExecutionException { JobConf conf = new JobConf(context, EdgeSetMinorExpand.class); conf.setJobName("EdgeSetMinorExpand"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(EdgeSet.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(ReduceClass.class); conf.setReducerClass(ReduceClass.class); // makes the file format suitable for machine processing. conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); try {/*from w w w . jav a2 s.c om*/ FileInputFormat.setInputPaths(conf, getSource().getPath()); FileOutputFormat.setOutputPath(conf, getDestination().getPath()); } catch (IllegalAccessException e1) { throw new ProcessorExecutionException(e1); } conf.setNumMapTasks(getMapperNum()); conf.setNumReduceTasks(getReducerNum()); conf.setCompressMapOutput(true); conf.setMapOutputCompressorClass(GzipCodec.class); try { this.runningJob = JobClient.runJob(conf); } catch (IOException e) { throw new ProcessorExecutionException(e); } }
From source file:org.sf.xrime.algorithms.partitions.connected.bi.EdgeSetMinorJoin.java
License:Apache License
@Override public void execute() throws ProcessorExecutionException { JobConf conf = new JobConf(context, EdgeSetMinorJoin.class); conf.setJobName("EdgeSetMinorJoin"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(EdgeSet.class); conf.setMapperClass(MapClass.class); // Combiner is permitted here. conf.setCombinerClass(ReduceClass.class); conf.setReducerClass(ReduceClass.class); // makes the file format suitable for machine processing. conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); try {/*from w ww .j a v a 2 s.c o m*/ FileInputFormat.setInputPaths(conf, getSource().getPath()); FileOutputFormat.setOutputPath(conf, getDestination().getPath()); } catch (IllegalAccessException e1) { throw new ProcessorExecutionException(e1); } conf.setNumMapTasks(getMapperNum()); conf.setNumReduceTasks(getReducerNum()); conf.setCompressMapOutput(true); conf.setMapOutputCompressorClass(GzipCodec.class); try { this.runningJob = JobClient.runJob(conf); } catch (IOException e) { throw new ProcessorExecutionException(e); } }
From source file:org.sf.xrime.algorithms.partitions.connected.bi.EdgeSetSummarize.java
License:Apache License
@Override public void execute() throws ProcessorExecutionException { JobConf conf = new JobConf(context, EdgeSetSummarize.class); conf.setJobName("EdgeSetSummarize"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(EdgeSet.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(ReduceClass.class); conf.setReducerClass(ReduceClass.class); // makes the file format suitable for machine processing. conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); try {//w w w .j ava2 s .co m FileInputFormat.setInputPaths(conf, getSource().getPath()); FileOutputFormat.setOutputPath(conf, getDestination().getPath()); } catch (IllegalAccessException e1) { throw new ProcessorExecutionException(e1); } conf.setNumMapTasks(getMapperNum()); conf.setNumReduceTasks(getReducerNum()); conf.setCompressMapOutput(true); conf.setMapOutputCompressorClass(GzipCodec.class); try { this.runningJob = JobClient.runJob(conf); } catch (IOException e) { throw new ProcessorExecutionException(e); } }
From source file:org.sf.xrime.algorithms.partitions.connected.strongly.LabelPropagation.java
License:Apache License
@Override public void execute() throws ProcessorExecutionException { JobConf conf = new JobConf(context, LabelPropagation.class); conf.setJobName("LabelPropagation"); // the keys are vertex identifiers (strings) conf.setOutputKeyClass(Text.class); // the values are adjacent vertexes with labels (Writable) conf.setOutputValueClass(LabeledAdjBiSetVertex.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(ReduceClass.class); conf.setReducerClass(ReduceClass.class); // makes the file format suitable for machine processing. conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); try {/*from w w w .ja va 2 s. com*/ FileInputFormat.setInputPaths(conf, getSource().getPath()); FileOutputFormat.setOutputPath(conf, getDestination().getPath()); } catch (IllegalAccessException e1) { throw new ProcessorExecutionException(e1); } conf.setNumMapTasks(getMapperNum()); conf.setNumReduceTasks(getReducerNum()); conf.setCompressMapOutput(true); conf.setMapOutputCompressorClass(GzipCodec.class); try { this.runningJob = JobClient.runJob(conf); } catch (IOException e) { throw new ProcessorExecutionException(e); } }
From source file:org.sf.xrime.algorithms.partitions.connected.weakly.alg_1.VertexSetMinorExpand.java
License:Apache License
@Override public void execute() throws ProcessorExecutionException { JobConf conf = new JobConf(context, VertexSetMinorExpand.class); conf.setJobName("VertexSetMinorExpand"); // the keys are vertex identifiers (strings) conf.setOutputKeyClass(Text.class); // the values are ultimate vertex set identifiers/label (Writable) conf.setOutputValueClass(VertexSet.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(ReduceClass.class); conf.setReducerClass(ReduceClass.class); // makes the file format suitable for machine processing. conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); try {//w ww . j a v a 2 s . c o m FileInputFormat.setInputPaths(conf, getSource().getPath()); FileOutputFormat.setOutputPath(conf, getDestination().getPath()); } catch (IllegalAccessException e1) { throw new ProcessorExecutionException(e1); } conf.setNumMapTasks(getMapperNum()); conf.setNumReduceTasks(getReducerNum()); try { this.runningJob = JobClient.runJob(conf); } catch (IOException e) { throw new ProcessorExecutionException(e); } }
From source file:org.sf.xrime.algorithms.partitions.connected.weakly.alg_1.VertexSetMinorJoin.java
License:Apache License
@Override public void execute() throws ProcessorExecutionException { JobConf conf = new JobConf(context, VertexSetMinorJoin.class); conf.setJobName("VertexSetMinorJoin"); // the keys are vertex identifiers (strings) conf.setOutputKeyClass(Text.class); // the values are vertex sets (Writable) conf.setOutputValueClass(VertexSet.class); conf.setMapperClass(MapClass.class); // Combiner is permitted here. conf.setCombinerClass(ReduceClass.class); conf.setReducerClass(ReduceClass.class); // makes the file format suitable for machine processing. conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); try {/*from w w w . j a v a 2s.c om*/ FileInputFormat.setInputPaths(conf, getSource().getPath()); FileOutputFormat.setOutputPath(conf, getDestination().getPath()); } catch (IllegalAccessException e1) { throw new ProcessorExecutionException(e1); } conf.setNumMapTasks(getMapperNum()); conf.setNumReduceTasks(getReducerNum()); try { this.runningJob = JobClient.runJob(conf); } catch (IOException e) { throw new ProcessorExecutionException(e); } }
From source file:org.sf.xrime.algorithms.partitions.connected.weakly.alg_1.VertexSetSummarize.java
License:Apache License
@Override public void execute() throws ProcessorExecutionException { JobConf conf = new JobConf(context, VertexSetSummarize.class); conf.setJobName("VertexSetSummarize"); // the keys are vertex labels (strings) conf.setOutputKeyClass(Text.class); // the values are vertex sets (Writable) conf.setOutputValueClass(VertexSet.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(ReduceClass.class); conf.setReducerClass(ReduceClass.class); // makes the file format suitable for machine processing. conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); try {/*from ww w . j av a2s .c o m*/ FileInputFormat.setInputPaths(conf, getSource().getPath()); FileOutputFormat.setOutputPath(conf, getDestination().getPath()); } catch (IllegalAccessException e1) { throw new ProcessorExecutionException(e1); } conf.setNumMapTasks(getMapperNum()); conf.setNumReduceTasks(getReducerNum()); try { this.runningJob = JobClient.runJob(conf); } catch (IOException e) { throw new ProcessorExecutionException(e); } }