List of usage examples for org.apache.hadoop.mapred JobConf setReducerClass
public void setReducerClass(Class<? extends Reducer> theClass)
From source file:org.acacia.partitioner.java.EdgeDistributor.java
License:Apache License
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { String dir1 = "/user/miyuru/input"; String dir2 = "/user/miyuru/edgedistributed-out"; // //We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); if (fs1.exists(new Path(dir2))) { fs1.delete(new Path(dir2), true); }/*from ww w. j a v a 2 s. c om*/ //First job scans through the edge list and splits the edges in to separate files based on the partitioned vertex files. JobConf conf = new JobConf(EdgeDistributor.class); conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[0]); conf.set("org.acacia.partitioner.hbase.table", args[1]); conf.set("org.acacia.partitioner.index.contacthost", args[2]); conf.set("vert-count", args[3]); conf.set("initpartition-id", args[4]); conf.set("zero-flag", args[5]); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(FileMapper.class); conf.setReducerClass(FileReducer.class); //conf.setInputFormat(TextInputFormat.class); conf.setInputFormat(NLinesInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setNumReduceTasks(96); //Need to specify the number of reduce tasks explicitly. Otherwise it creates only one reduce task. FileInputFormat.setInputPaths(conf, new Path(dir1)); FileOutputFormat.setOutputPath(conf, new Path(dir2)); MultipleOutputs.addMultiNamedOutput(conf, "partition", TextOutputFormat.class, NullWritable.class, Text.class); Job job = new Job(conf, "EdgeDistributor"); job.waitForCompletion(true); System.out.println("Done job EdgeDistribution"); }
From source file:org.acacia.partitioner.java.EdgelistPartitioner.java
License:Apache License
@SuppressWarnings("unused") public static void main(String[] args) throws IOException { JobConf conf = new JobConf(EdgelistPartitioner.class); if (conf == null) { return;/* www .j av a2 s . co m*/ } String dir1 = "/user/miyuru/merged"; String dir2 = "/user/miyuru/merged-out"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); //only delete dir2 because dir1 is uploaded externally. if (fs1.exists(new Path(dir2))) { fs1.delete(new Path(dir2), true); } conf.setInputFormat(WholeFileInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); WholeFileInputFormat.setInputPaths(conf, new Path(dir1)); SequenceFileOutputFormat.setOutputPath(conf, new Path(dir2)); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(SequenceFileMapper.class); conf.setReducerClass(MultipleOutputsInvertedReducer.class); conf.setOutputFormat(NullOutputFormat.class); conf.setJobName("EdgelistPartitioner"); MultipleOutputs.addMultiNamedOutput(conf, "partition", TextOutputFormat.class, NullWritable.class, Text.class); JobClient.runJob(conf); }
From source file:org.acacia.partitioner.java.NoptSplitter.java
License:Apache License
/** * @param args//from w w w . j a v a 2 s.co m */ public static void main(String[] args) { if (!validArgs(args)) { printUsage(); return; } //These are the temp paths that are created on HDFS String dir1 = "/user/miyuru/edgedistributed-out/nopt"; String dir2 = "/user/miyuru/nopt-distributed"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs1; try { fs1 = FileSystem.get(new JobConf()); System.out.println("Deleting the dir : " + dir2); if (fs1.exists(new Path(dir2))) { fs1.delete(new Path(dir2), true); } // Path notinPath = new Path(dir2); // // if(!fs1.exists(notinPath)){ // fs1.create(notinPath); // } JobConf conf = new JobConf(NoptSplitter.class); // conf.setOutputKeyClass(Text.class); // conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); // conf.setInputFormat(TextInputFormat.class); // conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(dir1)); FileOutputFormat.setOutputPath(conf, new Path(dir2)); Job job1 = new Job(conf, "nopt_splitter"); job1.setNumReduceTasks(Integer.parseInt(args[0])); //The most importnt point in this job job1.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } }
From source file:org.ahanna.DoubleConversionMapper.java
License:Apache License
public static void main(String[] args) { JobConf conf = new JobConf(DoubleConversion.class); conf.setJobName("DoubleConversation"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(DoubleConversionMapper.class); conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class); // KeyValueTextInputFormat treats each line as an input record, // and splits the line by the tab character to separate it into key and value conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); try {//from w w w.j ava 2 s. c o m JobClient.runJob(conf); } catch (IOException e) { // do nothing } }
From source file:org.apache.ambari.servicemonitor.jobs.FileUsingJobRunner.java
License:Apache License
public int run(String[] args) throws Exception { // Configuration processed by ToolRunner Configuration conf = getConf(); CommandLine commandLine = getCommandLine(); // Create a JobConf using the processed conf JobConf jobConf = new JobConf(conf, FileUsingJobRunner.class); //tune the config if (jobConf.get(JobKeys.RANGEINPUTFORMAT_ROWS) == null) { jobConf.setInt(JobKeys.RANGEINPUTFORMAT_ROWS, 1); }/*from w w w . ja v a 2s .co m*/ // Process custom command-line options String name = OptionHelper.getStringOption(commandLine, "n", "File Using Job"); if (commandLine.hasOption('x')) { //delete the output directory String destDir = jobConf.get(JobKeys.MAPRED_OUTPUT_DIR); FileSystem fs = FileSystem.get(jobConf); fs.delete(new Path(destDir), true); } // Specify various job-specific parameters jobConf.setMapperClass(FileUsingMapper.class); jobConf.setReducerClass(FileUsingReducer.class); jobConf.setMapOutputKeyClass(IntWritable.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputFormat(TextOutputFormat.class); jobConf.setInputFormat(RangeInputFormat.class); //jobConf.setPartitionerClass(SleepJob.class); jobConf.setSpeculativeExecution(false); jobConf.setJobName(name); jobConf.setJarByClass(this.getClass()); FileInputFormat.addInputPath(jobConf, new Path("ignored")); // Submit the job, then poll for progress until the job is complete RunningJob runningJob = JobClient.runJob(jobConf); runningJob.waitForCompletion(); return runningJob.isSuccessful() ? 0 : 1; }
From source file:org.apache.avro.mapred.AvroJob.java
License:Apache License
private static void configureAvroOutput(JobConf job) { if (job.get("mapred.output.format.class") == null) job.setOutputFormat(AvroOutputFormat.class); if (job.getReducerClass() == IdentityReducer.class) job.setReducerClass(HadoopReducer.class); job.setOutputKeyClass(AvroWrapper.class); configureAvroShuffle(job);//w w w. j a v a2s . c om }
From source file:org.apache.avro.mapred.TestSequenceFileReader.java
License:Apache License
@Test public void testNonAvroReducer() throws Exception { JobConf job = new JobConf(); Path output = new Path(System.getProperty("test.dir", ".") + "/seq-out"); output.getFileSystem(job).delete(output); // configure input for Avro from sequence file AvroJob.setInputSequenceFile(job);//from ww w. j av a 2 s . c o m AvroJob.setInputSchema(job, SCHEMA); FileInputFormat.setInputPaths(job, FILE.toURI().toString()); // mapper is default, identity // use a hadoop reducer that consumes Avro input AvroJob.setMapOutputSchema(job, SCHEMA); job.setReducerClass(NonAvroReducer.class); // configure output for non-Avro SequenceFile job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, output); // output key/value classes are default, LongWritable/Text JobClient.runJob(job); checkFile(new SequenceFileReader<Long, CharSequence>(new File(output.toString() + "/part-00000"))); }
From source file:org.apache.avro.mapred.TestWordCountGeneric.java
License:Apache License
@Test @SuppressWarnings("deprecation") public void testJob() throws Exception { String dir = System.getProperty("test.dir", ".") + "/mapred"; Path outputPath = new Path(dir + "/out"); JobConf job = new JobConf(); try {//w w w .jav a 2 s. c o m WordCountUtil.writeLinesFile(); job.setJobName("wordcount"); AvroJob.setInputGeneric(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputGeneric(job, WordCount.SCHEMA$); job.setMapperClass(MapImpl.class); job.setCombinerClass(ReduceImpl.class); job.setReducerClass(ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(dir + "/in")); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, true); JobClient.runJob(job); WordCountUtil.validateCountsFile(); } finally { outputPath.getFileSystem(job).delete(outputPath); } }
From source file:org.apache.avro.mapred.TestWordCountSpecific.java
License:Apache License
@Test @SuppressWarnings("deprecation") public void testJob() throws Exception { JobConf job = new JobConf(); String dir = System.getProperty("test.dir", ".") + "/mapred"; Path outputPath = new Path(dir + "/out"); try {/*from ww w . ja va 2s .c om*/ WordCountUtil.writeLinesFile(); job.setJobName("wordcount"); AvroJob.setInputSpecific(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputSpecific(job, WordCount.SCHEMA$); job.setMapperClass(MapImpl.class); job.setCombinerClass(ReduceImpl.class); job.setReducerClass(ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(dir + "/in")); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, true); JobClient.runJob(job); WordCountUtil.validateCountsFile(); } finally { outputPath.getFileSystem(job).delete(outputPath); } }
From source file:org.apache.avro.mapred.tether.TetherJob.java
License:Apache License
private static void setupTetherJob(JobConf job) throws IOException { job.setMapRunnerClass(TetherMapRunner.class); job.setPartitionerClass(TetherPartitioner.class); job.setReducerClass(TetherReducer.class); job.setInputFormat(TetherInputFormat.class); job.setOutputFormat(TetherOutputFormat.class); job.setOutputKeyClass(TetherData.class); job.setOutputKeyComparatorClass(TetherKeyComparator.class); job.setMapOutputValueClass(NullWritable.class); // set the map output key class to TetherData job.setMapOutputKeyClass(TetherData.class); // add TetherKeySerialization to io.serializations Collection<String> serializations = job.getStringCollection("io.serializations"); if (!serializations.contains(TetherKeySerialization.class.getName())) { serializations.add(TetherKeySerialization.class.getName()); job.setStrings("io.serializations", serializations.toArray(new String[0])); }/* ww w . j a v a 2 s . c o m*/ // determine whether the executable should be added to the cache. if (job.getBoolean(TETHER_EXEC_CACHED, false)) { DistributedCache.addCacheFile(getExecutable(job), job); } }