List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass
public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException
From source file:csc555.ebratt.depaul.edu.GildPercentDriverPass1.java
License:Open Source License
/** * /*from w w w. j av a 2s . c om*/ * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the GildPercentReducerPass1.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String groupBy = getConf().get("groupBy"); StringBuffer sb = new StringBuffer(); sb.append("gild percent of: "); sb.append(groupBy); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // testing -- ensure each node gets 2 reducers JobConf jobConf = new JobConf(getConf(), GildPercentDriverPass1.class); JobClient jobClient = new JobClient(jobConf); ClusterStatus cluster = jobClient.getClusterStatus(); job.setNumReduceTasks(cluster.getTaskTrackers() * 2); // Mapper and Reducer Classes to use job.setMapperClass(GildPercentMapperPass1.class); job.setReducerClass(GildPercentReducerPass1.class); // Mapper output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DoubleWritable.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(GildPercentReducerPass1.class); } // The Jar file to run job.setJarByClass(GildPercentDriverPass1.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.GildPercentDriverPass2.java
License:Open Source License
/** * /*from w ww .j ava2 s. c o m*/ * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the GildPercentReducerPass2.class as * the combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); StringBuffer sb = new StringBuffer(); sb.append("sorted gild percent"); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // to ensure output is sorted job.setNumReduceTasks(1); // Mapper and Reducer Classes to use job.setMapperClass(GildPercentMapperPass2.class); job.setReducerClass(GildPercentReducerPass2.class); // Mapper output classes job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(DoubleWritable.class); job.setOutputValueClass(Text.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(GildPercentReducerPass2.class); } // sort in descending order job.setSortComparatorClass(DoubleWritableDescendingComparator.class); // The Jar file to run job.setJarByClass(GildPercentDriverPass2.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.RCTop10Driver.java
License:Open Source License
/** * //from w ww. ja va 2s . co m * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @throws Exception * if there is an issue with any of the arguments * */ public int run(String[] args) throws Exception { Job job = new Job(getConf(), "Top 10 Reddit"); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // ensure 1 reduce tasks for ranking job.setNumReduceTasks(1); // Mapper and Reducer Classes to use job.setMapperClass(RCTop10Mapper.class); job.setReducerClass(RCTop10Reducer.class); // Mapper output classes job.setMapOutputKeyClass(GroupByCountPair.class); job.setMapOutputValueClass(Text.class); // set custom partitioner job.setPartitionerClass(GroupByCountPairPartitioner.class); // set custom grouping comparator job.setGroupingComparatorClass(GroupByGroupingComparator.class); // input class job.setInputFormatClass(KeyValueTextInputFormat.class); // Reducer output classes job.setOutputKeyClass(GroupByCountPair.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); // The Jar file to run job.setJarByClass(RCTop10Driver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.RCWordCountAcronymsDriver.java
License:Open Source License
/** * // w w w . j a v a 2s.c o m * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [3] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the RCWordCountReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String aggregate = getConf().get("aggregate"); String groupBy = getConf().get("groupBy"); StringBuffer sb = new StringBuffer(); sb.append("count of acronyms in: "); sb.append(aggregate); sb.append("; grouped by: "); sb.append(groupBy); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // testing -- ensure each node gets 2 reducers JobConf jobConf = new JobConf(getConf(), RCWordCountAcronymsDriver.class); JobClient jobClient = new JobClient(jobConf); ClusterStatus cluster = jobClient.getClusterStatus(); job.setNumReduceTasks(cluster.getTaskTrackers() * 2); // Mapper and Reducer Classes to use job.setMapperClass(RCWordCountMapper.class); job.setReducerClass(RCWordCountReducer.class); // Mapper output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[3].equals("yes")) { job.setCombinerClass(RCWordCountReducer.class); } // The Jar file to run job.setJarByClass(RCWordCountAcronymsDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.RCWordCountDriver.java
License:Open Source License
/** * /* ww w. j av a 2 s. c om*/ * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [3] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the RCWordCountReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String aggregate = getConf().get("aggregate"); String groupBy = getConf().get("groupBy"); StringBuffer sb = new StringBuffer(); sb.append("count of: "); sb.append(aggregate); sb.append("; grouped by: "); sb.append(groupBy); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // testing -- ensure each node gets 2 reducers JobConf jobConf = new JobConf(getConf(), RCWordCountDriver.class); JobClient jobClient = new JobClient(jobConf); ClusterStatus cluster = jobClient.getClusterStatus(); job.setNumReduceTasks(cluster.getTaskTrackers() * 5); // Mapper and Reducer Classes to use job.setMapperClass(RCWordCountMapper.class); job.setReducerClass(RCWordCountReducer.class); // Mapper output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[3].equals("yes")) { job.setCombinerClass(RCWordCountReducer.class); } // The Jar file to run job.setJarByClass(RCWordCountDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.VoteCountDriver.java
License:Open Source License
/** * /*from w w w .ja va2 s . c om*/ * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the VoteCountReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String groupBy = getConf().get("groupBy"); StringBuffer sb = new StringBuffer(); sb.append("count of votes grouped by: "); sb.append(groupBy); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // testing -- ensure each node gets 2 reducers JobConf jobConf = new JobConf(getConf(), VoteCountDriver.class); JobClient jobClient = new JobClient(jobConf); ClusterStatus cluster = jobClient.getClusterStatus(); job.setNumReduceTasks(cluster.getTaskTrackers() * 2); // Mapper and Reducer Classes to use job.setMapperClass(VoteCountMapper.class); job.setReducerClass(LongSumReducer.class); // Mapper output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(LongSumReducer.class); } // The Jar file to run job.setJarByClass(VoteCountDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.VoteSorterDriver.java
License:Open Source License
/** * /*from w w w . j a v a2s. c o m*/ * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the VoteSorterReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); StringBuffer sb = new StringBuffer(); sb.append("sorted vote counts"); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // to ensure output is sorted job.setNumReduceTasks(1); // Mapper and Reducer Classes to use job.setMapperClass(VoteSorterMapper.class); job.setReducerClass(VoteSorterReducer.class); // Mapper output classes job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(VoteSorterReducer.class); } // sort in descending order job.setSortComparatorClass(LongWritable.DecreasingComparator.class); // The Jar file to run job.setJarByClass(VoteSorterDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:DAAL.CovarianceDense.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job job = new Job(conf, "CovarianceDense Job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(CovarianceDenseStep1Mapper.class); job.setReducerClass(CovarianceDenseStep2Reducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(CovarianceDense.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:DAAL.CovarianceSparse.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job job = new Job(conf, "CovarianceSparse Job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(CovarianceSparseStep1Mapper.class); job.setReducerClass(CovarianceSparseStep2Reducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(CovarianceSparse.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:DAAL.Kmeans.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job initJob = new Job(conf, "K-means Init Job"); FileInputFormat.setInputPaths(initJob, new Path(args[0])); FileOutputFormat.setOutputPath(initJob, new Path("/Hadoop/Kmeans/initResults")); initJob.setMapperClass(KmeansInitStep1Mapper.class); initJob.setReducerClass(KmeansInitStep2Reducer.class); initJob.setInputFormatClass(TextInputFormat.class); initJob.setOutputFormatClass(SequenceFileOutputFormat.class); initJob.setOutputKeyClass(IntWritable.class); initJob.setOutputValueClass(WriteableData.class); initJob.setJarByClass(Kmeans.class); initJob.waitForCompletion(true);/* ww w. j a v a 2 s. co m*/ int a = 1; for (int i = 0; i < nIterations; i++) { Job job = new Job(conf, "K-means compute Job"); FileInputFormat.setInputPaths(job, new Path(args[0])); if (i == nIterations - 1) { FileOutputFormat.setOutputPath(job, new Path(args[1])); } else { FileOutputFormat.setOutputPath(job, new Path("/Hadoop/Kmeans/ResultsIter" + i)); } job.setMapperClass(KmeansStep1Mapper.class); job.setReducerClass(KmeansStep2Reducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(Kmeans.class); if (job.waitForCompletion(true)) { a = 0; } else { a = 1; } ; } return a; }