List of usage examples for org.apache.hadoop.mapred JobConf setOutputKeyClass
public void setOutputKeyClass(Class<?> theClass)
From source file:mapreducejava.SpeciesDriver.java
public static void main(String[] args) throws Exception { JobClient client = new JobClient(); JobConf conf = new JobConf(SpeciesDriver.class); conf.setJobName("Page-rank Species Graph Builder"); final File f = new File(SpeciesDriver.class.getProtectionDomain().getCodeSource().getLocation().getPath()); String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/InputFiles/species_medium.txt"; String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result"; FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); //conf.setOutputKeyClass(Text.class); //conf.setOutputValueClass(Text.class); conf.setMapperClass(SpeciesGraphBuilderMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); conf.setReducerClass(SpeciesGraphBuilderReducer.class); //conf.setCombinerClass(SpeciesGraphBuilderReducer.class); //conf.setInputPath(new Path("graph1")); //conf.setOutputPath(new Path("graph2")); // take the input and output from the command line FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); client.setConf(conf);/*from www . ja v a 2s .co m*/ try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result/part-00000"; for (int i = 0; i < 25; i++) { client = new JobClient(); conf = new JobConf(SpeciesDriver.class); conf.setJobName("Species Iter"); int count = i + 1; outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result" + count; conf.setNumReduceTasks(5); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); conf.setMapperClass(SpeciesIterMapper2.class); conf.setReducerClass(SpeciesIterReducer2.class); conf.setCombinerClass(SpeciesIterReducer2.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } inFiles = outFiles; } //Viewer client = new JobClient(); conf = new JobConf(SpeciesDriver.class); conf.setJobName("Species Viewer"); conf.setOutputKeyClass(FloatWritable.class); conf.setOutputValueClass(Text.class); inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result25/part-00000"; outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/ResultFinal"; FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); conf.setMapperClass(SpeciesViewerMapper.class); conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }
From source file:map_reduce.MapReduce_OptimizedBrandesAdditions_DO_JUNG.java
License:Open Source License
@SuppressWarnings("deprecation") @Override/*from w w w .j a v a2s . c om*/ public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage:\n"); System.exit(1); } // Job job = new Job(super.getConf()); // READ IN ALL COMMAND LINE ARGUMENTS // EXAMPLE: // hadoop jar MapReduce_OptimizedBrandesAdditions_DO_JUNG.jar // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar // -Dmapred.job.map.memory.mb=4096 // -Dmapred.job.reduce.memory.mb=4096 // -Dmapred.child.java.opts=-Xmx3500m // -Dmapreduce.task.timeout=60000000 // -Dmapreduce.job.queuename=QUEUENAME // input_iterbrandes_additions_nocomb_10k_1 output_iterbrandes_additions_nocomb_10k_1 // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/ int m = -1; // input path to use on hdfs Path inputPath = new Path(args[++m]); // output path to use on hdfs Path outputPath = new Path(args[++m]); // number of Mappers to split the sources: e.g., 1, 10, 100 etc. // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number. int numOfMaps = Integer.parseInt(args[++m]); // number of Reducers to collect the output int numOfReduce = Integer.parseInt(args[++m]); // Number of vertices in graph int N = Integer.parseInt(args[++m]); // Number of edges in graph int M = Integer.parseInt(args[++m]); // Graph file (edge list, tab delimited) (full path) String graph = args[++m]; // File with edges to be added (tab delimited) (full path) // Note: this version handles only edges between existing vertices in the graph. String random_edges = args[++m]; // Number of random edges added int re = Integer.parseInt(args[++m]); // Experiment iteration (in case of multiple experiments) int iter = Integer.parseInt(args[++m]); // Use combiner or not (true/false) Boolean comb = Boolean.valueOf(args[++m]); // Output path for file with stats String statsoutputpath = args[++m]; // Output path for file with final betweenness values String betoutputpath = args[++m]; // BEGIN INITIALIZATION JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesAdditions_DO_JUNG.class); FileSystem fs = FileSystem.get(conf); String setup = "_additions_edges" + re + "_maps" + numOfMaps + "_comb" + comb; conf.setJobName("OptimizedBrandesAdditionsDOJung_" + graph + setup + "_" + iter); conf.set("HDFS_GRAPH", graph + setup); conf.set("HDFS_Random_Edges", random_edges + setup); conf.set("output", outputPath.getName()); conf.set("setup", setup); // CREATE INPUT FILES FOR MAPPERS int numOfTasksperMap = (int) Math.ceil(N / numOfMaps); //generate an input file for each map task for (int i = 0; i < numOfMaps - 1; i++) { Path file = new Path(inputPath, "part-r-" + i); IntWritable start = new IntWritable(i * numOfTasksperMap); IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end); } // last mapper takes what is left Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1)); IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap); IntWritable end = new IntWritable(N - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end); // COPY FILES TO MAPPERS System.out.println("Copying graph to cache"); String LOCAL_GRAPH = graph; Path hdfsPath = new Path(graph + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); System.out.println("Copying random edges to cache"); String LOCAL_Random_Edges = random_edges; hdfsPath = new Path(random_edges + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapperClass(IterBrandesMapper.class); conf.setNumMapTasks(numOfMaps); if (comb) conf.setCombinerClass(IterBrandesReducer.class); conf.setReducerClass(IterBrandesReducer.class); conf.setNumReduceTasks(numOfReduce); // turn off speculative execution, because DFS doesn't handle multiple writers to the same file. conf.setSpeculativeExecution(false); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); // conf.set("mapred.job.name", "APS-" + outputPath.getName()); conf.setNumTasksToExecutePerJvm(-1); // JVM reuse System.out.println("Starting the execution...! Pray!! \n"); long time1 = System.nanoTime(); RunningJob rj = JobClient.runJob(conf); long time2 = System.nanoTime(); // READ OUTPUT FILES System.out.println("\nFinished and now reading/writing Betweenness Output...\n"); // Assuming 1 reducer. Path inFile = new Path(outputPath, "part-00000"); IntWritable id = new IntWritable(); DoubleWritable betweenness = new DoubleWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter)); try { int i = 0; for (; i < (N + M + re); i++) { reader.next(id, betweenness); fw.write(id + "\t" + betweenness + "\n"); fw.flush(); } } finally { reader.close(); fw.close(); } System.out.println("\nWriting times Output...\n"); fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter)); fw.write("Total-time:\t" + (time2 - time1) + "\n"); fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_MAPS") + "\n"); fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_REDUCES") + "\n"); fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("CPU_MILLISECONDS") + "\n"); fw.write("total-gc-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS") + "\n"); fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("PHYSICAL_MEMORY_BYTES") + "\n"); fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("VIRTUAL_MEMORY_BYTES") + "\n"); fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes") + "\n"); fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n"); fw.flush(); try { Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator(); while (counters.hasNext()) { Counter cc = counters.next(); fw.write(cc.getName() + "\t" + cc.getCounter() + "\n"); fw.flush(); } } finally { fw.close(); } return 0; }
From source file:map_reduce.MapReduce_OptimizedBrandesDeletions_DO_JUNG.java
License:Open Source License
@SuppressWarnings("deprecation") @Override/*w w w. ja v a 2 s. c o m*/ public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage:\n"); System.exit(1); } // Job job = new Job(super.getConf()); // READ IN ALL COMMAND LINE ARGUMENTS // EXAMPLE: // hadoop jar MapReduce_OptimizedBrandesDeletions_DO_JUNG.jar // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar // -Dmapred.job.map.memory.mb=4096 // -Dmapred.job.reduce.memory.mb=4096 // -Dmapred.child.java.opts=-Xmx3500m // -Dmapreduce.task.timeout=60000000 // -Dmapreduce.job.queuename=QUEUENAME // input_iterbrandes_deletions_nocomb_10k_1 output_iterbrandes_deletions_nocomb_10k_1 // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/ int m = -1; // input path to use on hdfs Path inputPath = new Path(args[++m]); // output path to use on hdfs Path outputPath = new Path(args[++m]); // number of Mappers to split the sources: e.g., 1, 10, 100 etc. // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number. int numOfMaps = Integer.parseInt(args[++m]); // number of Reducers to collect the output int numOfReduce = Integer.parseInt(args[++m]); // Number of vertices in graph int N = Integer.parseInt(args[++m]); // Number of edges in graph int M = Integer.parseInt(args[++m]); // Graph file (edge list, tab delimited) (full path) String graph = args[++m]; // File with edges to be added (tab delimited) (full path) // Note: this version handles only edges between existing vertices in the graph. String random_edges = args[++m]; // Number of random edges added int re = Integer.parseInt(args[++m]); // Experiment iteration (in case of multiple experiments) int iter = Integer.parseInt(args[++m]); // Use combiner or not (true/false) Boolean comb = Boolean.valueOf(args[++m]); // Output path for file with stats String statsoutputpath = args[++m]; // Output path for file with final betweenness values String betoutputpath = args[++m]; // BEGIN INITIALIZATION JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesDeletions_DO_JUNG.class); FileSystem fs = FileSystem.get(conf); String setup = "_deletions_edges" + re + "_maps" + numOfMaps + "_comb" + comb; conf.setJobName("OptimizedBrandesDeletionsDOJung_" + graph + setup + "_" + iter); conf.set("HDFS_GRAPH", graph + setup); conf.set("HDFS_Random_Edges", random_edges + setup); conf.set("output", outputPath.getName()); conf.set("setup", setup); // CREATE INPUT FILES FOR MAPPERS int numOfTasksperMap = (int) Math.ceil(N / numOfMaps); //generate an input file for each map task for (int i = 0; i < numOfMaps - 1; i++) { Path file = new Path(inputPath, "part-r-" + i); IntWritable start = new IntWritable(i * numOfTasksperMap); IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end); } // last mapper takes what is left Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1)); IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap); IntWritable end = new IntWritable(N - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end); // COPY FILES TO MAPPERS System.out.println("Copying graph to cache"); String LOCAL_GRAPH = graph; Path hdfsPath = new Path(graph + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); System.out.println("Copying random edges to cache"); String LOCAL_Random_Edges = random_edges; hdfsPath = new Path(random_edges + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapperClass(IterBrandesMapper.class); conf.setNumMapTasks(numOfMaps); if (comb) conf.setCombinerClass(IterBrandesReducer.class); conf.setReducerClass(IterBrandesReducer.class); conf.setNumReduceTasks(numOfReduce); // turn off speculative execution, because DFS doesn't handle multiple writers to the same file. conf.setSpeculativeExecution(false); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); // conf.set("mapred.job.name", "APS-" + outputPath.getName()); conf.setNumTasksToExecutePerJvm(-1); // JVM reuse System.out.println("Starting the execution...! Pray!! \n"); long time1 = System.nanoTime(); RunningJob rj = JobClient.runJob(conf); long time2 = System.nanoTime(); // READ OUTPUT FILES System.out.println("\nFinished and now reading/writing Betweenness Output...\n"); // Assuming 1 reducer. Path inFile = new Path(outputPath, "part-00000"); IntWritable id = new IntWritable(); DoubleWritable betweenness = new DoubleWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter)); try { int i = 0; for (; i < (N + (M - re)); i++) { reader.next(id, betweenness); fw.write(id + "\t" + betweenness + "\n"); fw.flush(); } } finally { reader.close(); fw.close(); } System.out.println("\nWriting times Output...\n"); fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter)); fw.write("Total-time:\t" + (time2 - time1) + "\n"); fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_MAPS") + "\n"); fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_REDUCES") + "\n"); fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("CPU_MILLISECONDS") + "\n"); fw.write("total-gc-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS") + "\n"); fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("PHYSICAL_MEMORY_BYTES") + "\n"); fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("VIRTUAL_MEMORY_BYTES") + "\n"); fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes") + "\n"); fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n"); fw.flush(); try { Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator(); while (counters.hasNext()) { Counter cc = counters.next(); fw.write(cc.getName() + "\t" + cc.getCounter() + "\n"); fw.flush(); } } finally { fw.close(); } return 0; }
From source file:me.tingri.graphs.cc.block.ConnectedComponentsBlock.java
License:Apache License
protected RunningJob join(short blockWidth, int recurDiagonalMult, Path edgePath, Path curVectorPath, Path tempVectorPath, int numOfReducers, String makeSymmetric, FileSystem fs) throws Exception { Utility.deleteIfExists(fs, tempVectorPath); JobConf conf = new JobConf(getConf(), ConnectedComponentsBlock.class); conf.set(BLOCK_WIDTH, "" + blockWidth); conf.set(RECURSIVE_DIAG_MULT, "" + recurDiagonalMult); conf.set(FIELD_SEPARATOR, DEFAULT_FIELD_SEPARATOR); conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR); conf.set(SEPARATOR_WITHIN_VALUE, SPACE); conf.set(MAKE_SYMMETRIC, makeSymmetric); conf.setJobName("CCBlock_join"); conf.setMapperClass(JoinMapper.class); conf.setReducerClass(JoinReducer.class); FileInputFormat.setInputPaths(conf, edgePath, curVectorPath); FileOutputFormat.setOutputPath(conf, tempVectorPath); conf.setNumReduceTasks(numOfReducers); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return JobClient.runJob(conf); }
From source file:me.tingri.graphs.cc.block.ConnectedComponentsBlock.java
License:Apache License
protected RunningJob merge(short blockWidth, Path tempVectorPath, Path nextVectorPath, int numOfReducers, FileSystem fs) throws Exception { Utility.deleteIfExists(fs, nextVectorPath); JobConf conf = new JobConf(getConf(), ConnectedComponentsBlock.class); conf.set(BLOCK_WIDTH, "" + blockWidth); conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR); conf.setJobName("CCBlock_reduce"); conf.setMapperClass(MergeMapper.class); conf.setReducerClass(MergeReducer.class); FileInputFormat.setInputPaths(conf, tempVectorPath); FileOutputFormat.setOutputPath(conf, nextVectorPath); conf.setNumReduceTasks(numOfReducers); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return JobClient.runJob(conf); }
From source file:me.tingri.graphs.cc.ConnectedComponents.java
License:Apache License
protected RunningJob join(FileSystem fs, Path edgePath, Path vecPath, Path tempVectorPath, String makeSymmetric, int numOfReducers) throws Exception { Utility.deleteIfExists(fs, tempVectorPath); JobConf conf = new JobConf(getConf(), ConnectedComponents.class); conf.set(FIELD_SEPARATOR, DEFAULT_FIELD_SEPARATOR); conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR); conf.set(MAKE_SYMMETRIC, makeSymmetric); conf.setJobName("ConnectedComponents_Join"); conf.setMapperClass(JoinMapper.class); conf.setReducerClass(JoinReducer.class); FileInputFormat.setInputPaths(conf, edgePath, vecPath); FileOutputFormat.setOutputPath(conf, tempVectorPath); conf.setNumReduceTasks(numOfReducers); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return JobClient.runJob(conf); }
From source file:me.tingri.graphs.cc.ConnectedComponents.java
License:Apache License
protected RunningJob merge(FileSystem fs, Path tempVectorPath, Path nextVectorPath, int numOfReducers) throws Exception { Utility.deleteIfExists(fs, nextVectorPath); JobConf conf = new JobConf(getConf(), ConnectedComponents.class); conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR); conf.setJobName("ConnectedComponents_Merge"); conf.setMapperClass(MergeMapper.class); conf.setReducerClass(MergeReducer.class); FileInputFormat.setInputPaths(conf, tempVectorPath); FileOutputFormat.setOutputPath(conf, nextVectorPath); conf.setNumReduceTasks(numOfReducers); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return JobClient.runJob(conf); }
From source file:mr.WordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { Properties properties = new Properties(); AppProps.addApplicationTag(properties, "tutorials"); AppProps.addApplicationTag(properties, "cluster:development"); AppProps.setApplicationName(properties, "cascading-mapreduce-flow"); JobConf conf = new JobConf(WordCount.class); conf.setJobName("casading-mapreduce-flow"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); MapReduceFlow flow = new MapReduceFlow("wordcount", conf, true); // JobClient.runJob(conf); flow.complete();/* w ww. j a v a 2 s .c om*/ }
From source file:name.abhijitsarkar.hadoop.citation.CitationCombiner.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("citation-combiner"); /* This is to set the separator byte for KeyValueTextInputFormat */ conf.set("key.value.separator.in.input.line", ","); conf.setMapperClass(CitationMapper.class); conf.setReducerClass(CitationReducer.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*w w w .j av a2 s .com*/ return 0; }