List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat
public void setOutputFormat(Class<? extends OutputFormat> theClass)
From source file:junto.algorithm.parallel.LP_ZGL_Hadoop.java
License:Apache License
public static void main(String[] args) throws Exception { Hashtable config = ConfigReader.read_config(args); String baseInputFilePat = Defaults.GetValueOrDie(config, "hdfs_input_pattern"); String baseOutputFilePat = Defaults.GetValueOrDie(config, "hdfs_output_base"); int numIterations = Integer.parseInt(Defaults.GetValueOrDie(config, "iters")); String currInputFilePat = baseInputFilePat; String currOutputFilePat = ""; for (int iter = 1; iter <= numIterations; ++iter) { JobConf conf = new JobConf(LP_ZGL_Hadoop.class); conf.setJobName("lp_zgl_hadoop"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(LP_ZGL_Map.class); // conf.setCombinerClass(LP_ZGL_Reduce.class); conf.setReducerClass(LP_ZGL_Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // hyperparameters conf.set("mu1", Defaults.GetValueOrDie(config, "mu1")); conf.set("mu2", Defaults.GetValueOrDie(config, "mu2")); conf.set("keepTopKLabels", Defaults.GetValueOrDefault((String) config.get("keep_top_k_labels"), Integer.toString(Integer.MAX_VALUE))); if (iter > 1) { // output from last iteration is the input for current iteration currInputFilePat = currOutputFilePat + "/*"; }/*from ww w .ja v a 2s .c o m*/ FileInputFormat.setInputPaths(conf, new Path(currInputFilePat)); currOutputFilePat = baseOutputFilePat + "_" + iter; FileOutputFormat.setOutputPath(conf, new Path(currOutputFilePat)); JobClient.runJob(conf); } }
From source file:junto.algorithm.parallel.MADHadoop.java
License:Apache License
public static void main(String[] args) throws Exception { Hashtable config = ConfigReader.read_config(args); String baseInputFilePat = Defaults.GetValueOrDie(config, "hdfs_input_pattern"); String baseOutputFilePat = Defaults.GetValueOrDie(config, "hdfs_output_base"); int numIterations = Integer.parseInt(Defaults.GetValueOrDie(config, "iters")); int numReducers = Defaults.GetValueOrDefault((String) config.get("num_reducers"), 10); String currInputFilePat = baseInputFilePat; String currOutputFilePat = ""; for (int iter = 1; iter <= numIterations; ++iter) { JobConf conf = new JobConf(MADHadoop.class); conf.setJobName("mad_hadoop"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(MADHadoopMap.class); // conf.setCombinerClass(MADHadoopReduce.class); conf.setReducerClass(MADHadoopReduce.class); conf.setNumReduceTasks(numReducers); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // hyperparameters conf.set("mu1", Defaults.GetValueOrDie(config, "mu1")); conf.set("mu2", Defaults.GetValueOrDie(config, "mu2")); conf.set("mu3", Defaults.GetValueOrDie(config, "mu3")); conf.set("keepTopKLabels", Defaults.GetValueOrDefault((String) config.get("keep_top_k_labels"), Integer.toString(Integer.MAX_VALUE))); if (iter > 1) { // output from last iteration is the input for current iteration currInputFilePat = currOutputFilePat + "/*"; }//from w ww. j a v a 2 s. co m FileInputFormat.setInputPaths(conf, new Path(currInputFilePat)); currOutputFilePat = baseOutputFilePat + "_iter_" + iter; FileOutputFormat.setOutputPath(conf, new Path(currOutputFilePat)); JobClient.runJob(conf); } }
From source file:kafka.etl.impl.SimpleKafkaETLJob.java
License:Apache License
protected JobConf createJobConf() throws Exception { JobConf jobConf = KafkaETLJob.createJobConf("SimpleKafakETL", _topic, _props, getClass()); jobConf.setMapperClass(SimpleKafkaETLMapper.class); KafkaETLInputFormat.setInputPaths(jobConf, new Path(_input)); jobConf.setOutputKeyClass(LongWritable.class); jobConf.setOutputValueClass(Text.class); jobConf.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setCompressOutput(jobConf, false); Path output = new Path(_output); FileSystem fs = output.getFileSystem(jobConf); if (fs.exists(output)) fs.delete(output);/*from ww w .ja v a 2s . co m*/ TextOutputFormat.setOutputPath(jobConf, output); jobConf.setNumReduceTasks(0); return jobConf; }
From source file:mapreduce.DosAttack.java
License:Apache License
private void issue() throws IOException { LOG.info("Starting DOS on url[{}] with clients[{}]", wsURL, numMappers); DosMapper.init(wsURL);//from w ww. j a v a 2 s. c o m JobConf job = new JobConf(DosAttack.class); job.setJarByClass(DosAttack.class); job.setJobName("DOS Attack"); job.setNumReduceTasks(0); job.setInputFormat(NullInputFormat.class); job.setOutputFormat(NullOutputFormat.class); job.setMapperClass(DosMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setNumMapTasks(numMappers); job.setInt(NUM_MAPPERS_KEY, numMappers); job.setInt(NUM_REQUESTS_KEY, numRequests); job.set(TARGET_URL_KEY, wsURL); JobClient.runJob(job); }
From source file:map_reduce.MapReduce_OptimizedBrandesAdditions_DO_JUNG.java
License:Open Source License
@SuppressWarnings("deprecation") @Override/*from w w w. jav a2 s . c o m*/ public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage:\n"); System.exit(1); } // Job job = new Job(super.getConf()); // READ IN ALL COMMAND LINE ARGUMENTS // EXAMPLE: // hadoop jar MapReduce_OptimizedBrandesAdditions_DO_JUNG.jar // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar // -Dmapred.job.map.memory.mb=4096 // -Dmapred.job.reduce.memory.mb=4096 // -Dmapred.child.java.opts=-Xmx3500m // -Dmapreduce.task.timeout=60000000 // -Dmapreduce.job.queuename=QUEUENAME // input_iterbrandes_additions_nocomb_10k_1 output_iterbrandes_additions_nocomb_10k_1 // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/ int m = -1; // input path to use on hdfs Path inputPath = new Path(args[++m]); // output path to use on hdfs Path outputPath = new Path(args[++m]); // number of Mappers to split the sources: e.g., 1, 10, 100 etc. // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number. int numOfMaps = Integer.parseInt(args[++m]); // number of Reducers to collect the output int numOfReduce = Integer.parseInt(args[++m]); // Number of vertices in graph int N = Integer.parseInt(args[++m]); // Number of edges in graph int M = Integer.parseInt(args[++m]); // Graph file (edge list, tab delimited) (full path) String graph = args[++m]; // File with edges to be added (tab delimited) (full path) // Note: this version handles only edges between existing vertices in the graph. String random_edges = args[++m]; // Number of random edges added int re = Integer.parseInt(args[++m]); // Experiment iteration (in case of multiple experiments) int iter = Integer.parseInt(args[++m]); // Use combiner or not (true/false) Boolean comb = Boolean.valueOf(args[++m]); // Output path for file with stats String statsoutputpath = args[++m]; // Output path for file with final betweenness values String betoutputpath = args[++m]; // BEGIN INITIALIZATION JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesAdditions_DO_JUNG.class); FileSystem fs = FileSystem.get(conf); String setup = "_additions_edges" + re + "_maps" + numOfMaps + "_comb" + comb; conf.setJobName("OptimizedBrandesAdditionsDOJung_" + graph + setup + "_" + iter); conf.set("HDFS_GRAPH", graph + setup); conf.set("HDFS_Random_Edges", random_edges + setup); conf.set("output", outputPath.getName()); conf.set("setup", setup); // CREATE INPUT FILES FOR MAPPERS int numOfTasksperMap = (int) Math.ceil(N / numOfMaps); //generate an input file for each map task for (int i = 0; i < numOfMaps - 1; i++) { Path file = new Path(inputPath, "part-r-" + i); IntWritable start = new IntWritable(i * numOfTasksperMap); IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end); } // last mapper takes what is left Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1)); IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap); IntWritable end = new IntWritable(N - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end); // COPY FILES TO MAPPERS System.out.println("Copying graph to cache"); String LOCAL_GRAPH = graph; Path hdfsPath = new Path(graph + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); System.out.println("Copying random edges to cache"); String LOCAL_Random_Edges = random_edges; hdfsPath = new Path(random_edges + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapperClass(IterBrandesMapper.class); conf.setNumMapTasks(numOfMaps); if (comb) conf.setCombinerClass(IterBrandesReducer.class); conf.setReducerClass(IterBrandesReducer.class); conf.setNumReduceTasks(numOfReduce); // turn off speculative execution, because DFS doesn't handle multiple writers to the same file. conf.setSpeculativeExecution(false); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); // conf.set("mapred.job.name", "APS-" + outputPath.getName()); conf.setNumTasksToExecutePerJvm(-1); // JVM reuse System.out.println("Starting the execution...! Pray!! \n"); long time1 = System.nanoTime(); RunningJob rj = JobClient.runJob(conf); long time2 = System.nanoTime(); // READ OUTPUT FILES System.out.println("\nFinished and now reading/writing Betweenness Output...\n"); // Assuming 1 reducer. Path inFile = new Path(outputPath, "part-00000"); IntWritable id = new IntWritable(); DoubleWritable betweenness = new DoubleWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter)); try { int i = 0; for (; i < (N + M + re); i++) { reader.next(id, betweenness); fw.write(id + "\t" + betweenness + "\n"); fw.flush(); } } finally { reader.close(); fw.close(); } System.out.println("\nWriting times Output...\n"); fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter)); fw.write("Total-time:\t" + (time2 - time1) + "\n"); fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_MAPS") + "\n"); fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_REDUCES") + "\n"); fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("CPU_MILLISECONDS") + "\n"); fw.write("total-gc-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS") + "\n"); fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("PHYSICAL_MEMORY_BYTES") + "\n"); fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("VIRTUAL_MEMORY_BYTES") + "\n"); fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes") + "\n"); fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n"); fw.flush(); try { Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator(); while (counters.hasNext()) { Counter cc = counters.next(); fw.write(cc.getName() + "\t" + cc.getCounter() + "\n"); fw.flush(); } } finally { fw.close(); } return 0; }
From source file:map_reduce.MapReduce_OptimizedBrandesDeletions_DO_JUNG.java
License:Open Source License
@SuppressWarnings("deprecation") @Override//from w w w . ja v a 2s .c o m public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage:\n"); System.exit(1); } // Job job = new Job(super.getConf()); // READ IN ALL COMMAND LINE ARGUMENTS // EXAMPLE: // hadoop jar MapReduce_OptimizedBrandesDeletions_DO_JUNG.jar // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar // -Dmapred.job.map.memory.mb=4096 // -Dmapred.job.reduce.memory.mb=4096 // -Dmapred.child.java.opts=-Xmx3500m // -Dmapreduce.task.timeout=60000000 // -Dmapreduce.job.queuename=QUEUENAME // input_iterbrandes_deletions_nocomb_10k_1 output_iterbrandes_deletions_nocomb_10k_1 // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/ int m = -1; // input path to use on hdfs Path inputPath = new Path(args[++m]); // output path to use on hdfs Path outputPath = new Path(args[++m]); // number of Mappers to split the sources: e.g., 1, 10, 100 etc. // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number. int numOfMaps = Integer.parseInt(args[++m]); // number of Reducers to collect the output int numOfReduce = Integer.parseInt(args[++m]); // Number of vertices in graph int N = Integer.parseInt(args[++m]); // Number of edges in graph int M = Integer.parseInt(args[++m]); // Graph file (edge list, tab delimited) (full path) String graph = args[++m]; // File with edges to be added (tab delimited) (full path) // Note: this version handles only edges between existing vertices in the graph. String random_edges = args[++m]; // Number of random edges added int re = Integer.parseInt(args[++m]); // Experiment iteration (in case of multiple experiments) int iter = Integer.parseInt(args[++m]); // Use combiner or not (true/false) Boolean comb = Boolean.valueOf(args[++m]); // Output path for file with stats String statsoutputpath = args[++m]; // Output path for file with final betweenness values String betoutputpath = args[++m]; // BEGIN INITIALIZATION JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesDeletions_DO_JUNG.class); FileSystem fs = FileSystem.get(conf); String setup = "_deletions_edges" + re + "_maps" + numOfMaps + "_comb" + comb; conf.setJobName("OptimizedBrandesDeletionsDOJung_" + graph + setup + "_" + iter); conf.set("HDFS_GRAPH", graph + setup); conf.set("HDFS_Random_Edges", random_edges + setup); conf.set("output", outputPath.getName()); conf.set("setup", setup); // CREATE INPUT FILES FOR MAPPERS int numOfTasksperMap = (int) Math.ceil(N / numOfMaps); //generate an input file for each map task for (int i = 0; i < numOfMaps - 1; i++) { Path file = new Path(inputPath, "part-r-" + i); IntWritable start = new IntWritable(i * numOfTasksperMap); IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end); } // last mapper takes what is left Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1)); IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap); IntWritable end = new IntWritable(N - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end); // COPY FILES TO MAPPERS System.out.println("Copying graph to cache"); String LOCAL_GRAPH = graph; Path hdfsPath = new Path(graph + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); System.out.println("Copying random edges to cache"); String LOCAL_Random_Edges = random_edges; hdfsPath = new Path(random_edges + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapperClass(IterBrandesMapper.class); conf.setNumMapTasks(numOfMaps); if (comb) conf.setCombinerClass(IterBrandesReducer.class); conf.setReducerClass(IterBrandesReducer.class); conf.setNumReduceTasks(numOfReduce); // turn off speculative execution, because DFS doesn't handle multiple writers to the same file. conf.setSpeculativeExecution(false); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); // conf.set("mapred.job.name", "APS-" + outputPath.getName()); conf.setNumTasksToExecutePerJvm(-1); // JVM reuse System.out.println("Starting the execution...! Pray!! \n"); long time1 = System.nanoTime(); RunningJob rj = JobClient.runJob(conf); long time2 = System.nanoTime(); // READ OUTPUT FILES System.out.println("\nFinished and now reading/writing Betweenness Output...\n"); // Assuming 1 reducer. Path inFile = new Path(outputPath, "part-00000"); IntWritable id = new IntWritable(); DoubleWritable betweenness = new DoubleWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter)); try { int i = 0; for (; i < (N + (M - re)); i++) { reader.next(id, betweenness); fw.write(id + "\t" + betweenness + "\n"); fw.flush(); } } finally { reader.close(); fw.close(); } System.out.println("\nWriting times Output...\n"); fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter)); fw.write("Total-time:\t" + (time2 - time1) + "\n"); fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_MAPS") + "\n"); fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_REDUCES") + "\n"); fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("CPU_MILLISECONDS") + "\n"); fw.write("total-gc-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS") + "\n"); fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("PHYSICAL_MEMORY_BYTES") + "\n"); fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("VIRTUAL_MEMORY_BYTES") + "\n"); fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes") + "\n"); fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n"); fw.flush(); try { Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator(); while (counters.hasNext()) { Counter cc = counters.next(); fw.write(cc.getName() + "\t" + cc.getCounter() + "\n"); fw.flush(); } } finally { fw.close(); } return 0; }
From source file:mr.WordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { Properties properties = new Properties(); AppProps.addApplicationTag(properties, "tutorials"); AppProps.addApplicationTag(properties, "cluster:development"); AppProps.setApplicationName(properties, "cascading-mapreduce-flow"); JobConf conf = new JobConf(WordCount.class); conf.setJobName("casading-mapreduce-flow"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); MapReduceFlow flow = new MapReduceFlow("wordcount", conf, true); // JobClient.runJob(conf); flow.complete();// w w w .jav a 2 s . c o m }
From source file:name.abhijitsarkar.hadoop.citation.CitationCombiner.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("citation-combiner"); /* This is to set the separator byte for KeyValueTextInputFormat */ conf.set("key.value.separator.in.input.line", ","); conf.setMapperClass(CitationMapper.class); conf.setReducerClass(CitationReducer.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*from w w w . jav a 2s . c o m*/ return 0; }
From source file:name.abhijitsarkar.hadoop.citation.CitationCombinerWithChaining.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("citation-combiner-with-chaining"); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobConf map1Conf = new JobConf(false); ChainMapper.addMapper(conf, CitationInputSplitMapper.class, LongWritable.class, Text.class, Text.class, Text.class, true, map1Conf); JobConf map2Conf = new JobConf(false); ChainMapper.addMapper(conf, CitationHeaderStripMapper.class, Text.class, Text.class, Text.class, Text.class, true, map2Conf);/*from w w w. j ava 2 s . c o m*/ JobConf red1Conf = new JobConf(false); ChainReducer.setReducer(conf, CitationReducer.class, Text.class, Text.class, Text.class, Text.class, true, red1Conf); JobClient.runJob(conf); return 0; }
From source file:NCDSearch.DistributedNCDSearch.java
public int run(String args[]) throws Exception { String inputpath = args[1];/*from w ww .ja v a2 s . c o m*/ String outputpath = args[2]; JobConf conf = new JobConf(getConf(), ChunkyFileInputFormat.class); //Add the target file to a cache so all nodes can have a copy. DistributedCache.addCacheFile(new URI(args[0]), conf); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); FileInputFormat.setInputPaths(conf, new Path(inputpath)); conf.setJobName("NCDSearch"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(FloatWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setInputFormat(ChunkyFileInputFormat.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); JobClient.runJob(conf); return 0; }