List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath
public static void setOutputPath(Job job, Path outputDir)
From source file:cienciaCelularMR.Main.java
@Override public int run(String[] args) throws Exception { for (int i = 0; i < args.length; i++) { System.out.println("Hadoop - arg[" + i + "] es: " + args[i]); }//from w w w . j a v a 2s .c o m //Configuracin de memoria de YARN Configuration conf = new Configuration(); conf.set("mapreduce.map.memory.mb", "1400"); conf.set("mapreduce.reduce.memory.mb", "2800"); conf.set("mapreduce.map.java.opts", "-Xmx1120m"); conf.set("mapreduce.reduce.java.opts", "-Xmx2240m"); conf.set("yarn.app.mapreduce.am.resource.mb", "2800"); conf.set("yarn.app.mapreduce.am.command-opts", "-Xmx2240m"); conf.set("yarn.nodemanager.resource.memory-mb", "5040"); conf.set("yarn.scheduler.minimum-allocation-mb", "1400"); conf.set("yarn.scheduler.maximum-allocation-mb", "5040"); conf.set("mapreduce.task.timeout", "18000000");//5 horas //Creacin del Job Job job = Job.getInstance(conf); job.setInputFormatClass(WholeFileInputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[5])); FileOutputFormat.setOutputPath(job, new Path(args[6])); //Salidas alternativas de Mapper para brindar informacin MultipleOutputs.addNamedOutput(job, "controloutput", TextOutputFormat.class, KeyMcell.class, Text.class); MultipleOutputs.addNamedOutput(job, "errormcell", TextOutputFormat.class, KeyMcell.class, Text.class); //Archivos copiados a cache de los nodos job.addCacheFile(new Path("wasb:///mcell.exe").toUri()); job.addCacheFile(new Path("wasb:///fernet.exe").toUri()); job.addCacheFile(new Path("wasb:///fernet.cfg").toUri()); job.addCacheFile(new Path("wasb:///libconfig_d.dll").toUri()); job.addCacheFile(new Path("wasb:///libtiff3.dll").toUri()); job.addCacheFile(new Path("wasb:///jpeg62.dll").toUri()); job.addCacheFile(new Path("wasb:///zlib1.dll").toUri()); job.addCacheFile(new Path("wasb:///msvcr100d.dll").toUri()); job.setJarByClass(Main.class); Configuration mapAConf = new Configuration(false); ChainMapper.addMapper(job, McellMapper.class, KeyMcell.class, BytesWritable.class, KeyMcell.class, Text.class, mapAConf); Configuration mapBConf = new Configuration(false); ChainMapper.addMapper(job, FernetMapper.class, KeyMcell.class, Text.class, KeyMcell.class, FernetOutput.class, mapBConf); job.setReducerClass(ResultReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.submit(); return 0; }
From source file:cityhubpartitioningcountry.CityHubPartitioning.java
/** * @param args the command line arguments *//*from w ww . j av a 2 s.co m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "partitioner"); job.setJarByClass(CityHubPartitioning.class); job.setMapperClass(PartitionMonthMapper.class); job.setReducerClass(countryReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(PartitionCountryPartitioner.class); job.setNumReduceTasks(27); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:cityhubtopten.CityHubTopTen.java
/** * @param args the command line arguments *//*ww w. j a va2s . c o m*/ public static void main(String[] args) { try { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Top10"); job.setJarByClass(CityHubTopTen.class); job.setMapperClass(Top10Mapper.class); job.setReducerClass(Top10Reducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (IOException | InterruptedException | ClassNotFoundException ex) { System.out.println("Erorr Message" + ex.getMessage()); } }
From source file:cloud9.ComputeCooccurrenceMatrixStripesOOM.java
License:Apache License
/** * Runs this tool.//from www .j a va2 s . co m */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int window = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: ComputeCooccurrenceMatrixStripes"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - window: " + window); sLogger.info(" - number of reducers: " + reduceTasks); Job job = new Job(getConf(), "CooccurrenceMatrixStripes"); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job.getConfiguration().setInt("window", window); job.setJarByClass(ComputeCooccurrenceMatrixStripesOOM.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(String2IntOpenHashMapWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); job.getConfiguration().setInt("io.sort.mb", 400); job.getConfiguration().set("mapred.child.java.opts", "-Xmx1000m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps"); //job.getConfiguration().set("mapred.child.java.opts", "-Xmx1000m"); job.getConfiguration().setInt("child.monitor.jstat.seconds", 2); job.getConfiguration().set("fs.default.name", "hdfs://master:9000"); job.getConfiguration().set("mapred.job.tracker", "master:9001"); //conf.set("user.name", "xulijie"); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", 1); //job.getConfiguration().setFloat("io.sort.record.percent", 0.2f); //job.getConfiguration().setFloat("io.sort.spill.percent", 0.95f); // conf.setFloat("mapred.job.shuffle.input.buffer.percent", 0.9f); // conf.setFloat("mapred.job.shuffle.merge.percent", 0.9f); //conf.setFloat("mapred.job.reduce.input.buffer.percent", 0.4f); //conf.set("mapred.job.tracker", "local"); //conf.set("fs.default.name", "file:///"); job.getConfiguration().setLong("mapred.min.split.size", 512 * 1024 * 1024L); job.getConfiguration().setLong("mapred.max.split.size", 512 * 1024 * 1024L); job.getConfiguration().setInt("mapred.map.max.attempts", 0); job.getConfiguration().setInt("mapred.reduce.max.attempts", 0); //job.getConfiguration().set("heapdump.reduce.input.groups", "3,897,853[5]"); //job.getConfiguration().set("heapdump.reduce.input.records", "8407734;8407737;8407740;8407743;8407746;8407749;8407750"); //job.getConfiguration().set("omit.reduce.input.records", "8407733;8407750"); //job.getConfiguration().set("heapdump.reduce.input.records", "8407751"); //job.getConfiguration().set("heapdump.reduce.output.records", "3897853"); job.getConfiguration().set("heapdump.task.attempt.ids", "attempt_201404281552_0001_r_000000_0"); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:clustering.init.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s input_dir output_dir [column_splitter] [dict_path]\n", this.getClass().getSimpleName()); System.exit(1);/*from w w w . j av a2s . co m*/ } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.set("column.splitter", args[2]); } else { conf.set("column.splitter", ","); } if (args.length > 3) { conf.set("dict.path", args[3]); } else { conf.set("dict.path", "./dicts"); } Job job = Job.getInstance(conf, "Initialization job"); job.setJarByClass(Driver.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setMapperClass(WordSepMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); long starttime = System.currentTimeMillis(); boolean complete = job.waitForCompletion(true); long endtime = System.currentTimeMillis(); System.out.println("Initialization job finished in: " + (endtime - starttime) / 1000 + " seconds"); return complete ? 0 : 1; }
From source file:clustering.inverted_index.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s tf_idf_result_dir output_dir" + "[decimal_number] [pruning_threshold]\n", getClass().getSimpleName()); System.exit(1);/*from ww w . ja va2s .co m*/ } Path normDir = new Path(args[1] + "/normed"); Path resultDir = new Path(args[1] + "/result"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.setInt("deci.number", Integer.valueOf(args[2])); } else { conf.setInt("deci.number", 4); } if (args.length > 3) { conf.setBoolean("pruning", true); conf.setDouble("pruning.threshold", Double.valueOf(args[3])); } else { conf.setBoolean("pruning", false); } JobControl jobControl = new JobControl("inverted-index jobs"); /* step 1, normalize the vector lenth of each document */ Job job1 = Job.getInstance(conf, "tf idf normalizer job"); job1.setJarByClass(Driver.class); FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(KeyValueTextInputFormat.class); job1.setMapperClass(Mapper.class); job1.setReducerClass(NormalizerReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job1, normDir); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); /* step 2, calculate inverted index */ Job job2 = Job.getInstance(conf, "inverted index job"); job2.setJarByClass(Driver.class); FileInputFormat.addInputPath(job2, normDir); job2.setInputFormatClass(KeyValueTextInputFormat.class); job2.setMapperClass(Mapper.class); job2.setReducerClass(InvertedIndexReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job2, resultDir); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); MapReduceUtils.runJobs(jobControl); return job2.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.link_back.pre.Driver.java
License:Apache License
public Job configJob(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s input_dir output_dir\n", getClass().getSimpleName()); System.exit(1);//from w ww . j a va2 s . c om } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); Job job = Job.getInstance(conf, "linkback pre step"); job.setJarByClass(Driver.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapperClass(AttachMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job; }
From source file:clustering.link_back.step1.Driver.java
License:Apache License
public Job configJob(String[] args) throws Exception { if (args.length < 3) { System.err.printf("usage: %s mst_result_dir simhash_result_file output_dir\n", getClass().getSimpleName()); System.exit(1);//from w w w . ja v a 2 s .com } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); Job job = Job.getInstance(conf, "link back step 1 job"); job.setJarByClass(Driver.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileInputFormat.addInputPath(job, new Path(args[1])); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapperClass(SetKeyMapper.class); job.setMapOutputKeyClass(Step1KeyWritable.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(JoinPartitioner.class); job.setGroupingComparatorClass(Step1GroupComparator.class); job.setReducerClass(JoinReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); return job; }
From source file:clustering.link_back.step2.Driver.java
License:Apache License
public Job configJob(String[] args) throws Exception { if (args.length < 3) { System.err.printf("usage: %s pre_step_result_dir step1_result_dir output_dir\n", getClass().getSimpleName()); System.exit(1);//from w ww . ja v a 2 s .c o m } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); Job job = Job.getInstance(conf, "link back step 2 job"); job.setJarByClass(Driver.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileInputFormat.addInputPath(job, new Path(args[1])); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapperClass(clustering.link_back.step2.SetKeyMapper.class); job.setMapOutputKeyClass(Step2KeyWritable.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(JoinPartitioner.class); job.setGroupingComparatorClass(Step2GroupComparator.class); job.setReducerClass(clustering.link_back.step2.JoinReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); return job; }
From source file:clustering.mst.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 3) { System.err.printf("usage: %s similarity_result_dir document_count_file output_dir " + "[cluster_threshold] [reduce_number] [compression]\n", getClass().getSimpleName()); System.exit(1);// w w w .j ava 2 s . c om } Path step1_OutputDir = new Path(args[2] + "/step1"); Path resultDir = new Path(args[2] + "/result"); URI docCntFile = new URI(args[1] + "/part-r-00000#docCnt"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 3) { conf.setDouble("final.threshold", Double.valueOf(args[3])); } else { conf.setDouble("final.threshold", 0.2d); } if (args.length > 4) { conf.setInt("reduce.task.num", Integer.valueOf(args[4])); } else { conf.setInt("reduce.task.num", 5); } JobControl jobControl = new JobControl("mst jobs"); /* step 1, split and calculate the child msts */ Job childJob = Job.getInstance(conf, "mst child job"); childJob.setJarByClass(Driver.class); childJob.addCacheFile(docCntFile); if (args.length > 5 && args[5].equals("0")) { FileInputFormat.addInputPath(childJob, new Path(args[0])); childJob.setInputFormatClass(KeyValueTextInputFormat.class); } else { SequenceFileInputFormat.addInputPath(childJob, new Path(args[0])); childJob.setInputFormatClass(SequenceFileAsTextInputFormat.class); } FileOutputFormat.setOutputPath(childJob, step1_OutputDir); childJob.setMapperClass(ChildMapper.class); childJob.setMapOutputKeyClass(DoubleWritable.class); childJob.setMapOutputValueClass(Text.class); childJob.setPartitionerClass(ChildPartitioner.class); childJob.setReducerClass(ChildReducer.class); childJob.setNumReduceTasks(conf.getInt("reduce.task.num", 1)); childJob.setOutputKeyClass(DoubleWritable.class); childJob.setOutputValueClass(Text.class); ControlledJob controlledChildJob = new ControlledJob(conf); controlledChildJob.setJob(childJob); jobControl.addJob(controlledChildJob); /* step 2, merge step 1's output and calculate final mst */ Job finalJob = Job.getInstance(conf, "mst final job"); finalJob.setJarByClass(FinalReducer.class); finalJob.addCacheFile(docCntFile); FileInputFormat.addInputPath(finalJob, step1_OutputDir); finalJob.setInputFormatClass(KeyValueTextInputFormat.class); finalJob.setMapperClass(FinalMapper.class); finalJob.setMapOutputKeyClass(DoubleWritable.class); finalJob.setMapOutputValueClass(Text.class); finalJob.setReducerClass(FinalReducer.class); finalJob.setOutputKeyClass(IntWritable.class); finalJob.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(finalJob, resultDir); ControlledJob finalControlledJob = new ControlledJob(conf); finalControlledJob.setJob(finalJob); finalControlledJob.addDependingJob(controlledChildJob); jobControl.addJob(finalControlledJob); // run jobs MapReduceUtils.runJobs(jobControl); return finalJob.waitForCompletion(true) ? 0 : 1; }