List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath
public static void setOutputPath(Job job, Path outputDir)
From source file:RunPageRankBasic.java
License:Apache License
private float phase1(int i, int j, String basePath, int numNodes, boolean useCombiner, boolean useInMapperCombiner) throws Exception { Job job = Job.getInstance(getConf()); job.setJobName("PageRank:Basic:iteration" + j + ":Phase1"); job.setJarByClass(RunPageRankBasic.class); String in = basePath + "/iter" + formatter.format(i); String out = basePath + "/iter" + formatter.format(j) + "t"; String outm = out + "-mass"; // We need to actually count the number of part files to get the number of partitions (because // the directory might contain _log). int numPartitions = 0; for (FileStatus s : FileSystem.get(getConf()).listStatus(new Path(in))) { if (s.getPath().getName().contains("part-")) numPartitions++;/* w w w . j ava2 s.com*/ } LOG.info("PageRank: iteration " + j + ": Phase1"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); LOG.info(" - nodeCnt: " + numNodes); LOG.info(" - useCombiner: " + useCombiner); LOG.info(" - useInmapCombiner: " + useInMapperCombiner); LOG.info("computed number of partitions: " + numPartitions); int numReduceTasks = numPartitions; job.getConfiguration().setInt("NodeCount", numNodes); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); //job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m"); job.getConfiguration().set("PageRankMassPath", outm); job.setNumReduceTasks(numReduceTasks); FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(useInMapperCombiner ? MapWithInMapperCombiningClass.class : MapClass.class); if (useCombiner) { job.setCombinerClass(CombineClass.class); } job.setReducerClass(ReduceClass.class); FileSystem.get(getConf()).delete(new Path(out), true); FileSystem.get(getConf()).delete(new Path(outm), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); float mass = Float.NEGATIVE_INFINITY; FileSystem fs = FileSystem.get(getConf()); for (FileStatus f : fs.listStatus(new Path(outm))) { FSDataInputStream fin = fs.open(f.getPath()); mass = sumLogProbs(mass, fin.readFloat()); fin.close(); } return mass; }
From source file:RunPageRankBasic.java
License:Apache License
private void phase2(int i, int j, float missing, String basePath, int numNodes) throws Exception { Job job = Job.getInstance(getConf()); job.setJobName("PageRank:Basic:iteration" + j + ":Phase2"); job.setJarByClass(RunPageRankBasic.class); LOG.info("missing PageRank mass: " + missing); LOG.info("number of nodes: " + numNodes); String in = basePath + "/iter" + formatter.format(j) + "t"; String out = basePath + "/iter" + formatter.format(j); LOG.info("PageRank: iteration " + j + ": Phase2"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setFloat("MissingMass", (float) missing); job.getConfiguration().setInt("NodeCount", numNodes); job.setNumReduceTasks(0);/*from w w w . j a va 2s. co m*/ FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MapPageRankMassDistributionClass.class); FileSystem.get(getConf()).delete(new Path(out), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); }
From source file:lab2_2.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); FileSystem.get(conf).delete(new Path(args[1]), true); Job job = Job.getInstance(conf, "drive time lab 2.1"); job.setJarByClass(lab2_1.class); job.setMapperClass(PartitioningMapper.class); job.setPartitionerClass(TypePartitioner.class); job.setReducerClass(IdentityReducer.class); job.setNumReduceTasks(6);//from w w w . j a v a 2s . c o m job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:TweetCategorizer.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // conf.addResource(new Path("../../env_vars")); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: TweetCategorizer <in> <out>"); System.exit(2);/* www .ja v a2 s .com*/ } // ---------------------------------------------------------- // READ FILTER FILE // ---------------------------------------------------------- // Path pt=new Path("hdfs://pathTofile"); //Path pt = new Path("../www/hfilters.json"); String l; String line = ""; //FileSystem fs = FileSystem.get(conf); BufferedReader br = new BufferedReader(new FileReader("../www/json/filters.json")); try { //BufferedReader br = new BufferedReader(new FileReader(fs.open(pt))); while ((l = br.readLine()) != null) { line += l; //System.out.println(line); } } finally { // you should close out the BufferedReader br.close(); } // ---------------------------------------------------------- // PARSE JSON //http://stackoverflow.com/questions/6697147/json-iterate-through-jsonarray //http://juliusdavies.ca/json-simple-1.1.1-javadocs/org/json/simple/JSONObject.html // ---------------------------------------------------------- JSONParser parser = new JSONParser(); JSONObject jsonObject = (JSONObject) parser.parse(line); Set<String> filters = jsonObject.keySet(); // inside each object there is a "name" field, get value and add to keyword_list for (String i : filters) { JSONObject objects = (JSONObject) jsonObject.get(i); String keyword = ((String) objects.get("name")).toLowerCase(); TokenizerMapper.keyname_list.add(i); TokenizerMapper.keyword_list.add(keyword); } // ---------------------------------------------------------- Job job = new Job(conf, "categorize tweets"); job.setJarByClass(TweetCategorizer.class); job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:WordLines.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 3) { System.err.println("Usage: wordlines <in> [<in>...] <SearchTerm> <out>"); System.exit(2);//from w w w. j a va 2 s. c o m } conf.set("searchWord", otherArgs[otherArgs.length - 2]); Job job = new Job(conf, "word lines"); job.setJarByClass(WordLines.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); for (int i = 0; i < otherArgs.length - 2; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:BigramRelativeFrequency.java
License:Apache License
/** * Runs this tool.//from w ww .jav a 2 s . c om */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool name: " + BigramRelativeFrequency.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(BigramRelativeFrequency.class.getSimpleName()); job.setJarByClass(BigramRelativeFrequency.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(PairOfStrings.class); job.setOutputValueClass(FloatWritable.class); //job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:CategoriesInvertedIndex.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Inverted Index"); job.setJarByClass(CategoriesInvertedIndex.class); job.setMapperClass(CategoriesMapper.class); job.setReducerClass(CategoriesReducer.class); job.setCombinerClass(CategoriesReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:First.java
License:Apache License
public int run(String[] args) throws Exception { String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1];/*from w ww .j a va2 s. c o m*/ } logger.info("output reducer type: " + outputReducerType); for (int i = 2000; i < 2012; i++) { String columnName = Integer.toString(i); getConf().set(CONF_COLUMN_NAME, columnName); Job job = new Job(getConf(), "app"); job.setJarByClass(First.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setCombinerClass(ReducerToFilesystem.class); job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBuffer.wrap(columnName.getBytes()))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true); } return 0; }
From source file:MapReduce3.java
public static void main(String[] args) throws Exception { ////w ww . j a v a 2 s . c o m String dst = "hdfs://localhost:9000/data/2006a.csv"; //?? // String dstOut = "hdfs://localhost:9000/mapreduce/result3/1"; String dstOut = "/Users/wendyzhuo/NetBeansProjects/final_Hadoop/src/output3/1"; String outFiles = "/Users/wendyzhuo/NetBeansProjects/final_Hadoop/src/output3/2"; Configuration hadoopConfig = new Configuration(); hadoopConfig.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); hadoopConfig.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); Job job = new Job(hadoopConfig); Job job2 = new Job(hadoopConfig); FileInputFormat.addInputPath(job, new Path(dst)); FileOutputFormat.setOutputPath(job, new Path(dstOut)); FileInputFormat.addInputPath(job2, new Path(dstOut)); FileOutputFormat.setOutputPath(job2, new Path(outFiles)); JobConf map1Conf = new JobConf(false); ChainMapper.addMapper(job, TempMapper.class, LongWritable.class, Text.class, CompositeKey_wd.class, IntWritable.class, map1Conf); JobConf reduceConf = new JobConf(false); ChainReducer.setReducer(job, TempReducer.class, CompositeKey_wd.class, IntWritable.class, CompositeKey_wd.class, IntWritable.class, reduceConf); JobConf map2Conf = new JobConf(false); ChainMapper.addMapper(job2, TempMapper2.class, LongWritable.class, Text.class, IntWritable.class, CompositeKey_wd.class, map2Conf); JobConf map3Conf = new JobConf(false); ChainReducer.setReducer(job2, TempReduce2.class, IntWritable.class, CompositeKey_wd.class, Text.class, IntWritable.class, map3Conf); // // JobClient.runJob(job); //MapperReducer? // job.setMapperClass(TempMapper.class); // // job.setReducerClass(TempReducer.class); //?KeyValue job.setOutputKeyClass(CompositeKey_wd.class); job.setOutputValueClass(IntWritable.class); job2.setMapOutputKeyClass(IntWritable.class); job2.setMapOutputValueClass(CompositeKey_wd.class); // job2.setSortComparatorClass(LongWritable.DecreasingComparator.class); //job? job.waitForCompletion(true); System.out.println("Finished1"); job2.waitForCompletion(true); System.out.println("Finished2"); }
From source file:TorrentWeb.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); Job job = Job.getInstance(conf, "Torrent Web"); job.setJarByClass(TorrentWeb.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(TorrentWebExtracter.class); job.setReducerClass(TorrentWebReducer.class); job.setInputFormatClass(WarcInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Execute job and return status return job.waitForCompletion(true) ? 0 : 1; }