List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath
public static void setOutputPath(Job job, Path outputDir)
From source file:com.cloudera.recordservice.avro.mapreduce.ColorCount.java
License:Apache License
/** * Run the MR2 color count with generic records, and return a map of favorite colors to * the number of users./*from w ww . ja v a 2 s. c o m*/ */ public static java.util.Map<String, Integer> countColors() throws IOException, ClassNotFoundException, InterruptedException { String output = TestUtil.getTempDirectory(); Path outputPath = new Path(output); JobConf conf = new JobConf(ColorCount.class); conf.setInt("mapreduce.job.reduces", 1); Job job = Job.getInstance(conf); job.setJarByClass(ColorCount.class); job.setJobName("MR2 Color Count With Generic Records"); RecordServiceConfig.setInputTable(job.getConfiguration(), "rs", "users"); job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(Map.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(Reduce.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); job.waitForCompletion(false); // Read the result and return it. Since we set the number of reducers to 1, // there is always just one file containing the value. SeekableInput input = new FsInput(new Path(output + "/part-r-00000.avro"), conf); DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader); java.util.Map<String, Integer> colorMap = new HashMap<String, Integer>(); for (GenericRecord datum : fileReader) { colorMap.put(datum.get(0).toString(), Integer.parseInt(datum.get(1).toString())); } return colorMap; }
From source file:com.cloudera.recordservice.examples.mapreduce.MapReduceAgeCount.java
License:Apache License
public int run(String[] args) throws Exception { org.apache.log4j.BasicConfigurator.configure(); if (args.length != 2) { System.err.println("Usage: MapReduceAgeCount <input path> <output path>"); return -1; }// w w w .ja v a 2s .com Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceAgeCount.class); job.setJobName("Age Count"); // RECORDSERVICE: // To read from a table instead of a path, comment out // FileInputFormat.setInputPaths() and instead use: // FileInputFormat.setInputPaths(job, new Path(args[0])); RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]); // RECORDSERVICE: // Use the RecordService version of the AvroKeyValueInputFormat job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyValueInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(AgeCountMapper.class); // Set schema for input key and value. AvroJob.setInputKeySchema(job, UserKey.getClassSchema()); AvroJob.setInputValueSchema(job, UserValue.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(AgeCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.cloudera.recordservice.examples.mapreduce.MapReduceColorCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { org.apache.log4j.BasicConfigurator.configure(); if (args.length != 2) { System.err.println("Usage: MapReduceColorCount <input path> <output path>"); return -1; }/*from www. j a va2 s . c om*/ Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceColorCount.class); job.setJobName("Color Count"); // RECORDSERVICE: // To read from a table instead of a path, comment out // FileInputFormat.setInputPaths() and instead use: //FileInputFormat.setInputPaths(job, new Path(args[0])); RecordServiceConfig.setInputTable(job.getConfiguration(), "rs", "users"); // RECORDSERVICE: // Use the RecordService version of the AvroKeyInputFormat job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.class); //job.setInputFormatClass(AvroKeyInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(ColorCountMapper.class); AvroJob.setInputKeySchema(job, User.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.cloudera.recordservice.examples.mapreduce.RecordCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: RecordCount <input_query> <output_path>"); System.exit(1);/*from w ww . j a v a2 s.c o m*/ } String inputQuery = args[0]; String output = args[1]; Job job = Job.getInstance(getConf()); job.setJobName("recordcount"); job.setJarByClass(RecordCount.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(1); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(LongWritable.class); RecordServiceConfig.setInputQuery(job.getConfiguration(), inputQuery); job.setInputFormatClass(RecordServiceInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileSystem fs = FileSystem.get(job.getConfiguration()); Path outputPath = new Path(output); if (fs.exists(outputPath)) fs.delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.recordservice.examples.terasort.TeraChecksum.java
License:Apache License
@Override public int run(String[] args) throws Exception { boolean useRecordService = false; Job job = Job.getInstance(getConf()); if (args.length != 2 && args.length != 3) { usage();//from w w w .j a va 2s . c om return 2; } if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSum"); job.setJarByClass(TeraChecksum.class); job.setMapperClass(ChecksumMapper.class); job.setReducerClass(ChecksumReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Unsigned16.class); // force a single reducer job.setNumReduceTasks(1); if (useRecordService) { RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]); job.setInputFormatClass(RecordServiceTeraInputFormat.class); } else { TeraInputFormat.setInputPaths(job, new Path(args[0])); job.setInputFormatClass(TeraInputFormat.class); } return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.recordservice.examples.terasort.TeraGen.java
License:Apache License
/** * @param args the cli arguments/*from w w w . j ava 2 s . c o m*/ */ @Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage(); return 2; } setNumberOfRows(job, parseHumanLong(args[0])); Path outputDir = new Path(args[1]); if (outputDir.getFileSystem(getConf()).exists(outputDir)) { throw new IOException("Output directory " + outputDir + " already exists."); } FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RangeInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.recordservice.examples.terasort.TeraSort.java
License:Apache License
@Override public int run(String[] args) throws Exception { boolean useRecordService = false; if (args.length != 2 && args.length != 3) { usage();/*from www . j a v a2 s. c o m*/ return 1; } if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } LOG.info("starting"); Job job = Job.getInstance(getConf()); boolean useSimplePartitioner = getUseSimplePartitioner(job); if (useRecordService) { RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]); job.setInputFormatClass(RecordServiceTeraInputFormat.class); useSimplePartitioner = true; } else { Path inputDir = new Path(args[0]); TeraInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TeraInputFormat.class); } Path outputDir = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TeraOutputFormat.class); if (useSimplePartitioner) { job.setPartitionerClass(SimplePartitioner.class); } else { long start = System.currentTimeMillis(); Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); try { TeraInputFormat.writePartitionFile(job, partitionFile); } catch (Throwable e) { LOG.error(e.getMessage()); return -1; } job.addCacheFile(partitionUri); long end = System.currentTimeMillis(); System.out.println("Spent " + (end - start) + "ms computing partitions."); job.setPartitionerClass(TotalOrderPartitioner.class); } job.getConfiguration().setInt("dfs.replication", getOutputReplication(job)); TeraOutputFormat.setFinalSync(job, true); int ret = job.waitForCompletion(true) ? 0 : 1; LOG.info("done"); return ret; }
From source file:com.cloudera.recordservice.examples.terasort.TeraValidate.java
License:Apache License
@Override public int run(String[] args) throws Exception { boolean useRecordService = false; if (args.length != 2 && args.length != 3) { usage();/*from w ww .j a v a 2 s . c o m*/ return 1; } if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } Job job = Job.getInstance(getConf()); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1); // force a single split FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE); if (useRecordService) { RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]); job.setInputFormatClass(RecordServiceTeraInputFormat.class); } else { TeraInputFormat.setInputPaths(job, new Path(args[0])); job.setInputFormatClass(TeraInputFormat.class); } return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.sa.giraph.examples.componentisation.Job.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 3) { System.out.println("Componentisation Help:"); System.out.println("Parameters: <numbersOfWorkers> <inputLocation> <outputLocation>"); System.out.println("Example: 1 inputFolder outputFolder"); return;/*from w w w .ja va 2s . c o m*/ } String numberOfWorkers = args[0]; String inputLocation = args[1]; String outputLocation = args[2]; GiraphJob bspJob = new GiraphJob(new Configuration(), Job.class.getName()); bspJob.getConfiguration().setVertexClass(ComponentisationVertex.class); bspJob.getConfiguration().setVertexInputFormatClass(InputFormat.class); GiraphFileInputFormat.addVertexInputPath(bspJob.getConfiguration(), new Path(inputLocation)); bspJob.getConfiguration().setVertexOutputFormatClass(OutputFormat.class); int minWorkers = Integer.parseInt(numberOfWorkers); int maxWorkers = Integer.parseInt(numberOfWorkers); bspJob.getConfiguration().setWorkerConfiguration(minWorkers, maxWorkers, 100.0f); FileOutputFormat.setOutputPath(bspJob.getInternalJob(), new Path(outputLocation)); boolean verbose = true; if (bspJob.run(verbose)) { System.out.println("Ended well"); } else { System.out.println("Ended with Failure"); } }
From source file:com.cloudera.sa.giraph.examples.kcore.Job.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 3) { System.out.println("KCore Help:"); System.out.println("Parameters: <numbersOfWorkers> <inputLocation> <outputLocation>"); System.out.println("Example: 1 inputFolder outputFolder"); return;// www. ja va 2 s . c o m } String numberOfWorkers = args[0]; String inputLocation = args[1]; String outputLocation = args[2]; GiraphJob bspJob = new GiraphJob(new Configuration(), Job.class.getName()); bspJob.getConfiguration().setVertexClass(KCoreVertex.class); bspJob.getConfiguration().setVertexInputFormatClass(InputFormat.class); GiraphFileInputFormat.addVertexInputPath(bspJob.getConfiguration(), new Path(inputLocation)); bspJob.getConfiguration().setVertexOutputFormatClass(OutputFormat.class); int minWorkers = Integer.parseInt(numberOfWorkers); int maxWorkers = Integer.parseInt(numberOfWorkers); bspJob.getConfiguration().setWorkerConfiguration(minWorkers, maxWorkers, 100.0f); FileOutputFormat.setOutputPath(bspJob.getInternalJob(), new Path(outputLocation)); boolean verbose = true; if (bspJob.run(verbose)) { System.out.println("Ended well"); } else { System.out.println("Ended with Failure"); } }