List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks
public void setNumReduceTasks(int tasks) throws IllegalStateException
From source file:com.phantom.hadoop.examples.terasort.TeraChecksum.java
License:Apache License
public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage();//from w w w. j a va 2 s.co m return 2; } TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSum"); job.setJarByClass(TeraChecksum.class); job.setMapperClass(ChecksumMapper.class); job.setReducerClass(ChecksumReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Unsigned16.class); // force a single reducer job.setNumReduceTasks(1); job.setInputFormatClass(TeraInputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.phantom.hadoop.examples.terasort.TeraGen.java
License:Apache License
/** * @param args//from w w w .j ava2s .c om * the cli arguments */ public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage(); return 2; } setNumberOfRows(job, parseHumanLong(args[0])); Path outputDir = new Path(args[1]); if (outputDir.getFileSystem(getConf()).exists(outputDir)) { throw new IOException("Output directory " + outputDir + " already exists."); } FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RangeInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.phantom.hadoop.examples.terasort.TeraValidate.java
License:Apache License
public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage();//w w w .j a va 2 s. com return 1; } TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1); // force a single split FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE); job.setInputFormatClass(TeraInputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.pivotal.gfxd.demo.mapreduce.LoadAverage.java
License:Open Source License
/** * This method is assuming fs.default.name as args[0] * * @param args/*from ww w .j ava 2s . co m*/ * @return * @throws Exception */ @Override public int run(String[] args) throws Exception { System.out.println("Starting MapReduce Job"); GfxdDataSerializable.initTypes(); Configuration conf = new Configuration(); //Configuration conf = getConf(); Path outputPath = new Path("/output"); String hdfsHomeDir = "/sensorStore"; //args[1]; String tableName = "RAW_SENSOR"; String outTableName = "LOAD_AVERAGES_SHADOW"; String gfxdURL = conf.get("gemfirexd.url", "jdbc:gemfirexd://localhost:1527"); // conf.set("fs.default.name", args[0]); String hdfsUrl = conf.get("fs.defaultFS"); FileSystem hdfs = FileSystem.get(new URI(hdfsUrl), conf); // Retrieve last run timestamp long now = System.currentTimeMillis(); long lastStart = getLastStart(hdfs); outputPath.getFileSystem(conf).delete(outputPath, true); conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir); conf.set(RowInputFormat.INPUT_TABLE, tableName); conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false); conf.setLong(RowInputFormat.START_TIME_MILLIS, lastStart); conf.setLong(RowInputFormat.END_TIME_MILLIS, now); conf.set(RowOutputFormat.OUTPUT_URL, gfxdURL); conf.set(RowOutputFormat.OUTPUT_TABLE, outTableName); // print config to troubleshoot possible issues // Configuration.dumpConfiguration(conf, new PrintWriter(System.out)); Job job = Job.getInstance(conf, "LoadAverage"); job.setNumReduceTasks(1); job.setInputFormatClass(RowInputFormat.class); // configure mapper and reducer job.setJarByClass(LoadAverage.class); job.setMapperClass(LoadAverageMapper.class); job.setReducerClass(LoadAverageReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LoadKey.class); TextOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(RowOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(LoadAverageModel.class); boolean jobSuccess = job.waitForCompletion(true); if (jobSuccess) { writeLastStart(hdfs, now); } return jobSuccess ? 0 : 1; }
From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 3) { printUsage();//from w ww.j a v a 2 s .com } String dbURL = args[0]; String tableName = args[1]; String outputPath = args[2]; String username = (args.length >= 4) ? args[3] : null; String password = (args.length >= 5) ? args[4] : null; Job job = new Job(getConf(), "HAWQInputFormatDemo"); job.setJarByClass(HAWQInputFormatDemo.class); job.setInputFormatClass(HAWQInputFormat.class); HAWQInputFormat.setInput(job.getConfiguration(), dbURL, username, password, tableName); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapperClass(HAWQEchoMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); long startTime = System.currentTimeMillis(); int returnCode = job.waitForCompletion(true) ? 0 : 1; long endTime = System.currentTimeMillis(); System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds"); return returnCode; }
From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo2.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { printUsage();/*from ww w. jav a2 s. co m*/ } String metadataFile = args[0]; String outputPath = args[1]; Job job = new Job(getConf(), "HAWQInputFormatDemo2"); job.setJarByClass(HAWQInputFormatDemo2.class); job.setInputFormatClass(HAWQInputFormat.class); HAWQInputFormat.setInput(job.getConfiguration(), metadataFile); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapperClass(HAWQEchoMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); long startTime = System.currentTimeMillis(); int returnCode = job.waitForCompletion(true) ? 0 : 1; long endTime = System.currentTimeMillis(); System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds"); return returnCode; }
From source file:com.pivotal.hawq.mapreduce.parquet.HAWQParquetOutputDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf(), "HAWQParquetOutputFormat"); job.setJarByClass(HAWQParquetOutputDriver.class); job.setOutputFormatClass(HAWQParquetOutputFormat.class); /*/* w w w .ja va2 s . c om*/ // int2 int4 int8 HAWQSchema schema = new HAWQSchema("t_int", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT2, "col_short"), HAWQSchema.optional_field(HAWQPrimitiveField.PrimitiveType.INT4, "col_int"), HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT8, "col_long") ); job.setMapperClass(WriteIntMapper.class); */ /* // varchar HAWQSchema schema = new HAWQSchema("t_varchar", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.VARCHAR, "col_varchar") ); job.setMapperClass(WriteVarcharMapper.class); */ /* // float4 float8 HAWQSchema schema = new HAWQSchema("t_floating", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT4, "col_float"), HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT8, "col_long") ); job.setMapperClass(WriteFloatingNumberMapper.class); */ // boolean // HAWQSchema schema = new HAWQSchema("t_boolean", // HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BOOL, "col_bool")); // job.setMapperClass(WriteBooleanMapper.class); // byte array HAWQSchema schema = new HAWQSchema("t_bytea", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BYTEA, "col_bytea")); job.setMapperClass(WriteByteArrayMapper.class); HAWQParquetOutputFormat.setSchema(job, schema); FileInputFormat.addInputPath(job, new Path(args[0])); HAWQParquetOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Void.class); job.setMapOutputValueClass(HAWQRecord.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.pivotal.hawq.mapreduce.pt.HAWQInputFormatPerformanceTest_TPCH.java
License:Apache License
private int runMapReduceJob() throws Exception { Path outputPath = new Path("/output"); // delete previous output FileSystem fs = FileSystem.get(getConf()); if (fs.exists(outputPath)) fs.delete(outputPath, true);//from w w w . ja va2 s. c om fs.close(); Job job = new Job(getConf()); job.setJarByClass(HAWQInputFormatPerformanceTest_TPCH.class); job.setInputFormatClass(HAWQInputFormat.class); long startTime = System.currentTimeMillis(); HAWQInputFormat.setInput(job.getConfiguration(), MRFormatConfiguration.TEST_DB_URL, null, null, tableName); metadataExtractTime = System.currentTimeMillis() - startTime; FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(TPCHTableMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Void.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.placeiq.piqconnect.BlocksBuilder.java
License:Apache License
protected Job configStage1() throws Exception { FileSystem fs = FileSystem.get(getConf()); fs.delete(pathOutput, true); // useful ? Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); conf.setBoolean(Constants.PROP_IS_VECTOR, isVector); conf.set("mapred.output.compression.type", "BLOCK"); // useful ? Job job = new Job(conf, "data-piqid.piqconnect.BlocksBuilder"); job.setJarByClass(BlocksBuilder.class); job.setMapperClass(MapStage1.class); job.setReducerClass(RedStage1.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numberOfReducers); job.setMapOutputKeyClass(BlockIndexWritable.class); job.setMapOutputValueClass(LightBlockWritable.class); job.setOutputKeyClass(BlockIndexWritable.class); job.setOutputValueClass(BlockWritable.class); FileInputFormat.setInputPaths(job, pathEdges); SequenceFileOutputFormat.setOutputPath(job, pathOutput); SequenceFileOutputFormat.setCompressOutput(job, true); Runner.setCompression(job);/*from w ww . ja v a2 s . c o m*/ return job; }
From source file:com.placeiq.piqconnect.InitialVectorGenerator.java
License:Apache License
private Job buildJob() throws Exception { Configuration conf = getConf(); conf.setLong("numberOfNodes", numberOfNodes); Job job = new Job(conf, "data-piqid.piqconnect.ConCmptIVGen_Stage1"); job.setJarByClass(InitialVectorGenerator.class); job.setMapperClass(_Mapper.class); job.setReducerClass(_Reducer.class); job.setNumReduceTasks(numberOfReducers); job.setOutputKeyClass(VLongWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, pathBitmask); FileOutputFormat.setOutputPath(job, pathVector); FileOutputFormat.setCompressOutput(job, true); return job;/*from w w w. j a v a 2s . c o m*/ }