Example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks

List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks.

Prototype

public void setNumReduceTasks(int tasks) throws IllegalStateException 

Source Link

Document

Set the number of reduce tasks for the job.

Usage

From source file:com.phantom.hadoop.examples.terasort.TeraChecksum.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();//from w w w.  j  a va 2 s.co  m
        return 2;
    }
    TeraInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraSum");
    job.setJarByClass(TeraChecksum.class);
    job.setMapperClass(ChecksumMapper.class);
    job.setReducerClass(ChecksumReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Unsigned16.class);
    // force a single reducer
    job.setNumReduceTasks(1);
    job.setInputFormatClass(TeraInputFormat.class);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.phantom.hadoop.examples.terasort.TeraGen.java

License:Apache License

/**
 * @param args//from  w w  w .j  ava2s  .c  om
 *            the cli arguments
 */
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();
        return 2;
    }
    setNumberOfRows(job, parseHumanLong(args[0]));
    Path outputDir = new Path(args[1]);
    if (outputDir.getFileSystem(getConf()).exists(outputDir)) {
        throw new IOException("Output directory " + outputDir + " already exists.");
    }
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraGen");
    job.setJarByClass(TeraGen.class);
    job.setMapperClass(SortGenMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(RangeInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.phantom.hadoop.examples.terasort.TeraValidate.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();//w w w  .j  a va  2  s. com
        return 1;
    }
    TeraInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraValidate");
    job.setJarByClass(TeraValidate.class);
    job.setMapperClass(ValidateMapper.class);
    job.setReducerClass(ValidateReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    // force a single reducer
    job.setNumReduceTasks(1);
    // force a single split
    FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
    job.setInputFormatClass(TeraInputFormat.class);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pivotal.gfxd.demo.mapreduce.LoadAverage.java

License:Open Source License

/**
 * This method is assuming fs.default.name as args[0]
 *
 * @param args/*from ww  w  .j  ava 2s  . co m*/
 * @return
 * @throws Exception
 */
@Override
public int run(String[] args) throws Exception {
    System.out.println("Starting MapReduce Job");
    GfxdDataSerializable.initTypes();
    Configuration conf = new Configuration();
    //Configuration conf = getConf();

    Path outputPath = new Path("/output");
    String hdfsHomeDir = "/sensorStore"; //args[1];
    String tableName = "RAW_SENSOR";
    String outTableName = "LOAD_AVERAGES_SHADOW";
    String gfxdURL = conf.get("gemfirexd.url", "jdbc:gemfirexd://localhost:1527");

    // conf.set("fs.default.name", args[0]);
    String hdfsUrl = conf.get("fs.defaultFS");

    FileSystem hdfs = FileSystem.get(new URI(hdfsUrl), conf);

    // Retrieve last run timestamp
    long now = System.currentTimeMillis();
    long lastStart = getLastStart(hdfs);

    outputPath.getFileSystem(conf).delete(outputPath, true);

    conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
    conf.set(RowInputFormat.INPUT_TABLE, tableName);
    conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
    conf.setLong(RowInputFormat.START_TIME_MILLIS, lastStart);
    conf.setLong(RowInputFormat.END_TIME_MILLIS, now);

    conf.set(RowOutputFormat.OUTPUT_URL, gfxdURL);
    conf.set(RowOutputFormat.OUTPUT_TABLE, outTableName);

    // print config to troubleshoot possible issues
    // Configuration.dumpConfiguration(conf, new PrintWriter(System.out));

    Job job = Job.getInstance(conf, "LoadAverage");

    job.setNumReduceTasks(1);

    job.setInputFormatClass(RowInputFormat.class);

    // configure mapper and reducer
    job.setJarByClass(LoadAverage.class);
    job.setMapperClass(LoadAverageMapper.class);
    job.setReducerClass(LoadAverageReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LoadKey.class);

    TextOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(RowOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(LoadAverageModel.class);

    boolean jobSuccess = job.waitForCompletion(true);
    if (jobSuccess) {
        writeLastStart(hdfs, now);
    }

    return jobSuccess ? 0 : 1;
}

From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        printUsage();//from  w  ww.j  a v  a 2  s .com
    }

    String dbURL = args[0];
    String tableName = args[1];
    String outputPath = args[2];
    String username = (args.length >= 4) ? args[3] : null;
    String password = (args.length >= 5) ? args[4] : null;

    Job job = new Job(getConf(), "HAWQInputFormatDemo");
    job.setJarByClass(HAWQInputFormatDemo.class);

    job.setInputFormatClass(HAWQInputFormat.class);

    HAWQInputFormat.setInput(job.getConfiguration(), dbURL, username, password, tableName);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapperClass(HAWQEchoMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    long startTime = System.currentTimeMillis();
    int returnCode = job.waitForCompletion(true) ? 0 : 1;
    long endTime = System.currentTimeMillis();

    System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds");

    return returnCode;
}

From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo2.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();/*from   ww  w. jav a2 s.  co  m*/
    }

    String metadataFile = args[0];
    String outputPath = args[1];

    Job job = new Job(getConf(), "HAWQInputFormatDemo2");
    job.setJarByClass(HAWQInputFormatDemo2.class);

    job.setInputFormatClass(HAWQInputFormat.class);

    HAWQInputFormat.setInput(job.getConfiguration(), metadataFile);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapperClass(HAWQEchoMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    long startTime = System.currentTimeMillis();
    int returnCode = job.waitForCompletion(true) ? 0 : 1;
    long endTime = System.currentTimeMillis();

    System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds");

    return returnCode;
}

From source file:com.pivotal.hawq.mapreduce.parquet.HAWQParquetOutputDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf(), "HAWQParquetOutputFormat");
    job.setJarByClass(HAWQParquetOutputDriver.class);

    job.setOutputFormatClass(HAWQParquetOutputFormat.class);

    /*/*  w  w w .ja va2 s  .  c  om*/
    // int2 int4 int8
    HAWQSchema schema = new HAWQSchema("t_int",
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT2, "col_short"),
    HAWQSchema.optional_field(HAWQPrimitiveField.PrimitiveType.INT4, "col_int"),
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT8, "col_long")
    );
    job.setMapperClass(WriteIntMapper.class);
    */

    /*
    // varchar
    HAWQSchema schema = new HAWQSchema("t_varchar",
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.VARCHAR, "col_varchar")
    );
    job.setMapperClass(WriteVarcharMapper.class);
    */

    /*
    // float4 float8
    HAWQSchema schema = new HAWQSchema("t_floating",
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT4, "col_float"),
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT8, "col_long")
    );
    job.setMapperClass(WriteFloatingNumberMapper.class);
    */

    // boolean
    //      HAWQSchema schema = new HAWQSchema("t_boolean",
    //            HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BOOL, "col_bool"));
    //      job.setMapperClass(WriteBooleanMapper.class);

    // byte array
    HAWQSchema schema = new HAWQSchema("t_bytea",
            HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BYTEA, "col_bytea"));
    job.setMapperClass(WriteByteArrayMapper.class);

    HAWQParquetOutputFormat.setSchema(job, schema);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    HAWQParquetOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setNumReduceTasks(0);

    job.setMapOutputKeyClass(Void.class);
    job.setMapOutputValueClass(HAWQRecord.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pivotal.hawq.mapreduce.pt.HAWQInputFormatPerformanceTest_TPCH.java

License:Apache License

private int runMapReduceJob() throws Exception {
    Path outputPath = new Path("/output");
    // delete previous output
    FileSystem fs = FileSystem.get(getConf());
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);//from   w w  w . ja  va2  s. c  om
    fs.close();

    Job job = new Job(getConf());
    job.setJarByClass(HAWQInputFormatPerformanceTest_TPCH.class);

    job.setInputFormatClass(HAWQInputFormat.class);

    long startTime = System.currentTimeMillis();
    HAWQInputFormat.setInput(job.getConfiguration(), MRFormatConfiguration.TEST_DB_URL, null, null, tableName);
    metadataExtractTime = System.currentTimeMillis() - startTime;

    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(TPCHTableMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Void.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.placeiq.piqconnect.BlocksBuilder.java

License:Apache License

protected Job configStage1() throws Exception {
    FileSystem fs = FileSystem.get(getConf());
    fs.delete(pathOutput, true); // useful ?

    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);
    conf.setBoolean(Constants.PROP_IS_VECTOR, isVector);
    conf.set("mapred.output.compression.type", "BLOCK"); // useful ?

    Job job = new Job(conf, "data-piqid.piqconnect.BlocksBuilder");
    job.setJarByClass(BlocksBuilder.class);
    job.setMapperClass(MapStage1.class);
    job.setReducerClass(RedStage1.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setMapOutputKeyClass(BlockIndexWritable.class);
    job.setMapOutputValueClass(LightBlockWritable.class);
    job.setOutputKeyClass(BlockIndexWritable.class);
    job.setOutputValueClass(BlockWritable.class);

    FileInputFormat.setInputPaths(job, pathEdges);
    SequenceFileOutputFormat.setOutputPath(job, pathOutput);
    SequenceFileOutputFormat.setCompressOutput(job, true);

    Runner.setCompression(job);/*from w  ww  .  ja v  a2 s .  c  o  m*/

    return job;
}

From source file:com.placeiq.piqconnect.InitialVectorGenerator.java

License:Apache License

private Job buildJob() throws Exception {
    Configuration conf = getConf();
    conf.setLong("numberOfNodes", numberOfNodes);

    Job job = new Job(conf, "data-piqid.piqconnect.ConCmptIVGen_Stage1");
    job.setJarByClass(InitialVectorGenerator.class);
    job.setMapperClass(_Mapper.class);
    job.setReducerClass(_Reducer.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, pathBitmask);
    FileOutputFormat.setOutputPath(job, pathVector);
    FileOutputFormat.setCompressOutput(job, true);

    return job;/*from w  w w.  j a v a  2s  . c  o  m*/
}