Example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks

List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks.

Prototype

public void setNumReduceTasks(int n) 

Source Link

Document

Set the requisite number of reduce tasks for this job.

Usage

From source file:org.cloudata.core.tablet.backup.RestoreBinaryJob.java

License:Apache License

/**
 * @param string/* www . j av  a  2s  . c  om*/
 * @param string2
 * @param binary
 */
public void runRestore(String tableName, String[] columnNames, int numOfVersion, String inputPath)
        throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf partitionJob = new JobConf(BackupJob.class);

    FileSystem fs = FileSystem.get(partitionJob);

    if (!fs.exists(new Path(inputPath))) {
        throw new IOException("input path not exists:" + inputPath);
    }

    if (CTable.existsTable(nconf, tableName)) {
        throw new IOException("table already exists" + tableName);
    }

    TableSchema tableSchema = new TableSchema(tableName, "", columnNames);
    tableSchema.setNumOfVersion(numOfVersion);
    CTable.createTable(nconf, tableSchema);

    String columns = "";
    for (String eachColumn : columnNames) {
        columns += eachColumn.trim() + ",";
    }
    columns = columns.substring(0, columns.length() - 1);

    String jobName = tableName + " restore";
    String tempDir = jobName + "_" + System.currentTimeMillis();

    partitionJob.setJobName(tableName + " restore");

    partitionJob.setMapperClass(RestoreBinaryPartitionMap.class);
    FileInputFormat.addInputPath(partitionJob, new Path(inputPath));
    partitionJob.setInputFormat(RestoreSequenceFileAsBinaryInputFormat.class);
    partitionJob.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName);
    FileOutputFormat.setOutputPath(partitionJob, new Path(tempDir));

    //map only
    partitionJob.setNumReduceTasks(0);

    JobClient.runJob(partitionJob);

    //delete temp output dir
    fs.delete(new Path(tempDir), true);

    ////////////////////////////////////////////////////////////////
    JobConf jobConf = new JobConf(BackupJob.class);
    jobConf.setJobName(tableName + " restore");

    jobConf.setMapperClass(RestoreBinaryMap.class);
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(RestoreSequenceFileAsBinaryInputFormat.class);
    jobConf.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName);
    jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, columns);
    FileOutputFormat.setOutputPath(jobConf, new Path(tempDir));
    jobConf.setMaxMapAttempts(0);
    //map only
    jobConf.setNumReduceTasks(0);

    JobClient.runJob(jobConf);

    //delete temp output dir
    fs.delete(new Path(tempDir), true);
}

From source file:org.cloudata.core.tablet.backup.RestoreJob.java

License:Apache License

/**
 * @param string/*from   ww  w .j av a2s  .c o m*/
 * @param string2
 * @param binary
 */
public void runRestore(String tableName, String[] columnNames, int numOfVersion, String inputPath)
        throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf partitionJob = new JobConf(RestoreJob.class);

    FileSystem fs = FileSystem.get(partitionJob);

    if (!fs.exists(new Path(inputPath))) {
        throw new IOException("input path not exists:" + inputPath);
    }

    if (CTable.existsTable(nconf, tableName)) {
        throw new IOException("table already exists" + tableName);
    }

    TableSchema tableSchema = new TableSchema(tableName, "", columnNames);
    tableSchema.setNumOfVersion(numOfVersion);
    CTable.createTable(nconf, tableSchema);

    String columns = "";
    for (String eachColumn : columnNames) {
        columns += eachColumn.trim() + ",";
    }
    columns = columns.substring(0, columns.length() - 1);

    String jobName = tableName + " restore";
    String tempDir = jobName + "_" + System.currentTimeMillis();

    partitionJob.setJobName(jobName + "_partition");

    partitionJob.setMapperClass(RestorePartitionMap.class);
    FileInputFormat.addInputPath(partitionJob, new Path(inputPath));
    partitionJob.setInputFormat(RestoreTextInputFormat.class);
    partitionJob.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName);
    FileOutputFormat.setOutputPath(partitionJob, new Path(tempDir));

    //map only
    partitionJob.setNumReduceTasks(0);

    JobClient.runJob(partitionJob);

    fs.delete(new Path(tempDir), true);
    ////////////////////////////////////////////////////////////

    JobConf jobConf = new JobConf(BackupJob.class);
    jobConf.setJobName(jobName);

    jobConf.setMapperClass(RestoreMap.class);
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(RestoreTextInputFormat.class);
    jobConf.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName);
    jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, columns);
    FileOutputFormat.setOutputPath(jobConf, new Path(tempDir));
    jobConf.setMaxMapAttempts(0);

    //map only
    jobConf.setNumReduceTasks(0);

    JobClient.runJob(jobConf);
    //delete temp output dir
    fs.delete(new Path(tempDir), true);
}

From source file:org.cloudata.core.testjob.performance.ManyTableJob.java

License:Apache License

public static void getData(CloudataConf conf, Path keyPath) throws IOException {
    JobConf jobConf = new JobConf(TeraReadJob.class);
    jobConf.set("user.name", conf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    Path tempOutputPath = new Path("ManyTableJob_Get_" + System.currentTimeMillis());

    jobConf.setJobName("ManyTableJob_Get_" + "(" + new Date() + ")");

    TextOutputFormat.setOutputPath(jobConf, tempOutputPath);
    //<MAP>
    jobConf.setMapperClass(ManyTableGetMap.class);
    jobConf.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(jobConf, keyPath);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);/*from  w w  w.j a  v  a  2s.c  o  m*/
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.performance.ManyTableJob.java

License:Apache License

public static Path putData() throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(ManyTableJob.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("ManyTableJob_Put" + "(" + new Date() + ")");

    jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000);

    Path outputPath = new Path("ManyTableJob_KEY_" + System.currentTimeMillis());

    FileOutputFormat.setOutputPath(jobConf, outputPath);

    //<MAP>
    jobConf.setMapperClass(ManyTablePutMap.class);
    jobConf.setInputFormat(SimpleInputFormat.class);
    jobConf.setNumMapTasks(numOfTables);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);/*from ww  w.j  av  a  2 s.co m*/
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
        return outputPath;
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.performance.TestMultiThreadCTable.java

License:Apache License

public static Path putData(String outputDir) throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(TestMultiThreadCTable.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("TestMultiThreadNTable_" + "(" + new Date() + ")");

    jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000);

    Path outputPath = new Path(outputDir);

    FileOutputFormat.setOutputPath(jobConf, outputPath);

    JobClient jobClient = new JobClient();

    int numOfRowPerMap = 100000 / jobClient.getClusterStatus().getMaxMapTasks();
    jobConf.setInt("numOfRowPerMap", numOfRowPerMap);
    //<MAP>
    jobConf.setMapperClass(PutDataMap.class);
    jobConf.setInputFormat(SimpleInputFormat.class);
    jobConf.setNumMapTasks(jobClient.getClusterStatus().getMaxMapTasks());
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);//from  w  ww.  j  av a  2 s  .c om
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
        return outputPath;
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.tera.TeraJob.java

License:Apache License

public void runJob(String tableName, int numOfTablets, int dataLength, int totalGb, String keyOutputPath)
        throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(TeraJob.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    if (!CTable.existsTable(nconf, tableName)) {
        TableSchema tableInfo = new TableSchema(tableName, "Test");
        tableInfo.addColumn(new ColumnInfo("Col1"));
        tableInfo.addColumn(new ColumnInfo("Col2", TableSchema.CACHE_TYPE));
        tableInfo.addColumn(new ColumnInfo("Col3"));
        CTable.createTable(nconf, tableInfo);
    }//from   w ww . j a va 2 s . c o  m
    jobConf.setJobName("TeraOnlineJob" + "(" + new Date() + ")");

    long rowsPerTask = ((((long) totalGb) * 1024L * 1024L * 1024L) / ((long) dataLength)) / (long) numOfTablets;

    jobConf.setInt("teraJob.dataLength", dataLength);
    jobConf.setLong("teraJob.rowsPerTask", rowsPerTask);

    jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000);

    FileOutputFormat.setOutputPath(jobConf, new Path(keyOutputPath));

    //<MAP>
    jobConf.setMapperClass(TeraOnlineMap.class);
    jobConf.setInputFormat(SimpleInputFormat.class);
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);
    jobConf.setNumMapTasks(numOfTablets);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.tera.TeraReadJob.java

License:Apache License

public static void main(String[] args) throws IOException {
    if (args.length < 2) {
        System.out.println("Usage: java TeraReadJob <table name> <keyOutputPath>");
        System.exit(0);/*ww  w  . j a v a2s  .c  o  m*/
    }

    String tableName = args[0];
    String keyOutputPath = args[1];

    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(TeraReadJob.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    if (!CTable.existsTable(nconf, tableName)) {
        System.out.println("Error: No table " + tableName);
        System.exit(0);
    }
    Path tempOutputPath = new Path("TeraReadJob" + System.currentTimeMillis());

    jobConf.setJobName("TeraReadJob" + "(" + new Date() + ")");
    jobConf.set("TeraReadJob.tableName", tableName);

    TextOutputFormat.setOutputPath(jobConf, tempOutputPath);
    //<MAP>
    jobConf.setMapperClass(TeraReadMap.class);
    jobConf.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(jobConf, new Path(keyOutputPath));
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.tera.TeraScanJob.java

License:Apache License

public void runJob(String tableName) throws IOException {
    JobConf jobConf = new JobConf(TeraScanJob.class);

    CloudataConf nconf = new CloudataConf();

    if (!CTable.existsTable(nconf, tableName)) {
        System.out.println("No table:" + tableName);
        System.exit(0);//from   ww w  .j  av a  2 s.  c  o  m
    }
    Path tempOutputPath = new Path("TeraScanJob" + System.currentTimeMillis());

    jobConf.setJobName("TeraScanJob" + "(" + new Date() + ")");

    //<MAP>
    jobConf.setMapperClass(TeraScanMap.class);
    jobConf.setInputFormat(TeraScanJobTabletInputFormat.class);
    jobConf.set(AbstractTabletInputFormat.INPUT_TABLE, tableName);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    //    jobConf.setReducerClass(DocFreqReduce.class);
    //    jobConf.setOutputKeyClass(Text.class);
    //    jobConf.setOutputValueClass(Text.class);    
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    //Run Job
    JobClient.runJob(jobConf);

    //    //delete temp output path
    FileSystem fs = FileSystem.get(jobConf);
    FileUtil.delete(fs, tempOutputPath, true);
}

From source file:org.cloudata.examples.first.FirstMapReduce.java

License:Apache License

public static void main(String[] args) throws Exception {
    //Output ? ?//from  w  w w .  j ava 2  s.  co  m
    CloudataConf conf = new CloudataConf();
    String outputTableName = "InvertedTable";
    TableSchema outputTableSchema = new TableSchema();
    outputTableSchema.addColumn("InvertedCloumn");
    if (!CTable.existsTable(conf, outputTableName)) {
        CTable.createTable(conf, outputTableSchema);
    }

    JobConf jobConf = new JobConf(FirstMapReduce.class);
    jobConf.setJobName("FirstMapReduce");
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    //<Mapper>
    //Mapper ? 
    jobConf.setMapperClass(FirstMapReduceMapper.class);
    //InputFormat? TabletInputFormat 
    jobConf.setInputFormat(FirstMapReduceInputFormat.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    //</Mapper>

    //<Reducer>
    String outputPath = "temp/FirstMapReduce";
    FileOutputFormat.setOutputPath(jobConf, new Path(outputPath));
    //Reducer ? 
    jobConf.setReducerClass(FirstMapReduceReducer.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    //Map Reduce  ?? .
    CTable ctable = CTable.openTable(conf, "SampleTable1");
    TabletInfo[] tabletInfos = ctable.listTabletInfos();
    jobConf.setNumReduceTasks(tabletInfos.length);
    //Reduce?  Tablet? ??  ?  ?
    //Task    ?? ?  0 .
    jobConf.setMaxReduceAttempts(0);
    //</Reducer>

    try {
        //Job 
        JobClient.runJob(jobConf);
    } finally {
        //Temp 
        FileSystem fs = FileSystem.get(jobConf);
        fs.delete(new Path(outputPath), true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.examples.first.HdfsToCloudataMapReduce.java

License:Apache License

public void run(String[] args) throws IOException {
    if (args.length < 2) {
        System.out.println("Usage: java HdfsToCloudataMapReduce <input path> <table name>");
        System.exit(0);/*from   w  w w  . j a  v  a2  s  .c o m*/
    }

    Path inputPath = new Path(args[0]);
    String tableName = args[1];

    CloudataConf nconf = new CloudataConf();
    if (!CTable.existsTable(nconf, tableName)) {
        TableSchema tableSchema = new TableSchema(tableName);
        tableSchema.addColumn("col1");

        CTable.createTable(nconf, tableSchema);
    }

    JobConf jobConf = new JobConf(HdfsToCloudataMapReduce.class);
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    // <MAP>
    FileInputFormat.addInputPath(jobConf, inputPath);
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.setMapperClass(HdfsToCloudataMappper.class);
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);
    // </MAP>

    // <REDUCE>
    // Map Only
    FileOutputFormat.setOutputPath(jobConf, new Path("HdfsToCloudataMapReduce_" + System.currentTimeMillis()));
    jobConf.setNumReduceTasks(0);
    // </REDUCE>

    try {
        JobClient.runJob(jobConf);
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        fs.delete(FileOutputFormat.getOutputPath(jobConf), true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}