Example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks.

Prototype

public void setNumReduceTasks(int n)

Source Link

Document

Set the requisite number of reduce tasks for this job.

Usage

From source file:org.cloudata.examples.upload.partitionjob.PartitionJob.java

License:Apache License

public boolean runJob(String inputPath, String tableName, int numOfTablets) throws IOException {
    JobConf jobConf = new JobConf(PartitionJob.class);
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    FileSystem fs = FileSystem.get(jobConf);
    // ? /*from w w w  .j a va 2 s  . com*/
    FileUtil.delete(fs, new Path(getLogCountFilepath(tableName)), true);

    jobConf.setJobName("PartitionJob_" + tableName + "(" + new Date() + ")");
    jobConf.set("cloudata.numOfTablets", String.valueOf(numOfTablets));
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);

    String clientOpt = jobConf.get("mapred.child.java.opts");
    if (clientOpt == null) {
        clientOpt = "";
    }
    jobConf.set("mapred.child.java.opts", clientOpt + " -Duser.name=" + System.getProperty("user.name"));

    //<Map>
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.setMapperClass(PartitionMap.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    //</Map>

    //<Reduce>
    Path tempOutputPath = new Path("temp/partitionJob/" + tableName + "/reducer");
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setReducerClass(PartitionReducer.class);
    //Reduce  1 
    jobConf.setNumReduceTasks(1);
    //</Reduce>

    try {
        RunningJob job = JobClient.runJob(jobConf);
        return job.isSuccessful();
    } finally {
        FileUtil.delete(fs, new Path(getLogCountFilepath(tableName)), true);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.examples.upload.partitionjob.UploadJob.java

License:Apache License

public void runJob(String inputPath, String tableName) throws IOException {
    JobConf jobConf = new JobConf(UploadJob.class);
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("UploadJob_" + tableName + "(" + new Date() + ")");

    //KeyRangePartitioner    
    //AbstractTabletInputFormat.OUTPUT_TABLE? ? 
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);

    CloudataConf conf = new CloudataConf();
    CTable ctable = CTable.openTable(conf, tableName);
    TabletInfo[] tabletInfos = ctable.listTabletInfos();

    //<Map>
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.setMapperClass(UploadMap.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);/*from  www  . j  a va 2s.co  m*/
    jobConf.setPartitionerClass(KeyRangePartitioner.class);
    //</Map>

    //<Reduce>
    Path tempOutputPath = new Path("temp/uploadJob/" + tableName + "/reducer");
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setReducerClass(UploadReducer.class);
    jobConf.setReduceSpeculativeExecution(false);
    jobConf.setMaxReduceAttempts(0);
    //Reduce  Tablet 
    jobConf.setNumReduceTasks(tabletInfos.length);
    //</Reduce>

    try {
        JobClient.runJob(jobConf);
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.examples.upload.SimpleUploaderMapReduce.java

License:Apache License

public void run(String[] args) throws IOException {
    if (args.length < 3) {
        System.out.println("Usage: java SimpleUploaderMapReduce <input path> <table name> <# reduce>");
        System.exit(0);//www.  j ava 2 s.  co m
    }

    Path inputPath = new Path(args[0]);
    String tableName = args[1];

    CloudataConf nconf = new CloudataConf();
    if (!CTable.existsTable(nconf, tableName)) {
        TableSchema tableSchema = new TableSchema(tableName);
        tableSchema.addColumn("Col1");

        Row.Key[] rowKeys = new Row.Key[20];
        for (int i = 0; i < 10; i++) {
            rowKeys[i] = new Row.Key("-0" + i);
        }
        for (int i = 1; i < 10; i++) {
            rowKeys[9 + i] = new Row.Key("0" + i);
        }
        rowKeys[19] = Row.Key.MAX_KEY;

        CTable.createTable(nconf, tableSchema, rowKeys);
    }
    JobConf jobConf = new JobConf(HdfsToCloudataMapReduce.class);
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    // <MAP>
    FileInputFormat.addInputPath(jobConf, inputPath);
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.setMapperClass(SimpleUploaderMapper.class);
    jobConf.setPartitionerClass(KeyRangePartitioner.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);
    // </MAP>

    // <REDUCE>
    FileOutputFormat.setOutputPath(jobConf, new Path("SimpleUploaderMapReduce_" + System.currentTimeMillis()));
    jobConf.setReducerClass(SimpleUploaderReducer.class);
    jobConf.setNumReduceTasks(Integer.parseInt(args[2]));
    jobConf.setMaxReduceAttempts(0);
    // </REDUCE>

    try {
        JobClient.runJob(jobConf);
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        fs.delete(FileOutputFormat.getOutputPath(jobConf), true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.examples.web.DocFreqJob.java

License:Apache License

public void exec(String[] options) throws Exception {
    if (options.length < 1) {
        System.out.println("Usage: java DocFreqJob <num of repeats> docFreq [#reduce]");
        System.exit(0);/*from w w w  .j  a v a  2 s  .  co  m*/
    }

    JobConf jobConf = new JobConf(DocFreqJob.class);

    JobClient jobClinet = new JobClient(jobConf);
    int maxReduce = jobClinet.getClusterStatus().getMaxReduceTasks() * 2;
    if (options.length > 0) {
        maxReduce = Integer.parseInt(options[0]);
    }

    CloudataConf nconf = new CloudataConf();
    if (!CTable.existsTable(nconf, TermUploadJob.TERM_TABLE)) {
        TableSchema temrTableInfo = new TableSchema(TermUploadJob.TERM_TABLE, "Test",
                TermUploadJob.TERM_TABLE_COLUMNS);
        CTable.createTable(nconf, temrTableInfo);
    }
    Path tempOutputPath = new Path("DocFreqJob_" + System.currentTimeMillis());

    jobConf.setJobName("DocFreqJob" + "(" + new Date() + ")");

    //<MAP>
    jobConf.setMapperClass(DocFreqMap.class);
    jobConf.setInputFormat(AbstractTabletInputFormat.class);
    jobConf.set(AbstractTabletInputFormat.INPUT_TABLE, WebTableJob.WEB_TABLE);
    jobConf.set(AbstractTabletInputFormat.INPUT_COLUMN_LIST,
            WebTableJob.WEB_TABLE_COLUMNS[1] + "," + WebTableJob.WEB_TABLE_COLUMNS[2]);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    //    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setReducerClass(DocFreqReduce.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setNumReduceTasks(maxReduce);
    //</REDUCE>

    //Run Job
    JobClient.runJob(jobConf);

    //    //delete temp output path
    FileSystem fs = FileSystem.get(jobConf);
    fs.delete(tempOutputPath, true);
}

From source file:org.cloudata.examples.web.TermGlobalJob.java

License:Apache License

public void exec() throws Exception {
    CloudataConf nconf = new CloudataConf();
    if (!CTable.existsTable(nconf, GLOBAL_TABLE)) {
        TableSchema globalTableInfo = new TableSchema(GLOBAL_TABLE, "Test", GLOBAL_TABLE_COLUMNS);
        CTable.createTable(nconf, globalTableInfo);
    }/*from w  w w .  j av a2s . c om*/

    Path tempOutputPath = new Path("globalTableInfo" + System.currentTimeMillis());

    JobConf jobConf = new JobConf(WebTableJob.class);
    jobConf.setJobName("TermGlobalJob" + "(" + new Date() + ")");

    //<MAP>
    jobConf.setMapperClass(TermGlobalMap.class);
    jobConf.setInputFormat(DefaultTabletInputFormat.class);
    jobConf.set(DefaultTabletInputFormat.INPUT_TABLE, WebTableJob.WEB_TABLE);
    jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, WebTableJob.WEB_TABLE_COLUMNS[2]);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    //</MAP>

    //<REDUCE>
    jobConf.setReducerClass(TermGlobalReduce.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setNumReduceTasks(1);
    jobConf.setMaxReduceAttempts(0);
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    //</REDUCE>

    //Run Job
    JobClient.runJob(jobConf);

    //delete temp output path
    FileSystem fs = FileSystem.get(jobConf);
    fs.delete(tempOutputPath, true);
}

From source file:org.cloudata.examples.web.TermUploadJob.java

License:Apache License

public void exec(String[] options) throws Exception {
    if (options.length < 1) {
        System.out.println("Usage: java TermUploadJob <num of repeats> termUpload <inputPath> [#redcue]");
        System.exit(0);/*from   w  w w .j a  va  2  s  .c o  m*/
    }
    JobConf jobConf = new JobConf(TermUploadJob.class);
    JobClient jobClinet = new JobClient(jobConf);
    int maxReduce = jobClinet.getClusterStatus().getMaxReduceTasks() * 2;
    if (options.length > 1) {
        maxReduce = Integer.parseInt(options[1]);
    }

    jobConf.setInt("mapred.task.timeout", 60 * 60 * 1000);

    FileSystem fs = FileSystem.get(jobConf);

    CloudataConf nconf = new CloudataConf();
    if (!CTable.existsTable(nconf, TERM_TABLE)) {
        //Table  
        Path path = new Path("blogdata/tmp/weight");
        FileStatus[] paths = fs.listStatus(path);
        if (paths == null || paths.length == 0) {
            LOG.error("No Partition info:" + path);
            return;
        }
        SortedSet<Text> terms = new TreeSet<Text>();
        Text text = new Text();
        for (FileStatus eachPath : paths) {
            CloudataLineReader reader = new CloudataLineReader(fs.open(eachPath.getPath()));
            while (true) {
                int length = reader.readLine(text);
                if (length <= 0) {
                    break;
                }
                terms.add(new Text(text));
            }
        }

        int temrsPerTablet = terms.size() / (maxReduce - 1);
        int count = 0;
        List<Row.Key> rowKeys = new ArrayList<Row.Key>();
        for (Text term : terms) {
            count++;
            if (count == temrsPerTablet) {
                rowKeys.add(new Row.Key(term.getBytes()));
                count = 0;
            }
        }
        rowKeys.add(Row.Key.MAX_KEY);

        TableSchema temrTableInfo = new TableSchema(TERM_TABLE, "Test", TERM_TABLE_COLUMNS);
        CTable.createTable(nconf, temrTableInfo, rowKeys.toArray(new Row.Key[] {}));
    }
    CTable termTable = CTable.openTable(nconf, TERM_TABLE);
    TabletInfo[] tabletInfos = termTable.listTabletInfos();

    Path tempOutputPath = new Path("WebTableJob_" + System.currentTimeMillis());

    jobConf.setJobName("TermUploadJob" + "(" + new Date() + ")");
    FileInputFormat.addInputPath(jobConf, new Path(options[0]));

    //<MAP>
    jobConf.setMapperClass(TermUploadMap.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, TERM_TABLE);
    jobConf.setPartitionerClass(WebKeyRangePartitioner.class);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setReducerClass(TermUploadReduce.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setNumReduceTasks(tabletInfos.length);
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setNumReduceTasks(maxReduce);
    jobConf.setMaxReduceAttempts(0);
    //<REDUCE>

    //Run Job
    JobClient.runJob(jobConf);

    fs.delete(tempOutputPath);
}

From source file:org.cloudata.examples.web.TermWeightJob.java

License:Apache License

public void exec(String[] options) throws Exception {
    if (options.length < 1) {
        System.out.println("Usage: java TermWeightJob <num of repeats> termWeight <outputPath> [noGlobal]");
        System.exit(0);/* w  ww. j  ava  2  s  . co  m*/
    }
    if (options.length == 1 || !options[1].equals("noGlobal")) {
        TermGlobalJob termGlobalJob = new TermGlobalJob();
        termGlobalJob.exec();
    }
    Path outputPath = new Path(options[0]);

    JobConf jobConf = new JobConf(WebTableJob.class);
    jobConf.setJobName("TermWeightJob" + "(" + new Date() + ")");

    //<MAP>
    jobConf.setMapperClass(TermWeightMap.class);
    jobConf.setInputFormat(DefaultTabletInputFormat.class);
    jobConf.set(DefaultTabletInputFormat.INPUT_TABLE, WebTableJob.WEB_TABLE);
    jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST,
            WebTableJob.WEB_TABLE_COLUMNS[1] + "," + WebTableJob.WEB_TABLE_COLUMNS[2]);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    //    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setReducerClass(TermWeightReduce.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    FileOutputFormat.setOutputPath(jobConf, outputPath);
    //jobConf.setMaxReduceAttempts(0);
    JobClient jobClinet = new JobClient(jobConf);
    int maxReduce = jobClinet.getClusterStatus().getMaxReduceTasks();

    jobConf.setNumReduceTasks(maxReduce);
    //</REDUCE>

    //Run Job
    JobClient.runJob(jobConf);

    //    //delete temp output path
    //    FileSystem fs = FileSystem.get(jobConf);
    //    fs.delete(outputPath);
}

From source file:org.cloudata.examples.web.TermWeightJobOnline.java

License:Apache License

public void exec(String[] options) throws Exception {
    if (options.length < 1) {
        System.out.println("Usage: java TermWeightJobOnline <num of repeats> termWeightOnline [noGlobal]");
        System.exit(0);/*from  www. j av a 2  s  .c  om*/
    }
    if (options.length == 1 || !options[1].equals("noGlobal")) {
        TermGlobalJob termGlobalJob = new TermGlobalJob();
        termGlobalJob.exec();
    }
    Path outputPath = new Path("TermWeightJobOnline_" + System.currentTimeMillis());

    JobConf jobConf = new JobConf(TermWeightJobOnline.class);
    jobConf.setJobName("TermWeightJobOnline" + "(" + new Date() + ")");

    //<MAP>
    jobConf.setMapperClass(TermWeightMap.class);
    jobConf.setInputFormat(DefaultTabletInputFormat.class);
    jobConf.set(DefaultTabletInputFormat.INPUT_TABLE, WebTableJob.WEB_TABLE);
    jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST,
            WebTableJob.WEB_TABLE_COLUMNS[1] + "," + WebTableJob.WEB_TABLE_COLUMNS[2]);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    //    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setReducerClass(TermWeightReduceOnline.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    FileOutputFormat.setOutputPath(jobConf, outputPath);
    //jobConf.setMaxReduceAttempts(0);
    JobClient jobClinet = new JobClient(jobConf);
    int maxReduce = jobClinet.getClusterStatus().getMaxReduceTasks();

    jobConf.setNumReduceTasks(maxReduce);
    //</REDUCE>

    //Run Job
    try {
        JobClient.runJob(jobConf);
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        fs.delete(outputPath, true);
    }
}

From source file:org.cloudata.examples.web.WebTableJob.java

License:Apache License

public void exec(String[] options) throws Exception {
    if (options.length < 1) {
        System.out.println("Usage: java TestWebPage <num of repeats> webtable <inputPath>");
        System.exit(0);/*w w  w  .  ja  va2  s . c om*/
    }
    //WebTable ?
    CloudataConf nconf = new CloudataConf();
    if (!CTable.existsTable(nconf, WEB_TABLE)) {
        TableSchema webTableInfo = new TableSchema(WEB_TABLE, "Test", WEB_TABLE_COLUMNS);
        webTableInfo.setNumOfVersion(2);
        CTable.createTable(nconf, webTableInfo);
    }

    Path tempOutputPath = new Path("WebTableJob_" + System.currentTimeMillis());

    JobConf jobConf = new JobConf(WebTableJob.class);
    jobConf.setJobName("WebTableJob" + "(" + new Date() + ")");
    FileInputFormat.addInputPath(jobConf, new Path(options[0]));

    //<MAP>
    jobConf.setMapperClass(WebTableMap.class);
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //Map Only
    jobConf.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);

    //Run Job
    JobClient.runJob(jobConf);

    //delete temp output path
    FileSystem fs = FileSystem.get(jobConf);
    fs.delete(tempOutputPath, true);
}

From source file:org.cloudata.examples.weblink.ScanJob.java

License:Apache License

public void run(String[] args) throws IOException {
    if (args.length < 1) {
        System.out.println("Usage: java ScanJob <table name>");
        System.exit(0);//from  w w w  .j a v a2s .co  m
    }

    String tableName = args[0];

    CloudataConf nconf = new CloudataConf();
    if (!CTable.existsTable(nconf, tableName)) {
        System.out.println("No table: " + tableName);
        System.exit(0);
    }

    JobConf jobConf = new JobConf(UploadJob.class);
    jobConf.setJobName("CloudataExamles.weblink.ScanJob_" + new Date());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    // <MAP>
    jobConf.setInputFormat(DefaultTabletInputFormat.class);
    jobConf.setMapperClass(ScanJobMapper.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.set(AbstractTabletInputFormat.INPUT_TABLE, tableName);
    jobConf.set(AbstractTabletInputFormat.INPUT_COLUMN_LIST, "url");
    // </MAP>

    // <REDUCE>
    FileOutputFormat.setOutputPath(jobConf,
            new Path("CloudataExamles_WebScanJob_" + System.currentTimeMillis()));
    jobConf.setReducerClass(ScanJobReducer.class);
    jobConf.setNumReduceTasks(1);
    // </REDUCE>

    try {
        JobClient.runJob(jobConf);
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}