Example usage for org.apache.hadoop.mapred JobConf setMaxMapAttempts

List of usage examples for org.apache.hadoop.mapred JobConf setMaxMapAttempts

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMaxMapAttempts.

Prototype

public void setMaxMapAttempts(int n) 

Source Link

Document

Expert: Set the number of maximum attempts that will be made to run a map task.

Usage

From source file:org.cloudata.core.testjob.tera.TeraScanJob.java

License:Apache License

public void runJob(String tableName) throws IOException {
    JobConf jobConf = new JobConf(TeraScanJob.class);

    CloudataConf nconf = new CloudataConf();

    if (!CTable.existsTable(nconf, tableName)) {
        System.out.println("No table:" + tableName);
        System.exit(0);//w w w .  j a  v a2s  . c om
    }
    Path tempOutputPath = new Path("TeraScanJob" + System.currentTimeMillis());

    jobConf.setJobName("TeraScanJob" + "(" + new Date() + ")");

    //<MAP>
    jobConf.setMapperClass(TeraScanMap.class);
    jobConf.setInputFormat(TeraScanJobTabletInputFormat.class);
    jobConf.set(AbstractTabletInputFormat.INPUT_TABLE, tableName);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    //    jobConf.setReducerClass(DocFreqReduce.class);
    //    jobConf.setOutputKeyClass(Text.class);
    //    jobConf.setOutputValueClass(Text.class);    
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    //Run Job
    JobClient.runJob(jobConf);

    //    //delete temp output path
    FileSystem fs = FileSystem.get(jobConf);
    FileUtil.delete(fs, tempOutputPath, true);
}

From source file:org.cloudata.examples.upload.partitionjob.UploadJob.java

License:Apache License

public void runJob(String inputPath, String tableName) throws IOException {
    JobConf jobConf = new JobConf(UploadJob.class);
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("UploadJob_" + tableName + "(" + new Date() + ")");

    //KeyRangePartitioner    
    //AbstractTabletInputFormat.OUTPUT_TABLE? ? 
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);

    CloudataConf conf = new CloudataConf();
    CTable ctable = CTable.openTable(conf, tableName);
    TabletInfo[] tabletInfos = ctable.listTabletInfos();

    //<Map>
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.setMapperClass(UploadMap.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    jobConf.setPartitionerClass(KeyRangePartitioner.class);
    //</Map>

    //<Reduce>
    Path tempOutputPath = new Path("temp/uploadJob/" + tableName + "/reducer");
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setReducerClass(UploadReducer.class);
    jobConf.setReduceSpeculativeExecution(false);
    jobConf.setMaxReduceAttempts(0);/*  w w  w .j a v  a 2 s  . co  m*/
    //Reduce  Tablet 
    jobConf.setNumReduceTasks(tabletInfos.length);
    //</Reduce>

    try {
        JobClient.runJob(jobConf);
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.examples.web.TermUploadJob.java

License:Apache License

public void exec(String[] options) throws Exception {
    if (options.length < 1) {
        System.out.println("Usage: java TermUploadJob <num of repeats> termUpload <inputPath> [#redcue]");
        System.exit(0);//from w ww . ja  v  a2  s.c  o  m
    }
    JobConf jobConf = new JobConf(TermUploadJob.class);
    JobClient jobClinet = new JobClient(jobConf);
    int maxReduce = jobClinet.getClusterStatus().getMaxReduceTasks() * 2;
    if (options.length > 1) {
        maxReduce = Integer.parseInt(options[1]);
    }

    jobConf.setInt("mapred.task.timeout", 60 * 60 * 1000);

    FileSystem fs = FileSystem.get(jobConf);

    CloudataConf nconf = new CloudataConf();
    if (!CTable.existsTable(nconf, TERM_TABLE)) {
        //Table  
        Path path = new Path("blogdata/tmp/weight");
        FileStatus[] paths = fs.listStatus(path);
        if (paths == null || paths.length == 0) {
            LOG.error("No Partition info:" + path);
            return;
        }
        SortedSet<Text> terms = new TreeSet<Text>();
        Text text = new Text();
        for (FileStatus eachPath : paths) {
            CloudataLineReader reader = new CloudataLineReader(fs.open(eachPath.getPath()));
            while (true) {
                int length = reader.readLine(text);
                if (length <= 0) {
                    break;
                }
                terms.add(new Text(text));
            }
        }

        int temrsPerTablet = terms.size() / (maxReduce - 1);
        int count = 0;
        List<Row.Key> rowKeys = new ArrayList<Row.Key>();
        for (Text term : terms) {
            count++;
            if (count == temrsPerTablet) {
                rowKeys.add(new Row.Key(term.getBytes()));
                count = 0;
            }
        }
        rowKeys.add(Row.Key.MAX_KEY);

        TableSchema temrTableInfo = new TableSchema(TERM_TABLE, "Test", TERM_TABLE_COLUMNS);
        CTable.createTable(nconf, temrTableInfo, rowKeys.toArray(new Row.Key[] {}));
    }
    CTable termTable = CTable.openTable(nconf, TERM_TABLE);
    TabletInfo[] tabletInfos = termTable.listTabletInfos();

    Path tempOutputPath = new Path("WebTableJob_" + System.currentTimeMillis());

    jobConf.setJobName("TermUploadJob" + "(" + new Date() + ")");
    FileInputFormat.addInputPath(jobConf, new Path(options[0]));

    //<MAP>
    jobConf.setMapperClass(TermUploadMap.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, TERM_TABLE);
    jobConf.setPartitionerClass(WebKeyRangePartitioner.class);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setReducerClass(TermUploadReduce.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setNumReduceTasks(tabletInfos.length);
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setNumReduceTasks(maxReduce);
    jobConf.setMaxReduceAttempts(0);
    //<REDUCE>

    //Run Job
    JobClient.runJob(jobConf);

    fs.delete(tempOutputPath);
}

From source file:org.cloudata.examples.web.WebTableJob.java

License:Apache License

public void exec(String[] options) throws Exception {
    if (options.length < 1) {
        System.out.println("Usage: java TestWebPage <num of repeats> webtable <inputPath>");
        System.exit(0);/*from  w w w .  j ava2 s. c  o  m*/
    }
    //WebTable ?
    CloudataConf nconf = new CloudataConf();
    if (!CTable.existsTable(nconf, WEB_TABLE)) {
        TableSchema webTableInfo = new TableSchema(WEB_TABLE, "Test", WEB_TABLE_COLUMNS);
        webTableInfo.setNumOfVersion(2);
        CTable.createTable(nconf, webTableInfo);
    }

    Path tempOutputPath = new Path("WebTableJob_" + System.currentTimeMillis());

    JobConf jobConf = new JobConf(WebTableJob.class);
    jobConf.setJobName("WebTableJob" + "(" + new Date() + ")");
    FileInputFormat.addInputPath(jobConf, new Path(options[0]));

    //<MAP>
    jobConf.setMapperClass(WebTableMap.class);
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //Map Only
    jobConf.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);

    //Run Job
    JobClient.runJob(jobConf);

    //delete temp output path
    FileSystem fs = FileSystem.get(jobConf);
    fs.delete(tempOutputPath, true);
}

From source file:org.cloudata.util.upload.UploadUtil.java

License:Apache License

private void doHadoopUpload(CloudataConf conf) throws IOException {
    if (!CTable.existsTable(conf, tableName)) {
        throw new IOException("No table:" + tableName);
    }//from ww w .  j ava  2s.c  o m

    JobConf jobConf = new JobConf(UploadUtil.class);
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("UploadJob_" + tableName + "(" + new Date() + ")");

    //KeyRangePartitioner    
    //AbstractTabletInputFormat.OUTPUT_TABLE? ? 
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);

    //<Map>
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.set("uploadJob.delim", delim);
    String columnStr = "";
    for (String eachColumn : columns) {
        columnStr += eachColumn + ",";
    }
    jobConf.set("uploadJob.columns", columnStr);

    String fieldNumStr = "";
    for (int eachField : fieldNums) {
        fieldNumStr += eachField + ",";
    }
    jobConf.set("uploadJob.fieldNums", fieldNumStr);
    jobConf.setBoolean("uploadJob.keyValuePair", keyValuePair);
    jobConf.setMapperClass(UploadMap.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</Map>

    //<Reduce>
    Path tempOutputPath = new Path("temp/uploadJob/" + tableName + "/reducer");
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setNumReduceTasks(0);
    //</Reduce>

    try {
        JobClient.runJob(jobConf);
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.terrier.utility.io.TestHadoopPlugin.java

License:Mozilla Public License

protected void checkTwoJC(JobConf jc1, JobConf jc2) {
    jc1.set(RANDOM_PROPERTY, "notnull");
    jc1.setMaxMapAttempts(3014);

    assertNull(jc2.get(RANDOM_PROPERTY, null));
    assertNotSame(3014, jc2.getMaxMapAttempts());
}