Example usage for org.apache.hadoop.mapred JobConf setBoolean

List of usage examples for org.apache.hadoop.mapred JobConf setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setBoolean.

Prototype

public void setBoolean(String name, boolean value) 

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:org.archive.nutchwax.PageRankDbMerger.java

License:Apache License

public static JobConf createMergeJob(Configuration config, Path pageRankDb, boolean normalize, boolean filter) {
    Path newPageRankDb = new Path(
            "pagerankdb-merge-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf job = new NutchJob(config);
    job.setJobName("pagerankdb merge " + pageRankDb);

    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(PageRankDbFilter.class);
    job.setBoolean(LinkDbFilter.URL_NORMALIZING, normalize);
    job.setBoolean(LinkDbFilter.URL_FILTERING, filter);
    job.setReducerClass(PageRankDbMerger.class);

    FileOutputFormat.setOutputPath(job, newPageRankDb);
    job.setOutputFormat(MapFileOutputFormat.class);
    job.setBoolean("mapred.output.compress", true);
    job.setOutputKeyClass(Text.class);

    // DIFF: Use IntWritable instead of Inlinks as the output value type.
    job.setOutputValueClass(IntWritable.class);

    return job;//from ww  w  . jav a2s .c  o  m
}

From source file:org.cloudata.util.matrix.AbstractMatrix.java

License:Apache License

public void mutiply(AbstractMatrix targetMatrix, AbstractMatrix resultMatrix) throws IOException {
    Path tempOutputPath = new Path("temp/Matrix_" + System.currentTimeMillis());

    JobConf jobConf = new JobConf(AbstractMatrix.class);
    jobConf.setJobName("Matrix_Mutiply_Job" + "(" + new Date() + ")");

    //<MAP>
    jobConf.setMapperClass(MatrixMutiplyMap.class);
    jobConf.setInputFormat(MatrixInputFormat.class);
    jobConf.set(MatrixInputFormat.MATRIX_INPUT_TABLE, ctable.getTableName());
    jobConf.set(MatrixInputFormat.MATRIX_INPUT_COLUMN, columnName);
    jobConf.set(MatrixInputFormat.MATRIX_TARGET_TABLE, targetMatrix.ctable.getTableName());
    jobConf.set(MatrixInputFormat.MATRIX_TARGET_COLUMN, targetMatrix.columnName);
    jobConf.setBoolean(MatrixInputFormat.MATRIX_TARGET_SPARSE, targetMatrix.isSparse());
    jobConf.setMapOutputKeyClass(MatrixItem.class);
    jobConf.setMapOutputValueClass(Text.class);
    //</MAP>

    //<REDUCE>
    jobConf.setPartitionerClass(KeyRangePartitioner.class);
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, resultMatrix.ctable.getTableName());
    jobConf.setReducerClass(MatrixMutiplyReduce.class);
    jobConf.set(MatrixInputFormat.MATRIX_RESULT_TABLE, resultMatrix.ctable.getTableName());
    jobConf.set(MatrixInputFormat.MATRIX_RESULT_COLUMN, resultMatrix.columnName);
    jobConf.setBoolean(MatrixInputFormat.MATRIX_RESULT_SPARSE, resultMatrix.isSparse());
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);

    TabletInfo[] tabletInfos = resultMatrix.ctable.listTabletInfos();

    jobConf.setNumReduceTasks(tabletInfos.length);
    jobConf.setMaxReduceAttempts(0);//from  ww w.j  a va2s  .c  o  m
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    //</REDUCE>

    //Run Job
    JobClient.runJob(jobConf);

    //delete temp output path
    FileSystem fs = FileSystem.get(jobConf);
    fs.delete(tempOutputPath, true);
}

From source file:org.cloudata.util.upload.UploadUtil.java

License:Apache License

private void doHadoopUpload(CloudataConf conf) throws IOException {
    if (!CTable.existsTable(conf, tableName)) {
        throw new IOException("No table:" + tableName);
    }// ww w. ja  v a 2 s  .  com

    JobConf jobConf = new JobConf(UploadUtil.class);
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("UploadJob_" + tableName + "(" + new Date() + ")");

    //KeyRangePartitioner    
    //AbstractTabletInputFormat.OUTPUT_TABLE? ? 
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);

    //<Map>
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.set("uploadJob.delim", delim);
    String columnStr = "";
    for (String eachColumn : columns) {
        columnStr += eachColumn + ",";
    }
    jobConf.set("uploadJob.columns", columnStr);

    String fieldNumStr = "";
    for (int eachField : fieldNums) {
        fieldNumStr += eachField + ",";
    }
    jobConf.set("uploadJob.fieldNums", fieldNumStr);
    jobConf.setBoolean("uploadJob.keyValuePair", keyValuePair);
    jobConf.setMapperClass(UploadMap.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</Map>

    //<Reduce>
    Path tempOutputPath = new Path("temp/uploadJob/" + tableName + "/reducer");
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setNumReduceTasks(0);
    //</Reduce>

    try {
        JobClient.runJob(jobConf);
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.elasticsearch.hadoop.integration.HdpBootstrap.java

License:Apache License

public static JobConf hadoopConfig() {
    if (!hackVerified) {
        hackVerified = true;/* w  w  w  .j  a  va  2 s  .c o m*/
        // check local execution
        if ("local".equals(TestSettings.TESTING_PROPS.get("mapred.job.tracker"))) {
            hackHadoopStagingOnWin();
        }
        // damn HADOOP-9123
        System.setProperty("path.separator", ":");
    }

    JobConf conf = addProperties(new JobConf(), TestSettings.TESTING_PROPS);
    conf.setBoolean("mapred.used.genericoptionsparser", true);

    // provision if not local
    if (!isJtLocal(conf)) {
        Provisioner.provision(conf);
        HdfsUtils.rmr(conf, ".staging");
    }

    return conf;
}

From source file:org.elasticsearch.hadoop.integration.mr.AbstractMROldApiSearchTest.java

License:Apache License

@Test
public void testSearchNonExistingIndex() throws Exception {
    JobConf conf = createJobConf();
    conf.setBoolean(ConfigurationOptions.ES_INDEX_READ_MISSING_AS_EMPTY, true);
    conf.set(ConfigurationOptions.ES_RESOURCE, "foobar/save");

    JobClient.runJob(conf);//  w w w  .  j  a va2s. com
}

From source file:org.elasticsearch.hadoop.integration.mr.AbstractMROldApiSearchTest.java

License:Apache License

@Test(expected = EsHadoopIllegalArgumentException.class)
public void testSearchUpdatedWithoutUpsertMeaningNonExistingIndex() throws Exception {
    JobConf conf = createJobConf();
    conf.setBoolean(ConfigurationOptions.ES_INDEX_READ_MISSING_AS_EMPTY, false);
    conf.set(ConfigurationOptions.ES_RESOURCE, indexPrefix + "mroldapi/updatewoupsert");

    JobClient.runJob(conf);/*from ww w  .  ja  v a  2 s  .com*/
}

From source file:org.elasticsearch.hadoop.integration.mr.MROldApiSaveTest.java

License:Apache License

@Test
public void testBasicSave() throws Exception {
    JobConf conf = HdpBootstrap.hadoopConfig();

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(ESOutputFormat.class);
    conf.setMapOutputValueClass(MapWritable.class);
    conf.setMapperClass(JsonMapper.class);
    conf.setReducerClass(IdentityReducer.class);
    conf.setBoolean("mapred.used.genericoptionsparser", true);

    FileInputFormat.setInputPaths(conf, new Path("src/test/resources/artists.dat"));
    conf.set("es.resource", "mroldapi/save");

    JobClient.runJob(conf);/*ww w .  ja v  a2 s.c  om*/
}

From source file:org.elasticsearch.hadoop.integration.mr.MROldApiSaveTest.java

License:Apache License

@Test(expected = IllegalArgumentException.class)
public void testIndexAutoCreateDisabled() throws Exception {
    JobConf conf = HdpBootstrap.hadoopConfig();

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(ESOutputFormat.class);
    conf.setMapOutputValueClass(MapWritable.class);
    conf.setMapperClass(JsonMapper.class);
    conf.setReducerClass(IdentityReducer.class);
    conf.setBoolean("mapred.used.genericoptionsparser", true);

    FileInputFormat.setInputPaths(conf, new Path("src/test/resources/artists.dat"));
    conf.set(ConfigurationOptions.ES_RESOURCE, "mroldapi/non-existing");
    conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, "no");

    JobClient.runJob(conf);/*w w  w . j a v  a2s .  c o  m*/
}

From source file:org.elasticsearch.hadoop.integration.mr.MROldApiSearchTest.java

License:Apache License

@Test
public void testBasicSearch() throws Exception {
    JobConf conf = HdpBootstrap.hadoopConfig();

    conf.setInputFormat(ESInputFormat.class);
    conf.setOutputFormat(PrintStreamOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(MapWritable.class);
    conf.setBoolean("mapred.used.genericoptionsparser", true);
    conf.set("es.resource", "mroldapi/save/_search?q=*");

    // un-comment to print results to the console (works only in local mode)
    //PrintStreamOutputFormat.stream(conf, Stream.OUT);

    JobClient.runJob(conf);/*from  w w w .j  a va2 s.c o m*/
}

From source file:org.elasticsearch.hadoop.integration.mr.MROldApiSearchTest.java

License:Apache License

@Test
public void testSearchNonExistingIndex() throws Exception {
    JobConf conf = HdpBootstrap.hadoopConfig();

    conf.setInputFormat(ESInputFormat.class);
    conf.setOutputFormat(PrintStreamOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(MapWritable.class);
    conf.setBoolean("mapred.used.genericoptionsparser", true);
    conf.setBoolean(ConfigurationOptions.ES_INDEX_READ_MISSING_AS_EMPTY, true);
    conf.set("es.resource", "foobar/save/_search?q=*");

    // un-comment to print results to the console (works only in local mode)
    //PrintStreamOutputFormat.stream(conf, Stream.OUT);

    JobClient.runJob(conf);//from w w  w. j  a v a2s .  com
}