Example usage for org.apache.hadoop.mapred JobConf getBoolean

List of usage examples for org.apache.hadoop.mapred JobConf getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue) 

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:org.apache.sysml.runtime.matrix.sort.PickFromCompactInputFormat.java

License:Apache License

@Override
public RecordReader<MatrixIndexes, MatrixCell> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
        throws IOException {
    if (job.getBoolean(INPUT_IS_VECTOR, true))
        return new PickRecordReader(job, (FileSplit) split);
    else/*  w  w w.  j a  v a 2  s.c  o  m*/
        return new RangePickRecordReader(job, (FileSplit) split);
}

From source file:org.apache.sysml.runtime.matrix.sort.ValueSortReducer.java

License:Apache License

@Override
public void configure(JobConf job) {
    taskID = MapReduceTool.getUniqueKeyPerTask(job, false);
    valueIsWeight = job.getBoolean(SortMR.VALUE_IS_WEIGHT, false);
}

From source file:org.apache.tez.mapreduce.hadoop.TestDeprecatedKeys.java

License:Apache License

@Test(timeout = 5000)
public void verifyReduceKeyTranslation() {
    JobConf jobConf = new JobConf();

    jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f);
    jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l);
    jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000);
    jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f);
    jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f);
    jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f);
    jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true);
    jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f);

    MRHelpers.translateMRConfToTez(jobConf);

    assertEquals(0.4f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0f),
            0.01f);//  w w  w . jav a  2s.c  om
    assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0));
    assertEquals(2000, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0));
    assertEquals(0.55f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0),
            0.01f);
    assertEquals(0.60f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0),
            0.01f);
    assertEquals(0.22f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0), 0.01f);
    assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false));
    assertEquals(0.33f,
            jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0), 0.01f);
}

From source file:org.apache.tez.mapreduce.hadoop.TestDeprecatedKeys.java

License:Apache License

@Test(timeout = 5000)
/**/*  w w  w .j  a v a 2  s.c o m*/
 * Set of keys that can be overriden at tez runtime
 */
public void verifyTezOverridenKeys() {
    JobConf jobConf = new JobConf();
    jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000);
    jobConf.setInt(MRJobConfig.IO_SORT_MB, 100);
    jobConf.setInt(MRJobConfig.COUNTERS_MAX_KEY, 100);

    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 1000);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 200);
    jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD, true);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, 20);
    jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SORT_SPILL_PERCENT, 0.2f);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES, 10);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINE_MIN_SPILLS, 20);
    jobConf.setInt(Constants.TEZ_RUNTIME_TASK_MEMORY, 10);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, 10);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT, 10);
    jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_NOTIFY_READERROR, true);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 10);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 10);
    jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, true);
    jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 10.0f);
    jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 10.0f);
    jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 10.0f);
    jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 10);
    jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true);
    jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 10.0f);
    jobConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, "DefaultSorter");
    jobConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_GROUP_COMPARATOR_CLASS, "groupComparator");
    jobConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_SECONDARY_COMPARATOR_CLASS, "SecondaryComparator");

    jobConf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, false);
    jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, true);

    MRHelpers.translateMRConfToTez(jobConf);

    assertEquals(1000, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0));
    assertEquals(200, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 100));
    assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD, false));
    assertEquals(20, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, 0));
    assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES, 0));
    assertEquals(20, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINE_MIN_SPILLS, 0));
    assertEquals(10, jobConf.getInt(Constants.TEZ_RUNTIME_TASK_MEMORY, 0));
    assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, 0));
    assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT, 0));
    assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_NOTIFY_READERROR, false));
    assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 0));
    assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 0));
    assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, false));
    assertEquals(10.0f,
            jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.0f), 0.0f);
    assertEquals(10.0f,
            jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0.0f), 0.0f);
    assertEquals(10.0f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0.0f),
            0.0f);
    assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0));
    assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false));
    assertEquals(10.0f,
            jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.0f), 0.0f);
    assertEquals("DefaultSorter", jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, ""));
    assertEquals("groupComparator",
            jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_GROUP_COMPARATOR_CLASS, ""));
    assertEquals("SecondaryComparator",
            jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_SECONDARY_COMPARATOR_CLASS, ""));
    assertEquals("DefaultSorter", jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, ""));
    assertTrue(jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false));

    assertNull(jobConf.get(MRConfig.MAPRED_IFILE_READAHEAD));
    assertNull(jobConf.get(MRConfig.MAPRED_IFILE_READAHEAD_BYTES));
    assertNull(jobConf.get(MRJobConfig.RECORDS_BEFORE_PROGRESS));
    assertNull(jobConf.get(MRJobConfig.IO_SORT_FACTOR));
    assertNull(jobConf.get(MRJobConfig.IO_SORT_MB));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_READ_TIMEOUT));
    assertNull(jobConf.get(MRJobConfig.INDEX_CACHE_MEMORY_LIMIT));
    assertNull(jobConf.get(MRJobConfig.MAP_COMBINE_MIN_SPILLS));
    assertNull(jobConf.get(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_PARALLEL_COPIES));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_FETCH_FAILURES));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_NOTIFY_READERROR));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_CONNECT_TIMEOUT));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_READ_TIMEOUT));
    assertNull(jobConf.get(MRConfig.SHUFFLE_SSL_ENABLED_KEY));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT));
    assertNull(jobConf.get(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT));
    assertNull(jobConf.get(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD));
    assertNull(jobConf.get(MRJobConfig.REDUCE_MEMTOMEM_ENABLED));
    assertNull(jobConf.get(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT));
    assertNull(jobConf.get(MRJobConfig.GROUP_COMPARATOR_CLASS));
    assertNull(jobConf.get(MRJobConfig.GROUP_COMPARATOR_CLASS));
    assertNull(jobConf.get("map.sort.class"));
}

From source file:org.archive.access.nutch.jobs.ImportArcs.java

License:LGPL

public void configure(final JobConf job) {
    setConf(job);/*from  w  w w .j  a va 2 s  .  com*/
    this.indexAll = job.getBoolean("wax.index.all", false);

    this.contentLimit = job.getInt("http.content.limit", 1024 * 100);
    final int pdfMultiplicand = job.getInt("wax.pdf.size.multiplicand", 10);
    this.pdfContentLimit = (this.contentLimit == -1) ? this.contentLimit : pdfMultiplicand * this.contentLimit;
    this.mimeTypes = MimeTypes.get(job.get("mime.types.file"));
    this.segmentName = job.get(Nutch.SEGMENT_NAME_KEY);

    // Get the rsync protocol handler into the mix.
    System.setProperty("java.protocol.handler.pkgs", "org.archive.net");

    // Format numbers output by parse rate logging.
    this.numberFormatter.setMaximumFractionDigits(2);
    this.numberFormatter.setMinimumFractionDigits(2);
    this.parseThreshold = job.getInt("wax.parse.rate.threshold", -1);

    this.indexRedirects = job.getBoolean("wax.index.redirects", false);

    this.sha1 = job.getBoolean("wax.digest.sha1", false);

    this.urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_FETCHER);
    this.filters = new URLFilters(job);

    this.parseUtil = new ParseUtil(job);

    this.collectionName = job.get(ImportArcs.WAX_SUFFIX + ImportArcs.ARCCOLLECTION_KEY);

    // Get ARCName by reading first record in ARC?  Otherwise, we parse
    // the name of the file we've been passed to find an ARC name.
    this.arcNameFromFirstRecord = job.getBoolean("wax.arcname.from.first.record", true);

    this.collectionType = job.get(Global.COLLECTION_TYPE);
    this.timeoutIndexingDocument = job.getInt(Global.TIMEOUT_INDEXING_DOCUMENT, -1);

    LOG.info("ImportArcs collectionType: " + collectionType);
}

From source file:org.archive.jbs.lucene.LuceneOutputFormat.java

License:Apache License

/**
 * Factory method which constructs the LuceneDocumentWriter.  Much
 * of the configuration can be controlled via the Hadoop JobConf.
 *//*from  w  w w.  jav a2s  .  c  o m*/
protected LuceneDocumentWriter buildDocumentWriter(JobConf job, IndexWriter indexer) throws IOException {
    CustomAnalyzer analyzer = new CustomAnalyzer(
            job.getBoolean("jbs.lucene.analyzer.custom.omitNonAlpha", true), new HashSet<String>(
                    Arrays.asList(job.get("jbs.lucene.analyzer.stopWords", "").trim().split("\\s+"))));

    LuceneDocumentWriter writer = new LuceneDocumentWriter(indexer, analyzer);

    IDNHelper idnHelper = buildIDNHelper(job);
    TypeNormalizer normalizer = buildTypeNormalizer(job);
    TypeFilter typeFilter = buildTypeFilter(job, normalizer);

    writer.setFilter("reqFields", new RequiredFieldsFilter());
    writer.setFilter("type", typeFilter);
    writer.setFilter("robots", new RobotsFilter());
    writer.setFilter("http", new HTTPStatusCodeFilter(job.get("jbs.httpStatusCodeFilter")));

    int textMaxLength = job.getInt("jbs.lucene.text.maxlength", TextHandler.MAX_LENGTH);

    Map<String, FieldHandler> handlers = new HashMap<String, FieldHandler>();
    handlers.put("url", new SimpleFieldHandler("url", Field.Store.YES, Field.Index.ANALYZED));
    handlers.put("digest", new SimpleFieldHandler("digest", Field.Store.YES, Field.Index.NO));
    handlers.put("title", new SimpleFieldHandler("title", Field.Store.YES, Field.Index.ANALYZED));
    handlers.put("keywords", new SimpleFieldHandler("keywords", Field.Store.YES, Field.Index.ANALYZED));
    handlers.put("description", new SimpleFieldHandler("description", Field.Store.YES, Field.Index.ANALYZED));
    handlers.put("length", new SimpleFieldHandler("length", Field.Store.YES, Field.Index.NO));
    handlers.put("collection",
            new SimpleFieldHandler("collection", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
    handlers.put("code", new SimpleFieldHandler("code", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
    handlers.put("content", new TextHandler("content", textMaxLength));
    handlers.put("boiled", new TextHandler("boiled", textMaxLength));
    handlers.put("date", new DateHandler());
    handlers.put("site", new SiteHandler(idnHelper));
    handlers.put("type", new TypeHandler(normalizer));
    handlers.put("boost", new BoostHandler());

    writer.setHandlers(handlers);

    return writer;
}

From source file:org.archive.jbs.lucene.LuceneOutputFormat.java

License:Apache License

/**
 * Build a TypeNormalizer object using configuration information in the JobConf.
 *///from   w  ww  .  java  2 s  .  co m
protected TypeNormalizer buildTypeNormalizer(JobConf job) {
    TypeNormalizer normalizer = new TypeNormalizer();

    Map<String, String> aliases = normalizer.parseAliases(job.get("jbs.typeNormalizer.aliases", ""));

    if (job.getBoolean("jbs.typeNormalizer.useDefaults", true)) {
        Map<String, String> defaults = normalizer.getDefaultAliases();
        defaults.putAll(aliases);

        aliases = defaults;
    }
    normalizer.setAliases(aliases);

    return normalizer;
}

From source file:org.archive.jbs.lucene.LuceneOutputFormat.java

License:Apache License

/**
 * Build a TypeFilter object using configuration information in the JobConf.
 *//*from  w  ww.  j  a  v  a 2s  .c o m*/
protected TypeFilter buildTypeFilter(JobConf job, TypeNormalizer normalizer) {
    TypeFilter typeFilter = new TypeFilter();

    Set<String> allowedTypes = typeFilter.parse(job.get("jbs.typeFilter.allowed", ""));

    if (job.getBoolean("jbs.typeFilter.useDefaults", true)) {
        Set<String> defaults = typeFilter.getDefaultAllowed();
        defaults.addAll(allowedTypes);

        allowedTypes = defaults;
    }
    typeFilter.setAllowed(allowedTypes);
    typeFilter.setTypeNormalizer(normalizer);

    return typeFilter;
}

From source file:org.archive.jbs.lucene.LuceneOutputFormat.java

License:Apache License

/**
 * Build an IDNHelper object using configuration information in the JobConf.
 *//*from w  w  w. j  av a 2 s  .c o m*/
protected IDNHelper buildIDNHelper(JobConf job) throws IOException {
    IDNHelper helper = new IDNHelper();

    if (job.getBoolean("jbs.idnHelper.useDefaults", true)) {
        InputStream is = SiteHandler.class.getClassLoader().getResourceAsStream("effective_tld_names.dat");

        if (is == null) {
            throw new RuntimeException("Cannot load default tld rules: effective_tld_names.dat");
        }

        Reader reader = new InputStreamReader(is, "utf-8");

        helper.addRules(reader);
    }

    String moreRules = job.get("jbs.idnHelper.moreRules", "");

    if (moreRules.length() > 0) {
        helper.addRules(new StringReader(moreRules));
    }

    return helper;
}

From source file:org.archive.jbs.misc.PageRank.java

License:Apache License

public static IDNHelper buildIDNHelper(JobConf job) throws IOException {
    IDNHelper helper = new IDNHelper();

    if (job.getBoolean("jbs.idnHelper.useDefaults", true)) {
        InputStream is = PageRank.class.getClassLoader().getResourceAsStream("effective_tld_names.dat");

        if (is == null) {
            throw new RuntimeException("Cannot load default tld rules: effective_tld_names.dat");
        }/*from   www .  ja va  2s.  c  o  m*/

        Reader reader = new InputStreamReader(is, "utf-8");

        helper.addRules(reader);
    }

    String moreRules = job.get("jbs.idnHelper.moreRules", "");

    if (moreRules.length() > 0) {
        helper.addRules(new StringReader(moreRules));
    }

    return helper;
}