List of usage examples for org.apache.hadoop.mapred JobConf getBoolean
public boolean getBoolean(String name, boolean defaultValue)
name
property as a boolean
. From source file:org.apache.sysml.runtime.matrix.sort.PickFromCompactInputFormat.java
License:Apache License
@Override public RecordReader<MatrixIndexes, MatrixCell> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { if (job.getBoolean(INPUT_IS_VECTOR, true)) return new PickRecordReader(job, (FileSplit) split); else/* w w w. j a v a 2 s.c o m*/ return new RangePickRecordReader(job, (FileSplit) split); }
From source file:org.apache.sysml.runtime.matrix.sort.ValueSortReducer.java
License:Apache License
@Override public void configure(JobConf job) { taskID = MapReduceTool.getUniqueKeyPerTask(job, false); valueIsWeight = job.getBoolean(SortMR.VALUE_IS_WEIGHT, false); }
From source file:org.apache.tez.mapreduce.hadoop.TestDeprecatedKeys.java
License:Apache License
@Test(timeout = 5000) public void verifyReduceKeyTranslation() { JobConf jobConf = new JobConf(); jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f); jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l); jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000); jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f); jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f); jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f); jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true); jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f); MRHelpers.translateMRConfToTez(jobConf); assertEquals(0.4f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0f), 0.01f);// w w w . jav a 2s.c om assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0)); assertEquals(2000, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0)); assertEquals(0.55f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0), 0.01f); assertEquals(0.60f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0), 0.01f); assertEquals(0.22f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0), 0.01f); assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false)); assertEquals(0.33f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0), 0.01f); }
From source file:org.apache.tez.mapreduce.hadoop.TestDeprecatedKeys.java
License:Apache License
@Test(timeout = 5000) /**/* w w w .j a v a 2 s.c o m*/ * Set of keys that can be overriden at tez runtime */ public void verifyTezOverridenKeys() { JobConf jobConf = new JobConf(); jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000); jobConf.setInt(MRJobConfig.IO_SORT_MB, 100); jobConf.setInt(MRJobConfig.COUNTERS_MAX_KEY, 100); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 1000); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 200); jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD, true); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, 20); jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SORT_SPILL_PERCENT, 0.2f); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES, 10); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINE_MIN_SPILLS, 20); jobConf.setInt(Constants.TEZ_RUNTIME_TASK_MEMORY, 10); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, 10); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT, 10); jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_NOTIFY_READERROR, true); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 10); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 10); jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, true); jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 10.0f); jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 10.0f); jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 10.0f); jobConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 10); jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true); jobConf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 10.0f); jobConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, "DefaultSorter"); jobConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_GROUP_COMPARATOR_CLASS, "groupComparator"); jobConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_SECONDARY_COMPARATOR_CLASS, "SecondaryComparator"); jobConf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, false); jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, true); MRHelpers.translateMRConfToTez(jobConf); assertEquals(1000, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0)); assertEquals(200, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 100)); assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD, false)); assertEquals(20, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, 0)); assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES, 0)); assertEquals(20, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINE_MIN_SPILLS, 0)); assertEquals(10, jobConf.getInt(Constants.TEZ_RUNTIME_TASK_MEMORY, 0)); assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, 0)); assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT, 0)); assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_NOTIFY_READERROR, false)); assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 0)); assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 0)); assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, false)); assertEquals(10.0f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.0f), 0.0f); assertEquals(10.0f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0.0f), 0.0f); assertEquals(10.0f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0.0f), 0.0f); assertEquals(10, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0)); assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false)); assertEquals(10.0f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.0f), 0.0f); assertEquals("DefaultSorter", jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, "")); assertEquals("groupComparator", jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_GROUP_COMPARATOR_CLASS, "")); assertEquals("SecondaryComparator", jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_SECONDARY_COMPARATOR_CLASS, "")); assertEquals("DefaultSorter", jobConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, "")); assertTrue(jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false)); assertNull(jobConf.get(MRConfig.MAPRED_IFILE_READAHEAD)); assertNull(jobConf.get(MRConfig.MAPRED_IFILE_READAHEAD_BYTES)); assertNull(jobConf.get(MRJobConfig.RECORDS_BEFORE_PROGRESS)); assertNull(jobConf.get(MRJobConfig.IO_SORT_FACTOR)); assertNull(jobConf.get(MRJobConfig.IO_SORT_MB)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_READ_TIMEOUT)); assertNull(jobConf.get(MRJobConfig.INDEX_CACHE_MEMORY_LIMIT)); assertNull(jobConf.get(MRJobConfig.MAP_COMBINE_MIN_SPILLS)); assertNull(jobConf.get(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_PARALLEL_COPIES)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_FETCH_FAILURES)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_NOTIFY_READERROR)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_CONNECT_TIMEOUT)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_READ_TIMEOUT)); assertNull(jobConf.get(MRConfig.SHUFFLE_SSL_ENABLED_KEY)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT)); assertNull(jobConf.get(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT)); assertNull(jobConf.get(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD)); assertNull(jobConf.get(MRJobConfig.REDUCE_MEMTOMEM_ENABLED)); assertNull(jobConf.get(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT)); assertNull(jobConf.get(MRJobConfig.GROUP_COMPARATOR_CLASS)); assertNull(jobConf.get(MRJobConfig.GROUP_COMPARATOR_CLASS)); assertNull(jobConf.get("map.sort.class")); }
From source file:org.archive.access.nutch.jobs.ImportArcs.java
License:LGPL
public void configure(final JobConf job) { setConf(job);/*from w w w .j a va 2 s . com*/ this.indexAll = job.getBoolean("wax.index.all", false); this.contentLimit = job.getInt("http.content.limit", 1024 * 100); final int pdfMultiplicand = job.getInt("wax.pdf.size.multiplicand", 10); this.pdfContentLimit = (this.contentLimit == -1) ? this.contentLimit : pdfMultiplicand * this.contentLimit; this.mimeTypes = MimeTypes.get(job.get("mime.types.file")); this.segmentName = job.get(Nutch.SEGMENT_NAME_KEY); // Get the rsync protocol handler into the mix. System.setProperty("java.protocol.handler.pkgs", "org.archive.net"); // Format numbers output by parse rate logging. this.numberFormatter.setMaximumFractionDigits(2); this.numberFormatter.setMinimumFractionDigits(2); this.parseThreshold = job.getInt("wax.parse.rate.threshold", -1); this.indexRedirects = job.getBoolean("wax.index.redirects", false); this.sha1 = job.getBoolean("wax.digest.sha1", false); this.urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_FETCHER); this.filters = new URLFilters(job); this.parseUtil = new ParseUtil(job); this.collectionName = job.get(ImportArcs.WAX_SUFFIX + ImportArcs.ARCCOLLECTION_KEY); // Get ARCName by reading first record in ARC? Otherwise, we parse // the name of the file we've been passed to find an ARC name. this.arcNameFromFirstRecord = job.getBoolean("wax.arcname.from.first.record", true); this.collectionType = job.get(Global.COLLECTION_TYPE); this.timeoutIndexingDocument = job.getInt(Global.TIMEOUT_INDEXING_DOCUMENT, -1); LOG.info("ImportArcs collectionType: " + collectionType); }
From source file:org.archive.jbs.lucene.LuceneOutputFormat.java
License:Apache License
/** * Factory method which constructs the LuceneDocumentWriter. Much * of the configuration can be controlled via the Hadoop JobConf. *//*from w w w. jav a2s . c o m*/ protected LuceneDocumentWriter buildDocumentWriter(JobConf job, IndexWriter indexer) throws IOException { CustomAnalyzer analyzer = new CustomAnalyzer( job.getBoolean("jbs.lucene.analyzer.custom.omitNonAlpha", true), new HashSet<String>( Arrays.asList(job.get("jbs.lucene.analyzer.stopWords", "").trim().split("\\s+")))); LuceneDocumentWriter writer = new LuceneDocumentWriter(indexer, analyzer); IDNHelper idnHelper = buildIDNHelper(job); TypeNormalizer normalizer = buildTypeNormalizer(job); TypeFilter typeFilter = buildTypeFilter(job, normalizer); writer.setFilter("reqFields", new RequiredFieldsFilter()); writer.setFilter("type", typeFilter); writer.setFilter("robots", new RobotsFilter()); writer.setFilter("http", new HTTPStatusCodeFilter(job.get("jbs.httpStatusCodeFilter"))); int textMaxLength = job.getInt("jbs.lucene.text.maxlength", TextHandler.MAX_LENGTH); Map<String, FieldHandler> handlers = new HashMap<String, FieldHandler>(); handlers.put("url", new SimpleFieldHandler("url", Field.Store.YES, Field.Index.ANALYZED)); handlers.put("digest", new SimpleFieldHandler("digest", Field.Store.YES, Field.Index.NO)); handlers.put("title", new SimpleFieldHandler("title", Field.Store.YES, Field.Index.ANALYZED)); handlers.put("keywords", new SimpleFieldHandler("keywords", Field.Store.YES, Field.Index.ANALYZED)); handlers.put("description", new SimpleFieldHandler("description", Field.Store.YES, Field.Index.ANALYZED)); handlers.put("length", new SimpleFieldHandler("length", Field.Store.YES, Field.Index.NO)); handlers.put("collection", new SimpleFieldHandler("collection", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); handlers.put("code", new SimpleFieldHandler("code", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); handlers.put("content", new TextHandler("content", textMaxLength)); handlers.put("boiled", new TextHandler("boiled", textMaxLength)); handlers.put("date", new DateHandler()); handlers.put("site", new SiteHandler(idnHelper)); handlers.put("type", new TypeHandler(normalizer)); handlers.put("boost", new BoostHandler()); writer.setHandlers(handlers); return writer; }
From source file:org.archive.jbs.lucene.LuceneOutputFormat.java
License:Apache License
/** * Build a TypeNormalizer object using configuration information in the JobConf. *///from w ww . java 2 s . co m protected TypeNormalizer buildTypeNormalizer(JobConf job) { TypeNormalizer normalizer = new TypeNormalizer(); Map<String, String> aliases = normalizer.parseAliases(job.get("jbs.typeNormalizer.aliases", "")); if (job.getBoolean("jbs.typeNormalizer.useDefaults", true)) { Map<String, String> defaults = normalizer.getDefaultAliases(); defaults.putAll(aliases); aliases = defaults; } normalizer.setAliases(aliases); return normalizer; }
From source file:org.archive.jbs.lucene.LuceneOutputFormat.java
License:Apache License
/** * Build a TypeFilter object using configuration information in the JobConf. *//*from w ww. j a v a 2s .c o m*/ protected TypeFilter buildTypeFilter(JobConf job, TypeNormalizer normalizer) { TypeFilter typeFilter = new TypeFilter(); Set<String> allowedTypes = typeFilter.parse(job.get("jbs.typeFilter.allowed", "")); if (job.getBoolean("jbs.typeFilter.useDefaults", true)) { Set<String> defaults = typeFilter.getDefaultAllowed(); defaults.addAll(allowedTypes); allowedTypes = defaults; } typeFilter.setAllowed(allowedTypes); typeFilter.setTypeNormalizer(normalizer); return typeFilter; }
From source file:org.archive.jbs.lucene.LuceneOutputFormat.java
License:Apache License
/** * Build an IDNHelper object using configuration information in the JobConf. *//*from w w w. j av a 2 s .c o m*/ protected IDNHelper buildIDNHelper(JobConf job) throws IOException { IDNHelper helper = new IDNHelper(); if (job.getBoolean("jbs.idnHelper.useDefaults", true)) { InputStream is = SiteHandler.class.getClassLoader().getResourceAsStream("effective_tld_names.dat"); if (is == null) { throw new RuntimeException("Cannot load default tld rules: effective_tld_names.dat"); } Reader reader = new InputStreamReader(is, "utf-8"); helper.addRules(reader); } String moreRules = job.get("jbs.idnHelper.moreRules", ""); if (moreRules.length() > 0) { helper.addRules(new StringReader(moreRules)); } return helper; }
From source file:org.archive.jbs.misc.PageRank.java
License:Apache License
public static IDNHelper buildIDNHelper(JobConf job) throws IOException { IDNHelper helper = new IDNHelper(); if (job.getBoolean("jbs.idnHelper.useDefaults", true)) { InputStream is = PageRank.class.getClassLoader().getResourceAsStream("effective_tld_names.dat"); if (is == null) { throw new RuntimeException("Cannot load default tld rules: effective_tld_names.dat"); }/*from www . ja va 2s. c o m*/ Reader reader = new InputStreamReader(is, "utf-8"); helper.addRules(reader); } String moreRules = job.get("jbs.idnHelper.moreRules", ""); if (moreRules.length() > 0) { helper.addRules(new StringReader(moreRules)); } return helper; }