List of usage examples for org.apache.hadoop.mapred JobConf getBoolean
public boolean getBoolean(String name, boolean defaultValue)
name
property as a boolean
. From source file:com.TCG.Nutch_DNS.HostDbFilter.java
License:Apache License
public void configure(JobConf job) { urlFiltering = job.getBoolean(URL_FILTERING, false); urlNormalizers = job.getBoolean(URL_NORMALIZING, false); url404Purging = job.getBoolean(HostDb.CRAWLDB_PURGE_404, false); if (urlFiltering) { filters = new URLFilters(job); }/*from www . ja v a2s. co m*/ if (urlNormalizers) { scope = job.get(URL_NORMALIZING_SCOPE, URLNormalizers.SCOPE_CRAWLDB); normalizers = new URLNormalizers(job, scope); } }
From source file:com.TCG.Nutch_DNS.HostDbReducer.java
License:Apache License
public void configure(JobConf job) { retryMax = job.getInt("db.fetch.retry.max", 3); scfilters = new ScoringFilters(job); additionsAllowed = job.getBoolean(HostDb.CRAWLDB_ADDITIONS_ALLOWED, true); maxInterval = job.getInt("db.fetch.interval.max", 0); schedule = FetchScheduleFactory.getFetchSchedule(job); int maxLinks = job.getInt("db.update.max.inlinks", 10000); linked = new InlinkPriorityQueue(maxLinks); }
From source file:com.yolodata.tbana.hadoop.mapred.csv.CSVLineRecordReader.java
License:Open Source License
public void init(InputStream is, JobConf conf) throws IOException { this.isZipFile = conf.getBoolean(IS_ZIPFILE, DEFAULT_ZIP); if (isZipFile) { @SuppressWarnings("resource") ZipInputStream zis = new ZipInputStream(new BufferedInputStream(is)); zis.getNextEntry();//from ww w. j av a2 s . c o m is = zis; } this.is = is; this.reader = new CSVReader(new BufferedReader(new InputStreamReader(is))); }
From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java
License:Apache License
public JobInfo getInfo(Submission submission, RunningJob job, JobConf conf) throws NotFoundException, InternalException { JobInfo info = new JobInfo(); info.setNativeID(submission.getHadoopID()); info.setName(job.getJobName());//from www . j a v a 2 s .c o m info.setTest(false); if (conf == null) // Can't proceed any further if configuration is unavailable return info; info.setRequestedMapTasks(conf.getNumMapTasks()); info.setRequestedReduceTasks(conf.getNumReduceTasks()); info.setMapper(conf.get(CONF_MAPPER)); info.setReducer(conf.get(CONF_REDUCER)); info.setNumericSort(conf.getBoolean(CONF_NUMERIC, false)); info.setInputPath( JobServiceHandler.relativizePath(_homeDir, FileInputFormat.getInputPaths(conf)[0]).toString()); info.setOutputPath( JobServiceHandler.relativizePath(_homeDir, FileOutputFormat.getOutputPath(conf)).toString()); return info; }
From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapper.java
License:Apache License
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrMapperProcCount(job, false); skipping = job.getBoolean("mapred.skip.on", false); String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class) .getCanonicalName();/*from w w w . j a v a2 s . com*/ ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()); try { mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8"); mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8"); numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }
From source file:edu.stolaf.cs.wmrserver.streaming.PipeReducer.java
License:Apache License
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrReducerProcCount(job, false); skipping = job.getBoolean("mapred.skip.on", false); try {//from w w w . j a v a 2s. c om reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8"); reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8"); this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }
From source file:edu.stolaf.cs.wmrserver.streaming.StreamUtil.java
License:Apache License
public static TaskId getTaskInfo(JobConf job) { TaskId res = new TaskId(); String id = job.get("mapred.task.id"); if (isLocalJobTracker(job)) { // it uses difft naming res.mapTask = job.getBoolean("mapred.task.is.map", true); res.jobid = "0"; res.taskid = 0;//from www . j av a2s .c o m res.execid = 0; } else { String[] e = id.split("_"); res.mapTask = e[3].equals("m"); res.jobid = e[1] + "_" + e[2]; res.taskid = Integer.parseInt(e[4]); res.execid = Integer.parseInt(e[5]); } return res; }
From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.ppjoin.MapJoin.java
License:Apache License
@Override public void configure(JobConf job) { ///*from w ww .j a va 2 s.co m*/ // set Tokenizer and SimilarityFilters // tokenizer = TokenizerFactory.getTokenizer( job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE), FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR); String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE); float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY, FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE); similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold); // // set TokenRank and TokenGroup // Path tokensPath; Path lengthstatsPath = null; try { Path[] cache = DistributedCache.getLocalCacheFiles(job); if (cache == null) { tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY)); try { lengthstatsPath = new Path(job.get(FuzzyJoinDriver.DATA_LENGTHSTATS_PROPERTY)); } catch (IllegalArgumentException e) { } } else { tokensPath = cache[0]; if (job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY, FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE)) { lengthstatsPath = cache[1]; } } } catch (IOException e) { throw new RuntimeException(e); } String recordGroupClass = job.get(FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_PROPERTY, FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_VALUE); int recordGroupFactor = job.getInt(FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_PROPERTY, FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_VALUE); recordGroup = RecordGroupFactory.getRecordGroup(recordGroupClass, Math.max(1, job.getNumReduceTasks() * recordGroupFactor), similarityFilters, "" + lengthstatsPath); TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank); tokenLoad.loadTokenRank(); // // set dataColumn // dataColumns[0] = FuzzyJoinUtil .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE)); dataColumns[1] = FuzzyJoinUtil .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA1_PROPERTY, FuzzyJoinConfig.RECORD_DATA1_VALUE)); // // get suffix for second relation // suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "") .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1]; }
From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.ppjoin.MapSelfJoin.java
License:Apache License
@Override public void configure(JobConf job) { ///*from w ww.j a v a 2 s .co m*/ // set Tokenizer and SimilarityFilters // tokenizer = TokenizerFactory.getTokenizer( job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE), FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR); String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE); float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY, FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE); similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold); // // set TokenRank and TokenGroup // Path tokensPath; Path lengthstatsPath = null; try { Path[] cache = DistributedCache.getLocalCacheFiles(job); if (cache == null) { tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY)); try { lengthstatsPath = new Path(job.get(FuzzyJoinDriver.DATA_LENGTHSTATS_PROPERTY)); } catch (IllegalArgumentException e) { } } else { tokensPath = cache[0]; if (job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY, FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE)) { lengthstatsPath = cache[1]; } } } catch (IOException e) { throw new RuntimeException(e); } String recordGroupClass = job.get(FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_PROPERTY, FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_VALUE); int recordGroupFactor = job.getInt(FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_PROPERTY, FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_VALUE); recordGroup = RecordGroupFactory.getRecordGroup(recordGroupClass, Math.max(1, job.getNumReduceTasks() * recordGroupFactor), similarityFilters, "" + lengthstatsPath); TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank); tokenLoad.loadTokenRank(); // // set dataColumn // dataColumns = FuzzyJoinUtil .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE)); }
From source file:edu.uci.ics.fuzzyjoin.hadoop.ridrecordpairs.ppjoin.MapJoin.java
License:Apache License
@Override public void configure(JobConf job) { //// www.j a va 2s.c o m // set Tokenizer and SimilarityFilters // tokenizer = TokenizerFactory.getTokenizer( job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE), FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR); String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE); float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY, FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE); similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold); // // set TokenRank and TokenGroup // Path tokensPath; Path lengthstatsPath = null; try { Path[] cache = DistributedCache.getLocalCacheFiles(job); if (cache == null) { tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY)); try { lengthstatsPath = new Path(job.get(FuzzyJoinDriver.DATA_LENGTHSTATS_PROPERTY)); } catch (IllegalArgumentException e) { } } else { tokensPath = cache[0]; if (job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY, FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE)) { lengthstatsPath = cache[1]; } } } catch (IOException e) { throw new RuntimeException(e); } String recordGroupClass = job.get(FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_PROPERTY, FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_VALUE); int recordGroupFactor = job.getInt(FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_PROPERTY, FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_VALUE); recordGroup = RecordGroupFactory.getRecordGroup(recordGroupClass, Math.max(1, job.getNumReduceTasks() * recordGroupFactor), similarityFilters, "" + lengthstatsPath); TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank); tokenLoad.loadTokenRank(); // // set dataColumn // dataColumns[0] = FuzzyJoinUtil .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE)); dataColumns[1] = FuzzyJoinUtil .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA1_PROPERTY, FuzzyJoinConfig.RECORD_DATA1_VALUE)); // // get suffix for second relation // suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "") .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1]; }