List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:com.liveramp.cascading_ext.bloom.BloomProps.java
License:Apache License
public static String getBloomFilterPartsDir(JobConf conf) { return conf.get(BLOOM_FILTER_PARTS_DIR); }
From source file:com.liveramp.cascading_ext.bloom.BloomProps.java
License:Apache License
public static String getApproxCountsDir(JobConf conf) { return conf.get(BLOOM_KEYS_COUNTS_DIR); }
From source file:com.liveramp.cascading_ext.bloom.BloomProps.java
License:Apache License
public static double getHllErr(JobConf conf) { return Double.parseDouble(conf.get(HLL_ERR)); }
From source file:com.liveramp.cascading_ext.bloom.BloomProps.java
License:Apache License
public static double getKeySampleRate(JobConf conf) { return Double.parseDouble(conf.get(KEY_SAMPLE_RATE)); }
From source file:com.liveramp.cascading_ext.bloom.BloomProps.java
License:Apache License
public static double getIOSortPercent(JobConf conf) { return Double.parseDouble(conf.get(IO_SORT_PERCENT)); }
From source file:com.liveramp.cascading_ext.bloom.BloomUtil.java
License:Apache License
public static void writeFilterToHdfs(JobConf stepConf, String bloomTargetPath) throws IOException, CardinalityMergeException { String bloomPartsDir = stepConf.get(BloomProps.BLOOM_FILTER_PARTS_DIR); LOG.info("Bloom filter parts located in: " + bloomPartsDir); int maxHashes = BloomProps.getMaxBloomHashes(stepConf); int minHashes = BloomProps.getMinBloomHashes(stepConf); long bloomFilterBits = BloomProps.getNumBloomBits(stepConf); int numSplits = BloomProps.getNumSplits(stepConf); // This is the side bucket that the HyperLogLog writes to LOG.info("Getting key counts from: " + stepConf.get(BloomProps.BLOOM_KEYS_COUNTS_DIR)); long prevJobTuples = getApproxDistinctKeysCount(stepConf, stepConf.get(BloomProps.BLOOM_KEYS_COUNTS_DIR)); Pair<Double, Integer> optimal = getOptimalFalsePositiveRateAndNumHashes(bloomFilterBits, prevJobTuples, minHashes, maxHashes);//from w w w . jav a2s .c o m LOG.info("Counted about " + prevJobTuples + " distinct keys"); LOG.info("Using " + bloomFilterBits + " bits in the bloom filter"); LOG.info("Found a false positive rate of: " + optimal.getLhs()); LOG.info("Will use " + optimal.getRhs() + " bloom hashes"); long splitSize = getSplitSize(bloomFilterBits, numSplits); int numBloomHashes = optimal.getRhs(); synchronized (BF_LOAD_LOCK) { // Load bloom filter parts and merge them. String path = bloomPartsDir + "/" + numBloomHashes; BloomFilter filter = mergeBloomParts(path, bloomFilterBits, splitSize, numBloomHashes, prevJobTuples); // Write merged bloom filter to HDFS LOG.info("Writing created bloom filter to FS: " + bloomTargetPath); filter.writeOut(FileSystemHelper.getFS(), new Path(bloomTargetPath)); } }
From source file:com.liveramp.hank.cascading.DomainBuilderTap.java
License:Apache License
public void sinkConfInit(FlowProcess<JobConf> process, JobConf conf) { super.sinkConfInit(process, conf); // Output Format conf.setOutputFormat(this.outputFormatClass); // Output Committer conf.setOutputCommitter(DomainBuilderOutputCommitter.class); // Set this tap's Domain name locally in the conf if (conf.get(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME) != null) { throw new RuntimeException("Trying to set domain name configuration parameter to " + domainName + " but it was previously set to " + conf.get(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME)); } else {/* w w w . ja v a2 s .c om*/ conf.set(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME, domainName); } }
From source file:com.liveramp.hank.hadoop.DomainBuilderAbstractOutputFormat.java
License:Apache License
protected static String getTaskAttemptOutputPath(JobConf conf) { String outputPath = conf.get("mapred.work.output.dir"); if (outputPath == null) { throw new RuntimeException("Path was not set in mapred.work.output.dir"); }//from w w w . j ava 2 s . c o m return outputPath; }
From source file:com.liveramp.hank.hadoop.DomainBuilderAbstractOutputFormat.java
License:Apache License
protected String getJobOutputPath(JobConf conf) { String outputPath = conf.get("mapred.output.dir"); if (outputPath == null) { throw new RuntimeException("Path was not set in mapred.output.dir"); }//from ww w. jav a2s.com return outputPath; }
From source file:com.m6d.filecrush.crush.CrushOptionParsingTest.java
License:Apache License
@Test public void defaults() throws Exception { crush.createJobConfAndParseArgs(tmp.newFolder("in").getAbsolutePath(), tmp.newFolder("out").getAbsolutePath(), "20101116123015"); JobConf job = crush.getJob(); assertThat(job.get("mapreduce.job.reduces"), equalTo("20")); assertThat(job.get("mapreduce.output.fileoutputformat.compress"), equalTo("true")); assertThat(job.get("mapreduce.output.fileoutputformat.compress.type"), equalTo("BLOCK")); assertThat(job.get("mapreduce.output.fileoutputformat.compress.codec"), equalTo("org.apache.hadoop.io.compress.DefaultCodec")); assertThat(crush.getMaxFileBlocks(), equalTo(8)); assertThat(job.get("crush.timestamp"), equalTo("20101116123015")); assertThat(job.get("crush.num.specs"), equalTo("1")); assertThat(job.get("crush.0.regex"), equalTo(".+")); assertThat(job.get("crush.0.regex.replacement"), equalTo("crushed_file-20101116123015-${crush.task.num}-${crush.file.num}")); assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat")); assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat")); }