Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:com.liveramp.cascading_ext.bloom.BloomProps.java

License:Apache License

public static String getBloomFilterPartsDir(JobConf conf) {
    return conf.get(BLOOM_FILTER_PARTS_DIR);
}

From source file:com.liveramp.cascading_ext.bloom.BloomProps.java

License:Apache License

public static String getApproxCountsDir(JobConf conf) {
    return conf.get(BLOOM_KEYS_COUNTS_DIR);
}

From source file:com.liveramp.cascading_ext.bloom.BloomProps.java

License:Apache License

public static double getHllErr(JobConf conf) {
    return Double.parseDouble(conf.get(HLL_ERR));
}

From source file:com.liveramp.cascading_ext.bloom.BloomProps.java

License:Apache License

public static double getKeySampleRate(JobConf conf) {
    return Double.parseDouble(conf.get(KEY_SAMPLE_RATE));
}

From source file:com.liveramp.cascading_ext.bloom.BloomProps.java

License:Apache License

public static double getIOSortPercent(JobConf conf) {
    return Double.parseDouble(conf.get(IO_SORT_PERCENT));
}

From source file:com.liveramp.cascading_ext.bloom.BloomUtil.java

License:Apache License

public static void writeFilterToHdfs(JobConf stepConf, String bloomTargetPath)
        throws IOException, CardinalityMergeException {
    String bloomPartsDir = stepConf.get(BloomProps.BLOOM_FILTER_PARTS_DIR);
    LOG.info("Bloom filter parts located in: " + bloomPartsDir);

    int maxHashes = BloomProps.getMaxBloomHashes(stepConf);
    int minHashes = BloomProps.getMinBloomHashes(stepConf);
    long bloomFilterBits = BloomProps.getNumBloomBits(stepConf);
    int numSplits = BloomProps.getNumSplits(stepConf);

    // This is the side bucket that the HyperLogLog writes to
    LOG.info("Getting key counts from: " + stepConf.get(BloomProps.BLOOM_KEYS_COUNTS_DIR));

    long prevJobTuples = getApproxDistinctKeysCount(stepConf, stepConf.get(BloomProps.BLOOM_KEYS_COUNTS_DIR));

    Pair<Double, Integer> optimal = getOptimalFalsePositiveRateAndNumHashes(bloomFilterBits, prevJobTuples,
            minHashes, maxHashes);//from  w w w  . jav  a2s .c  o m
    LOG.info("Counted about " + prevJobTuples + " distinct keys");
    LOG.info("Using " + bloomFilterBits + " bits in the bloom filter");
    LOG.info("Found a false positive rate of: " + optimal.getLhs());
    LOG.info("Will use " + optimal.getRhs() + " bloom hashes");

    long splitSize = getSplitSize(bloomFilterBits, numSplits);
    int numBloomHashes = optimal.getRhs();

    synchronized (BF_LOAD_LOCK) {
        // Load bloom filter parts and merge them.
        String path = bloomPartsDir + "/" + numBloomHashes;
        BloomFilter filter = mergeBloomParts(path, bloomFilterBits, splitSize, numBloomHashes, prevJobTuples);

        // Write merged bloom filter to HDFS
        LOG.info("Writing created bloom filter to FS: " + bloomTargetPath);
        filter.writeOut(FileSystemHelper.getFS(), new Path(bloomTargetPath));
    }
}

From source file:com.liveramp.hank.cascading.DomainBuilderTap.java

License:Apache License

public void sinkConfInit(FlowProcess<JobConf> process, JobConf conf) {
    super.sinkConfInit(process, conf);
    // Output Format
    conf.setOutputFormat(this.outputFormatClass);
    // Output Committer
    conf.setOutputCommitter(DomainBuilderOutputCommitter.class);
    // Set this tap's Domain name locally in the conf
    if (conf.get(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME) != null) {
        throw new RuntimeException("Trying to set domain name configuration parameter to " + domainName
                + " but it was previously set to "
                + conf.get(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME));
    } else {/*  w w  w  .  ja  v a2  s .c om*/
        conf.set(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME, domainName);
    }
}

From source file:com.liveramp.hank.hadoop.DomainBuilderAbstractOutputFormat.java

License:Apache License

protected static String getTaskAttemptOutputPath(JobConf conf) {
    String outputPath = conf.get("mapred.work.output.dir");
    if (outputPath == null) {
        throw new RuntimeException("Path was not set in mapred.work.output.dir");
    }//from  w w w  .  j ava 2  s  . c o m
    return outputPath;
}

From source file:com.liveramp.hank.hadoop.DomainBuilderAbstractOutputFormat.java

License:Apache License

protected String getJobOutputPath(JobConf conf) {
    String outputPath = conf.get("mapred.output.dir");
    if (outputPath == null) {
        throw new RuntimeException("Path was not set in mapred.output.dir");
    }//from   ww w.  jav a2s.com
    return outputPath;
}

From source file:com.m6d.filecrush.crush.CrushOptionParsingTest.java

License:Apache License

@Test
public void defaults() throws Exception {
    crush.createJobConfAndParseArgs(tmp.newFolder("in").getAbsolutePath(),
            tmp.newFolder("out").getAbsolutePath(), "20101116123015");

    JobConf job = crush.getJob();

    assertThat(job.get("mapreduce.job.reduces"), equalTo("20"));
    assertThat(job.get("mapreduce.output.fileoutputformat.compress"), equalTo("true"));
    assertThat(job.get("mapreduce.output.fileoutputformat.compress.type"), equalTo("BLOCK"));
    assertThat(job.get("mapreduce.output.fileoutputformat.compress.codec"),
            equalTo("org.apache.hadoop.io.compress.DefaultCodec"));

    assertThat(crush.getMaxFileBlocks(), equalTo(8));

    assertThat(job.get("crush.timestamp"), equalTo("20101116123015"));

    assertThat(job.get("crush.num.specs"), equalTo("1"));

    assertThat(job.get("crush.0.regex"), equalTo(".+"));
    assertThat(job.get("crush.0.regex.replacement"),
            equalTo("crushed_file-20101116123015-${crush.task.num}-${crush.file.num}"));
    assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat"));
    assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat"));
}