Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:com.m6d.filecrush.crush.CrushOptionParsingTest.java

License:Apache License

@Test
public void disableCompression() throws Exception {
    crush.createJobConfAndParseArgs("--compress=none", tmp.newFolder("in").getAbsolutePath(),
            tmp.newFolder("out").getAbsolutePath(), "20101116123015");

    JobConf job = crush.getJob();

    assertThat(job.get("mapreduce.job.reduces"), equalTo("20"));
    assertThat(job.get("mapreduce.output.fileoutputformat.compress"), equalTo("false"));

    assertThat(crush.getMaxFileBlocks(), equalTo(8));

    assertThat(job.get("crush.timestamp"), equalTo("20101116123015"));

    assertThat(job.get("crush.num.specs"), equalTo("1"));

    assertThat(job.get("crush.0.regex"), equalTo(".+"));
    assertThat(job.get("crush.0.regex.replacement"),
            equalTo("crushed_file-20101116123015-${crush.task.num}-${crush.file.num}"));
    assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat"));
    assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat"));
}

From source file:com.m6d.filecrush.crush.CrushOptionParsingTest.java

License:Apache License

@Test
public void parse() throws Exception {
    crush.createJobConfAndParseArgs("--regex", ".+/ads/.+", "--replacement", "foo", "--input-format",
            "org.apache.hadoop.mapred.TextInputFormat", "--output-format",
            "org.apache.hadoop.mapred.TextOutputFormat", "--regex", ".+/act/.+", "--replacement", "bar",
            "--input-format", "org.apache.hadoop.mapred.TextInputFormat", "--output-format",
            "org.apache.hadoop.mapred.SequenceFileOutputFormat", "--regex", ".+/bid/.+", "--replacement",
            "hello", "--input-format", "org.apache.hadoop.mapred.SequenceFileInputFormat", "--output-format",
            "org.apache.hadoop.mapred.TextOutputFormat", "--threshold", "0.5", "--max-file-blocks", "100",
            "--compress", "org.apache.hadoop.io.compress.DefaultCodec",

            tmp.newFolder("in").getAbsolutePath(), tmp.newFolder("out").getAbsolutePath(), "20101116123015");

    JobConf job = crush.getJob();

    assertThat(job.get("mapreduce.job.reduces"), equalTo("20"));
    assertThat(job.get("mapreduce.output.fileoutputformat.compress"), equalTo("true"));
    assertThat(job.get("mapreduce.output.fileoutputformat.compress.codec"),
            equalTo("org.apache.hadoop.io.compress.DefaultCodec"));

    assertThat(crush.getMaxFileBlocks(), equalTo(100));

    assertThat(job.get("crush.timestamp"), equalTo("20101116123015"));

    assertThat(job.get("crush.num.specs"), equalTo("3"));

    assertThat(job.get("crush.0.regex"), equalTo(".+/ads/.+"));
    assertThat(job.get("crush.0.regex.replacement"), equalTo("foo"));
    assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.TextInputFormat"));
    assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.TextOutputFormat"));

    assertThat(job.get("crush.1.regex"), equalTo(".+/act/.+"));
    assertThat(job.get("crush.1.regex.replacement"), equalTo("bar"));
    assertThat(job.get("crush.1.input.format"), equalTo("org.apache.hadoop.mapred.TextInputFormat"));
    assertThat(job.get("crush.1.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat"));

    assertThat(job.get("crush.2.regex"), equalTo(".+/bid/.+"));
    assertThat(job.get("crush.2.regex.replacement"), equalTo("hello"));
    assertThat(job.get("crush.2.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat"));
    assertThat(job.get("crush.2.output.format"), equalTo("org.apache.hadoop.mapred.TextOutputFormat"));
}

From source file:com.m6d.filecrush.crush.CrushOptionParsingTest.java

License:Apache License

@Test
public void parseOldNoType() throws Exception {
    long millis = currentTimeMillis();

    crush.createJobConfAndParseArgs(tmp.newFolder("in").getAbsolutePath(),
            tmp.newFolder("out").getAbsolutePath(), "80");

    JobConf job = crush.getJob();

    assertThat(job.get("mapreduce.job.reduces"), equalTo("80"));
    assertThat(Long.parseLong(job.get("crush.timestamp")), greaterThanOrEqualTo(millis));
    assertThat(job.get("crush.num.specs"), equalTo("1"));

    assertThat(crush.getMaxFileBlocks(), equalTo(Integer.MAX_VALUE));

    assertThat(job.get("crush.0.regex"), equalTo(".+"));
    assertThat(job.get("crush.0.regex.replacement")
            .matches("crushed_file-\\d+-\\$\\{crush.task.num\\}-\\$\\{crush.file.num\\}"), is(true));
    assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat"));
    assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat"));
}

From source file:com.m6d.filecrush.crush.CrushOptionParsingTest.java

License:Apache License

@Test
public void parseOldSequence() throws Exception {
    long millis = currentTimeMillis();

    crush.createJobConfAndParseArgs(tmp.newFolder("in").getAbsolutePath(),
            tmp.newFolder("out").getAbsolutePath(), "80", "SEQUENCE");

    JobConf job = crush.getJob();

    assertThat(job.get("mapreduce.job.reduces"), equalTo("80"));
    assertThat(Long.parseLong(job.get("crush.timestamp")), greaterThanOrEqualTo(millis));
    assertThat(job.get("crush.num.specs"), equalTo("1"));

    assertThat(crush.getMaxFileBlocks(), equalTo(Integer.MAX_VALUE));

    assertThat(job.get("crush.0.regex"), equalTo(".+"));
    assertThat(job.get("crush.0.regex.replacement")
            .matches("crushed_file-\\d+-\\$\\{crush.task.num\\}-\\$\\{crush.file.num\\}"), is(true));
    assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat"));
    assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat"));
}

From source file:com.m6d.filecrush.crush.CrushOptionParsingTest.java

License:Apache License

@Test
public void parseOldText() throws Exception {
    long millis = currentTimeMillis();

    crush.createJobConfAndParseArgs(tmp.newFolder("in").getAbsolutePath(),
            tmp.newFolder("out").getAbsolutePath(), "80", "TEXT");

    JobConf job = crush.getJob();

    assertThat(job.get("mapreduce.job.reduces"), equalTo("80"));
    assertThat(Long.parseLong(job.get("crush.timestamp")), greaterThanOrEqualTo(millis));
    assertThat(job.get("crush.num.specs"), equalTo("1"));

    assertThat(crush.getMaxFileBlocks(), equalTo(Integer.MAX_VALUE));

    assertThat(job.get("crush.0.regex"), equalTo(".+"));
    assertThat(job.get("crush.0.regex.replacement")
            .matches("crushed_file-\\d+-\\$\\{crush.task.num\\}-\\$\\{crush.file.num\\}"), is(true));
    assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.TextInputFormat"));
    assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.TextOutputFormat"));
}

From source file:com.manning.hip.ch4.joins.improved.impl.OptimizedDataJoinMapperBase.java

License:Apache License

public void configure(JobConf job) {
    super.configure(job);
    this.job = job;
    this.inputFile = job.get("map.input.file");
    this.inputTag = generateInputTag(this.inputFile);
    if (isInputSmaller(this.inputFile)) {
        smaller = new BooleanWritable(true);
        outputKey.setOrder(0);//from   w  ww.  j  a  v  a 2s . c  o  m
    } else {
        smaller = new BooleanWritable(false);
        outputKey.setOrder(1);
    }
}

From source file:com.mongodb.hadoop.hive.input.HiveMongoInputFormat.java

License:Apache License

@Override
public FileSplit[] getSplits(final JobConf conf, final int numSplits) throws IOException {

    try {//from w w  w.j a  va  2s.co m
        MongoSplitter splitterImpl = MongoSplitterFactory.getSplitter(conf);
        final List<org.apache.hadoop.mapreduce.InputSplit> splits = splitterImpl.calculateSplits();
        InputSplit[] splitIns = splits.toArray(new InputSplit[splits.size()]);

        // wrap InputSplits in FileSplits so that 'getPath' 
        // doesn't produce an error (Hive bug)
        FileSplit[] wrappers = new FileSplit[splitIns.length];
        Path path = new Path(conf.get(MongoStorageHandler.TABLE_LOCATION));
        for (int i = 0; i < wrappers.length; i++) {
            wrappers[i] = new MongoHiveInputSplit(splitIns[i], path);
        }

        return wrappers;
    } catch (SplitFailedException spfe) {
        // split failed because no namespace found 
        // (so the corresponding collection doesn't exist)
        LOG.error(spfe.getMessage(), spfe);
        throw new IOException(spfe.getMessage(), spfe);
    } catch (Exception e) {
        throw new IOException(e);
    }
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/**
 * Driver to copy srcPath to destPath depending on required protocol.
 * @param args arguments//from  w w w . j  a  v  a  2  s .  c  o  m
 */
static void copy(final Configuration conf, final Arguments args) throws IOException {
    LOG.info("srcPaths=" + args.srcs);
    LOG.info("destPath=" + args.dst);
    checkSrcPath(conf, args.srcs);

    JobConf job = createJobConf(conf);
    if (args.preservedAttributes != null) {
        job.set(PRESERVE_STATUS_LABEL, args.preservedAttributes);
    }
    if (args.mapredSslConf != null) {
        job.set("dfs.https.client.keystore.resource", args.mapredSslConf);
    }

    //Initialize the mapper
    try {
        setup(conf, job, args);
        JobClient.runJob(job);
        finalize(conf, job, args.dst, args.preservedAttributes);
    } finally {
        //delete tmp
        fullyDelete(job.get(TMP_DIR_LABEL), job);
        //delete jobDirectory
        fullyDelete(job.get(JOB_DIR_LABEL), job);
    }
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

static private void finalize(Configuration conf, JobConf jobconf, final Path destPath,
        String presevedAttributes) throws IOException {
    if (presevedAttributes == null) {
        return;//  w w  w  .  j a v a  2 s .  c om
    }
    EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
    if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP)
            && !preseved.contains(FileAttribute.PERMISSION)) {
        return;
    }

    FileSystem dstfs = destPath.getFileSystem(conf);
    Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
    SequenceFile.Reader in = null;
    try {
        in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf);
        Text dsttext = new Text();
        FilePair pair = new FilePair();
        for (; in.next(dsttext, pair);) {
            Path absdst = new Path(destPath, pair.output);
            updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs);
        }
    } finally {
        checkAndClose(in);
    }
}

From source file:com.rapleaf.hank.cascading.DomainBuilderTap.java

License:Apache License

@Override
public void sinkInit(JobConf conf) throws IOException {
    super.sinkInit(conf);
    // Output Format
    conf.setOutputFormat(this.outputFormatClass);
    // Output Committer
    conf.setOutputCommitter(DomainBuilderOutputCommitter.class);
    // Set this tap's Domain name locally in the conf
    if (conf.get(DomainBuilderOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME) != null) {
        throw new RuntimeException("Trying to set domain name configuration parameter to " + domainName
                + " but it was previously set to "
                + conf.get(DomainBuilderOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME));
    } else {/*from www . j  a  v  a 2s.c o  m*/
        conf.set(DomainBuilderOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME, domainName);
    }
}