Example usage for org.apache.hadoop.mapred JobConf getJobName

List of usage examples for org.apache.hadoop.mapred JobConf getJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getJobName.

Prototype

public String getJobName() 

Source Link

Document

Get the user-specified job name.

Usage

From source file:babel.prep.corpus.CorpusGenerator.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2 || args.length > 3) {
        usage();/* ww  w  .j a v a2s.  co m*/
        return;
    }

    CorpusGenerator gen = new CorpusGenerator();
    JobConf job = gen.createJobConf(args[0], args[1], (args.length == 3) && PARAM_XML.equals(args[2]));

    if (LOG.isInfoEnabled()) {
        LOG.info("DatedCorpusGenerator: " + job.getJobName());
    }

    gen.runPrepStep(job);

    if (LOG.isInfoEnabled()) {
        LOG.info(Stats.dumpStats() + "\n");
        LOG.info("Output: " + FileOutputFormat.getOutputPath(job));
        LOG.info("DatedCorpusGenerator: done");
    }
}

From source file:babel.prep.datedcorpus.DatedCorpusGenerator.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2 || args.length > 3) {
        usage();/*from w  w  w.  j  a  v  a2s . c o  m*/
        return;
    }

    DatedCorpusGenerator gen = new DatedCorpusGenerator();
    JobConf job = gen.createJobConf(args[0], args[1]);

    if (LOG.isInfoEnabled()) {
        LOG.info("DatedCorpusGenerator: " + job.getJobName());
    }

    gen.runPrepStep(job);

    if (LOG.isInfoEnabled()) {
        LOG.info(Stats.dumpStats() + "\n");
        LOG.info("Output: " + FileOutputFormat.getOutputPath(job));
        LOG.info("DatedCorpusGenerator: done");
    }
}

From source file:babel.prep.extract.NutchPageExtractor.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        usage();//from   w w  w  .  jav a 2s . com
        return;
    }

    NutchPageExtractor extractor = new NutchPageExtractor();
    JobConf job = extractor.createJobConf(args[0]);

    if (LOG.isInfoEnabled()) {
        LOG.info("NutchPageExtractor: " + job.getJobName());
    }

    extractor.runPrepStep(job);

    if (LOG.isInfoEnabled()) {
        LOG.info(Stats.dumpStats() + "\n");
        LOG.info("Output: " + FileOutputFormat.getOutputPath(job));
        LOG.info("NutchPageExtractor: done");
    }
}

From source file:babel.prep.langid.LangIdentifier.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 3) {
        usage();//w w w . j a v a2  s  .  c  o  m
        return;
    }

    LangIdentifier identifier = new LangIdentifier();
    JobConf job = identifier.createJobConf(args[0], args[1], args[2]);

    if (LOG.isInfoEnabled()) {
        LOG.info("LangIdentifier: " + job.getJobName());
    }

    identifier.runPrepStep(job);

    if (LOG.isInfoEnabled()) {
        LOG.info(Stats.dumpStats() + "\n");
        LOG.info("Output: " + FileOutputFormat.getOutputPath(job));
        LOG.info("LangIdentifier: done");
    }
}

From source file:babel.prep.langidtime.LangAndTimeExtractor.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 3) {
        usage();//w w w.  j ava 2 s. c  om
        return;
    }

    LangAndTimeExtractor identifier = new LangAndTimeExtractor();
    JobConf job = identifier.createJobConf(args[0], args[1], args[2]);

    if (LOG.isInfoEnabled()) {
        LOG.info("LangAndTimeExtractor: " + job.getJobName());
    }

    identifier.runPrepStep(job);

    if (LOG.isInfoEnabled()) {
        LOG.info(Stats.dumpStats() + "\n");
        LOG.info("Output: " + FileOutputFormat.getOutputPath(job));
        LOG.info("LangAndTimeExtractor: done");
    }
}

From source file:babel.prep.merge.PageMerger.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 3) {
        usage();//from w  w w . j av a 2s . c o m
        return;
    }

    PageMerger merger = new PageMerger();
    JobConf job = merger.createJobConf(args[0], args[1], args[2]);

    if (LOG.isInfoEnabled()) {
        LOG.info("PageMerger: " + job.getJobName());
    }

    merger.runPrepStep(job);

    if (LOG.isInfoEnabled()) {
        LOG.info(Stats.dumpStats() + "\n");
        LOG.info("Output: " + FileOutputFormat.getOutputPath(job));
        LOG.info("PageMerger: done");
    }
}

From source file:cascading.flow.hadoop.MapReduceFlow.java

License:Open Source License

/**
 * Constructor MapReduceFlow creates a new MapReduceFlow instance.
 *
 * @param jobConf of type JobConf/* w  w  w.  j  av  a 2s  . c om*/
 */
@ConstructorProperties({ "jobConf" })
public MapReduceFlow(JobConf jobConf) {
    this(jobConf.getJobName(), jobConf, false);
}

From source file:cascading.flow.hadoop.MapReduceFlow.java

License:Open Source License

/**
 * Constructor MapReduceFlow creates a new MapReduceFlow instance.
 *
 * @param jobConf          of type JobConf
 * @param deleteSinkOnInit of type boolean
 *///from   w  ww .j a  v  a  2s.c o m
@ConstructorProperties({ "jobConf", "deleteSinkOnInit" })
public MapReduceFlow(JobConf jobConf, boolean deleteSinkOnInit) {
    this(jobConf.getJobName(), jobConf, deleteSinkOnInit);
}

From source file:com.cloudera.recordservice.tests.TestMiniClusterController.java

License:Apache License

public static void setRandomOutputDir(JobConf conf) {
    Integer intSuffix = rand_.nextInt(10000000);
    String suffix = intSuffix.toString();
    String outDir = "/tmp/" + conf.getJobName() + "_" + suffix;
    System.out.println("outdir: " + outDir);
    FileOutputFormat.setOutputPath(conf, new Path(outDir));
}

From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java

License:Apache License

@Override
protected synchronized void submit() {
    JobConf jobConf = this.getJobConf();
    boolean isLocalHadoop = jobConf.get("mapred.job.tracker", "local").equals("local");

    // the default partitioner is {@link com.ebay.erl.mobius.core.datajoin.DataJoinKeyPartitioner}
    // which is hash based.
    ///*from  w ww  .  j a  va 2s  .  c  o m*/
    // If user choose to use even partitioner, Mobius will use
    // {@link com.ebay.erl.mobius.core.datajoin.EvenlyPartitioner} which
    // is sampling based partitioner of attempting to balance the load
    // for each reducer.
    String partitioner = jobConf.get("mobius.partitioner", "default");

    if (!isLocalHadoop && jobConf.getNumReduceTasks() != 0 && partitioner.equals("even")) {
        // this job needs reducer, perform sampling on the keys to 
        // make load on reducers are almost evenly distributed.

        double freq = jobConf.getFloat("mobius.sampler.freq", 0.1F);
        int numSamples = jobConf.getInt("mobius.sampler.num.samples", 50000);
        int maxSplits = jobConf.getInt("mobius.sampler.max.slipts.sampled", 5);

        // log sampling parameters so that user knows.
        LOGGER.info("Sampling parameters { " + "mobius.sampler.freq:" + format.format(freq) + ", "
                + "mobius.sampler.num.samples:" + numSamples + ", " + "mobius.sampler.max.slipts.sampled:"
                + maxSplits + "}");

        InputSampler.Sampler<?, ?> sampler = new MobiusInputSampler(freq, numSamples, maxSplits);

        writePartitionFile(jobConf, sampler);

        // add to distributed cache
        try {
            URI partitionUri = new URI(TotalOrderPartitioner.getPartitionFile(jobConf) + "#_partitions");
            LOGGER.info("Adding partition uri to distributed cache:" + partitionUri.toString());

            DistributedCache.addCacheFile(partitionUri, jobConf);
            DistributedCache.createSymlink(jobConf);
            jobConf.setPartitionerClass(EvenlyPartitioner.class);

            LOGGER.info("Using " + EvenlyPartitioner.class.getCanonicalName()
                    + " to partiton the keys evenly among reducers.");
        } catch (URISyntaxException e) {
            LOGGER.error(e.getMessage(), e);
            throw new RuntimeException(e);
        }

        // adding -XX:-UseParallelOldGC, this will automatically set -XX:-UseParallelGC
        // according to Oracle's specification
        String jvmOpts = jobConf.get("mapred.child.java.opts", "");
        if (jvmOpts.isEmpty()) {
            jvmOpts = "-XX:-UseParallelOldGC";
        } else {
            if (jvmOpts.indexOf("-XX:-UseParallelOldGC") < 0) {
                // remove "
                jvmOpts = jvmOpts.replaceAll("\"", "");
                jvmOpts = jvmOpts.concat(" -XX:-UseParallelOldGC");
            }
        }
        jobConf.set("mapred.child.java.opts", jvmOpts);

        this.setJobConf(jobConf);
    }
    LOGGER.info("Submiting job:" + jobConf.getJobName());
    super.submit();
}