List of usage examples for org.apache.hadoop.mapred JobConf getJobName
public String getJobName()
From source file:babel.prep.corpus.CorpusGenerator.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 2 || args.length > 3) { usage();/* ww w .j a v a2s. co m*/ return; } CorpusGenerator gen = new CorpusGenerator(); JobConf job = gen.createJobConf(args[0], args[1], (args.length == 3) && PARAM_XML.equals(args[2])); if (LOG.isInfoEnabled()) { LOG.info("DatedCorpusGenerator: " + job.getJobName()); } gen.runPrepStep(job); if (LOG.isInfoEnabled()) { LOG.info(Stats.dumpStats() + "\n"); LOG.info("Output: " + FileOutputFormat.getOutputPath(job)); LOG.info("DatedCorpusGenerator: done"); } }
From source file:babel.prep.datedcorpus.DatedCorpusGenerator.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 2 || args.length > 3) { usage();/*from w w w. j a v a2s . c o m*/ return; } DatedCorpusGenerator gen = new DatedCorpusGenerator(); JobConf job = gen.createJobConf(args[0], args[1]); if (LOG.isInfoEnabled()) { LOG.info("DatedCorpusGenerator: " + job.getJobName()); } gen.runPrepStep(job); if (LOG.isInfoEnabled()) { LOG.info(Stats.dumpStats() + "\n"); LOG.info("Output: " + FileOutputFormat.getOutputPath(job)); LOG.info("DatedCorpusGenerator: done"); } }
From source file:babel.prep.extract.NutchPageExtractor.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { usage();//from w w w . jav a 2s . com return; } NutchPageExtractor extractor = new NutchPageExtractor(); JobConf job = extractor.createJobConf(args[0]); if (LOG.isInfoEnabled()) { LOG.info("NutchPageExtractor: " + job.getJobName()); } extractor.runPrepStep(job); if (LOG.isInfoEnabled()) { LOG.info(Stats.dumpStats() + "\n"); LOG.info("Output: " + FileOutputFormat.getOutputPath(job)); LOG.info("NutchPageExtractor: done"); } }
From source file:babel.prep.langid.LangIdentifier.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 3) { usage();//w w w . j a v a2 s . c o m return; } LangIdentifier identifier = new LangIdentifier(); JobConf job = identifier.createJobConf(args[0], args[1], args[2]); if (LOG.isInfoEnabled()) { LOG.info("LangIdentifier: " + job.getJobName()); } identifier.runPrepStep(job); if (LOG.isInfoEnabled()) { LOG.info(Stats.dumpStats() + "\n"); LOG.info("Output: " + FileOutputFormat.getOutputPath(job)); LOG.info("LangIdentifier: done"); } }
From source file:babel.prep.langidtime.LangAndTimeExtractor.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 3) { usage();//w w w. j ava 2 s. c om return; } LangAndTimeExtractor identifier = new LangAndTimeExtractor(); JobConf job = identifier.createJobConf(args[0], args[1], args[2]); if (LOG.isInfoEnabled()) { LOG.info("LangAndTimeExtractor: " + job.getJobName()); } identifier.runPrepStep(job); if (LOG.isInfoEnabled()) { LOG.info(Stats.dumpStats() + "\n"); LOG.info("Output: " + FileOutputFormat.getOutputPath(job)); LOG.info("LangAndTimeExtractor: done"); } }
From source file:babel.prep.merge.PageMerger.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 3) { usage();//from w w w . j av a 2s . c o m return; } PageMerger merger = new PageMerger(); JobConf job = merger.createJobConf(args[0], args[1], args[2]); if (LOG.isInfoEnabled()) { LOG.info("PageMerger: " + job.getJobName()); } merger.runPrepStep(job); if (LOG.isInfoEnabled()) { LOG.info(Stats.dumpStats() + "\n"); LOG.info("Output: " + FileOutputFormat.getOutputPath(job)); LOG.info("PageMerger: done"); } }
From source file:cascading.flow.hadoop.MapReduceFlow.java
License:Open Source License
/** * Constructor MapReduceFlow creates a new MapReduceFlow instance. * * @param jobConf of type JobConf/* w w w. j av a 2s . c om*/ */ @ConstructorProperties({ "jobConf" }) public MapReduceFlow(JobConf jobConf) { this(jobConf.getJobName(), jobConf, false); }
From source file:cascading.flow.hadoop.MapReduceFlow.java
License:Open Source License
/** * Constructor MapReduceFlow creates a new MapReduceFlow instance. * * @param jobConf of type JobConf * @param deleteSinkOnInit of type boolean *///from w ww .j a v a 2s.c o m @ConstructorProperties({ "jobConf", "deleteSinkOnInit" }) public MapReduceFlow(JobConf jobConf, boolean deleteSinkOnInit) { this(jobConf.getJobName(), jobConf, deleteSinkOnInit); }
From source file:com.cloudera.recordservice.tests.TestMiniClusterController.java
License:Apache License
public static void setRandomOutputDir(JobConf conf) { Integer intSuffix = rand_.nextInt(10000000); String suffix = intSuffix.toString(); String outDir = "/tmp/" + conf.getJobName() + "_" + suffix; System.out.println("outdir: " + outDir); FileOutputFormat.setOutputPath(conf, new Path(outDir)); }
From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java
License:Apache License
@Override protected synchronized void submit() { JobConf jobConf = this.getJobConf(); boolean isLocalHadoop = jobConf.get("mapred.job.tracker", "local").equals("local"); // the default partitioner is {@link com.ebay.erl.mobius.core.datajoin.DataJoinKeyPartitioner} // which is hash based. ///*from w ww . j a va 2s . c o m*/ // If user choose to use even partitioner, Mobius will use // {@link com.ebay.erl.mobius.core.datajoin.EvenlyPartitioner} which // is sampling based partitioner of attempting to balance the load // for each reducer. String partitioner = jobConf.get("mobius.partitioner", "default"); if (!isLocalHadoop && jobConf.getNumReduceTasks() != 0 && partitioner.equals("even")) { // this job needs reducer, perform sampling on the keys to // make load on reducers are almost evenly distributed. double freq = jobConf.getFloat("mobius.sampler.freq", 0.1F); int numSamples = jobConf.getInt("mobius.sampler.num.samples", 50000); int maxSplits = jobConf.getInt("mobius.sampler.max.slipts.sampled", 5); // log sampling parameters so that user knows. LOGGER.info("Sampling parameters { " + "mobius.sampler.freq:" + format.format(freq) + ", " + "mobius.sampler.num.samples:" + numSamples + ", " + "mobius.sampler.max.slipts.sampled:" + maxSplits + "}"); InputSampler.Sampler<?, ?> sampler = new MobiusInputSampler(freq, numSamples, maxSplits); writePartitionFile(jobConf, sampler); // add to distributed cache try { URI partitionUri = new URI(TotalOrderPartitioner.getPartitionFile(jobConf) + "#_partitions"); LOGGER.info("Adding partition uri to distributed cache:" + partitionUri.toString()); DistributedCache.addCacheFile(partitionUri, jobConf); DistributedCache.createSymlink(jobConf); jobConf.setPartitionerClass(EvenlyPartitioner.class); LOGGER.info("Using " + EvenlyPartitioner.class.getCanonicalName() + " to partiton the keys evenly among reducers."); } catch (URISyntaxException e) { LOGGER.error(e.getMessage(), e); throw new RuntimeException(e); } // adding -XX:-UseParallelOldGC, this will automatically set -XX:-UseParallelGC // according to Oracle's specification String jvmOpts = jobConf.get("mapred.child.java.opts", ""); if (jvmOpts.isEmpty()) { jvmOpts = "-XX:-UseParallelOldGC"; } else { if (jvmOpts.indexOf("-XX:-UseParallelOldGC") < 0) { // remove " jvmOpts = jvmOpts.replaceAll("\"", ""); jvmOpts = jvmOpts.concat(" -XX:-UseParallelOldGC"); } } jobConf.set("mapred.child.java.opts", jvmOpts); this.setJobConf(jobConf); } LOGGER.info("Submiting job:" + jobConf.getJobName()); super.submit(); }