List of usage examples for org.apache.hadoop.mapreduce Job getInstance
public static Job getInstance() throws IOException
From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java
License:Apache License
private void runMR(File inputDir, File outputDir, long startTime, long endTime, long splitSize, long ttl) throws Exception { Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); StreamInputFormat.setTTL(conf, ttl); StreamInputFormat.setStreamPath(conf, inputDir.toURI()); StreamInputFormat.setTimeRange(conf, startTime, endTime); StreamInputFormat.setMaxSplitSize(conf, splitSize); job.setInputFormatClass(TestStreamInputFormat.class); TextOutputFormat.setOutputPath(job, new Path(outputDir.toURI())); job.setOutputFormatClass(TextOutputFormat.class); job.setJarByClass(StreamInputFormatTest.class); job.setMapperClass(TokenizeMapper.class); job.setReducerClass(AggregateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setMapOutputValueClass(IntWritable.class); job.waitForCompletion(true);/* ww w .j a va2s . c om*/ }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultipleInputsTest.java
License:Apache License
@Test public void testConfigurations() throws IOException, ClassNotFoundException { Job job = Job.getInstance(); String inputName1 = "inputName1"; String inputFormatClass1 = TextInputFormat.class.getName(); Map<String, String> inputFormatConfigs1 = ImmutableMap.of("key1", "val1", "key2", "val2"); MultipleInputs.addInput(job, inputName1, inputFormatClass1, inputFormatConfigs1, job.getMapperClass()); Map<String, MultipleInputs.MapperInput> map = MultipleInputs.getInputMap(job.getConfiguration()); Assert.assertEquals(1, map.size());/*from w w w . j a va 2 s . co m*/ Assert.assertEquals(inputName1, Iterables.getOnlyElement(map.keySet())); Assert.assertEquals(inputFormatClass1, Iterables.getOnlyElement(map.values()).getInputFormatClassName()); Assert.assertEquals(inputFormatConfigs1, Iterables.getOnlyElement(map.values()).getInputFormatConfiguration()); Assert.assertEquals(job.getMapperClass().getName(), Iterables.getOnlyElement(map.values()).getMapperClassName()); Assert.assertEquals(DelegatingInputFormat.class, job.getInputFormatClass()); // now, test with two inputs in the configuration String inputName2 = "inputName2"; String inputFormatClass2 = TextInputFormat.class.getName(); Map<String, String> inputFormatConfigs2 = ImmutableMap.of("some_key1", "some_val1", "some_key2", "some_val2"); MultipleInputs.addInput(job, inputName2, inputFormatClass2, inputFormatConfigs2, CustomMapper.class); map = MultipleInputs.getInputMap(job.getConfiguration()); Assert.assertEquals(2, map.size()); MultipleInputs.MapperInput mapperInput1 = map.get(inputName1); Assert.assertEquals(inputFormatClass1, mapperInput1.getInputFormatClassName()); Assert.assertEquals(inputFormatConfigs1, mapperInput1.getInputFormatConfiguration()); Assert.assertEquals(job.getMapperClass().getName(), mapperInput1.getMapperClassName()); MultipleInputs.MapperInput mapperInput2 = map.get(inputName2); Assert.assertEquals(inputFormatClass2, mapperInput2.getInputFormatClassName()); Assert.assertEquals(inputFormatConfigs2, mapperInput2.getInputFormatConfiguration()); Assert.assertEquals(CustomMapper.class, job.getConfiguration().getClassByName(mapperInput2.getMapperClassName())); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputsTest.java
License:Apache License
@Test public void testInvalidInputName() throws IOException { Job job = Job.getInstance(); try {//from w ww . j a v a 2s. c o m // the other parameters don't matter, because it fails just checking the name MultipleOutputs.addNamedOutput(job, "name.with.dots", null, null, null, null); Assert.fail("Expected not to be able to add an output with a '.' in the name"); } catch (IllegalArgumentException expected) { // just check the its not some other IllegalArgumentException that happened Assert.assertTrue(expected.getMessage().contains("must consist only of ASCII letters, numbers,")); } }
From source file:co.cask.hydrator.common.batch.JobUtils.java
License:Apache License
/** * Creates a new instance of {@link Job}. Note that the job created is not meant for actual MR * submission. It's just for setting up configurations. *//* w w w . j a v a 2 s. c om*/ public static Job createInstance() throws IOException { Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); conf.clear(); if (UserGroupInformation.isSecurityEnabled()) { // If runs in secure cluster, this program runner is running in a yarn container, hence not able // to get authenticated with the history. conf.unset("mapreduce.jobhistory.address"); conf.setBoolean(Job.JOB_AM_ACCESS_DISABLED, false); Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); job.getCredentials().addAll(credentials); } return job; }
From source file:com.cloudera.dataflow.spark.TransformTranslator.java
License:Open Source License
private static <T> TransformEvaluator<AvroIO.Write.Bound<T>> writeAvro() { return new TransformEvaluator<AvroIO.Write.Bound<T>>() { @Override/*from w ww .j a v a 2 s. c o m*/ public void evaluate(AvroIO.Write.Bound<T> transform, EvaluationContext context) { Job job; try { job = Job.getInstance(); } catch (IOException e) { throw new IllegalStateException(e); } AvroJob.setOutputKeySchema(job, transform.getSchema()); @SuppressWarnings("unchecked") JavaPairRDD<AvroKey<T>, NullWritable> last = ((JavaRDDLike<WindowedValue<T>, ?>) context .getInputRDD(transform)).map(WindowingHelpers.<T>unwindowFunction()) .mapToPair(new PairFunction<T, AvroKey<T>, NullWritable>() { @Override public Tuple2<AvroKey<T>, NullWritable> call(T t) throws Exception { return new Tuple2<>(new AvroKey<>(t), NullWritable.get()); } }); ShardTemplateInformation shardTemplateInfo = new ShardTemplateInformation(transform.getNumShards(), transform.getShardTemplate(), transform.getFilenamePrefix(), transform.getFilenameSuffix()); writeHadoopFile(last, job.getConfiguration(), shardTemplateInfo, AvroKey.class, NullWritable.class, TemplatedAvroKeyOutputFormat.class); } }; }
From source file:com.ema.hadoop.bestclient.BestClient.java
public static void main(String[] args) throws Exception { if (args.length != 4) { System.err.println("Usage: BestClient <input path> <output path> <date start> <date end>"); System.exit(-1);//from w w w.ja v a 2 s . co m } Job job = Job.getInstance(); job.setJarByClass(BestClient.class); job.setJobName("Best client job"); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.setStrings("dates", args[2], args[3]); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(BCMapper.class); job.setReducerClass(BCReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.ema.hadoop.wordcount.WordCount.java
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: WordCount <input path> <output path>"); System.exit(-1);/* ww w .j av a 2s. co m*/ } Job job = Job.getInstance(); job.setJarByClass(WordCount.class); job.setJobName("Word count job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(WCMapper.class); job.setReducerClass(WCReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.ema.hadoop.wordcount.WordCount_cache.java
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: WordCount <input path> <output path>"); System.exit(-1);//from w w w.ja v a 2 s.c om } // First we write the stop word list // it could also be a file manually loaded into HDFS String[] stopwords = { "the", "a" }; Configuration configuration = new Configuration(); FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration); Path file = new Path("hdfs://localhost:9000/user/student/stop_words.txt"); if (hdfs.exists(file)) { hdfs.delete(file, true); } OutputStream os = hdfs.create(file, new Progressable() { @Override public void progress() { out.println("...bytes written"); } }); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8")); for (String w : stopwords) { br.write(w + "\n"); } br.close(); hdfs.close(); Job job = Job.getInstance(); job.addCacheFile(new Path("hdfs://localhost:9000/user/student/stop_words.txt").toUri()); job.setJarByClass(WordCount_cache.class); job.setJobName("Word count job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(WCMapper_cache.class); job.setReducerClass(WCReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.google.cloud.dataflow.contrib.hadoop.HadoopFileSink.java
License:Apache License
private Job jobInstance() throws IOException { Job job = Job.getInstance(); // deserialize map to conf Configuration conf = job.getConfiguration(); for (Map.Entry<String, String> entry : map.entrySet()) { conf.set(entry.getKey(), entry.getValue()); }/* w w w . ja va 2 s . c o m*/ return job; }
From source file:com.google.cloud.dataflow.contrib.hadoop.HadoopFileSource.java
License:Apache License
private List<InputSplit> computeSplits(long desiredBundleSizeBytes) throws IOException, IllegalAccessException, InstantiationException { Job job = Job.getInstance(); FileInputFormat.setMinInputSplitSize(job, desiredBundleSizeBytes); FileInputFormat.setMaxInputSplitSize(job, desiredBundleSizeBytes); return createFormat(job).getSplits(job); }