Example usage for org.apache.hadoop.mapreduce Job getInstance

List of usage examples for org.apache.hadoop.mapreduce Job getInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getInstance.

Prototype

public static Job getInstance() throws IOException 

Source Link

Document

Creates a new Job with no particular Cluster .

Usage

From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java

License:Apache License

private void runMR(File inputDir, File outputDir, long startTime, long endTime, long splitSize, long ttl)
        throws Exception {

    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();

    StreamInputFormat.setTTL(conf, ttl);
    StreamInputFormat.setStreamPath(conf, inputDir.toURI());
    StreamInputFormat.setTimeRange(conf, startTime, endTime);
    StreamInputFormat.setMaxSplitSize(conf, splitSize);
    job.setInputFormatClass(TestStreamInputFormat.class);

    TextOutputFormat.setOutputPath(job, new Path(outputDir.toURI()));
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setJarByClass(StreamInputFormatTest.class);
    job.setMapperClass(TokenizeMapper.class);
    job.setReducerClass(AggregateReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.waitForCompletion(true);/*  ww w .j  a va2s . c  om*/
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultipleInputsTest.java

License:Apache License

@Test
public void testConfigurations() throws IOException, ClassNotFoundException {
    Job job = Job.getInstance();

    String inputName1 = "inputName1";
    String inputFormatClass1 = TextInputFormat.class.getName();
    Map<String, String> inputFormatConfigs1 = ImmutableMap.of("key1", "val1", "key2", "val2");
    MultipleInputs.addInput(job, inputName1, inputFormatClass1, inputFormatConfigs1, job.getMapperClass());

    Map<String, MultipleInputs.MapperInput> map = MultipleInputs.getInputMap(job.getConfiguration());

    Assert.assertEquals(1, map.size());/*from  w  w  w  .  j  a va 2  s  .  co  m*/
    Assert.assertEquals(inputName1, Iterables.getOnlyElement(map.keySet()));
    Assert.assertEquals(inputFormatClass1, Iterables.getOnlyElement(map.values()).getInputFormatClassName());
    Assert.assertEquals(inputFormatConfigs1,
            Iterables.getOnlyElement(map.values()).getInputFormatConfiguration());
    Assert.assertEquals(job.getMapperClass().getName(),
            Iterables.getOnlyElement(map.values()).getMapperClassName());

    Assert.assertEquals(DelegatingInputFormat.class, job.getInputFormatClass());

    // now, test with two inputs in the configuration
    String inputName2 = "inputName2";
    String inputFormatClass2 = TextInputFormat.class.getName();
    Map<String, String> inputFormatConfigs2 = ImmutableMap.of("some_key1", "some_val1", "some_key2",
            "some_val2");
    MultipleInputs.addInput(job, inputName2, inputFormatClass2, inputFormatConfigs2, CustomMapper.class);

    map = MultipleInputs.getInputMap(job.getConfiguration());

    Assert.assertEquals(2, map.size());

    MultipleInputs.MapperInput mapperInput1 = map.get(inputName1);
    Assert.assertEquals(inputFormatClass1, mapperInput1.getInputFormatClassName());
    Assert.assertEquals(inputFormatConfigs1, mapperInput1.getInputFormatConfiguration());
    Assert.assertEquals(job.getMapperClass().getName(), mapperInput1.getMapperClassName());

    MultipleInputs.MapperInput mapperInput2 = map.get(inputName2);
    Assert.assertEquals(inputFormatClass2, mapperInput2.getInputFormatClassName());
    Assert.assertEquals(inputFormatConfigs2, mapperInput2.getInputFormatConfiguration());
    Assert.assertEquals(CustomMapper.class,
            job.getConfiguration().getClassByName(mapperInput2.getMapperClassName()));
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputsTest.java

License:Apache License

@Test
public void testInvalidInputName() throws IOException {
    Job job = Job.getInstance();
    try {//from   w ww .  j a  v a  2s.  c o m
        // the other parameters don't matter, because it fails just checking the name
        MultipleOutputs.addNamedOutput(job, "name.with.dots", null, null, null, null);
        Assert.fail("Expected not to be able to add an output with a '.' in the name");
    } catch (IllegalArgumentException expected) {
        // just check the its not some other IllegalArgumentException that happened
        Assert.assertTrue(expected.getMessage().contains("must consist only of ASCII letters, numbers,"));
    }
}

From source file:co.cask.hydrator.common.batch.JobUtils.java

License:Apache License

/**
 * Creates a new instance of {@link Job}. Note that the job created is not meant for actual MR
 * submission. It's just for setting up configurations.
 *//*  w  w  w  . j a  v  a  2 s.  c om*/
public static Job createInstance() throws IOException {
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();
    conf.clear();

    if (UserGroupInformation.isSecurityEnabled()) {
        // If runs in secure cluster, this program runner is running in a yarn container, hence not able
        // to get authenticated with the history.
        conf.unset("mapreduce.jobhistory.address");
        conf.setBoolean(Job.JOB_AM_ACCESS_DISABLED, false);

        Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
        job.getCredentials().addAll(credentials);
    }

    return job;
}

From source file:com.cloudera.dataflow.spark.TransformTranslator.java

License:Open Source License

private static <T> TransformEvaluator<AvroIO.Write.Bound<T>> writeAvro() {
    return new TransformEvaluator<AvroIO.Write.Bound<T>>() {
        @Override/*from   w  ww .j a v a 2  s. c o  m*/
        public void evaluate(AvroIO.Write.Bound<T> transform, EvaluationContext context) {
            Job job;
            try {
                job = Job.getInstance();
            } catch (IOException e) {
                throw new IllegalStateException(e);
            }
            AvroJob.setOutputKeySchema(job, transform.getSchema());
            @SuppressWarnings("unchecked")
            JavaPairRDD<AvroKey<T>, NullWritable> last = ((JavaRDDLike<WindowedValue<T>, ?>) context
                    .getInputRDD(transform)).map(WindowingHelpers.<T>unwindowFunction())
                            .mapToPair(new PairFunction<T, AvroKey<T>, NullWritable>() {
                                @Override
                                public Tuple2<AvroKey<T>, NullWritable> call(T t) throws Exception {
                                    return new Tuple2<>(new AvroKey<>(t), NullWritable.get());
                                }
                            });
            ShardTemplateInformation shardTemplateInfo = new ShardTemplateInformation(transform.getNumShards(),
                    transform.getShardTemplate(), transform.getFilenamePrefix(), transform.getFilenameSuffix());
            writeHadoopFile(last, job.getConfiguration(), shardTemplateInfo, AvroKey.class, NullWritable.class,
                    TemplatedAvroKeyOutputFormat.class);
        }
    };
}

From source file:com.ema.hadoop.bestclient.BestClient.java

public static void main(String[] args) throws Exception {

    if (args.length != 4) {
        System.err.println("Usage: BestClient <input path> <output path> <date start> <date end>");
        System.exit(-1);//from w  w  w.ja  v  a  2  s . co m
    }

    Job job = Job.getInstance();
    job.setJarByClass(BestClient.class);
    job.setJobName("Best client job");

    JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.setStrings("dates", args[2], args[3]);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(BCMapper.class);
    job.setReducerClass(BCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ema.hadoop.wordcount.WordCount.java

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);/* ww  w  .j av a 2s.  co  m*/
    }

    Job job = Job.getInstance();
    job.setJarByClass(WordCount.class);
    job.setJobName("Word count job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(WCMapper.class);
    job.setReducerClass(WCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ema.hadoop.wordcount.WordCount_cache.java

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);//from  w  w  w.ja v a 2 s.c om
    }

    // First we write the stop word list
    // it could also be a file manually loaded into HDFS

    String[] stopwords = { "the", "a" };
    Configuration configuration = new Configuration();
    FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration);
    Path file = new Path("hdfs://localhost:9000/user/student/stop_words.txt");
    if (hdfs.exists(file)) {
        hdfs.delete(file, true);
    }
    OutputStream os = hdfs.create(file, new Progressable() {
        @Override
        public void progress() {
            out.println("...bytes written");
        }
    });
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
    for (String w : stopwords) {
        br.write(w + "\n");
    }

    br.close();
    hdfs.close();

    Job job = Job.getInstance();
    job.addCacheFile(new Path("hdfs://localhost:9000/user/student/stop_words.txt").toUri());

    job.setJarByClass(WordCount_cache.class);
    job.setJobName("Word count job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(WCMapper_cache.class);
    job.setReducerClass(WCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.google.cloud.dataflow.contrib.hadoop.HadoopFileSink.java

License:Apache License

private Job jobInstance() throws IOException {
    Job job = Job.getInstance();
    // deserialize map to conf
    Configuration conf = job.getConfiguration();
    for (Map.Entry<String, String> entry : map.entrySet()) {
        conf.set(entry.getKey(), entry.getValue());
    }/*  w  w  w . ja va  2  s  .  c  o  m*/
    return job;
}

From source file:com.google.cloud.dataflow.contrib.hadoop.HadoopFileSource.java

License:Apache License

private List<InputSplit> computeSplits(long desiredBundleSizeBytes)
        throws IOException, IllegalAccessException, InstantiationException {
    Job job = Job.getInstance();
    FileInputFormat.setMinInputSplitSize(job, desiredBundleSizeBytes);
    FileInputFormat.setMaxInputSplitSize(job, desiredBundleSizeBytes);
    return createFormat(job).getSplits(job);
}