Example usage for org.apache.hadoop.mapreduce Job getConfiguration

List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration() 

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.datasalt.pangool.tuplemr.TupleMRBuilder.java

License:Apache License

public Job createJob() throws IOException, TupleMRException {

    failIfNull(tupleReducer, "Need to set a group handler");
    failIfEmpty(multipleInputs.getMultiInputs(), "Need to add at least one input");
    failIfNull(outputFormat, "Need to set output format");
    failIfNull(outputKeyClass, "Need to set outputKeyClass");
    failIfNull(outputValueClass, "Need to set outputValueClass");
    failIfNull(outputPath, "Need to set outputPath");

    // perform a deep copy of the Configuration
    this.conf = new Configuration(this.conf);

    TupleMRConfig tupleMRConf = buildConf();
    // Serialize PangoolConf in Hadoop Configuration
    instanceFilesCreated.addAll(TupleMRConfig.set(tupleMRConf, conf));
    Job job = (jobName == null) ? new Job(conf) : new Job(conf, jobName);
    if (tupleMRConf.getRollupFrom() != null) {
        job.setReducerClass(RollupReducer.class);
    } else {//from   w  ww .  j av a 2  s.com
        job.setReducerClass(SimpleReducer.class);
    }

    if (tupleCombiner != null) {
        job.setCombinerClass(SimpleCombiner.class); // not rollup by now
        // Set Combiner Handler
        String uniqueName = UUID.randomUUID().toString() + '.' + "combiner-handler.dat";
        try {
            InstancesDistributor.distribute(tupleCombiner, uniqueName, job.getConfiguration());
            instanceFilesCreated.add(uniqueName);
            job.getConfiguration().set(SimpleCombiner.CONF_COMBINER_HANDLER, uniqueName);
        } catch (URISyntaxException e1) {
            throw new TupleMRException(e1);
        }
    }

    // Set Tuple Reducer
    try {
        String uniqueName = UUID.randomUUID().toString() + '.' + "group-handler.dat";
        InstancesDistributor.distribute(tupleReducer, uniqueName, job.getConfiguration());
        instanceFilesCreated.add(uniqueName);
        job.getConfiguration().set(SimpleReducer.CONF_REDUCER_HANDLER, uniqueName);
    } catch (URISyntaxException e1) {
        throw new TupleMRException(e1);
    }

    // Enabling serialization
    TupleSerialization.enableSerialization(job.getConfiguration());

    job.setJarByClass((jarByClass != null) ? jarByClass : tupleReducer.getClass());
    job.setMapOutputKeyClass(DatumWrapper.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setPartitionerClass(TupleHashPartitioner.class);
    job.setGroupingComparatorClass(GroupComparator.class);
    job.setSortComparatorClass(SortComparator.class);
    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);
    FileOutputFormat.setOutputPath(job, outputPath);
    instanceFilesCreated.addAll(multipleInputs.configureJob(job));
    instanceFilesCreated.addAll(namedOutputs.configureJob(job));
    // Configure a {@link ProxyOutputFormat} for Pangool's Multiple Outputs to
    // work: {@link PangoolMultipleOutput}
    String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat";
    try {
        InstancesDistributor.distribute(outputFormat, uniqueName, conf);
        instanceFilesCreated.add(uniqueName);
    } catch (URISyntaxException e1) {
        throw new TupleMRException(e1);
    }
    job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName);
    job.setOutputFormatClass(ProxyOutputFormat.class);

    return job;
}

From source file:com.datasalt.pangool.utils.test.AbstractHadoopTestLibrary.java

License:Apache License

public void assertRun(Job job) throws IOException, InterruptedException, ClassNotFoundException {
    FileSystem fs = FileSystem.get(job.getConfiguration());
    HadoopUtils.deleteIfExists(fs, FileOutputFormat.getOutputPath(job));
    // Close input writers first
    for (Map.Entry<String, Object> entry : inputs.entrySet()) {
        Object in = entry.getValue();
        if (in instanceof SequenceFile.Writer) {
            ((SequenceFile.Writer) in).close();
        } else if (in instanceof TupleFile.Writer) {
            ((TupleFile.Writer) in).close();
        }// w w w .  j  a v a2s  .  c om
    }
    job.waitForCompletion(true);
    Assert.assertTrue(job.isSuccessful());

}

From source file:com.datasalt.utils.mapred.counter.MapRedCounter.java

License:Apache License

protected static Job buildMapRedCounterJobWithoutCombiner(String name,
        @SuppressWarnings("rawtypes") Class<? extends OutputFormat> outputFormat, String outPath,
        Configuration conf) throws IOException {

    Job job = new Job(conf, name);

    Path output = new Path(outPath);
    HadoopUtils.deleteIfExists(FileSystem.get(conf), output);
    job.setJarByClass(MapRedCounter.class);

    job.setReducerClass(MapRedCountReducer.class);
    job.setMapOutputKeyClass(CounterKey.class);
    job.setMapOutputValueClass(CounterValue.class);
    job.setOutputFormatClass(outputFormat);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);

    // Secondary sorting configuration.
    job.setGroupingComparatorClass(CounterKey.IdGroupComparator.class);
    job.setPartitionerClass(CounterKey.IdGroupPartitioner.class);

    FileOutputFormat.setOutputPath(job, output);

    String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat";
    try {//from   www. jav a  2s .  com
        DCUtils.serializeToDC(new HadoopOutputFormat(SequenceFileOutputFormat.class), uniqueName, conf);
        job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName);
        job.setOutputFormatClass(ProxyOutputFormat.class);
        // Multioutput configuration
        PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTFILE.toString(),
                new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterKey.class, LongWritable.class);
        PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTDISTINCTFILE.toString(),
                new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterDistinctKey.class,
                LongPairWritable.class);
    } catch (URISyntaxException e) {
        e.printStackTrace();
        throw new IOException(e);
    }
    return job;
}

From source file:com.datasalt.utils.mapred.crossproduct.CrossProductMapRed.java

License:Apache License

public Job getJob() throws IOException {

    if (job == null) {
        MultiJoiner multiJoiner = new MultiJoiner(name, conf);
        multiJoiner.setReducer(CrossProductReducer.class);
        multiJoiner.setOutputKeyClass(CrossProductPair.class);
        multiJoiner.setOutputValueClass(NullWritable.class);
        multiJoiner.setOutputFormat(outputFormat);
        multiJoiner.setOutputPath(outputPath);
        multiJoiner.setJarByClass((jarByClass != null) ? jarByClass : leftInputMapper);

        Job job = multiJoiner
                .addChanneledInput(SECOND_CHANNEL_IN_REDUCER, leftInputPath, Object.class, leftInputFormat,
                        leftInputMapper)
                .addChanneledInput(FIRST_CHANNEL_IN_REDUCER, rightInputPath, Object.class, rightInputFormat,
                        rightInputMapper)
                .getJob();/* www  . j ava 2  s . c  om*/

        /*
         * Outputs
         */
        String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat";
        try {
            DCUtils.serializeToDC(new HadoopOutputFormat(SequenceFileOutputFormat.class), uniqueName, conf);
            job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName);
            job.setOutputFormatClass(ProxyOutputFormat.class);
            PangoolMultipleOutputs.addNamedOutput(job, EXTRA_OUTPUT,
                    new HadoopOutputFormat(SequenceFileOutputFormat.class), CrossProductExtraKey.class,
                    CrossProductPair.class);
        } catch (URISyntaxException e) {
            throw new IOException(e);
        }
        this.job = job;
    }
    return job;

}

From source file:com.david.mos.out.FileOutputFormat.java

License:Apache License

/**
 * Set the {@link Path} of the output directory for the map-reduce job.
 *
 * @param job The job to modify/*ww w .j  a  v a  2  s. com*/
 * @param outputDir the {@link Path} of the output directory for 
 * the map-reduce job.
 */
public static void setOutputPath(Job job, Path outputDir) {
    try {
        outputDir = outputDir.getFileSystem(job.getConfiguration()).makeQualified(outputDir);
    } catch (IOException e) {
        // Throw the IOException as a RuntimeException to be compatible with MR1
        throw new RuntimeException(e);
    }
    job.getConfiguration().set(FileOutputFormat.OUTDIR, outputDir.toString());
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Set a PathFilter to be applied to the input paths for the map-reduce job.
 * @param job the job to modify/*from   w ww.  jav  a  2  s.c o  m*/
 * @param filter the PathFilter class use for filtering the input paths.
 */
public static void setInputPathFilter(Job job, Class<? extends PathFilter> filter) {
    job.getConfiguration().setClass(PATHFILTER_CLASS, filter, PathFilter.class);
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Set the minimum input split size//from   w ww  .jav  a 2  s.c  o m
 * @param job the job to modify
 * @param size the minimum size
 */
public static void setMinInputSplitSize(Job job, long size) {
    job.getConfiguration().setLong(SPLIT_MINSIZE, size);
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Set the maximum split size/*from w w  w. j a  v  a2  s  . co  m*/
 * @param job the job to modify
 * @param size the maximum split size
 */
public static void setMaxInputSplitSize(Job job, long size) {
    job.getConfiguration().setLong(SPLIT_MAXSIZE, size);
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Set the array of {@link Path}s as the list of inputs
 * for the map-reduce job./*from  w w w  .  j a v  a 2  s .com*/
 * 
 * @param job The job to modify 
 * @param inputPaths the {@link Path}s of the input directories/files 
 * for the map-reduce job.
 */
public static void setInputPaths(Job job, Path... inputPaths) throws IOException {
    Configuration conf = job.getConfiguration();
    Path path = inputPaths[0].getFileSystem(conf).makeQualified(inputPaths[0]);
    StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString()));
    for (int i = 1; i < inputPaths.length; i++) {
        str.append(StringUtils.COMMA_STR);
        path = inputPaths[i].getFileSystem(conf).makeQualified(inputPaths[i]);
        str.append(StringUtils.escapeString(path.toString()));
    }
    conf.set(INPUT_DIR, str.toString());
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Add a {@link Path} to the list of inputs for the map-reduce job.
 * /*from  www . java  2 s. c  o m*/
 * @param job The {@link Job} to modify
 * @param path {@link Path} to be added to the list of inputs for 
 *            the map-reduce job.
 */
public static void addInputPath(Job job, Path path) throws IOException {
    Configuration conf = job.getConfiguration();
    path = path.getFileSystem(conf).makeQualified(path);
    String dirStr = StringUtils.escapeString(path.toString());
    String dirs = conf.get(INPUT_DIR);
    conf.set(INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr);
}