Example usage for org.apache.hadoop.conf Configuration setClass

List of usage examples for org.apache.hadoop.conf Configuration setClass

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setClass.

Prototype

public void setClass(String name, Class<?> theClass, Class<?> xface) 

Source Link

Document

Set the value of the name property to the name of a theClass implementing the given interface xface.

Usage

From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java

License:Apache License

private Configuration createConfiguration(int... numberOfLines) {
    Configuration conf = new Configuration();
    if (numberOfLines.length > 0) {
        conf.setLong(NUMBER_OF_LINES_KEY, numberOfLines[0]);
    }/*  ww  w .  j av  a 2  s. co m*/
    conf.set("fs.default.name", "file:///");
    conf.setBoolean("fs.file.impl.disable.cache", false);
    conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
    return conf;
}

From source file:boa.runtime.BoaRunner.java

License:Apache License

/**
 * Create a {@link Job} describing the work to be done by this Boa job.
 * //from w ww  . j  av a2 s . c om
 * @param ins
 *            An array of {@link Path} containing the locations of the input
 *            files
 * 
 * @param out
 *            A {@link Path} containing the location of the output file
 * 
 * @param robust
 *            A boolean representing whether the job should ignore most
 *            exceptions
 * 
 * @return A {@link Job} describing the work to be done by this Boa job
 * @throws IOException
 */
public Job job(final Path[] ins, final Path out, final boolean robust) throws IOException {
    final Configuration configuration = getConf();

    configuration.setBoolean("boa.runtime.robust", robust);

    // faster local reads
    configuration.setBoolean("dfs.client.read.shortcircuit", true);
    configuration.setBoolean("dfs.client.read.shortcircuit.skip.checksum", true);

    // by default our MapFile's index every key, which takes up
    // a lot of memory - this lets you skip keys in the index and
    // control the memory requirements (as a tradeoff of slower gets)
    //configuration.setLong("io.map.index.skip", 128);

    // map output compression
    configuration.setBoolean("mapred.compress.map.output", true);
    configuration.set("mapred.map.output.compression.type", "BLOCK");
    configuration.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class);

    configuration.setBoolean("mapred.map.tasks.speculative.execution", false);
    configuration.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    configuration.setLong("mapred.job.reuse.jvm.num.tasks", -1);

    final Job job = new Job(configuration);

    if (ins != null)
        for (final Path in : ins)
            FileInputFormat.addInputPath(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setPartitionerClass(BoaPartitioner.class);

    job.setMapOutputKeyClass(EmitKey.class);
    job.setMapOutputValueClass(EmitValue.class);

    job.setOutputFormatClass(BoaOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    return job;
}

From source file:cascading.platform.tez.Hadoop2TezPlatform.java

License:Open Source License

protected boolean setTimelineStore(Configuration configuration) {
    try {/*from w  w w  .  j  a  v a 2 s. c  o  m*/
        // try hadoop 2.6
        Class<?> target = Util.loadClass("org.apache.hadoop.yarn.server.timeline.TimelineStore");
        Class<?> type = Util.loadClass("org.apache.hadoop.yarn.server.timeline.MemoryTimelineStore");

        configuration.setClass(YarnConfiguration.TIMELINE_SERVICE_STORE, type, target);

        try {
            // hadoop 2.5 has the above classes, but this one is also necessary for the timeline service with acls to function.
            Util.loadClass("org.apache.hadoop.yarn.api.records.timeline.TimelineDomain");
        } catch (CascadingException exception) {
            configuration.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true);
        }

        return true;
    } catch (CascadingException exception) {
        try {
            // try hadoop 2.4
            Class<?> target = Util.loadClass(
                    "org.apache.hadoop.yarn.server.applicationhistoryservice.timeline.TimelineStore");
            Class<?> type = Util.loadClass(
                    "org.apache.hadoop.yarn.server.applicationhistoryservice.timeline.MemoryTimelineStore");

            configuration.setClass(YarnConfiguration.TIMELINE_SERVICE_STORE, type, target);
            configuration.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true);

            return true;
        } catch (CascadingException ignore) {
            return false;
        }
    }
}

From source file:cascading.scheme.hadoop.SequenceFile.java

License:Open Source License

@Override
public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    conf.setBoolean("mapred.mapper.new-api", false);
    conf.setClass("mapred.input.format.class", SequenceFileInputFormat.class, InputFormat.class);
}

From source file:cascading.scheme.hadoop.SequenceFile.java

License:Open Source License

@Override
public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    conf.setBoolean("mapred.mapper.new-api", false);
    conf.setClass("mapred.output.key.class", Tuple.class, Object.class);
    conf.setClass("mapred.output.value.class", Tuple.class, Object.class);
    conf.setClass("mapred.output.format.class", SequenceFileOutputFormat.class, OutputFormat.class);
}

From source file:cascading.scheme.hadoop.TextLine.java

License:Open Source License

@Override
public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    if (hasZippedFiles(FileInputFormat.getInputPaths(asJobConfInstance(conf))))
        throw new IllegalStateException("cannot read zip files: "
                + Arrays.toString(FileInputFormat.getInputPaths(asJobConfInstance(conf))));

    conf.setBoolean("mapred.mapper.new-api", false);
    conf.setClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class);
}

From source file:cascading.scheme.hadoop.TextLine.java

License:Open Source License

@Override
public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    if (tap.getFullIdentifier(conf).endsWith(".zip"))
        throw new IllegalStateException("cannot write zip files: " + HadoopUtil.getOutputPath(conf));

    conf.setBoolean("mapred.mapper.new-api", false);

    if (getSinkCompression() == Compress.DISABLE)
        conf.setBoolean("mapred.output.compress", false);
    else if (getSinkCompression() == Compress.ENABLE)
        conf.setBoolean("mapred.output.compress", true);

    conf.setClass("mapred.output.key.class", Text.class, Object.class);
    conf.setClass("mapred.output.value.class", Text.class, Object.class);
    conf.setClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class);
}

From source file:cascading.scheme.hadoop.WritableSequenceFile.java

License:Open Source License

@Override
public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    if (keyType != null)
        conf.setClass("mapred.output.key.class", keyType, Object.class);
    else/*from  w  w w.  j  a va 2s .co m*/
        conf.setClass("mapred.output.key.class", NullWritable.class, Object.class);

    if (valueType != null)
        conf.setClass("mapred.output.value.class", valueType, Object.class);
    else
        conf.setClass("mapred.output.value.class", NullWritable.class, Object.class);

    conf.setClass("mapred.output.format.class", SequenceFileOutputFormat.class, OutputFormat.class);
}

From source file:cascading.tap.hadoop.Hfs.java

License:Open Source License

/**
 * Based on the configuration, handles and sets {@link CombineFileInputFormat} as the input
 * format./*from ww w .java 2  s. c  o m*/
 */
private void handleCombineFileInputFormat(Configuration conf) {
    // if combining files, override the configuration to use CombineFileInputFormat
    if (!getUseCombinedInput(conf))
        return;

    // get the prescribed individual input format from the underlying scheme so it can be used by CombinedInputFormat
    String individualInputFormat = conf.get("mapred.input.format.class");

    if (individualInputFormat == null)
        throw new TapException("input format is missing from the underlying scheme");

    if (individualInputFormat.equals(CombinedInputFormat.class.getName())
            && conf.get(CombineFileRecordReaderWrapper.INDIVIDUAL_INPUT_FORMAT) == null)
        throw new TapException(
                "the input format class is already the combined input format but the underlying input format is missing");

    // if safe mode is on (default) throw an exception if the InputFormat is not a FileInputFormat, otherwise log a
    // warning and don't use the CombineFileInputFormat
    boolean safeMode = getCombinedInputSafeMode(conf);

    if (!FileInputFormat.class.isAssignableFrom(conf.getClass("mapred.input.format.class", null))) {
        if (safeMode)
            throw new TapException(
                    "input format must be of type org.apache.hadoop.mapred.FileInputFormat, got: "
                            + individualInputFormat);
        else
            LOG.warn(
                    "not combining input splits with CombineFileInputFormat, {} is not of type org.apache.hadoop.mapred.FileInputFormat.",
                    individualInputFormat);
    } else {
        // set the underlying individual input format
        conf.set(CombineFileRecordReaderWrapper.INDIVIDUAL_INPUT_FORMAT, individualInputFormat);

        // override the input format class
        conf.setClass("mapred.input.format.class", CombinedInputFormat.class, InputFormat.class);
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

/**
 * Adds a named output for the job.//from ww  w .j a  v  a2 s. co m
 *
 * @param job               job to add the named output
 * @param namedOutput       named output name, it has to be a word, letters
 *                          and numbers only (alphanumeric)
 * @param outputFormatClass name of the OutputFormat class.
 * @param keyClass          key class
 * @param valueClass        value class
 * @param outputConfigs     configurations for the output
 */
@SuppressWarnings("unchecked")
public static void addNamedOutput(Job job, String namedOutput, String outputFormatClass, Class<?> keyClass,
        Class<?> valueClass, Map<String, String> outputConfigs) {
    assertValidName(namedOutput);
    checkNamedOutputName(namedOutput, getNamedOutputsList(job), false);
    Configuration conf = job.getConfiguration();
    conf.set(MULTIPLE_OUTPUTS, conf.get(MULTIPLE_OUTPUTS, "") + " " + namedOutput);
    conf.set(MO_PREFIX + namedOutput + FORMAT, outputFormatClass);
    conf.setClass(MO_PREFIX + namedOutput + KEY, keyClass, Object.class);
    conf.setClass(MO_PREFIX + namedOutput + VALUE, valueClass, Object.class);
    ConfigurationUtil.setNamedConfigurations(conf, computePrefixName(namedOutput), outputConfigs);
}