Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf() 

Source Link

Document

Construct a map/reduce job configuration.

Usage

From source file:cascading.avro.AvroSchemeTest.java

License:Apache License

@Test
public void listOrMapInsideMapTest() throws Exception {
    final Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("test3.avsc"));
    final AvroScheme scheme = new AvroScheme(schema);

    final Fields fields = new Fields("aMapToListOfInt", "aMapToMapToLong");

    final Lfs lfs = new Lfs(scheme, tempDir.getRoot().toString());
    HadoopFlowProcess writeProcess = new HadoopFlowProcess(new JobConf());
    final TupleEntryCollector collector = lfs.openForWrite(writeProcess);

    Map<String, Map<String, Long>> aMapToMapToLong = new HashMap<String, Map<String, Long>>();
    Map<String, Long> aMapToLong = new HashMap<String, Long>();
    aMapToLong.put("one", 1L);
    aMapToLong.put("two", 2L);
    aMapToMapToLong.put("key", aMapToLong);

    Map<String, List<Integer>> aMapToListOfInt = new HashMap<String, List<Integer>>();
    List<Integer> aListOfInt = new LinkedList<Integer>();
    aListOfInt.add(0);/*  w  w w  .j a  v  a  2s .  com*/
    aListOfInt.add(1);
    aMapToListOfInt.put("key", aListOfInt);

    write(scheme, collector, new TupleEntry(fields, new Tuple(aMapToListOfInt, aMapToMapToLong)));
    collector.close();

    HadoopFlowProcess readProcess = new HadoopFlowProcess(new JobConf());
    final TupleEntryIterator iterator = lfs.openForRead(readProcess);
    assertTrue(iterator.hasNext());
    final TupleEntry readEntry1 = iterator.next();

    List<Integer> outListOfInt = (List) ((Map) readEntry1.getObject("aMapToListOfInt")).get("key");
    Map<String, Long> outMapToLong = (Map) ((Map) readEntry1.getObject("aMapToMapToLong")).get("key");

    assertEquals(Integer.valueOf(0), outListOfInt.get(0));
    assertEquals(Integer.valueOf(1), outListOfInt.get(1));
    assertEquals(Long.valueOf(1L), outMapToLong.get("one"));
    assertEquals(Long.valueOf(2L), outMapToLong.get("two"));
    assertTrue(!iterator.hasNext());

}

From source file:cascading.ClusterTestCase.java

License:Open Source License

public void setUp() throws IOException {
    if (jobConf != null)
        return;//from ww  w .j a v a  2s. c  om

    if (!enableCluster) {
        jobConf = new JobConf();
    } else {
        System.setProperty("test.build.data", "build");
        new File("build/test/log").mkdirs();
        System.setProperty("hadoop.log.dir", "build/test/log");
        Configuration conf = new Configuration();

        dfs = new MiniDFSCluster(conf, 4, true, null);
        fileSys = dfs.getFileSystem();
        mr = new MiniMRCluster(4, fileSys.getUri().toString(), 1);

        jobConf = mr.createJobConf();

        jobConf.set("mapred.child.java.opts", "-Xmx512m");
        jobConf.setMapSpeculativeExecution(false);
        jobConf.setReduceSpeculativeExecution(false);
    }

    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setNumReduceTasks(numReduceTasks);

    if (logger != null)
        properties.put("log4j.logger", logger);

    Flow.setJobPollingInterval(properties, 500); // should speed up tests
    MultiMapReducePlanner.setJobConf(properties, jobConf);
}

From source file:cascading.flow.Flow.java

License:Open Source License

/**
 * Method getJobConf returns the jobConf of this Flow object.
 *
 * @return the jobConf (type JobConf) of this Flow object.
 *///from ww  w . j av a  2  s.  co m
public JobConf getJobConf() {
    if (jobConf == null)
        setJobConf(new JobConf());

    return jobConf;
}

From source file:cascading.flow.FlowStep.java

License:Open Source License

protected JobConf getJobConf(JobConf parentConf) throws IOException {
    JobConf conf = parentConf == null ? new JobConf() : new JobConf(parentConf);

    // set values first so they can't break things downstream
    if (hasProperties()) {
        for (Map.Entry entry : getProperties().entrySet())
            conf.set(entry.getKey().toString(), entry.getValue().toString());
    }/* w  ww  .  j  ava 2s. c  o  m*/

    // disable warning
    conf.setBoolean("mapred.used.genericoptionsparser", true);

    conf.setJobName(getStepName());

    conf.setOutputKeyClass(Tuple.class);
    conf.setOutputValueClass(Tuple.class);

    conf.setMapperClass(FlowMapper.class);
    conf.setReducerClass(FlowReducer.class);

    // set for use by the shuffling phase
    TupleSerialization.setSerializations(conf);

    initFromSources(conf);

    initFromSink(conf);

    initFromTraps(conf);

    if (sink.getScheme().getNumSinkParts() != 0) {
        // if no reducer, set num map tasks to control parts
        if (getGroup() != null)
            conf.setNumReduceTasks(sink.getScheme().getNumSinkParts());
        else
            conf.setNumMapTasks(sink.getScheme().getNumSinkParts());
    }

    conf.setOutputKeyComparatorClass(TupleComparator.class);

    if (getGroup() == null) {
        conf.setNumReduceTasks(0); // disable reducers
    } else {
        // must set map output defaults when performing a reduce
        conf.setMapOutputKeyClass(Tuple.class);
        conf.setMapOutputValueClass(Tuple.class);

        // handles the case the groupby sort should be reversed
        if (getGroup().isSortReversed())
            conf.setOutputKeyComparatorClass(ReverseTupleComparator.class);

        addComparators(conf, "cascading.group.comparator", getGroup().getGroupingSelectors());

        if (getGroup().isGroupBy())
            addComparators(conf, "cascading.sort.comparator", getGroup().getSortingSelectors());

        if (!getGroup().isGroupBy()) {
            conf.setPartitionerClass(CoGroupingPartitioner.class);
            conf.setMapOutputKeyClass(IndexTuple.class); // allows groups to be sorted by index
            conf.setMapOutputValueClass(IndexTuple.class);
            conf.setOutputKeyComparatorClass(IndexTupleCoGroupingComparator.class); // sorts by group, then by index
            conf.setOutputValueGroupingComparator(CoGroupingComparator.class);
        }

        if (getGroup().isSorted()) {
            conf.setPartitionerClass(GroupingPartitioner.class);
            conf.setMapOutputKeyClass(TuplePair.class);

            if (getGroup().isSortReversed())
                conf.setOutputKeyComparatorClass(ReverseGroupingSortingComparator.class);
            else
                conf.setOutputKeyComparatorClass(GroupingSortingComparator.class);

            // no need to supply a reverse comparator, only equality is checked
            conf.setOutputValueGroupingComparator(GroupingComparator.class);
        }
    }

    // perform last so init above will pass to tasks
    conf.setInt("cascading.flow.step.id", id);
    conf.set("cascading.flow.step", Util.serializeBase64(this));

    return conf;
}

From source file:cascading.flow.hadoop.HadoopFlow.java

License:Open Source License

@Override
protected JobConf newConfig(JobConf defaultConfig) {
    return defaultConfig == null ? new JobConf() : HadoopUtil.copyJobConf(defaultConfig);
}

From source file:cascading.flow.hadoop.HadoopFlow.java

License:Open Source License

@ProcessConfiguration
@Override
public JobConf getConfig() {
    if (jobConf == null)
        initConfig(null, new JobConf());

    return jobConf;
}

From source file:cascading.flow.hadoop.HadoopFlowStep.java

License:Open Source License

public JobConf createInitializedConfig(FlowProcess<JobConf> flowProcess, JobConf parentConfig) {
    JobConf conf = parentConfig == null ? new JobConf() : HadoopUtil.copyJobConf(parentConfig);

    // disable warning
    conf.setBoolean("mapred.used.genericoptionsparser", true);

    conf.setJobName(getStepDisplayName(conf.getInt("cascading.display.id.truncate", Util.ID_LENGTH)));

    conf.setOutputKeyClass(Tuple.class);
    conf.setOutputValueClass(Tuple.class);

    conf.setMapRunnerClass(FlowMapper.class);
    conf.setReducerClass(FlowReducer.class);

    // set for use by the shuffling phase
    TupleSerialization.setSerializations(conf);

    initFromSources(flowProcess, conf);/*from  w  w  w  . jav a 2 s .c  o  m*/

    initFromSink(flowProcess, conf);

    initFromTraps(flowProcess, conf);

    initFromStepConfigDef(conf);

    int numSinkParts = getSink().getScheme().getNumSinkParts();

    if (numSinkParts != 0) {
        // if no reducer, set num map tasks to control parts
        if (getGroup() != null)
            conf.setNumReduceTasks(numSinkParts);
        else
            conf.setNumMapTasks(numSinkParts);
    } else if (getGroup() != null) {
        int gatherPartitions = conf.getNumReduceTasks();

        if (gatherPartitions == 0)
            gatherPartitions = conf.getInt(FlowRuntimeProps.GATHER_PARTITIONS, 0);

        if (gatherPartitions == 0)
            throw new FlowException(getName(),
                    "a default number of gather partitions must be set, see FlowRuntimeProps");

        conf.setNumReduceTasks(gatherPartitions);
    }

    conf.setOutputKeyComparatorClass(TupleComparator.class);

    if (getGroup() == null) {
        conf.setNumReduceTasks(0); // disable reducers
    } else {
        // must set map output defaults when performing a reduce
        conf.setMapOutputKeyClass(Tuple.class);
        conf.setMapOutputValueClass(Tuple.class);
        conf.setPartitionerClass(GroupingPartitioner.class);

        // handles the case the groupby sort should be reversed
        if (getGroup().isSortReversed())
            conf.setOutputKeyComparatorClass(ReverseTupleComparator.class);

        addComparators(conf, "cascading.group.comparator", getGroup().getKeySelectors(), this, getGroup());

        if (getGroup().isGroupBy())
            addComparators(conf, "cascading.sort.comparator", getGroup().getSortingSelectors(), this,
                    getGroup());

        if (!getGroup().isGroupBy()) {
            conf.setPartitionerClass(CoGroupingPartitioner.class);
            conf.setMapOutputKeyClass(IndexTuple.class); // allows groups to be sorted by index
            conf.setMapOutputValueClass(IndexTuple.class);
            conf.setOutputKeyComparatorClass(IndexTupleCoGroupingComparator.class); // sorts by group, then by index
            conf.setOutputValueGroupingComparator(CoGroupingComparator.class);
        }

        if (getGroup().isSorted()) {
            conf.setPartitionerClass(GroupingSortingPartitioner.class);
            conf.setMapOutputKeyClass(TuplePair.class);

            if (getGroup().isSortReversed())
                conf.setOutputKeyComparatorClass(ReverseGroupingSortingComparator.class);
            else
                conf.setOutputKeyComparatorClass(GroupingSortingComparator.class);

            // no need to supply a reverse comparator, only equality is checked
            conf.setOutputValueGroupingComparator(GroupingComparator.class);
        }
    }

    // perform last so init above will pass to tasks
    String versionString = Version.getRelease();

    if (versionString != null)
        conf.set("cascading.version", versionString);

    conf.set(CASCADING_FLOW_STEP_ID, getID());
    conf.set("cascading.flow.step.num", Integer.toString(getOrdinal()));

    HadoopUtil.setIsInflow(conf);

    Iterator<FlowNode> iterator = getFlowNodeGraph().getTopologicalIterator();

    String mapState = pack(iterator.next(), conf);
    String reduceState = pack(iterator.hasNext() ? iterator.next() : null, conf);

    // hadoop 20.2 doesn't like dist cache when using local mode
    int maxSize = Short.MAX_VALUE;

    int length = mapState.length() + reduceState.length();

    if (isHadoopLocalMode(conf) || length < maxSize) // seems safe
    {
        conf.set("cascading.flow.step.node.map", mapState);

        if (!Util.isEmpty(reduceState))
            conf.set("cascading.flow.step.node.reduce", reduceState);
    } else {
        conf.set("cascading.flow.step.node.map.path",
                HadoopMRUtil.writeStateToDistCache(conf, getID(), "map", mapState));

        if (!Util.isEmpty(reduceState))
            conf.set("cascading.flow.step.node.reduce.path",
                    HadoopMRUtil.writeStateToDistCache(conf, getID(), "reduce", reduceState));
    }

    return conf;
}

From source file:cascading.flow.hadoop.HadoopUtil.java

License:Open Source License

public static JobConf createJobConf(Map<Object, Object> properties, JobConf defaultJobconf) {
    JobConf jobConf = defaultJobconf == null ? new JobConf() : new JobConf(defaultJobconf);

    if (properties == null)
        return jobConf;

    Set<Object> keys = new HashSet<Object>(properties.keySet());

    // keys will only be grabbed if both key/value are String, so keep orig keys
    if (properties instanceof Properties)
        keys.addAll(((Properties) properties).stringPropertyNames());

    for (Object key : keys) {
        Object value = properties.get(key);

        if (value == null && properties instanceof Properties && key instanceof String)
            value = ((Properties) properties).getProperty((String) key);

        if (value == null) // don't stuff null values
            continue;

        // don't let these objects pass, even though toString is called below.
        if (value instanceof Class || value instanceof JobConf)
            continue;

        jobConf.set(key.toString(), value.toString());
    }/*  www  .  ja v a2s  .  co m*/

    return jobConf;
}

From source file:cascading.flow.hadoop.MapReduceFlowStep.java

License:Open Source License

@Override
public JobConf createInitializedConfig(FlowProcess<JobConf> flowProcess, JobConf parentConfig) {
    // allow to delete
    getSink().sinkConfInit(flowProcess, new JobConf());

    return jobConf;
}

From source file:cascading.flow.hadoop.planner.HadoopPlanner.java

License:Open Source License

/**
 * Method createJobConf returns a new JobConf instance using the values in the given properties argument.
 *
 * @param properties of type Map/*from   www . j av  a2s  .  com*/
 * @return a JobConf instance
 */
public static JobConf createJobConf(Map<Object, Object> properties) {
    JobConf conf = new JobConf();

    copyProperties(conf, properties);

    return conf;
}