Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf()

Source Link

Document

Construct a map/reduce job configuration.

Usage

From source file:cascading.avro.AvroSchemeTest.java

License:Apache License

@Test
public void listOrMapInsideMapTest() throws Exception {
    final Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("test3.avsc"));
    final AvroScheme scheme = new AvroScheme(schema);

    final Fields fields = new Fields("aMapToListOfInt", "aMapToMapToLong");

    final Lfs lfs = new Lfs(scheme, tempDir.getRoot().toString());
    HadoopFlowProcess writeProcess = new HadoopFlowProcess(new JobConf());
    final TupleEntryCollector collector = lfs.openForWrite(writeProcess);

    Map<String, Map<String, Long>> aMapToMapToLong = new HashMap<String, Map<String, Long>>();
    Map<String, Long> aMapToLong = new HashMap<String, Long>();
    aMapToLong.put("one", 1L);
    aMapToLong.put("two", 2L);
    aMapToMapToLong.put("key", aMapToLong);

    Map<String, List<Integer>> aMapToListOfInt = new HashMap<String, List<Integer>>();
    List<Integer> aListOfInt = new LinkedList<Integer>();
    aListOfInt.add(0);/*  w  w w  .j a  v  a  2s .  com*/
    aListOfInt.add(1);
    aMapToListOfInt.put("key", aListOfInt);

    write(scheme, collector, new TupleEntry(fields, new Tuple(aMapToListOfInt, aMapToMapToLong)));
    collector.close();

    HadoopFlowProcess readProcess = new HadoopFlowProcess(new JobConf());
    final TupleEntryIterator iterator = lfs.openForRead(readProcess);
    assertTrue(iterator.hasNext());
    final TupleEntry readEntry1 = iterator.next();

    List<Integer> outListOfInt = (List) ((Map) readEntry1.getObject("aMapToListOfInt")).get("key");
    Map<String, Long> outMapToLong = (Map) ((Map) readEntry1.getObject("aMapToMapToLong")).get("key");

    assertEquals(Integer.valueOf(0), outListOfInt.get(0));
    assertEquals(Integer.valueOf(1), outListOfInt.get(1));
    assertEquals(Long.valueOf(1L), outMapToLong.get("one"));
    assertEquals(Long.valueOf(2L), outMapToLong.get("two"));
    assertTrue(!iterator.hasNext());

}

From source file:cascading.ClusterTestCase.java

License:Open Source License

public void setUp() throws IOException {
    if (jobConf != null)
        return;//from ww  w .j a v a  2s. c  om

    if (!enableCluster) {
        jobConf = new JobConf();
    } else {
        System.setProperty("test.build.data", "build");
        new File("build/test/log").mkdirs();
        System.setProperty("hadoop.log.dir", "build/test/log");
        Configuration conf = new Configuration();

        dfs = new MiniDFSCluster(conf, 4, true, null);
        fileSys = dfs.getFileSystem();
        mr = new MiniMRCluster(4, fileSys.getUri().toString(), 1);

        jobConf = mr.createJobConf();

        jobConf.set("mapred.child.java.opts", "-Xmx512m");
        jobConf.setMapSpeculativeExecution(false);
        jobConf.setReduceSpeculativeExecution(false);
    }

    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setNumReduceTasks(numReduceTasks);

    if (logger != null)
        properties.put("log4j.logger", logger);

    Flow.setJobPollingInterval(properties, 500); // should speed up tests
    MultiMapReducePlanner.setJobConf(properties, jobConf);
}

From source file:cascading.flow.Flow.java

License:Open Source License

/**
 * Method getJobConf returns the jobConf of this Flow object.
 *
 * @return the jobConf (type JobConf) of this Flow object.
 *///from ww  w . j av a  2  s.  co m
public JobConf getJobConf() {
    if (jobConf == null)
        setJobConf(new JobConf());

    return jobConf;
}

From source file:cascading.flow.FlowStep.java

License:Open Source License

protected JobConf getJobConf(JobConf parentConf) throws IOException {
    JobConf conf = parentConf == null ? new JobConf() : new JobConf(parentConf);

    // set values first so they can't break things downstream
    if (hasProperties()) {
        for (Map.Entry entry : getProperties().entrySet())
            conf.set(entry.getKey().toString(), entry.getValue().toString());
    }/* w  ww  .  j  ava 2s. c  o  m*/

    // disable warning
    conf.setBoolean("mapred.used.genericoptionsparser", true);

    conf.setJobName(getStepName());

    conf.setOutputKeyClass(Tuple.class);
    conf.setOutputValueClass(Tuple.class);

    conf.setMapperClass(FlowMapper.class);
    conf.setReducerClass(FlowReducer.class);

    // set for use by the shuffling phase
    TupleSerialization.setSerializations(conf);

    initFromSources(conf);

    initFromSink(conf);

    initFromTraps(conf);

    if (sink.getScheme().getNumSinkParts() != 0) {
        // if no reducer, set num map tasks to control parts
        if (getGroup() != null)
            conf.setNumReduceTasks(sink.getScheme().getNumSinkParts());
        else
            conf.setNumMapTasks(sink.getScheme().getNumSinkParts());
    }

    conf.setOutputKeyComparatorClass(TupleComparator.class);

    if (getGroup() == null) {
        conf.setNumReduceTasks(0); // disable reducers
    } else {
        // must set map output defaults when performing a reduce
        conf.setMapOutputKeyClass(Tuple.class);
        conf.setMapOutputValueClass(Tuple.class);

        // handles the case the groupby sort should be reversed
        if (getGroup().isSortReversed())
            conf.setOutputKeyComparatorClass(ReverseTupleComparator.class);

        addComparators(conf, "cascading.group.comparator", getGroup().getGroupingSelectors());

        if (getGroup().isGroupBy())
            addComparators(conf, "cascading.sort.comparator", getGroup().getSortingSelectors());

        if (!getGroup().isGroupBy()) {
            conf.setPartitionerClass(CoGroupingPartitioner.class);
            conf.setMapOutputKeyClass(IndexTuple.class); // allows groups to be sorted by index
            conf.setMapOutputValueClass(IndexTuple.class);
            conf.setOutputKeyComparatorClass(IndexTupleCoGroupingComparator.class); // sorts by group, then by index
            conf.setOutputValueGroupingComparator(CoGroupingComparator.class);
        }

        if (getGroup().isSorted()) {
            conf.setPartitionerClass(GroupingPartitioner.class);
            conf.setMapOutputKeyClass(TuplePair.class);

            if (getGroup().isSortReversed())
                conf.setOutputKeyComparatorClass(ReverseGroupingSortingComparator.class);
            else
                conf.setOutputKeyComparatorClass(GroupingSortingComparator.class);

            // no need to supply a reverse comparator, only equality is checked
            conf.setOutputValueGroupingComparator(GroupingComparator.class);
        }
    }

    // perform last so init above will pass to tasks
    conf.setInt("cascading.flow.step.id", id);
    conf.set("cascading.flow.step", Util.serializeBase64(this));

    return conf;
}

From source file:cascading.flow.hadoop.HadoopFlow.java

License:Open Source License

@Override
protected JobConf newConfig(JobConf defaultConfig) {
    return defaultConfig == null ? new JobConf() : HadoopUtil.copyJobConf(defaultConfig);
}

From source file:cascading.flow.hadoop.HadoopFlow.java

License:Open Source License

@ProcessConfiguration
@Override
public JobConf getConfig() {
    if (jobConf == null)
        initConfig(null, new JobConf());

    return jobConf;
}

From source file:cascading.flow.hadoop.HadoopFlowStep.java

License:Open Source License

public JobConf createInitializedConfig(FlowProcess<JobConf> flowProcess, JobConf parentConfig) {
    JobConf conf = parentConfig == null ? new JobConf() : HadoopUtil.copyJobConf(parentConfig);

    // disable warning
    conf.setBoolean("mapred.used.genericoptionsparser", true);

    conf.setJobName(getStepDisplayName(conf.getInt("cascading.display.id.truncate", Util.ID_LENGTH)));

    conf.setOutputKeyClass(Tuple.class);
    conf.setOutputValueClass(Tuple.class);

    conf.setMapRunnerClass(FlowMapper.class);
    conf.setReducerClass(FlowReducer.class);

    // set for use by the shuffling phase
    TupleSerialization.setSerializations(conf);

    initFromSources(flowProcess, conf);/*from  w  w  w  . jav a 2 s .c  o  m*/

    initFromSink(flowProcess, conf);

    initFromTraps(flowProcess, conf);

    initFromStepConfigDef(conf);

    int numSinkParts = getSink().getScheme().getNumSinkParts();

    if (numSinkParts != 0) {
        // if no reducer, set num map tasks to control parts
        if (getGroup() != null)
            conf.setNumReduceTasks(numSinkParts);
        else
            conf.setNumMapTasks(numSinkParts);
    } else if (getGroup() != null) {
        int gatherPartitions = conf.getNumReduceTasks();

        if (gatherPartitions == 0)
            gatherPartitions = conf.getInt(FlowRuntimeProps.GATHER_PARTITIONS, 0);

        if (gatherPartitions == 0)
            throw new FlowException(getName(),
                    "a default number of gather partitions must be set, see FlowRuntimeProps");

        conf.setNumReduceTasks(gatherPartitions);
    }

    conf.setOutputKeyComparatorClass(TupleComparator.class);

    if (getGroup() == null) {
        conf.setNumReduceTasks(0); // disable reducers
    } else {
        // must set map output defaults when performing a reduce
        conf.setMapOutputKeyClass(Tuple.class);
        conf.setMapOutputValueClass(Tuple.class);
        conf.setPartitionerClass(GroupingPartitioner.class);

        // handles the case the groupby sort should be reversed
        if (getGroup().isSortReversed())
            conf.setOutputKeyComparatorClass(ReverseTupleComparator.class);

        addComparators(conf, "cascading.group.comparator", getGroup().getKeySelectors(), this, getGroup());

        if (getGroup().isGroupBy())
            addComparators(conf, "cascading.sort.comparator", getGroup().getSortingSelectors(), this,
                    getGroup());

        if (!getGroup().isGroupBy()) {
            conf.setPartitionerClass(CoGroupingPartitioner.class);
            conf.setMapOutputKeyClass(IndexTuple.class); // allows groups to be sorted by index
            conf.setMapOutputValueClass(IndexTuple.class);
            conf.setOutputKeyComparatorClass(IndexTupleCoGroupingComparator.class); // sorts by group, then by index
            conf.setOutputValueGroupingComparator(CoGroupingComparator.class);
        }

        if (getGroup().isSorted()) {
            conf.setPartitionerClass(GroupingSortingPartitioner.class);
            conf.setMapOutputKeyClass(TuplePair.class);

            if (getGroup().isSortReversed())
                conf.setOutputKeyComparatorClass(ReverseGroupingSortingComparator.class);
            else
                conf.setOutputKeyComparatorClass(GroupingSortingComparator.class);

            // no need to supply a reverse comparator, only equality is checked
            conf.setOutputValueGroupingComparator(GroupingComparator.class);
        }
    }

    // perform last so init above will pass to tasks
    String versionString = Version.getRelease();

    if (versionString != null)
        conf.set("cascading.version", versionString);

    conf.set(CASCADING_FLOW_STEP_ID, getID());
    conf.set("cascading.flow.step.num", Integer.toString(getOrdinal()));

    HadoopUtil.setIsInflow(conf);

    Iterator<FlowNode> iterator = getFlowNodeGraph().getTopologicalIterator();

    String mapState = pack(iterator.next(), conf);
    String reduceState = pack(iterator.hasNext() ? iterator.next() : null, conf);

    // hadoop 20.2 doesn't like dist cache when using local mode
    int maxSize = Short.MAX_VALUE;

    int length = mapState.length() + reduceState.length();

    if (isHadoopLocalMode(conf) || length < maxSize) // seems safe
    {
        conf.set("cascading.flow.step.node.map", mapState);

        if (!Util.isEmpty(reduceState))
            conf.set("cascading.flow.step.node.reduce", reduceState);
    } else {
        conf.set("cascading.flow.step.node.map.path",
                HadoopMRUtil.writeStateToDistCache(conf, getID(), "map", mapState));

        if (!Util.isEmpty(reduceState))
            conf.set("cascading.flow.step.node.reduce.path",
                    HadoopMRUtil.writeStateToDistCache(conf, getID(), "reduce", reduceState));
    }

    return conf;
}

From source file:cascading.flow.hadoop.HadoopUtil.java

License:Open Source License

public static JobConf createJobConf(Map<Object, Object> properties, JobConf defaultJobconf) {
    JobConf jobConf = defaultJobconf == null ? new JobConf() : new JobConf(defaultJobconf);

    if (properties == null)
        return jobConf;

    Set<Object> keys = new HashSet<Object>(properties.keySet());

    // keys will only be grabbed if both key/value are String, so keep orig keys
    if (properties instanceof Properties)
        keys.addAll(((Properties) properties).stringPropertyNames());

    for (Object key : keys) {
        Object value = properties.get(key);

        if (value == null && properties instanceof Properties && key instanceof String)
            value = ((Properties) properties).getProperty((String) key);

        if (value == null) // don't stuff null values
            continue;

        // don't let these objects pass, even though toString is called below.
        if (value instanceof Class || value instanceof JobConf)
            continue;

        jobConf.set(key.toString(), value.toString());
    }/*  www  .  ja v a2s  .  co m*/

    return jobConf;
}

From source file:cascading.flow.hadoop.MapReduceFlowStep.java

License:Open Source License

@Override
public JobConf createInitializedConfig(FlowProcess<JobConf> flowProcess, JobConf parentConfig) {
    // allow to delete
    getSink().sinkConfInit(flowProcess, new JobConf());

    return jobConf;
}

From source file:cascading.flow.hadoop.planner.HadoopPlanner.java

License:Open Source License

/**
 * Method createJobConf returns a new JobConf instance using the values in the given properties argument.
 *
 * @param properties of type Map/*from   www . j av  a2s  .  com*/
 * @return a JobConf instance
 */
public static JobConf createJobConf(Map<Object, Object> properties) {
    JobConf conf = new JobConf();

    copyProperties(conf, properties);

    return conf;
}