List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf()
From source file:cascading.avro.AvroSchemeTest.java
License:Apache License
@Test public void listOrMapInsideMapTest() throws Exception { final Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("test3.avsc")); final AvroScheme scheme = new AvroScheme(schema); final Fields fields = new Fields("aMapToListOfInt", "aMapToMapToLong"); final Lfs lfs = new Lfs(scheme, tempDir.getRoot().toString()); HadoopFlowProcess writeProcess = new HadoopFlowProcess(new JobConf()); final TupleEntryCollector collector = lfs.openForWrite(writeProcess); Map<String, Map<String, Long>> aMapToMapToLong = new HashMap<String, Map<String, Long>>(); Map<String, Long> aMapToLong = new HashMap<String, Long>(); aMapToLong.put("one", 1L); aMapToLong.put("two", 2L); aMapToMapToLong.put("key", aMapToLong); Map<String, List<Integer>> aMapToListOfInt = new HashMap<String, List<Integer>>(); List<Integer> aListOfInt = new LinkedList<Integer>(); aListOfInt.add(0);/* w w w .j a v a 2s . com*/ aListOfInt.add(1); aMapToListOfInt.put("key", aListOfInt); write(scheme, collector, new TupleEntry(fields, new Tuple(aMapToListOfInt, aMapToMapToLong))); collector.close(); HadoopFlowProcess readProcess = new HadoopFlowProcess(new JobConf()); final TupleEntryIterator iterator = lfs.openForRead(readProcess); assertTrue(iterator.hasNext()); final TupleEntry readEntry1 = iterator.next(); List<Integer> outListOfInt = (List) ((Map) readEntry1.getObject("aMapToListOfInt")).get("key"); Map<String, Long> outMapToLong = (Map) ((Map) readEntry1.getObject("aMapToMapToLong")).get("key"); assertEquals(Integer.valueOf(0), outListOfInt.get(0)); assertEquals(Integer.valueOf(1), outListOfInt.get(1)); assertEquals(Long.valueOf(1L), outMapToLong.get("one")); assertEquals(Long.valueOf(2L), outMapToLong.get("two")); assertTrue(!iterator.hasNext()); }
From source file:cascading.ClusterTestCase.java
License:Open Source License
public void setUp() throws IOException { if (jobConf != null) return;//from ww w .j a v a 2s. c om if (!enableCluster) { jobConf = new JobConf(); } else { System.setProperty("test.build.data", "build"); new File("build/test/log").mkdirs(); System.setProperty("hadoop.log.dir", "build/test/log"); Configuration conf = new Configuration(); dfs = new MiniDFSCluster(conf, 4, true, null); fileSys = dfs.getFileSystem(); mr = new MiniMRCluster(4, fileSys.getUri().toString(), 1); jobConf = mr.createJobConf(); jobConf.set("mapred.child.java.opts", "-Xmx512m"); jobConf.setMapSpeculativeExecution(false); jobConf.setReduceSpeculativeExecution(false); } jobConf.setNumMapTasks(numMapTasks); jobConf.setNumReduceTasks(numReduceTasks); if (logger != null) properties.put("log4j.logger", logger); Flow.setJobPollingInterval(properties, 500); // should speed up tests MultiMapReducePlanner.setJobConf(properties, jobConf); }
From source file:cascading.flow.Flow.java
License:Open Source License
/** * Method getJobConf returns the jobConf of this Flow object. * * @return the jobConf (type JobConf) of this Flow object. *///from ww w . j av a 2 s. co m public JobConf getJobConf() { if (jobConf == null) setJobConf(new JobConf()); return jobConf; }
From source file:cascading.flow.FlowStep.java
License:Open Source License
protected JobConf getJobConf(JobConf parentConf) throws IOException { JobConf conf = parentConf == null ? new JobConf() : new JobConf(parentConf); // set values first so they can't break things downstream if (hasProperties()) { for (Map.Entry entry : getProperties().entrySet()) conf.set(entry.getKey().toString(), entry.getValue().toString()); }/* w ww . j ava 2s. c o m*/ // disable warning conf.setBoolean("mapred.used.genericoptionsparser", true); conf.setJobName(getStepName()); conf.setOutputKeyClass(Tuple.class); conf.setOutputValueClass(Tuple.class); conf.setMapperClass(FlowMapper.class); conf.setReducerClass(FlowReducer.class); // set for use by the shuffling phase TupleSerialization.setSerializations(conf); initFromSources(conf); initFromSink(conf); initFromTraps(conf); if (sink.getScheme().getNumSinkParts() != 0) { // if no reducer, set num map tasks to control parts if (getGroup() != null) conf.setNumReduceTasks(sink.getScheme().getNumSinkParts()); else conf.setNumMapTasks(sink.getScheme().getNumSinkParts()); } conf.setOutputKeyComparatorClass(TupleComparator.class); if (getGroup() == null) { conf.setNumReduceTasks(0); // disable reducers } else { // must set map output defaults when performing a reduce conf.setMapOutputKeyClass(Tuple.class); conf.setMapOutputValueClass(Tuple.class); // handles the case the groupby sort should be reversed if (getGroup().isSortReversed()) conf.setOutputKeyComparatorClass(ReverseTupleComparator.class); addComparators(conf, "cascading.group.comparator", getGroup().getGroupingSelectors()); if (getGroup().isGroupBy()) addComparators(conf, "cascading.sort.comparator", getGroup().getSortingSelectors()); if (!getGroup().isGroupBy()) { conf.setPartitionerClass(CoGroupingPartitioner.class); conf.setMapOutputKeyClass(IndexTuple.class); // allows groups to be sorted by index conf.setMapOutputValueClass(IndexTuple.class); conf.setOutputKeyComparatorClass(IndexTupleCoGroupingComparator.class); // sorts by group, then by index conf.setOutputValueGroupingComparator(CoGroupingComparator.class); } if (getGroup().isSorted()) { conf.setPartitionerClass(GroupingPartitioner.class); conf.setMapOutputKeyClass(TuplePair.class); if (getGroup().isSortReversed()) conf.setOutputKeyComparatorClass(ReverseGroupingSortingComparator.class); else conf.setOutputKeyComparatorClass(GroupingSortingComparator.class); // no need to supply a reverse comparator, only equality is checked conf.setOutputValueGroupingComparator(GroupingComparator.class); } } // perform last so init above will pass to tasks conf.setInt("cascading.flow.step.id", id); conf.set("cascading.flow.step", Util.serializeBase64(this)); return conf; }
From source file:cascading.flow.hadoop.HadoopFlow.java
License:Open Source License
@Override protected JobConf newConfig(JobConf defaultConfig) { return defaultConfig == null ? new JobConf() : HadoopUtil.copyJobConf(defaultConfig); }
From source file:cascading.flow.hadoop.HadoopFlow.java
License:Open Source License
@ProcessConfiguration @Override public JobConf getConfig() { if (jobConf == null) initConfig(null, new JobConf()); return jobConf; }
From source file:cascading.flow.hadoop.HadoopFlowStep.java
License:Open Source License
public JobConf createInitializedConfig(FlowProcess<JobConf> flowProcess, JobConf parentConfig) { JobConf conf = parentConfig == null ? new JobConf() : HadoopUtil.copyJobConf(parentConfig); // disable warning conf.setBoolean("mapred.used.genericoptionsparser", true); conf.setJobName(getStepDisplayName(conf.getInt("cascading.display.id.truncate", Util.ID_LENGTH))); conf.setOutputKeyClass(Tuple.class); conf.setOutputValueClass(Tuple.class); conf.setMapRunnerClass(FlowMapper.class); conf.setReducerClass(FlowReducer.class); // set for use by the shuffling phase TupleSerialization.setSerializations(conf); initFromSources(flowProcess, conf);/*from w w w . jav a 2 s .c o m*/ initFromSink(flowProcess, conf); initFromTraps(flowProcess, conf); initFromStepConfigDef(conf); int numSinkParts = getSink().getScheme().getNumSinkParts(); if (numSinkParts != 0) { // if no reducer, set num map tasks to control parts if (getGroup() != null) conf.setNumReduceTasks(numSinkParts); else conf.setNumMapTasks(numSinkParts); } else if (getGroup() != null) { int gatherPartitions = conf.getNumReduceTasks(); if (gatherPartitions == 0) gatherPartitions = conf.getInt(FlowRuntimeProps.GATHER_PARTITIONS, 0); if (gatherPartitions == 0) throw new FlowException(getName(), "a default number of gather partitions must be set, see FlowRuntimeProps"); conf.setNumReduceTasks(gatherPartitions); } conf.setOutputKeyComparatorClass(TupleComparator.class); if (getGroup() == null) { conf.setNumReduceTasks(0); // disable reducers } else { // must set map output defaults when performing a reduce conf.setMapOutputKeyClass(Tuple.class); conf.setMapOutputValueClass(Tuple.class); conf.setPartitionerClass(GroupingPartitioner.class); // handles the case the groupby sort should be reversed if (getGroup().isSortReversed()) conf.setOutputKeyComparatorClass(ReverseTupleComparator.class); addComparators(conf, "cascading.group.comparator", getGroup().getKeySelectors(), this, getGroup()); if (getGroup().isGroupBy()) addComparators(conf, "cascading.sort.comparator", getGroup().getSortingSelectors(), this, getGroup()); if (!getGroup().isGroupBy()) { conf.setPartitionerClass(CoGroupingPartitioner.class); conf.setMapOutputKeyClass(IndexTuple.class); // allows groups to be sorted by index conf.setMapOutputValueClass(IndexTuple.class); conf.setOutputKeyComparatorClass(IndexTupleCoGroupingComparator.class); // sorts by group, then by index conf.setOutputValueGroupingComparator(CoGroupingComparator.class); } if (getGroup().isSorted()) { conf.setPartitionerClass(GroupingSortingPartitioner.class); conf.setMapOutputKeyClass(TuplePair.class); if (getGroup().isSortReversed()) conf.setOutputKeyComparatorClass(ReverseGroupingSortingComparator.class); else conf.setOutputKeyComparatorClass(GroupingSortingComparator.class); // no need to supply a reverse comparator, only equality is checked conf.setOutputValueGroupingComparator(GroupingComparator.class); } } // perform last so init above will pass to tasks String versionString = Version.getRelease(); if (versionString != null) conf.set("cascading.version", versionString); conf.set(CASCADING_FLOW_STEP_ID, getID()); conf.set("cascading.flow.step.num", Integer.toString(getOrdinal())); HadoopUtil.setIsInflow(conf); Iterator<FlowNode> iterator = getFlowNodeGraph().getTopologicalIterator(); String mapState = pack(iterator.next(), conf); String reduceState = pack(iterator.hasNext() ? iterator.next() : null, conf); // hadoop 20.2 doesn't like dist cache when using local mode int maxSize = Short.MAX_VALUE; int length = mapState.length() + reduceState.length(); if (isHadoopLocalMode(conf) || length < maxSize) // seems safe { conf.set("cascading.flow.step.node.map", mapState); if (!Util.isEmpty(reduceState)) conf.set("cascading.flow.step.node.reduce", reduceState); } else { conf.set("cascading.flow.step.node.map.path", HadoopMRUtil.writeStateToDistCache(conf, getID(), "map", mapState)); if (!Util.isEmpty(reduceState)) conf.set("cascading.flow.step.node.reduce.path", HadoopMRUtil.writeStateToDistCache(conf, getID(), "reduce", reduceState)); } return conf; }
From source file:cascading.flow.hadoop.HadoopUtil.java
License:Open Source License
public static JobConf createJobConf(Map<Object, Object> properties, JobConf defaultJobconf) { JobConf jobConf = defaultJobconf == null ? new JobConf() : new JobConf(defaultJobconf); if (properties == null) return jobConf; Set<Object> keys = new HashSet<Object>(properties.keySet()); // keys will only be grabbed if both key/value are String, so keep orig keys if (properties instanceof Properties) keys.addAll(((Properties) properties).stringPropertyNames()); for (Object key : keys) { Object value = properties.get(key); if (value == null && properties instanceof Properties && key instanceof String) value = ((Properties) properties).getProperty((String) key); if (value == null) // don't stuff null values continue; // don't let these objects pass, even though toString is called below. if (value instanceof Class || value instanceof JobConf) continue; jobConf.set(key.toString(), value.toString()); }/* www . ja v a2s . co m*/ return jobConf; }
From source file:cascading.flow.hadoop.MapReduceFlowStep.java
License:Open Source License
@Override public JobConf createInitializedConfig(FlowProcess<JobConf> flowProcess, JobConf parentConfig) { // allow to delete getSink().sinkConfInit(flowProcess, new JobConf()); return jobConf; }
From source file:cascading.flow.hadoop.planner.HadoopPlanner.java
License:Open Source License
/** * Method createJobConf returns a new JobConf instance using the values in the given properties argument. * * @param properties of type Map/*from www . j av a2s . com*/ * @return a JobConf instance */ public static JobConf createJobConf(Map<Object, Object> properties) { JobConf conf = new JobConf(); copyProperties(conf, properties); return conf; }