List of usage examples for org.apache.hadoop.mapred JobConf getStringCollection
public Collection<String> getStringCollection(String name)
name
property as a collection of String
s. From source file:cascading.avro.AvroScheme.java
License:Apache License
private void addAvroSerializations(JobConf conf) { Collection<String> serializations = conf.getStringCollection("io.serializations"); if (!serializations.contains(AvroSerialization.class.getName())) { serializations.add(AvroSerialization.class.getName()); serializations.add(AvroSpecificRecordSerialization.class.getName()); }/*ww w . j a va 2 s. c o m*/ conf.setStrings("io.serializations", serializations.toArray(new String[serializations.size()])); }
From source file:cascading.flow.tez.Hadoop2TezFlowStep.java
License:Open Source License
protected Map<FlowElement, Configuration> initFromSources(FlowNode flowNode, FlowProcess<TezConfiguration> flowProcess, Configuration conf, Map<String, LocalResource> taskLocalResources) { Set<? extends FlowElement> accumulatedSources = flowNode.getSourceElements(StreamMode.Accumulated); for (FlowElement element : accumulatedSources) { if (element instanceof Tap) { JobConf current = new JobConf(conf); Tap tap = (Tap) element;//from ww w .j a v a 2 s .co m if (tap.getIdentifier() == null) throw new IllegalStateException("tap may not have null identifier: " + tap.toString()); tap.sourceConfInit(flowProcess, current); Collection<String> paths = current.getStringCollection(CASCADING_LOCAL_RESOURCES + Tap.id(tap)); if (!paths.isEmpty()) { String flowStagingPath = ((Hadoop2TezFlow) getFlow()).getFlowStagingPath(); String resourceSubPath = Tap.id(tap); Map<Path, Path> pathMap = TezUtil.addToClassPath(current, flowStagingPath, resourceSubPath, paths, LocalResourceType.FILE, taskLocalResources, null); current.setStrings(CASCADING_REMOTE_RESOURCES + Tap.id(tap), taskLocalResources.keySet().toArray(new String[taskLocalResources.size()])); allLocalResources.putAll(taskLocalResources); syncPaths.putAll(pathMap); } Map<String, String> map = flowProcess.diffConfigIntoMap(new TezConfiguration(conf), new TezConfiguration(current)); conf.set("cascading.node.accumulated.source.conf." + Tap.id(tap), pack(map, conf)); setLocalMode(conf, current, tap); } } Set<FlowElement> sources = new HashSet<>(flowNode.getSourceElements()); sources.removeAll(accumulatedSources); if (sources.isEmpty()) throw new IllegalStateException("all sources marked as accumulated"); Map<FlowElement, Configuration> configs = new HashMap<>(); for (FlowElement element : sources) { JobConf current = new JobConf(conf); String id = FlowElements.id(element); current.set("cascading.node.source", id); if (element instanceof Tap) { Tap tap = (Tap) element; if (tap.getIdentifier() == null) throw new IllegalStateException("tap may not have null identifier: " + tap.toString()); tap.sourceConfInit(flowProcess, current); setLocalMode(conf, current, tap); } configs.put(element, current); } return configs; }
From source file:com.github.dryangkun.hbase.tidx.hive.HBaseStorageHandler.java
License:Apache License
@Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { try {/*from w w w . j a va 2 s . c o m*/ HBaseSerDe.configureJobConf(tableDesc, jobConf); /* * HIVE-6356 * The following code change is only needed for hbase-0.96.0 due to HBASE-9165, and * will not be required once Hive bumps up its hbase version). At that time , we will * only need TableMapReduceUtil.addDependencyJars(jobConf) here. */ if (counterClass != null) { TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class, TableInputFormatBase.class, counterClass); } else { TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class, TableInputFormatBase.class); } if (HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVE_HBASE_SNAPSHOT_NAME) != null) { // There is an extra dependency on MetricsRegistry for snapshot IF. TableMapReduceUtil.addDependencyJars(jobConf, MetricsRegistry.class); } Set<String> merged = new LinkedHashSet<String>(jobConf.getStringCollection("tmpjars")); Job copy = new Job(jobConf); TableMapReduceUtil.addDependencyJars(copy); merged.addAll(copy.getConfiguration().getStringCollection("tmpjars")); jobConf.set("tmpjars", StringUtils.arrayToString(merged.toArray(new String[0]))); // Get credentials using the configuration instance which has HBase properties JobConf hbaseJobConf = new JobConf(getConf()); org.apache.hadoop.hbase.mapred.TableMapReduceUtil.initCredentials(hbaseJobConf); ShimLoader.getHadoopShims().mergeCredentials(jobConf, hbaseJobConf); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:com.maxpoint.cascading.avro.AvroScheme.java
License:Open Source License
private void addAvroSerialization(JobConf conf) { // add AvroSerialization to io.serializations final Collection<String> serializations = conf.getStringCollection("io.serializations"); if (!serializations.contains(AvroSerialization.class.getName())) { serializations.add(AvroSerialization.class.getName()); conf.setStrings("io.serializations", serializations.toArray(new String[serializations.size()])); }// w ww . j a va2 s .c o m }
From source file:org.apache.avro.mapred.AvroJob.java
License:Apache License
private static void configureAvroShuffle(JobConf job) { job.setOutputKeyComparatorClass(AvroKeyComparator.class); job.setMapOutputKeyClass(AvroKey.class); job.setMapOutputValueClass(AvroValue.class); // add AvroSerialization to io.serializations Collection<String> serializations = job.getStringCollection("io.serializations"); if (!serializations.contains(AvroSerialization.class.getName())) { serializations.add(AvroSerialization.class.getName()); job.setStrings("io.serializations", serializations.toArray(new String[0])); }/*from w w w .ja va 2 s .c o m*/ }
From source file:org.apache.avro.mapred.tether.TetherJob.java
License:Apache License
private static void setupTetherJob(JobConf job) throws IOException { job.setMapRunnerClass(TetherMapRunner.class); job.setPartitionerClass(TetherPartitioner.class); job.setReducerClass(TetherReducer.class); job.setInputFormat(TetherInputFormat.class); job.setOutputFormat(TetherOutputFormat.class); job.setOutputKeyClass(TetherData.class); job.setOutputKeyComparatorClass(TetherKeyComparator.class); job.setMapOutputValueClass(NullWritable.class); // set the map output key class to TetherData job.setMapOutputKeyClass(TetherData.class); // add TetherKeySerialization to io.serializations Collection<String> serializations = job.getStringCollection("io.serializations"); if (!serializations.contains(TetherKeySerialization.class.getName())) { serializations.add(TetherKeySerialization.class.getName()); job.setStrings("io.serializations", serializations.toArray(new String[0])); }/* w w w .ja v a 2 s. c om*/ // determine whether the executable should be added to the cache. if (job.getBoolean(TETHER_EXEC_CACHED, false)) { DistributedCache.addCacheFile(getExecutable(job), job); } }