Example usage for org.apache.hadoop.mapred JobConf getStringCollection

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getStringCollection.

Prototype

public Collection<String> getStringCollection(String name)

Source Link

Document

Get the comma delimited values of the name property as a collection of Strings.

Usage

From source file:cascading.avro.AvroScheme.java

License:Apache License

private void addAvroSerializations(JobConf conf) {
    Collection<String> serializations = conf.getStringCollection("io.serializations");
    if (!serializations.contains(AvroSerialization.class.getName())) {
        serializations.add(AvroSerialization.class.getName());
        serializations.add(AvroSpecificRecordSerialization.class.getName());
    }/*ww w  . j a va 2  s. c  o m*/

    conf.setStrings("io.serializations", serializations.toArray(new String[serializations.size()]));
}

From source file:cascading.flow.tez.Hadoop2TezFlowStep.java

License:Open Source License

protected Map<FlowElement, Configuration> initFromSources(FlowNode flowNode,
        FlowProcess<TezConfiguration> flowProcess, Configuration conf,
        Map<String, LocalResource> taskLocalResources) {
    Set<? extends FlowElement> accumulatedSources = flowNode.getSourceElements(StreamMode.Accumulated);

    for (FlowElement element : accumulatedSources) {
        if (element instanceof Tap) {
            JobConf current = new JobConf(conf);
            Tap tap = (Tap) element;//from   ww  w .j  a  v  a  2 s  .co m

            if (tap.getIdentifier() == null)
                throw new IllegalStateException("tap may not have null identifier: " + tap.toString());

            tap.sourceConfInit(flowProcess, current);

            Collection<String> paths = current.getStringCollection(CASCADING_LOCAL_RESOURCES + Tap.id(tap));

            if (!paths.isEmpty()) {
                String flowStagingPath = ((Hadoop2TezFlow) getFlow()).getFlowStagingPath();
                String resourceSubPath = Tap.id(tap);
                Map<Path, Path> pathMap = TezUtil.addToClassPath(current, flowStagingPath, resourceSubPath,
                        paths, LocalResourceType.FILE, taskLocalResources, null);

                current.setStrings(CASCADING_REMOTE_RESOURCES + Tap.id(tap),
                        taskLocalResources.keySet().toArray(new String[taskLocalResources.size()]));

                allLocalResources.putAll(taskLocalResources);
                syncPaths.putAll(pathMap);
            }

            Map<String, String> map = flowProcess.diffConfigIntoMap(new TezConfiguration(conf),
                    new TezConfiguration(current));
            conf.set("cascading.node.accumulated.source.conf." + Tap.id(tap), pack(map, conf));

            setLocalMode(conf, current, tap);
        }
    }

    Set<FlowElement> sources = new HashSet<>(flowNode.getSourceElements());

    sources.removeAll(accumulatedSources);

    if (sources.isEmpty())
        throw new IllegalStateException("all sources marked as accumulated");

    Map<FlowElement, Configuration> configs = new HashMap<>();

    for (FlowElement element : sources) {
        JobConf current = new JobConf(conf);

        String id = FlowElements.id(element);

        current.set("cascading.node.source", id);

        if (element instanceof Tap) {
            Tap tap = (Tap) element;

            if (tap.getIdentifier() == null)
                throw new IllegalStateException("tap may not have null identifier: " + tap.toString());

            tap.sourceConfInit(flowProcess, current);

            setLocalMode(conf, current, tap);
        }

        configs.put(element, current);
    }

    return configs;
}

From source file:com.github.dryangkun.hbase.tidx.hive.HBaseStorageHandler.java

License:Apache License

@Override
public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
    try {/*from   w w w  . j  a  va  2 s . c  o m*/
        HBaseSerDe.configureJobConf(tableDesc, jobConf);
        /*
         * HIVE-6356
         * The following code change is only needed for hbase-0.96.0 due to HBASE-9165, and
         * will not be required once Hive bumps up its hbase version). At that time , we will
         * only need TableMapReduceUtil.addDependencyJars(jobConf) here.
         */
        if (counterClass != null) {
            TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class, TableInputFormatBase.class,
                    counterClass);
        } else {
            TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class,
                    TableInputFormatBase.class);
        }
        if (HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVE_HBASE_SNAPSHOT_NAME) != null) {
            // There is an extra dependency on MetricsRegistry for snapshot IF.
            TableMapReduceUtil.addDependencyJars(jobConf, MetricsRegistry.class);
        }
        Set<String> merged = new LinkedHashSet<String>(jobConf.getStringCollection("tmpjars"));

        Job copy = new Job(jobConf);
        TableMapReduceUtil.addDependencyJars(copy);
        merged.addAll(copy.getConfiguration().getStringCollection("tmpjars"));
        jobConf.set("tmpjars", StringUtils.arrayToString(merged.toArray(new String[0])));

        // Get credentials using the configuration instance which has HBase properties
        JobConf hbaseJobConf = new JobConf(getConf());
        org.apache.hadoop.hbase.mapred.TableMapReduceUtil.initCredentials(hbaseJobConf);
        ShimLoader.getHadoopShims().mergeCredentials(jobConf, hbaseJobConf);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:com.maxpoint.cascading.avro.AvroScheme.java

License:Open Source License

private void addAvroSerialization(JobConf conf) {
    // add AvroSerialization to io.serializations
    final Collection<String> serializations = conf.getStringCollection("io.serializations");
    if (!serializations.contains(AvroSerialization.class.getName())) {
        serializations.add(AvroSerialization.class.getName());
        conf.setStrings("io.serializations", serializations.toArray(new String[serializations.size()]));
    }// w  ww  .  j  a va2 s .c  o  m

}

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

private static void configureAvroShuffle(JobConf job) {
    job.setOutputKeyComparatorClass(AvroKeyComparator.class);
    job.setMapOutputKeyClass(AvroKey.class);
    job.setMapOutputValueClass(AvroValue.class);

    // add AvroSerialization to io.serializations
    Collection<String> serializations = job.getStringCollection("io.serializations");
    if (!serializations.contains(AvroSerialization.class.getName())) {
        serializations.add(AvroSerialization.class.getName());
        job.setStrings("io.serializations", serializations.toArray(new String[0]));
    }/*from  w w  w  .ja va  2 s  .c o m*/
}

From source file:org.apache.avro.mapred.tether.TetherJob.java

License:Apache License

private static void setupTetherJob(JobConf job) throws IOException {
    job.setMapRunnerClass(TetherMapRunner.class);
    job.setPartitionerClass(TetherPartitioner.class);
    job.setReducerClass(TetherReducer.class);

    job.setInputFormat(TetherInputFormat.class);
    job.setOutputFormat(TetherOutputFormat.class);

    job.setOutputKeyClass(TetherData.class);
    job.setOutputKeyComparatorClass(TetherKeyComparator.class);
    job.setMapOutputValueClass(NullWritable.class);

    // set the map output key class to TetherData
    job.setMapOutputKeyClass(TetherData.class);

    // add TetherKeySerialization to io.serializations
    Collection<String> serializations = job.getStringCollection("io.serializations");
    if (!serializations.contains(TetherKeySerialization.class.getName())) {
        serializations.add(TetherKeySerialization.class.getName());
        job.setStrings("io.serializations", serializations.toArray(new String[0]));
    }/* w  w w .ja v  a  2  s.  c om*/

    // determine whether the executable should be added to the cache.
    if (job.getBoolean(TETHER_EXEC_CACHED, false)) {
        DistributedCache.addCacheFile(getExecutable(job), job);
    }
}