Example usage for org.apache.hadoop.mapred JobConf getStringCollection

List of usage examples for org.apache.hadoop.mapred JobConf getStringCollection

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getStringCollection.

Prototype

public Collection<String> getStringCollection(String name) 

Source Link

Document

Get the comma delimited values of the name property as a collection of Strings.

Usage

From source file:cascading.avro.AvroScheme.java

License:Apache License

private void addAvroSerializations(JobConf conf) {
    Collection<String> serializations = conf.getStringCollection("io.serializations");
    if (!serializations.contains(AvroSerialization.class.getName())) {
        serializations.add(AvroSerialization.class.getName());
        serializations.add(AvroSpecificRecordSerialization.class.getName());
    }/*ww w  . j a va 2  s. c  o m*/

    conf.setStrings("io.serializations", serializations.toArray(new String[serializations.size()]));
}

From source file:cascading.flow.tez.Hadoop2TezFlowStep.java

License:Open Source License

protected Map<FlowElement, Configuration> initFromSources(FlowNode flowNode,
        FlowProcess<TezConfiguration> flowProcess, Configuration conf,
        Map<String, LocalResource> taskLocalResources) {
    Set<? extends FlowElement> accumulatedSources = flowNode.getSourceElements(StreamMode.Accumulated);

    for (FlowElement element : accumulatedSources) {
        if (element instanceof Tap) {
            JobConf current = new JobConf(conf);
            Tap tap = (Tap) element;//from   ww  w .j  a  v  a  2 s  .co m

            if (tap.getIdentifier() == null)
                throw new IllegalStateException("tap may not have null identifier: " + tap.toString());

            tap.sourceConfInit(flowProcess, current);

            Collection<String> paths = current.getStringCollection(CASCADING_LOCAL_RESOURCES + Tap.id(tap));

            if (!paths.isEmpty()) {
                String flowStagingPath = ((Hadoop2TezFlow) getFlow()).getFlowStagingPath();
                String resourceSubPath = Tap.id(tap);
                Map<Path, Path> pathMap = TezUtil.addToClassPath(current, flowStagingPath, resourceSubPath,
                        paths, LocalResourceType.FILE, taskLocalResources, null);

                current.setStrings(CASCADING_REMOTE_RESOURCES + Tap.id(tap),
                        taskLocalResources.keySet().toArray(new String[taskLocalResources.size()]));

                allLocalResources.putAll(taskLocalResources);
                syncPaths.putAll(pathMap);
            }

            Map<String, String> map = flowProcess.diffConfigIntoMap(new TezConfiguration(conf),
                    new TezConfiguration(current));
            conf.set("cascading.node.accumulated.source.conf." + Tap.id(tap), pack(map, conf));

            setLocalMode(conf, current, tap);
        }
    }

    Set<FlowElement> sources = new HashSet<>(flowNode.getSourceElements());

    sources.removeAll(accumulatedSources);

    if (sources.isEmpty())
        throw new IllegalStateException("all sources marked as accumulated");

    Map<FlowElement, Configuration> configs = new HashMap<>();

    for (FlowElement element : sources) {
        JobConf current = new JobConf(conf);

        String id = FlowElements.id(element);

        current.set("cascading.node.source", id);

        if (element instanceof Tap) {
            Tap tap = (Tap) element;

            if (tap.getIdentifier() == null)
                throw new IllegalStateException("tap may not have null identifier: " + tap.toString());

            tap.sourceConfInit(flowProcess, current);

            setLocalMode(conf, current, tap);
        }

        configs.put(element, current);
    }

    return configs;
}

From source file:com.github.dryangkun.hbase.tidx.hive.HBaseStorageHandler.java

License:Apache License

@Override
public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
    try {/*from   w w w  . j  a  va  2 s . c  o m*/
        HBaseSerDe.configureJobConf(tableDesc, jobConf);
        /*
         * HIVE-6356
         * The following code change is only needed for hbase-0.96.0 due to HBASE-9165, and
         * will not be required once Hive bumps up its hbase version). At that time , we will
         * only need TableMapReduceUtil.addDependencyJars(jobConf) here.
         */
        if (counterClass != null) {
            TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class, TableInputFormatBase.class,
                    counterClass);
        } else {
            TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class,
                    TableInputFormatBase.class);
        }
        if (HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVE_HBASE_SNAPSHOT_NAME) != null) {
            // There is an extra dependency on MetricsRegistry for snapshot IF.
            TableMapReduceUtil.addDependencyJars(jobConf, MetricsRegistry.class);
        }
        Set<String> merged = new LinkedHashSet<String>(jobConf.getStringCollection("tmpjars"));

        Job copy = new Job(jobConf);
        TableMapReduceUtil.addDependencyJars(copy);
        merged.addAll(copy.getConfiguration().getStringCollection("tmpjars"));
        jobConf.set("tmpjars", StringUtils.arrayToString(merged.toArray(new String[0])));

        // Get credentials using the configuration instance which has HBase properties
        JobConf hbaseJobConf = new JobConf(getConf());
        org.apache.hadoop.hbase.mapred.TableMapReduceUtil.initCredentials(hbaseJobConf);
        ShimLoader.getHadoopShims().mergeCredentials(jobConf, hbaseJobConf);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:com.maxpoint.cascading.avro.AvroScheme.java

License:Open Source License

private void addAvroSerialization(JobConf conf) {
    // add AvroSerialization to io.serializations
    final Collection<String> serializations = conf.getStringCollection("io.serializations");
    if (!serializations.contains(AvroSerialization.class.getName())) {
        serializations.add(AvroSerialization.class.getName());
        conf.setStrings("io.serializations", serializations.toArray(new String[serializations.size()]));
    }// w  ww  .  j  a va2 s .c  o  m

}

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

private static void configureAvroShuffle(JobConf job) {
    job.setOutputKeyComparatorClass(AvroKeyComparator.class);
    job.setMapOutputKeyClass(AvroKey.class);
    job.setMapOutputValueClass(AvroValue.class);

    // add AvroSerialization to io.serializations
    Collection<String> serializations = job.getStringCollection("io.serializations");
    if (!serializations.contains(AvroSerialization.class.getName())) {
        serializations.add(AvroSerialization.class.getName());
        job.setStrings("io.serializations", serializations.toArray(new String[0]));
    }/*from  w w  w  .ja va  2 s  .c o m*/
}

From source file:org.apache.avro.mapred.tether.TetherJob.java

License:Apache License

private static void setupTetherJob(JobConf job) throws IOException {
    job.setMapRunnerClass(TetherMapRunner.class);
    job.setPartitionerClass(TetherPartitioner.class);
    job.setReducerClass(TetherReducer.class);

    job.setInputFormat(TetherInputFormat.class);
    job.setOutputFormat(TetherOutputFormat.class);

    job.setOutputKeyClass(TetherData.class);
    job.setOutputKeyComparatorClass(TetherKeyComparator.class);
    job.setMapOutputValueClass(NullWritable.class);

    // set the map output key class to TetherData
    job.setMapOutputKeyClass(TetherData.class);

    // add TetherKeySerialization to io.serializations
    Collection<String> serializations = job.getStringCollection("io.serializations");
    if (!serializations.contains(TetherKeySerialization.class.getName())) {
        serializations.add(TetherKeySerialization.class.getName());
        job.setStrings("io.serializations", serializations.toArray(new String[0]));
    }/* w  w w .ja v  a  2  s.  c om*/

    // determine whether the executable should be added to the cache.
    if (job.getBoolean(TETHER_EXEC_CACHED, false)) {
        DistributedCache.addCacheFile(getExecutable(job), job);
    }
}