Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:com.intel.hadoop.graphbuilder.preprocess.mapreduce.EdgeTransformReducer.java

License:Open Source License

@Override
public void configure(JobConf job) {
    super.configure(job);
    this.reduceEndPoint = job.getBoolean("reduceEndPoint", EdgeTransformMR.SOURCE);
    try {/* w  w  w. j a v  a 2s.c  o m*/
        this.reduceFunc = (Functional) Class.forName(job.get("ReduceFunc")).newInstance();
        this.applyFunc = (Functional) Class.forName(job.get("ApplyFunc")).newInstance();
        this.reduceFunc.configure(job);
        this.applyFunc.configure(job);
    } catch (InstantiationException e) {
        e.printStackTrace();
    } catch (IllegalAccessException e) {
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

/**
 * Driver to exec srcPath to destPath depending on required protocol.
 *
 * @param args arguments//from ww w. jav  a 2  s.c o  m
 */
static void execution(final Configuration conf, final Arguments args) throws IOException {
    LOG.info("srcPaths=" + args.srcs);
    LOG.info("destPath=" + args.dst);
    LOG.info("execCmd=" + args.execCmd);

    JobConf job = createJobConf(conf);

    checkSrcPath(job, args.srcs);

    //Initialize the mapper
    try {
        if (setup(conf, job, args)) {
            JobClient.runJob(job);
        }
    } finally {
        //delete tmp
        fullyDelete(job.get(TMP_DIR_LABEL), job);
        //delete jobDirectory
        fullyDelete(job.get(JOB_DIR_LABEL), job);
    }
}

From source file:com.linkedin.haivvreo.AvroGenericRecordReader.java

License:Apache License

/**
 * Attempt to retrieve the reader schema.  Haivvreo has a couple opportunities
 * to provide this, depending on whether or not we're just selecting data
 * or running with a MR job./*from ww  w .ja  va2 s .c o  m*/
 * @return  Reader schema for the Avro object, or null if it has not been provided.
 * @throws HaivvreoException
 */
private Schema getSchema(JobConf job, FileSplit split) throws HaivvreoException, IOException {
    FileSystem fs = split.getPath().getFileSystem(job);
    // Inside of a MR job, we can pull out the actual properties
    if (HaivvreoUtils.insideMRJob(job)) {
        MapredWork mapRedWork = Utilities.getMapRedWork(job);

        // Iterate over the Path -> Partition descriptions to find the partition
        // that matches our input split.
        for (Map.Entry<String, PartitionDesc> pathsAndParts : mapRedWork.getPathToPartitionInfo().entrySet()) {
            String partitionPath = pathsAndParts.getKey();
            if (pathIsInPartition(split.getPath().makeQualified(fs), partitionPath)) {
                if (LOG.isInfoEnabled())
                    LOG.info("Matching partition " + partitionPath + " with input split " + split);

                Properties props = pathsAndParts.getValue().getProperties();
                if (props.containsKey(HaivvreoUtils.SCHEMA_LITERAL)
                        || props.containsKey(HaivvreoUtils.SCHEMA_URL)) {
                    return HaivvreoUtils.determineSchemaOrThrowException(props);
                } else
                    return null; // If it's not in this property, it won't be in any others
            }
        }
        if (LOG.isInfoEnabled())
            LOG.info("Unable to match filesplit " + split + " with a partition.");
    }

    // In "select * from table" situations (non-MR), Haivvreo can add things to the job
    // It's safe to add this to the job since it's not *actually* a mapred job.
    // Here the global state is confined to just this process.
    String s = job.get(AvroSerDe.HAIVVREO_SCHEMA);
    if (s != null) {
        LOG.info("Found the avro schema in the job: " + s);
        return Schema.parse(s);
    }
    // No more places to get the schema from. Give up.  May have to re-encode later.
    return null;
}

From source file:com.liveramp.cascading_ext.bloom.BloomAssemblyStrategy.java

License:Apache License

@Override
public void apply(Flow<JobConf> flow, List<FlowStep<JobConf>> predecessorSteps, FlowStep<JobConf> flowStep) {
    JobConf conf = flowStep.getConfig();

    String targetBloomID = conf.get(BloomProps.TARGET_BLOOM_FILTER_ID);
    if (targetBloomID != null) {
        prepareBloomFilterBuilder(flowStep);
    }//  w  ww  . j  a va2 s . c o m
    //  the job is the filter which needs to use the bloom filter
    String sourceBloomID = conf.get(BloomProps.SOURCE_BLOOM_FILTER_ID);
    if (sourceBloomID != null) {
        buildBloomfilter(sourceBloomID, flowStep, predecessorSteps);
    }

}

From source file:com.liveramp.cascading_ext.bloom.BloomAssemblyStrategy.java

License:Apache License

/**
 * Merges bloom filter parts created across multiple splits of the keys and put the result in the distributed cache.
 *//*from w  w  w . j  ava2 s  . co  m*/
private void buildBloomfilter(String bloomID, FlowStep<JobConf> currentStep,
        List<FlowStep<JobConf>> predecessorSteps) {
    try {
        JobConf currentStepConf = currentStep.getConfig();
        currentStepConf.set("io.sort.mb", Integer.toString(BloomProps.getBufferSize(currentStepConf)));
        currentStepConf.set("mapred.job.reuse.jvm.num.tasks", "-1");

        String requiredBloomPath = currentStepConf.get(BloomProps.REQUIRED_BLOOM_FILTER_PATH);

        for (FlowStep<JobConf> step : predecessorSteps) {
            JobConf prevStepConf = step.getConfig();
            String targetBloomID = prevStepConf.get(BloomProps.TARGET_BLOOM_FILTER_ID);

            if (bloomID.equals(targetBloomID)) {
                LOG.info("Found step generating required bloom filter: " + targetBloomID);

                // Extract the counters from the previous job to approximate the average key/tuple size
                FlowStepStats stats = ((BaseFlowStep) step).getFlowStepStats();

                // Collect some of the stats gathered. This will help configure the bloom filter
                long numSampled = Counters.get(stats, CreateBloomFilter.StatsCounters.TOTAL_SAMPLED_TUPLES);
                long keySizeSum = Counters.get(stats, CreateBloomFilter.StatsCounters.KEY_SIZE_SUM);
                long matchSizeSum = Counters.get(stats, CreateBloomFilter.StatsCounters.TUPLE_SIZE_SUM);

                int avgKeySize = 0;
                int avgMatchSize = 0;

                if (numSampled != 0) {
                    avgKeySize = (int) (keySizeSum / numSampled);
                    avgMatchSize = (int) (matchSizeSum / numSampled);
                }

                LOG.info("Avg key size ~= " + avgKeySize);
                LOG.info("Avg match size ~= " + avgMatchSize);
                for (Map.Entry<String, String> entry : BloomUtil
                        .getPropertiesForBloomFilter(avgMatchSize, avgKeySize).entrySet()) {
                    currentStepConf.set(entry.getKey(), entry.getValue());
                }

                // Put merged result in distributed cache
                LOG.info("Adding dist cache properties to config:");
                for (Map.Entry<String, String> prop : BloomUtil.getPropertiesForDistCache(requiredBloomPath)
                        .entrySet()) {
                    LOG.info(prop.getKey() + " = " + prop.getValue());
                    String previousProperty = currentStepConf.get(prop.getKey());
                    if (previousProperty != null) {
                        LOG.info("found already existing value for key: " + prop.getKey() + ", found "
                                + previousProperty + ".  Appending.");
                        currentStepConf.set(prop.getKey(), previousProperty + "," + prop.getValue());
                    } else {
                        currentStepConf.set(prop.getKey(), prop.getValue());
                    }
                }

                BloomUtil.writeFilterToHdfs(prevStepConf, requiredBloomPath);
            }
        }
    } catch (Exception e) {
        throw new RuntimeException("Failed to create bloom filter!", e);
    }
}

From source file:com.liveramp.cascading_ext.bloom.BloomProps.java

License:Apache License

public static long getNumBloomBits(JobConf conf) {
    return Long.parseLong(conf.get(NUM_BLOOM_BITS));
}

From source file:com.liveramp.cascading_ext.bloom.BloomProps.java

License:Apache License

public static int getMaxBloomHashes(JobConf conf) {
    return Integer.parseInt(conf.get(MAX_BLOOM_HASHES));
}

From source file:com.liveramp.cascading_ext.bloom.BloomProps.java

License:Apache License

public static int getMinBloomHashes(JobConf conf) {
    return Integer.parseInt(conf.get(MIN_BLOOM_HASHES));
}

From source file:com.liveramp.cascading_ext.bloom.BloomProps.java

License:Apache License

public static int getNumSplits(JobConf conf) {
    return Integer.parseInt(conf.get(NUM_SPLITS));
}

From source file:com.liveramp.cascading_ext.bloom.BloomProps.java

License:Apache License

public static int getBufferSize(JobConf conf) {
    return Integer.parseInt(conf.get(BUFFER_SIZE));
}