List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:com.intel.hadoop.graphbuilder.preprocess.mapreduce.EdgeTransformReducer.java
License:Open Source License
@Override public void configure(JobConf job) { super.configure(job); this.reduceEndPoint = job.getBoolean("reduceEndPoint", EdgeTransformMR.SOURCE); try {/* w w w. j a v a 2s.c o m*/ this.reduceFunc = (Functional) Class.forName(job.get("ReduceFunc")).newInstance(); this.applyFunc = (Functional) Class.forName(job.get("ApplyFunc")).newInstance(); this.reduceFunc.configure(job); this.applyFunc.configure(job); } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Driver to exec srcPath to destPath depending on required protocol. * * @param args arguments//from ww w. jav a 2 s.c o m */ static void execution(final Configuration conf, final Arguments args) throws IOException { LOG.info("srcPaths=" + args.srcs); LOG.info("destPath=" + args.dst); LOG.info("execCmd=" + args.execCmd); JobConf job = createJobConf(conf); checkSrcPath(job, args.srcs); //Initialize the mapper try { if (setup(conf, job, args)) { JobClient.runJob(job); } } finally { //delete tmp fullyDelete(job.get(TMP_DIR_LABEL), job); //delete jobDirectory fullyDelete(job.get(JOB_DIR_LABEL), job); } }
From source file:com.linkedin.haivvreo.AvroGenericRecordReader.java
License:Apache License
/** * Attempt to retrieve the reader schema. Haivvreo has a couple opportunities * to provide this, depending on whether or not we're just selecting data * or running with a MR job./*from ww w .ja va2 s .c o m*/ * @return Reader schema for the Avro object, or null if it has not been provided. * @throws HaivvreoException */ private Schema getSchema(JobConf job, FileSplit split) throws HaivvreoException, IOException { FileSystem fs = split.getPath().getFileSystem(job); // Inside of a MR job, we can pull out the actual properties if (HaivvreoUtils.insideMRJob(job)) { MapredWork mapRedWork = Utilities.getMapRedWork(job); // Iterate over the Path -> Partition descriptions to find the partition // that matches our input split. for (Map.Entry<String, PartitionDesc> pathsAndParts : mapRedWork.getPathToPartitionInfo().entrySet()) { String partitionPath = pathsAndParts.getKey(); if (pathIsInPartition(split.getPath().makeQualified(fs), partitionPath)) { if (LOG.isInfoEnabled()) LOG.info("Matching partition " + partitionPath + " with input split " + split); Properties props = pathsAndParts.getValue().getProperties(); if (props.containsKey(HaivvreoUtils.SCHEMA_LITERAL) || props.containsKey(HaivvreoUtils.SCHEMA_URL)) { return HaivvreoUtils.determineSchemaOrThrowException(props); } else return null; // If it's not in this property, it won't be in any others } } if (LOG.isInfoEnabled()) LOG.info("Unable to match filesplit " + split + " with a partition."); } // In "select * from table" situations (non-MR), Haivvreo can add things to the job // It's safe to add this to the job since it's not *actually* a mapred job. // Here the global state is confined to just this process. String s = job.get(AvroSerDe.HAIVVREO_SCHEMA); if (s != null) { LOG.info("Found the avro schema in the job: " + s); return Schema.parse(s); } // No more places to get the schema from. Give up. May have to re-encode later. return null; }
From source file:com.liveramp.cascading_ext.bloom.BloomAssemblyStrategy.java
License:Apache License
@Override public void apply(Flow<JobConf> flow, List<FlowStep<JobConf>> predecessorSteps, FlowStep<JobConf> flowStep) { JobConf conf = flowStep.getConfig(); String targetBloomID = conf.get(BloomProps.TARGET_BLOOM_FILTER_ID); if (targetBloomID != null) { prepareBloomFilterBuilder(flowStep); }// w ww . j a va2 s . c o m // the job is the filter which needs to use the bloom filter String sourceBloomID = conf.get(BloomProps.SOURCE_BLOOM_FILTER_ID); if (sourceBloomID != null) { buildBloomfilter(sourceBloomID, flowStep, predecessorSteps); } }
From source file:com.liveramp.cascading_ext.bloom.BloomAssemblyStrategy.java
License:Apache License
/** * Merges bloom filter parts created across multiple splits of the keys and put the result in the distributed cache. *//*from w w w . j ava2 s . co m*/ private void buildBloomfilter(String bloomID, FlowStep<JobConf> currentStep, List<FlowStep<JobConf>> predecessorSteps) { try { JobConf currentStepConf = currentStep.getConfig(); currentStepConf.set("io.sort.mb", Integer.toString(BloomProps.getBufferSize(currentStepConf))); currentStepConf.set("mapred.job.reuse.jvm.num.tasks", "-1"); String requiredBloomPath = currentStepConf.get(BloomProps.REQUIRED_BLOOM_FILTER_PATH); for (FlowStep<JobConf> step : predecessorSteps) { JobConf prevStepConf = step.getConfig(); String targetBloomID = prevStepConf.get(BloomProps.TARGET_BLOOM_FILTER_ID); if (bloomID.equals(targetBloomID)) { LOG.info("Found step generating required bloom filter: " + targetBloomID); // Extract the counters from the previous job to approximate the average key/tuple size FlowStepStats stats = ((BaseFlowStep) step).getFlowStepStats(); // Collect some of the stats gathered. This will help configure the bloom filter long numSampled = Counters.get(stats, CreateBloomFilter.StatsCounters.TOTAL_SAMPLED_TUPLES); long keySizeSum = Counters.get(stats, CreateBloomFilter.StatsCounters.KEY_SIZE_SUM); long matchSizeSum = Counters.get(stats, CreateBloomFilter.StatsCounters.TUPLE_SIZE_SUM); int avgKeySize = 0; int avgMatchSize = 0; if (numSampled != 0) { avgKeySize = (int) (keySizeSum / numSampled); avgMatchSize = (int) (matchSizeSum / numSampled); } LOG.info("Avg key size ~= " + avgKeySize); LOG.info("Avg match size ~= " + avgMatchSize); for (Map.Entry<String, String> entry : BloomUtil .getPropertiesForBloomFilter(avgMatchSize, avgKeySize).entrySet()) { currentStepConf.set(entry.getKey(), entry.getValue()); } // Put merged result in distributed cache LOG.info("Adding dist cache properties to config:"); for (Map.Entry<String, String> prop : BloomUtil.getPropertiesForDistCache(requiredBloomPath) .entrySet()) { LOG.info(prop.getKey() + " = " + prop.getValue()); String previousProperty = currentStepConf.get(prop.getKey()); if (previousProperty != null) { LOG.info("found already existing value for key: " + prop.getKey() + ", found " + previousProperty + ". Appending."); currentStepConf.set(prop.getKey(), previousProperty + "," + prop.getValue()); } else { currentStepConf.set(prop.getKey(), prop.getValue()); } } BloomUtil.writeFilterToHdfs(prevStepConf, requiredBloomPath); } } } catch (Exception e) { throw new RuntimeException("Failed to create bloom filter!", e); } }
From source file:com.liveramp.cascading_ext.bloom.BloomProps.java
License:Apache License
public static long getNumBloomBits(JobConf conf) { return Long.parseLong(conf.get(NUM_BLOOM_BITS)); }
From source file:com.liveramp.cascading_ext.bloom.BloomProps.java
License:Apache License
public static int getMaxBloomHashes(JobConf conf) { return Integer.parseInt(conf.get(MAX_BLOOM_HASHES)); }
From source file:com.liveramp.cascading_ext.bloom.BloomProps.java
License:Apache License
public static int getMinBloomHashes(JobConf conf) { return Integer.parseInt(conf.get(MIN_BLOOM_HASHES)); }
From source file:com.liveramp.cascading_ext.bloom.BloomProps.java
License:Apache License
public static int getNumSplits(JobConf conf) { return Integer.parseInt(conf.get(NUM_SPLITS)); }
From source file:com.liveramp.cascading_ext.bloom.BloomProps.java
License:Apache License
public static int getBufferSize(JobConf conf) { return Integer.parseInt(conf.get(BUFFER_SIZE)); }