List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Is the job output compressed?//from www . jav a 2s . c o m * * @param job * the Job to look in * @return <code>true</code> if the job output should be compressed, * <code>false</code> otherwise */ public static boolean getCompressOutput(JobContext job) { return job.getConfiguration().getBoolean(FileOutputFormat.COMPRESS, false); }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Get the {@link CompressionCodec} for compressing the job outputs. * /* w w w. java 2s .c o m*/ * @param job * the {@link Job} to look in * @param defaultValue * the {@link CompressionCodec} to return if not set * @return the {@link CompressionCodec} to be used to compress the job * outputs * @throws IllegalArgumentException * if the class was specified, but not found */ public static Class<? extends CompressionCodec> getOutputCompressorClass(JobContext job, Class<? extends CompressionCodec> defaultValue) { Class<? extends CompressionCodec> codecClass = defaultValue; Configuration conf = job.getConfiguration(); String name = conf.get(FileOutputFormat.COMPRESS_CODEC); if (name != null) { try { codecClass = conf.getClassByName(name).asSubclass(CompressionCodec.class); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Compression codec " + name + " was not found.", e); } } return codecClass; }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException { // Ensure that the output directory is set and not already there Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set."); }// ww w .j a v a 2 s . c om // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, job.getConfiguration()); // if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) { // throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); // } }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Get the {@link Path} to the output directory for the map-reduce job. * //w ww . j ava 2 s . c o m * @return the {@link Path} to the output directory for the map-reduce job. * @see FileOutputFormat#getWorkOutputPath(TaskInputOutputContext) */ public static Path getOutputPath(JobContext job) { String name = job.getConfiguration().get(FileOutputFormat.OUTDIR); return name == null ? null : new Path(name); }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Get the base output name for the output file. *///from ww w.j a v a2s . co m protected static String getOutputName(JobContext job) { return job.getConfiguration().get(BASE_OUTPUT_NAME, PART); }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Set the base output name for output file to be created. */// ww w .j av a 2 s. c om protected static void setOutputName(JobContext job, String name) { job.getConfiguration().set(BASE_OUTPUT_NAME, name); }
From source file:com.buzzinate.dm.cassandra.ColumnFamilyOutputFormat.java
License:Apache License
/** * Check for validity of the output-specification for the job. * * @param context//from w w w. j av a 2 s . c o m * information about the job * @throws IOException * when output should not be attempted */ @Override public void checkOutputSpecs(JobContext context) { checkOutputSpecs(context.getConfiguration()); }
From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java
License:Apache License
/** * Get a PathFilter instance of the filter set for the input paths. * * @param context A read-only view of the job that is provided to * the tasks while they are running./*from ww w . ja va2s . c om*/ * @return the PathFilter instance set for the job, NULL if none has been set. */ public static PathFilter getInputPathFilter(JobContext context) { Configuration conf = context.getConfiguration(); Class<?> filterClass = conf.getClass("mapred.input.pathFilter.class", null, PathFilter.class); return (filterClass != null) ? (PathFilter) ReflectionUtils.newInstance(filterClass, conf) : null; }
From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java
License:Apache License
public List<InputSplit> getSplits(JobContext context) throws IOException { Configuration conf = context.getConfiguration(); validateConfiguration(conf);/*ww w . j av a2 s .c om*/ // cannonical ranges and nodes holding replicas List<TokenRange> masterRangeNodes = getRangeMap(conf); keyspace = ConfigHelper.getInputKeyspace(context.getConfiguration()); cfName = ConfigHelper.getInputColumnFamily(context.getConfiguration()); partitioner = ConfigHelper.getInputPartitioner(context.getConfiguration()); logger.debug("partitioner is " + partitioner); // cannonical ranges, split into pieces, fetching the splits in parallel ExecutorService executor = Executors.newCachedThreadPool(); List<InputSplit> splits = new ArrayList<InputSplit>(); try { List<Future<List<InputSplit>>> splitfutures = new ArrayList<Future<List<InputSplit>>>(); KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf); Range<Token> jobRange = null; if (jobKeyRange != null && jobKeyRange.start_token != null) { assert partitioner .preservesOrder() : "ConfigHelper.setInputKeyRange(..) can only be used with a order preserving paritioner"; assert jobKeyRange.start_key == null : "only start_token supported"; assert jobKeyRange.end_key == null : "only end_token supported"; jobRange = new Range<Token>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token), partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner); } for (TokenRange range : masterRangeNodes) { if (jobRange == null) { // for each range, pick a live owner and ask it to compute bite-sized splits splitfutures.add(executor.submit(new SplitCallable(range, conf))); } else { Range<Token> dhtRange = new Range<Token>( partitioner.getTokenFactory().fromString(range.start_token), partitioner.getTokenFactory().fromString(range.end_token), partitioner); if (dhtRange.intersects(jobRange)) { for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) { range.start_token = partitioner.getTokenFactory().toString(intersection.left); range.end_token = partitioner.getTokenFactory().toString(intersection.right); // for each range, pick a live owner and ask it to compute bite-sized splits splitfutures.add(executor.submit(new SplitCallable(range, conf))); } } } } // wait until we have all the results back for (Future<List<InputSplit>> futureInputSplits : splitfutures) { try { splits.addAll(futureInputSplits.get()); } catch (Exception e) { throw new IOException("Could not get input splits", e); } } } finally { executor.shutdownNow(); } assert splits.size() > 0; Collections.shuffle(splits, new Random(System.nanoTime())); return splits; }
From source file:com.cloudera.bigdata.analysis.dataload.io.SplitableInputFormat.java
License:Apache License
@Override protected boolean isSplitable(JobContext context, Path file) { CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); if (null == codec) { return true; }/*from www . j a va 2s .c om*/ return codec instanceof SplittableCompressionCodec; }