List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:com.conductor.hadoop.WritableValueInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(final JobContext context) throws IOException, InterruptedException { final Configuration conf = context.getConfiguration(); // init the reader final String filePath = conf.get(INPUT_FILE_LOCATION_CONF); checkArgument(!Strings.isNullOrEmpty(filePath), "Missing property: " + INPUT_FILE_LOCATION_CONF); final FileSystem fs = getFileSystem(conf); final Path path = fs.makeQualified(new Path(filePath)); final SequenceFile.Reader reader = getReader(conf, path); // create the splits by looping through the values of the input file int totalInputs = 0; int maxInputsPerSplit = conf.getInt(INPUTS_PER_SPLIT_CONF, DEFAULT_INPUTS_PER_SPLIT); long pos = 0L; long last = 0L; long lengthRemaining = fs.getFileStatus(path).getLen(); final List<InputSplit> splits = Lists.newArrayList(); final V value = getV(conf); for (final NullWritable key = NullWritable.get(); reader.next(key, value); last = reader.getPosition()) { if (++totalInputs % maxInputsPerSplit == 0) { long splitSize = last - pos; splits.add(new FileSplit(path, pos, splitSize, null)); lengthRemaining -= splitSize; pos = last;/*from w w w . j av a2 s.c om*/ } } // create the last split if there is data remaining if (lengthRemaining != 0) { splits.add(new FileSplit(path, pos, lengthRemaining, null)); } return splits; }
From source file:com.conductor.s3.S3OptimizedFileInputFormat.java
License:Apache License
@Override protected List<FileStatus> listStatus(final JobContext job) throws IOException { final Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); }//from w ww . j a va2s. c om final long blockSize = job.getConfiguration().getLong(S3NativeFileSystemConfigKeys.S3_NATIVE_BLOCK_SIZE_KEY, S3NativeFileSystemConfigKeys.S3_NATIVE_BLOCK_SIZE_DEFAULT); final AmazonS3 s3Client = S3HadoopUtils.getS3Client(job.getConfiguration()); return S3InputFormatUtils.getFileStatuses(s3Client, blockSize, dirs); }
From source file:com.conductor.s3.S3TextInputFormat.java
License:Apache License
@Override protected boolean isSplitable(JobContext context, Path file) { final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); return null == codec || codec instanceof SplittableCompressionCodec; }
From source file:com.conversantmedia.mapreduce.output.BloomFilterOutputFormat.java
License:Apache License
/** * Sets the number of insertions expected for our Bloom filter to * ensure it's adequately sized.// w ww.j a va 2s. c om * @param job the context * @param size the size to set */ public static void setExpectedInsertions(JobContext job, int size) { job.getConfiguration().setInt(CONF_KEY_EXPECTED_INSERTIONS, size); }
From source file:com.couchbase.sqoop.mapreduce.db.CouchbaseOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(final JobContext context) throws IOException, InterruptedException { this.jobContext = context; final Configuration conf = context.getConfiguration(); // Sanity check all the configuration values we need. if (null == conf.get(DBConfiguration.URL_PROPERTY)) { throw new IOException("Database connection URL is not set."); }/*from w w w. j a v a 2 s . co m*/ }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.DelegatingInputFormat.java
License:Apache License
@SuppressWarnings("unchecked") public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); Job jobCopy = new Job(conf); List<InputSplit> splits = new ArrayList<InputSplit>(); Map<Path, String> formatMap = PangoolMultipleInputs.getInputFormatMap(job); Map<Path, String> mapperMap = PangoolMultipleInputs.getInputProcessorFileMap(job); for (Map.Entry<Path, String> entry : formatMap.entrySet()) { FileInputFormat.setInputPaths(jobCopy, entry.getKey()); InputFormat inputFormat = InstancesDistributor.loadInstance(conf, InputFormat.class, entry.getValue(), true);/* w w w. j a va2s. co m*/ PangoolMultipleInputs.setSpecificInputContext(jobCopy.getConfiguration(), entry.getValue()); List<InputSplit> pathSplits = inputFormat.getSplits(jobCopy); for (InputSplit pathSplit : pathSplits) { splits.add(new TaggedInputSplit(pathSplit, conf, entry.getValue(), mapperMap.get(entry.getKey()))); } } return splits; }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.PangoolMultipleInputs.java
License:Apache License
static Map<Path, String> getInputFormatMap(JobContext job) { Map<Path, String> m = new HashMap<Path, String>(); Configuration conf = job.getConfiguration(); String[] pathMappings = conf.get(PANGOOL_INPUT_DIR_FORMATS_CONF).split(","); for (String pathMapping : pathMappings) { String[] split = pathMapping.split(";"); m.put(new Path(split[0]), split[1]); }/*from ww w . j ava2s . co m*/ return m; }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.PangoolMultipleInputs.java
License:Apache License
/** * Retrieves a map of {@link Path}s to the serialized {@link TupleMapper} that should be used for them. * /* w ww .j a v a 2s . c o m*/ * @param job * The {@link JobContext} * @return A map of paths to InputProcessor instances for the job */ static Map<Path, String> getInputProcessorFileMap(JobContext job) { Configuration conf = job.getConfiguration(); if (conf.get(PANGOOL_INPUT_DIR_MAPPERS_CONF) == null) { return Collections.emptyMap(); } Map<Path, String> m = new HashMap<Path, String>(); String[] pathMappings = conf.get(PANGOOL_INPUT_DIR_MAPPERS_CONF).split(","); for (String pathMapping : pathMappings) { String[] split = pathMapping.split(";"); String inputProcessorFile = split[1]; m.put(new Path(split[0]), inputProcessorFile); } return m; }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java
License:Apache License
private static List<String> getNamedOutputsList(JobContext job) { List<String> names = new ArrayList<String>(); StringTokenizer st = new StringTokenizer(job.getConfiguration().get(MULTIPLE_OUTPUTS, ""), " "); while (st.hasMoreTokens()) { names.add(st.nextToken());/*from ww w .j a va2s . c om*/ } return names; }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java
License:Apache License
private static String getNamedOutputFormatInstanceFile(JobContext job, String namedOutput) { return job.getConfiguration().get(MO_PREFIX + namedOutput + FORMAT_INSTANCE_FILE, null); }