Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.conductor.hadoop.WritableValueInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(final JobContext context) throws IOException, InterruptedException {
    final Configuration conf = context.getConfiguration();

    // init the reader
    final String filePath = conf.get(INPUT_FILE_LOCATION_CONF);
    checkArgument(!Strings.isNullOrEmpty(filePath), "Missing property: " + INPUT_FILE_LOCATION_CONF);

    final FileSystem fs = getFileSystem(conf);
    final Path path = fs.makeQualified(new Path(filePath));
    final SequenceFile.Reader reader = getReader(conf, path);

    // create the splits by looping through the values of the input file
    int totalInputs = 0;
    int maxInputsPerSplit = conf.getInt(INPUTS_PER_SPLIT_CONF, DEFAULT_INPUTS_PER_SPLIT);
    long pos = 0L;
    long last = 0L;
    long lengthRemaining = fs.getFileStatus(path).getLen();
    final List<InputSplit> splits = Lists.newArrayList();
    final V value = getV(conf);
    for (final NullWritable key = NullWritable.get(); reader.next(key, value); last = reader.getPosition()) {
        if (++totalInputs % maxInputsPerSplit == 0) {
            long splitSize = last - pos;
            splits.add(new FileSplit(path, pos, splitSize, null));
            lengthRemaining -= splitSize;
            pos = last;/*from w  w  w  . j av a2 s.c om*/
        }
    }
    // create the last split if there is data remaining
    if (lengthRemaining != 0) {
        splits.add(new FileSplit(path, pos, lengthRemaining, null));
    }
    return splits;
}

From source file:com.conductor.s3.S3OptimizedFileInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(final JobContext job) throws IOException {
    final Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }//from  w ww . j a va2s. c  om
    final long blockSize = job.getConfiguration().getLong(S3NativeFileSystemConfigKeys.S3_NATIVE_BLOCK_SIZE_KEY,
            S3NativeFileSystemConfigKeys.S3_NATIVE_BLOCK_SIZE_DEFAULT);
    final AmazonS3 s3Client = S3HadoopUtils.getS3Client(job.getConfiguration());
    return S3InputFormatUtils.getFileStatuses(s3Client, blockSize, dirs);
}

From source file:com.conductor.s3.S3TextInputFormat.java

License:Apache License

@Override
protected boolean isSplitable(JobContext context, Path file) {
    final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    return null == codec || codec instanceof SplittableCompressionCodec;
}

From source file:com.conversantmedia.mapreduce.output.BloomFilterOutputFormat.java

License:Apache License

/**
 * Sets the number of insertions expected for our Bloom filter to
 * ensure it's adequately sized.// w  ww.j  a va 2s.  c  om
 * @param job   the context
 * @param size   the size to set
 */
public static void setExpectedInsertions(JobContext job, int size) {
    job.getConfiguration().setInt(CONF_KEY_EXPECTED_INSERTIONS, size);
}

From source file:com.couchbase.sqoop.mapreduce.db.CouchbaseOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(final JobContext context) throws IOException, InterruptedException {
    this.jobContext = context;
    final Configuration conf = context.getConfiguration();

    // Sanity check all the configuration values we need.
    if (null == conf.get(DBConfiguration.URL_PROPERTY)) {
        throw new IOException("Database connection URL is not set.");
    }/*from   w  w w.  j a v a 2  s  .  co  m*/
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.DelegatingInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    Job jobCopy = new Job(conf);
    List<InputSplit> splits = new ArrayList<InputSplit>();

    Map<Path, String> formatMap = PangoolMultipleInputs.getInputFormatMap(job);
    Map<Path, String> mapperMap = PangoolMultipleInputs.getInputProcessorFileMap(job);

    for (Map.Entry<Path, String> entry : formatMap.entrySet()) {
        FileInputFormat.setInputPaths(jobCopy, entry.getKey());
        InputFormat inputFormat = InstancesDistributor.loadInstance(conf, InputFormat.class, entry.getValue(),
                true);/* w w  w. j  a  va2s. co  m*/
        PangoolMultipleInputs.setSpecificInputContext(jobCopy.getConfiguration(), entry.getValue());
        List<InputSplit> pathSplits = inputFormat.getSplits(jobCopy);
        for (InputSplit pathSplit : pathSplits) {
            splits.add(new TaggedInputSplit(pathSplit, conf, entry.getValue(), mapperMap.get(entry.getKey())));
        }
    }

    return splits;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.PangoolMultipleInputs.java

License:Apache License

static Map<Path, String> getInputFormatMap(JobContext job) {
    Map<Path, String> m = new HashMap<Path, String>();
    Configuration conf = job.getConfiguration();
    String[] pathMappings = conf.get(PANGOOL_INPUT_DIR_FORMATS_CONF).split(",");
    for (String pathMapping : pathMappings) {
        String[] split = pathMapping.split(";");
        m.put(new Path(split[0]), split[1]);
    }/*from   ww w  . j ava2s . co m*/
    return m;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.PangoolMultipleInputs.java

License:Apache License

/**
 * Retrieves a map of {@link Path}s to the serialized {@link TupleMapper} that should be used for them.
 * /* w  ww  .j  a  v a 2s  .  c o m*/
 * @param job
 *          The {@link JobContext}
 * @return A map of paths to InputProcessor instances for the job
 */
static Map<Path, String> getInputProcessorFileMap(JobContext job) {
    Configuration conf = job.getConfiguration();
    if (conf.get(PANGOOL_INPUT_DIR_MAPPERS_CONF) == null) {
        return Collections.emptyMap();
    }
    Map<Path, String> m = new HashMap<Path, String>();
    String[] pathMappings = conf.get(PANGOOL_INPUT_DIR_MAPPERS_CONF).split(",");
    for (String pathMapping : pathMappings) {
        String[] split = pathMapping.split(";");
        String inputProcessorFile = split[1];
        m.put(new Path(split[0]), inputProcessorFile);
    }
    return m;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

private static List<String> getNamedOutputsList(JobContext job) {
    List<String> names = new ArrayList<String>();
    StringTokenizer st = new StringTokenizer(job.getConfiguration().get(MULTIPLE_OUTPUTS, ""), " ");
    while (st.hasMoreTokens()) {
        names.add(st.nextToken());/*from   ww w .j a  va2s . c  om*/
    }
    return names;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

private static String getNamedOutputFormatInstanceFile(JobContext job, String namedOutput) {
    return job.getConfiguration().get(MO_PREFIX + namedOutput + FORMAT_INSTANCE_FILE, null);
}