Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.conductor.hadoop.WritableValueInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(final JobContext context) throws IOException, InterruptedException {
    final Configuration conf = context.getConfiguration();

    // init the reader
    final String filePath = conf.get(INPUT_FILE_LOCATION_CONF);
    checkArgument(!Strings.isNullOrEmpty(filePath), "Missing property: " + INPUT_FILE_LOCATION_CONF);

    final FileSystem fs = getFileSystem(conf);
    final Path path = fs.makeQualified(new Path(filePath));
    final SequenceFile.Reader reader = getReader(conf, path);

    // create the splits by looping through the values of the input file
    int totalInputs = 0;
    int maxInputsPerSplit = conf.getInt(INPUTS_PER_SPLIT_CONF, DEFAULT_INPUTS_PER_SPLIT);
    long pos = 0L;
    long last = 0L;
    long lengthRemaining = fs.getFileStatus(path).getLen();
    final List<InputSplit> splits = Lists.newArrayList();
    final V value = getV(conf);
    for (final NullWritable key = NullWritable.get(); reader.next(key, value); last = reader.getPosition()) {
        if (++totalInputs % maxInputsPerSplit == 0) {
            long splitSize = last - pos;
            splits.add(new FileSplit(path, pos, splitSize, null));
            lengthRemaining -= splitSize;
            pos = last;/*from w  w  w  . j av a2 s.c om*/
        }
    }
    // create the last split if there is data remaining
    if (lengthRemaining != 0) {
        splits.add(new FileSplit(path, pos, lengthRemaining, null));
    }
    return splits;
}

From source file:com.conductor.s3.S3OptimizedFileInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(final JobContext job) throws IOException {
    final Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }//from  w ww . j a va2s. c  om
    final long blockSize = job.getConfiguration().getLong(S3NativeFileSystemConfigKeys.S3_NATIVE_BLOCK_SIZE_KEY,
            S3NativeFileSystemConfigKeys.S3_NATIVE_BLOCK_SIZE_DEFAULT);
    final AmazonS3 s3Client = S3HadoopUtils.getS3Client(job.getConfiguration());
    return S3InputFormatUtils.getFileStatuses(s3Client, blockSize, dirs);
}

From source file:com.conductor.s3.S3TextInputFormat.java

License:Apache License

@Override
protected boolean isSplitable(JobContext context, Path file) {
    final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    return null == codec || codec instanceof SplittableCompressionCodec;
}

From source file:com.conversantmedia.mapreduce.output.BloomFilterOutputFormat.java

License:Apache License

/**
 * Sets the number of insertions expected for our Bloom filter to
 * ensure it's adequately sized.// w  ww.j  a va 2s.  c  om
 * @param job   the context
 * @param size   the size to set
 */
public static void setExpectedInsertions(JobContext job, int size) {
    job.getConfiguration().setInt(CONF_KEY_EXPECTED_INSERTIONS, size);
}

From source file:com.couchbase.sqoop.mapreduce.db.CouchbaseOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(final JobContext context) throws IOException, InterruptedException {
    this.jobContext = context;
    final Configuration conf = context.getConfiguration();

    // Sanity check all the configuration values we need.
    if (null == conf.get(DBConfiguration.URL_PROPERTY)) {
        throw new IOException("Database connection URL is not set.");
    }/*from   w  w w.  j a v a 2  s  .  co  m*/
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.DelegatingInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    Job jobCopy = new Job(conf);
    List<InputSplit> splits = new ArrayList<InputSplit>();

    Map<Path, String> formatMap = PangoolMultipleInputs.getInputFormatMap(job);
    Map<Path, String> mapperMap = PangoolMultipleInputs.getInputProcessorFileMap(job);

    for (Map.Entry<Path, String> entry : formatMap.entrySet()) {
        FileInputFormat.setInputPaths(jobCopy, entry.getKey());
        InputFormat inputFormat = InstancesDistributor.loadInstance(conf, InputFormat.class, entry.getValue(),
                true);/* w w  w. j  a  va2s. co  m*/
        PangoolMultipleInputs.setSpecificInputContext(jobCopy.getConfiguration(), entry.getValue());
        List<InputSplit> pathSplits = inputFormat.getSplits(jobCopy);
        for (InputSplit pathSplit : pathSplits) {
            splits.add(new TaggedInputSplit(pathSplit, conf, entry.getValue(), mapperMap.get(entry.getKey())));
        }
    }

    return splits;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.PangoolMultipleInputs.java

License:Apache License

static Map<Path, String> getInputFormatMap(JobContext job) {
    Map<Path, String> m = new HashMap<Path, String>();
    Configuration conf = job.getConfiguration();
    String[] pathMappings = conf.get(PANGOOL_INPUT_DIR_FORMATS_CONF).split(",");
    for (String pathMapping : pathMappings) {
        String[] split = pathMapping.split(";");
        m.put(new Path(split[0]), split[1]);
    }/*from   ww w  . j ava2s . co m*/
    return m;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.PangoolMultipleInputs.java

License:Apache License

/**
 * Retrieves a map of {@link Path}s to the serialized {@link TupleMapper} that should be used for them.
 * /* w  ww  .j  a  v a 2s  .  c o m*/
 * @param job
 *          The {@link JobContext}
 * @return A map of paths to InputProcessor instances for the job
 */
static Map<Path, String> getInputProcessorFileMap(JobContext job) {
    Configuration conf = job.getConfiguration();
    if (conf.get(PANGOOL_INPUT_DIR_MAPPERS_CONF) == null) {
        return Collections.emptyMap();
    }
    Map<Path, String> m = new HashMap<Path, String>();
    String[] pathMappings = conf.get(PANGOOL_INPUT_DIR_MAPPERS_CONF).split(",");
    for (String pathMapping : pathMappings) {
        String[] split = pathMapping.split(";");
        String inputProcessorFile = split[1];
        m.put(new Path(split[0]), inputProcessorFile);
    }
    return m;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

private static List<String> getNamedOutputsList(JobContext job) {
    List<String> names = new ArrayList<String>();
    StringTokenizer st = new StringTokenizer(job.getConfiguration().get(MULTIPLE_OUTPUTS, ""), " ");
    while (st.hasMoreTokens()) {
        names.add(st.nextToken());/*from   ww w .j a  va2s . c  om*/
    }
    return names;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

private static String getNamedOutputFormatInstanceFile(JobContext job, String namedOutput) {
    return job.getConfiguration().get(MO_PREFIX + namedOutput + FORMAT_INSTANCE_FILE, null);
}