Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:bucket_sort.NLineInputFormat.java

License:Apache License

/** 
 * Logically splits the set of input files for the job, splits N lines
 * of the input as one split./*  w w w .  j  a va 2s .  c o m*/
 * 
 * @see FileInputFormat#getSplits(JobContext)
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    int numLinesPerSplit = getNumLinesPerSplit(job);
    for (FileStatus status : listStatus(job)) {
        splits.addAll(getSplitsForFile(status, job.getConfiguration(), numLinesPerSplit));
    }
    return splits;
}

From source file:bucket_sort.NLineInputFormat.java

License:Apache License

/**
 * Get the number of lines per split/* w ww.j ava2s .  c  om*/
 * @param job the job
 * @return the number of lines per split
 */
public static int getNumLinesPerSplit(JobContext job) {
    return job.getConfiguration().getInt(LINES_PER_MAP, 1);
}

From source file:ca.uwaterloo.iss4e.hadoop.io.CartesianInputFormat.java

License:Open Source License

private List<InputSplit> getInputSplits(JobContext jobContext, String inputFormatClass, Path path)
        throws ClassNotFoundException, IOException {
    Configuration conf = jobContext.getConfiguration();
    FileInputFormat inputFormat = (FileInputFormat) ReflectionUtils.newInstance(Class.forName(inputFormatClass),
            conf);/*from ww w. j ava  2 s . c om*/

    // Set the input path for the left data set
    path = path.getFileSystem(conf).makeQualified(path);
    String dirStr = StringUtils.escapeString(path.toString());
    String dirs = conf.get(INPUT_DIR);
    conf.set(INPUT_DIR, dirStr);
    return inputFormat.getSplits(jobContext);
}

From source file:ca.uwaterloo.iss4e.hadoop.io.CartesianInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {

    try {/*ww w .j a  v  a  2 s . c o  m*/
        // Get the input splits from both the left and right data sets
        Configuration conf = job.getConfiguration();
        List<InputSplit> leftSplits = getInputSplits(job, conf.get(LEFT_INPUT_FORMAT),
                new Path(conf.get(LEFT_INPUT_PATH)));
        List<InputSplit> rightSplits = getInputSplits(job, conf.get(RIGHT_INPUT_FORMAT),
                new Path(conf.get(RIGHT_INPUT_PATH)));

        // Create our CompositeInputSplits, size equal to left.length *
        // right.length
        List<InputSplit> compoisteInputSplits = new ArrayList<InputSplit>();

        // For each of the left input splits
        for (InputSplit left : leftSplits) {
            // For each of the right input splits
            for (InputSplit right : rightSplits) {
                // Create a new composite input split composing of the
                // two
                CompositeInputSplit returnSplits = new CompositeInputSplit(2);
                returnSplits.add(left);
                returnSplits.add(right);
                compoisteInputSplits.add(returnSplits);
            }
        }

        // Return the composite splits
        LOG.info("Total CompositeSplits to process: " + compoisteInputSplits.size());
        return compoisteInputSplits;
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new IOException(e);
    } catch (InterruptedException e) {
        e.printStackTrace();
        throw new IOException(e);
    }
}

From source file:cloudbase.core.client.mapreduce.CloudbaseOutputFormatShim.java

License:Apache License

protected static Instance getInstance(JobContext job) {
    Configuration conf = job.getConfiguration();
    if (isMock(conf)) {
        return new MockInstanceShim(conf.get(INSTANCE_NAME));
    }// w w  w  .  j  a v  a 2  s  .c  o  m
    return CloudbaseOutputFormat.getInstance(job);
}

From source file:cloudbase.core.client.mapreduce.CloudbaseOutputFormatShim.java

License:Apache License

public static void setMockInstance(JobContext job, String instanceName) {
    Configuration conf = job.getConfiguration();
    conf.setBoolean(INSTANCE_HAS_BEEN_SET, true);
    conf.setBoolean(MOCK, true);/*w  w w  . ja  va2 s . c om*/
    conf.set(INSTANCE_NAME, instanceName);
}

From source file:cloudbase.core.client.mapreduce.CloudbaseOutputFormatShim.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext job) throws IOException {
    if (isMock(job.getConfiguration())) {
        try {/* w w w  . j  av  a2 s.  co m*/
            Connector c = CloudbaseOutputFormatShim.getInstance(job).getConnector(getUsername(job),
                    getPassword(job));
            if (!c.securityOperations().authenticateUser(getUsername(job), getPassword(job)))
                throw new IOException("Unable to authenticate user");
        } catch (CBException e) {
            throw new IOException(e);
        } catch (CBSecurityException e) {
            throw new IOException(e);
        }
    } else {
        super.checkOutputSpecs(job);
    }
}

From source file:cn.uc.hadoop.mapreduce.lib.input.FileNameTextInputFormat.java

License:Apache License

@Override
protected boolean isSplitable(JobContext context, Path file) {
    final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    if (null == codec) {
        return true;
    }//from   ww  w  .  j  av a 2 s.  c  o m
    return codec instanceof SplittableCompressionCodec;
}

From source file:co.cask.cdap.data.stream.AbstractStreamInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    long ttl = conf.getLong(STREAM_TTL, Long.MAX_VALUE);
    long endTime = conf.getLong(EVENT_END_TIME, Long.MAX_VALUE);
    long startTime = Math.max(conf.getLong(EVENT_START_TIME, 0L), getCurrentTime() - ttl);
    long maxSplitSize = conf.getLong(MAX_SPLIT_SIZE, Long.MAX_VALUE);
    long minSplitSize = Math.min(conf.getLong(MIN_SPLIT_SIZE, 1L), maxSplitSize);
    StreamInputSplitFinder<InputSplit> splitFinder = StreamInputSplitFinder
            .builder(URI.create(conf.get(STREAM_PATH))).setStartTime(startTime).setEndTime(endTime)
            .setMinSplitSize(minSplitSize).setMaxSplitSize(maxSplitSize).build(splitFactory);
    return splitFinder.getSplits(conf);
}

From source file:co.cask.cdap.etl.batch.mapreduce.MapReduceTransformExecutorFactory.java

License:Apache License

public MapReduceTransformExecutorFactory(MapReduceTaskContext taskContext,
        PipelinePluginInstantiator pluginInstantiator, Metrics metrics,
        Map<String, Map<String, String>> pluginRuntimeArgs) {
    super(pluginInstantiator, metrics);
    this.taskContext = taskContext;
    this.pluginRuntimeArgs = pluginRuntimeArgs;
    JobContext hadoopContext = (JobContext) taskContext.getHadoopContext();
    Configuration hConf = hadoopContext.getConfiguration();
    this.mapOutputKeyClassName = hConf.get(ETLMapReduce.GROUP_KEY_CLASS);
    this.mapOutputValClassName = hConf.get(ETLMapReduce.GROUP_VAL_CLASS);
    this.isMapper = hadoopContext instanceof Mapper.Context;
}