Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:bucket_sort.NLineInputFormat.java

License:Apache License

/** 
 * Logically splits the set of input files for the job, splits N lines
 * of the input as one split./*  w w w .  j  a va 2s .  c o m*/
 * 
 * @see FileInputFormat#getSplits(JobContext)
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    int numLinesPerSplit = getNumLinesPerSplit(job);
    for (FileStatus status : listStatus(job)) {
        splits.addAll(getSplitsForFile(status, job.getConfiguration(), numLinesPerSplit));
    }
    return splits;
}

From source file:bucket_sort.NLineInputFormat.java

License:Apache License

/**
 * Get the number of lines per split/* w ww.j ava2s .  c  om*/
 * @param job the job
 * @return the number of lines per split
 */
public static int getNumLinesPerSplit(JobContext job) {
    return job.getConfiguration().getInt(LINES_PER_MAP, 1);
}

From source file:ca.uwaterloo.iss4e.hadoop.io.CartesianInputFormat.java

License:Open Source License

private List<InputSplit> getInputSplits(JobContext jobContext, String inputFormatClass, Path path)
        throws ClassNotFoundException, IOException {
    Configuration conf = jobContext.getConfiguration();
    FileInputFormat inputFormat = (FileInputFormat) ReflectionUtils.newInstance(Class.forName(inputFormatClass),
            conf);/*from ww w. j ava  2 s . c om*/

    // Set the input path for the left data set
    path = path.getFileSystem(conf).makeQualified(path);
    String dirStr = StringUtils.escapeString(path.toString());
    String dirs = conf.get(INPUT_DIR);
    conf.set(INPUT_DIR, dirStr);
    return inputFormat.getSplits(jobContext);
}

From source file:ca.uwaterloo.iss4e.hadoop.io.CartesianInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {

    try {/*ww w .j a  v  a  2 s . c o  m*/
        // Get the input splits from both the left and right data sets
        Configuration conf = job.getConfiguration();
        List<InputSplit> leftSplits = getInputSplits(job, conf.get(LEFT_INPUT_FORMAT),
                new Path(conf.get(LEFT_INPUT_PATH)));
        List<InputSplit> rightSplits = getInputSplits(job, conf.get(RIGHT_INPUT_FORMAT),
                new Path(conf.get(RIGHT_INPUT_PATH)));

        // Create our CompositeInputSplits, size equal to left.length *
        // right.length
        List<InputSplit> compoisteInputSplits = new ArrayList<InputSplit>();

        // For each of the left input splits
        for (InputSplit left : leftSplits) {
            // For each of the right input splits
            for (InputSplit right : rightSplits) {
                // Create a new composite input split composing of the
                // two
                CompositeInputSplit returnSplits = new CompositeInputSplit(2);
                returnSplits.add(left);
                returnSplits.add(right);
                compoisteInputSplits.add(returnSplits);
            }
        }

        // Return the composite splits
        LOG.info("Total CompositeSplits to process: " + compoisteInputSplits.size());
        return compoisteInputSplits;
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new IOException(e);
    } catch (InterruptedException e) {
        e.printStackTrace();
        throw new IOException(e);
    }
}

From source file:cloudbase.core.client.mapreduce.CloudbaseOutputFormatShim.java

License:Apache License

protected static Instance getInstance(JobContext job) {
    Configuration conf = job.getConfiguration();
    if (isMock(conf)) {
        return new MockInstanceShim(conf.get(INSTANCE_NAME));
    }// w w  w  .  j  a v  a 2  s  .c  o  m
    return CloudbaseOutputFormat.getInstance(job);
}

From source file:cloudbase.core.client.mapreduce.CloudbaseOutputFormatShim.java

License:Apache License

public static void setMockInstance(JobContext job, String instanceName) {
    Configuration conf = job.getConfiguration();
    conf.setBoolean(INSTANCE_HAS_BEEN_SET, true);
    conf.setBoolean(MOCK, true);/*w  w w  . ja  va2 s . c om*/
    conf.set(INSTANCE_NAME, instanceName);
}

From source file:cloudbase.core.client.mapreduce.CloudbaseOutputFormatShim.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext job) throws IOException {
    if (isMock(job.getConfiguration())) {
        try {/* w w w  . j  av  a2 s.  co m*/
            Connector c = CloudbaseOutputFormatShim.getInstance(job).getConnector(getUsername(job),
                    getPassword(job));
            if (!c.securityOperations().authenticateUser(getUsername(job), getPassword(job)))
                throw new IOException("Unable to authenticate user");
        } catch (CBException e) {
            throw new IOException(e);
        } catch (CBSecurityException e) {
            throw new IOException(e);
        }
    } else {
        super.checkOutputSpecs(job);
    }
}

From source file:cn.uc.hadoop.mapreduce.lib.input.FileNameTextInputFormat.java

License:Apache License

@Override
protected boolean isSplitable(JobContext context, Path file) {
    final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    if (null == codec) {
        return true;
    }//from   ww  w  .  j  av a 2 s.  c  o m
    return codec instanceof SplittableCompressionCodec;
}

From source file:co.cask.cdap.data.stream.AbstractStreamInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    long ttl = conf.getLong(STREAM_TTL, Long.MAX_VALUE);
    long endTime = conf.getLong(EVENT_END_TIME, Long.MAX_VALUE);
    long startTime = Math.max(conf.getLong(EVENT_START_TIME, 0L), getCurrentTime() - ttl);
    long maxSplitSize = conf.getLong(MAX_SPLIT_SIZE, Long.MAX_VALUE);
    long minSplitSize = Math.min(conf.getLong(MIN_SPLIT_SIZE, 1L), maxSplitSize);
    StreamInputSplitFinder<InputSplit> splitFinder = StreamInputSplitFinder
            .builder(URI.create(conf.get(STREAM_PATH))).setStartTime(startTime).setEndTime(endTime)
            .setMinSplitSize(minSplitSize).setMaxSplitSize(maxSplitSize).build(splitFactory);
    return splitFinder.getSplits(conf);
}

From source file:co.cask.cdap.etl.batch.mapreduce.MapReduceTransformExecutorFactory.java

License:Apache License

public MapReduceTransformExecutorFactory(MapReduceTaskContext taskContext,
        PipelinePluginInstantiator pluginInstantiator, Metrics metrics,
        Map<String, Map<String, String>> pluginRuntimeArgs) {
    super(pluginInstantiator, metrics);
    this.taskContext = taskContext;
    this.pluginRuntimeArgs = pluginRuntimeArgs;
    JobContext hadoopContext = (JobContext) taskContext.getHadoopContext();
    Configuration hConf = hadoopContext.getConfiguration();
    this.mapOutputKeyClassName = hConf.get(ETLMapReduce.GROUP_KEY_CLASS);
    this.mapOutputValClassName = hConf.get(ETLMapReduce.GROUP_VAL_CLASS);
    this.isMapper = hadoopContext instanceof Mapper.Context;
}