Example usage for org.apache.hadoop.mapreduce Job getConfiguration

List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration() 

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

private void runJob(Job job) throws ClassNotFoundException, IOException, InterruptedException {
    assert job.getJobID() != null;
    TaskID taskId = newMapTaskId(job.getJobID(), 0);
    Configuration conf = job.getConfiguration();
    OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
    OutputCommitter committer = output//from   ww  w . j av a 2s.  com
            .getOutputCommitter(newTaskAttemptContext(conf, newTaskAttemptId(taskId, 0)));
    boolean succeed = false;
    committer.setupJob(job);
    try {
        if (job.getNumReduceTasks() == 0) {
            runMap(job, null);
        } else {
            try (KeyValueSorter<?, ?> sorter = createSorter(job, job.getMapOutputKeyClass(),
                    job.getMapOutputValueClass())) {
                runMap(job, sorter);
                runReduce(job, sorter);
            }
        }
        committer.commitJob(job);
        succeed = true;
    } finally {
        if (succeed == false) {
            try {
                committer.abortJob(job, State.FAILED);
            } catch (IOException e) {
                LOG.error(MessageFormat.format("error occurred while aborting job: {0} ({1})", job.getJobID(),
                        job.getJobName()), e);
            }
        }
    }
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
private void runMap(Job job, KeyValueSorter<?, ?> sorter)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = input.getSplits(job);
    int serial = 1;
    for (InputSplit split : splits) {
        TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0);
        Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf);
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$
                    mapper.getClass().getName(), id, split.getLength()));
        }/*from  ww w  .  j a  v a2 s .  c o m*/
        TaskAttemptContext context = newTaskAttemptContext(conf, id);
        // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached
        OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
        OutputCommitter committer = output.getOutputCommitter(context);
        committer.setupTask(context);
        boolean succeed = false;
        try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) {
            RecordWriter<?, ?> writer;
            if (sorter != null) {
                writer = new ShuffleWriter(sorter);
            } else {
                writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            }
            try {
                Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split);
                reader.initialize(split, c);
                mapper.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
            doCommitTask(context, committer);
            succeed = true;
        } finally {
            if (succeed == false) {
                doAbortTask(context, committer);
            }
        }
    }
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
private void runReduce(Job job, KeyValueSorter<?, ?> sorter)
        throws ClassNotFoundException, IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
    TaskAttemptID id = newTaskAttemptId(newReduceTaskId(job.getJobID(), 1), 0);
    Reducer<?, ?, ?, ?> reducer = ReflectionUtils.newInstance(job.getReducerClass(), conf);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("starting reducer: {0}@{1} ({2}records, {3}bytes)", //$NON-NLS-1$
                reducer.getClass().getName(), id, sorter.getRecordCount(), sorter.getSizeInBytes()));
    }/*w w w . j av  a2 s  .c  om*/
    TaskAttemptContext context = newTaskAttemptContext(conf, id);
    OutputCommitter committer = output.getOutputCommitter(context);
    committer.setupTask(context);
    boolean succeed = false;
    try {
        ShuffleReader reader = new ShuffleReader(sorter, new Progress());
        try {
            RecordWriter<?, ?> writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            try {
                Reducer.Context c = newReducerContext(conf, id, reader, sorter.getKeyClass(),
                        sorter.getValueClass(), writer, committer, (RawComparator) job.getGroupingComparator());
                reducer.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn(MessageFormat.format("error occurred while reducer mapper input: {0} ({1})", id,
                        job.getJobName()), e);
            }
        }
        doCommitTask(context, committer);
        succeed = true;
    } finally {
        if (succeed == false) {
            doAbortTask(context, committer);
        }
    }
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

private <K, V> KeyValueSorter<?, ?> createSorter(Job job, Class<K> key, Class<V> value) {
    KeyValueSorter.Options options = getSorterOptions(job.getConfiguration());
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format(
                "shuffle buffer size: {1}bytes/page, {2}bytes/block, compression:{3} ({0})", //$NON-NLS-1$
                job.getJobName(), options.getPageSize(), options.getBlockSize(), options.isCompressBlock()));
    }//w  w w .ja v  a2 s  . com
    return new KeyValueSorter<>(new SerializationFactory(job.getConfiguration()), key, value,
            job.getSortComparator(), options);
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java

License:Apache License

private Job newJob() throws IOException {
    Job job = JobCompatibility.newJob(new Configuration());
    job.getConfiguration().setInt(SimpleJobRunner.KEY_BUFFER_SIZE, 16 * 1024 * 1024);
    job.getConfiguration().set(SimpleJobRunner.KEY_TEMPORARY_LOCATION,
            new File(folder.getRoot(), "spill-out").getAbsolutePath());
    return job;//from  www. j  a va 2  s .co m
}

From source file:com.asakusafw.runtime.stage.AbstractStageClient.java

License:Apache License

/**
 * Configures the {@link Job} object for this stage.
 * @param job the target job//  w  w  w .  j a  v  a 2s . com
 * @param variables current variable table
 * @throws IOException if failed to configure the job
 * @throws InterruptedException if interrupted while configuring {@link Job} object
 */
protected void configureStage(Job job, VariableTable variables) throws IOException, InterruptedException {
    ClassLoader loader = job.getConfiguration().getClassLoader();
    for (StageConfigurator configurator : ServiceLoader.load(StageConfigurator.class, loader)) {
        configurator.configure(job);
    }
}

From source file:com.asakusafw.runtime.stage.AbstractStageClient.java

License:Apache License

/**
 * Creates a new job./* w  ww.j  a v  a  2s  .  c  o  m*/
 * @param conf asakusa job configuration
 * @return the created job
 * @throws IOException if failed to create a new job
 * @throws InterruptedException if interrupted while creating {@link Job} object
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
public Job createJob(Configuration conf) throws IOException, InterruptedException {
    if (conf == null) {
        throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
    }
    Job job = JobCompatibility.newJob(conf);
    VariableTable variables = getPathParser(job.getConfiguration());
    configureJobInfo(job, variables);
    configureStageInput(job, variables);
    configureStageOutput(job, variables);
    configureShuffle(job, variables);
    configureStageResource(job, variables);
    configureStage(job, variables);
    return job;
}

From source file:com.asakusafw.runtime.stage.AbstractStageClient.java

License:Apache License

private int submit(Job job) throws IOException, InterruptedException, ClassNotFoundException {
    String jobRunnerClassName = job.getConfiguration().get(StageConstants.PROP_JOB_RUNNER);
    JobRunner runner = DefaultJobRunner.INSTANCE;
    if (jobRunnerClassName != null) {
        Class<?> jobRunnerClass = job.getConfiguration().getClassByName(jobRunnerClassName);
        runner = (JobRunner) ReflectionUtils.newInstance(jobRunnerClass, job.getConfiguration());
    }/*  w  w  w.  ja  va2 s . c  o  m*/
    LOG.info(MessageFormat.format("Submitting Job: {0} (runner: {1})", job.getJobName(), runner));
    long start = System.currentTimeMillis();
    boolean succeed;
    if (RuntimeContext.get().isSimulation()) {
        LOG.info(MessageFormat.format(
                "Job is skipped because current execution status is in simulation mode: name={0}",
                job.getJobName()));
        succeed = true;
    } else {
        succeed = runner.run(job);
    }
    long end = System.currentTimeMillis();
    LOG.info(MessageFormat.format("Job Finished: elapsed=[{3}]ms, succeed={2}, id={0}, name={1}",
            job.getJobID(), job.getJobName(), succeed, String.valueOf(end - start)));

    return succeed ? ToolLauncher.JOB_SUCCEEDED : ToolLauncher.JOB_FAILED;
}

From source file:com.asakusafw.runtime.stage.AbstractStageClient.java

License:Apache License

private void configureJobInfo(Job job, VariableTable variables) {
    Class<?> clientClass = getClass();
    String operationId = getOperationId();

    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("Hadoop Job Client: {0}", clientClass.getName())); //$NON-NLS-1$
    }// w  w w  .  j  a va2s  .c  o  m
    String jar = job.getConfiguration().get(PROP_APPLICATION_JAR);
    if (jar == null || (job.getConfiguration() instanceof JobConf) == false) {
        job.setJarByClass(clientClass);
    } else {
        ((JobConf) job.getConfiguration()).setJar(jar);
    }

    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("Hadoop Job Name: {0}", operationId)); //$NON-NLS-1$
    }
    job.setJobName(operationId);
}

From source file:com.asakusafw.runtime.stage.AbstractStageClient.java

License:Apache License

private void configureStageOutput(Job job, VariableTable variables) throws IOException {
    String outputPath = variables.parse(getStageOutputPath());
    List<StageOutput> outputList = new ArrayList<>();
    for (StageOutput output : getStageOutputs()) {
        String name = output.getName();
        Class<?> keyClass = output.getKeyClass();
        Class<?> valueClass = output.getValueClass();
        Class<? extends OutputFormat<?, ?>> formatClass = output.getFormatClass();
        Map<String, String> attributes = output.getAttributes();
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format(
                    "Output: path={0}/{1}-*, format={2}, key={3}, value={4}, attributes={5}", //$NON-NLS-1$
                    outputPath, name, formatClass.getName(), keyClass.getName(), valueClass.getName(),
                    attributes));//  w  ww  .ja va 2  s .  c  o  m
        }
        outputList.add(new StageOutput(name, keyClass, valueClass, formatClass, attributes));
    }
    StageOutputDriver.set(job, outputPath, outputList);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setOutputFormatClass(StageOutputFormat.class);
    job.getConfiguration().setClass("mapred.output.committer.class", //$NON-NLS-1$
            LegacyBridgeOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class);
}