List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
private void runJob(Job job) throws ClassNotFoundException, IOException, InterruptedException { assert job.getJobID() != null; TaskID taskId = newMapTaskId(job.getJobID(), 0); Configuration conf = job.getConfiguration(); OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf); OutputCommitter committer = output//from ww w . j av a 2s. com .getOutputCommitter(newTaskAttemptContext(conf, newTaskAttemptId(taskId, 0))); boolean succeed = false; committer.setupJob(job); try { if (job.getNumReduceTasks() == 0) { runMap(job, null); } else { try (KeyValueSorter<?, ?> sorter = createSorter(job, job.getMapOutputKeyClass(), job.getMapOutputValueClass())) { runMap(job, sorter); runReduce(job, sorter); } } committer.commitJob(job); succeed = true; } finally { if (succeed == false) { try { committer.abortJob(job, State.FAILED); } catch (IOException e) { LOG.error(MessageFormat.format("error occurred while aborting job: {0} ({1})", job.getJobID(), job.getJobName()), e); } } } }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) private void runMap(Job job, KeyValueSorter<?, ?> sorter) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = job.getConfiguration(); InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); List<InputSplit> splits = input.getSplits(job); int serial = 1; for (InputSplit split : splits) { TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0); Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$ mapper.getClass().getName(), id, split.getLength())); }/*from ww w . j a v a2 s . c o m*/ TaskAttemptContext context = newTaskAttemptContext(conf, id); // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf); OutputCommitter committer = output.getOutputCommitter(context); committer.setupTask(context); boolean succeed = false; try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) { RecordWriter<?, ?> writer; if (sorter != null) { writer = new ShuffleWriter(sorter); } else { writer = output.getRecordWriter(newTaskAttemptContext(conf, id)); } try { Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split); reader.initialize(split, c); mapper.run(c); } finally { writer.close(newTaskAttemptContext(conf, id)); } doCommitTask(context, committer); succeed = true; } finally { if (succeed == false) { doAbortTask(context, committer); } } } }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) private void runReduce(Job job, KeyValueSorter<?, ?> sorter) throws ClassNotFoundException, IOException, InterruptedException { Configuration conf = job.getConfiguration(); OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf); TaskAttemptID id = newTaskAttemptId(newReduceTaskId(job.getJobID(), 1), 0); Reducer<?, ?, ?, ?> reducer = ReflectionUtils.newInstance(job.getReducerClass(), conf); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("starting reducer: {0}@{1} ({2}records, {3}bytes)", //$NON-NLS-1$ reducer.getClass().getName(), id, sorter.getRecordCount(), sorter.getSizeInBytes())); }/*w w w . j av a2 s .c om*/ TaskAttemptContext context = newTaskAttemptContext(conf, id); OutputCommitter committer = output.getOutputCommitter(context); committer.setupTask(context); boolean succeed = false; try { ShuffleReader reader = new ShuffleReader(sorter, new Progress()); try { RecordWriter<?, ?> writer = output.getRecordWriter(newTaskAttemptContext(conf, id)); try { Reducer.Context c = newReducerContext(conf, id, reader, sorter.getKeyClass(), sorter.getValueClass(), writer, committer, (RawComparator) job.getGroupingComparator()); reducer.run(c); } finally { writer.close(newTaskAttemptContext(conf, id)); } } finally { try { reader.close(); } catch (IOException e) { LOG.warn(MessageFormat.format("error occurred while reducer mapper input: {0} ({1})", id, job.getJobName()), e); } } doCommitTask(context, committer); succeed = true; } finally { if (succeed == false) { doAbortTask(context, committer); } } }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
private <K, V> KeyValueSorter<?, ?> createSorter(Job job, Class<K> key, Class<V> value) { KeyValueSorter.Options options = getSorterOptions(job.getConfiguration()); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format( "shuffle buffer size: {1}bytes/page, {2}bytes/block, compression:{3} ({0})", //$NON-NLS-1$ job.getJobName(), options.getPageSize(), options.getBlockSize(), options.isCompressBlock())); }//w w w .ja v a2 s . com return new KeyValueSorter<>(new SerializationFactory(job.getConfiguration()), key, value, job.getSortComparator(), options); }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java
License:Apache License
private Job newJob() throws IOException { Job job = JobCompatibility.newJob(new Configuration()); job.getConfiguration().setInt(SimpleJobRunner.KEY_BUFFER_SIZE, 16 * 1024 * 1024); job.getConfiguration().set(SimpleJobRunner.KEY_TEMPORARY_LOCATION, new File(folder.getRoot(), "spill-out").getAbsolutePath()); return job;//from www. j a va 2 s .co m }
From source file:com.asakusafw.runtime.stage.AbstractStageClient.java
License:Apache License
/** * Configures the {@link Job} object for this stage. * @param job the target job// w w w . j a v a 2s . com * @param variables current variable table * @throws IOException if failed to configure the job * @throws InterruptedException if interrupted while configuring {@link Job} object */ protected void configureStage(Job job, VariableTable variables) throws IOException, InterruptedException { ClassLoader loader = job.getConfiguration().getClassLoader(); for (StageConfigurator configurator : ServiceLoader.load(StageConfigurator.class, loader)) { configurator.configure(job); } }
From source file:com.asakusafw.runtime.stage.AbstractStageClient.java
License:Apache License
/** * Creates a new job./* w ww.j a v a 2s . c o m*/ * @param conf asakusa job configuration * @return the created job * @throws IOException if failed to create a new job * @throws InterruptedException if interrupted while creating {@link Job} object * @throws IllegalArgumentException if some parameters were {@code null} */ public Job createJob(Configuration conf) throws IOException, InterruptedException { if (conf == null) { throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$ } Job job = JobCompatibility.newJob(conf); VariableTable variables = getPathParser(job.getConfiguration()); configureJobInfo(job, variables); configureStageInput(job, variables); configureStageOutput(job, variables); configureShuffle(job, variables); configureStageResource(job, variables); configureStage(job, variables); return job; }
From source file:com.asakusafw.runtime.stage.AbstractStageClient.java
License:Apache License
private int submit(Job job) throws IOException, InterruptedException, ClassNotFoundException { String jobRunnerClassName = job.getConfiguration().get(StageConstants.PROP_JOB_RUNNER); JobRunner runner = DefaultJobRunner.INSTANCE; if (jobRunnerClassName != null) { Class<?> jobRunnerClass = job.getConfiguration().getClassByName(jobRunnerClassName); runner = (JobRunner) ReflectionUtils.newInstance(jobRunnerClass, job.getConfiguration()); }/* w w w. ja va2 s . c o m*/ LOG.info(MessageFormat.format("Submitting Job: {0} (runner: {1})", job.getJobName(), runner)); long start = System.currentTimeMillis(); boolean succeed; if (RuntimeContext.get().isSimulation()) { LOG.info(MessageFormat.format( "Job is skipped because current execution status is in simulation mode: name={0}", job.getJobName())); succeed = true; } else { succeed = runner.run(job); } long end = System.currentTimeMillis(); LOG.info(MessageFormat.format("Job Finished: elapsed=[{3}]ms, succeed={2}, id={0}, name={1}", job.getJobID(), job.getJobName(), succeed, String.valueOf(end - start))); return succeed ? ToolLauncher.JOB_SUCCEEDED : ToolLauncher.JOB_FAILED; }
From source file:com.asakusafw.runtime.stage.AbstractStageClient.java
License:Apache License
private void configureJobInfo(Job job, VariableTable variables) { Class<?> clientClass = getClass(); String operationId = getOperationId(); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Hadoop Job Client: {0}", clientClass.getName())); //$NON-NLS-1$ }// w w w . j a va2s .c o m String jar = job.getConfiguration().get(PROP_APPLICATION_JAR); if (jar == null || (job.getConfiguration() instanceof JobConf) == false) { job.setJarByClass(clientClass); } else { ((JobConf) job.getConfiguration()).setJar(jar); } if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Hadoop Job Name: {0}", operationId)); //$NON-NLS-1$ } job.setJobName(operationId); }
From source file:com.asakusafw.runtime.stage.AbstractStageClient.java
License:Apache License
private void configureStageOutput(Job job, VariableTable variables) throws IOException { String outputPath = variables.parse(getStageOutputPath()); List<StageOutput> outputList = new ArrayList<>(); for (StageOutput output : getStageOutputs()) { String name = output.getName(); Class<?> keyClass = output.getKeyClass(); Class<?> valueClass = output.getValueClass(); Class<? extends OutputFormat<?, ?>> formatClass = output.getFormatClass(); Map<String, String> attributes = output.getAttributes(); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format( "Output: path={0}/{1}-*, format={2}, key={3}, value={4}, attributes={5}", //$NON-NLS-1$ outputPath, name, formatClass.getName(), keyClass.getName(), valueClass.getName(), attributes));// w ww .ja va 2 s . c o m } outputList.add(new StageOutput(name, keyClass, valueClass, formatClass, attributes)); } StageOutputDriver.set(job, outputPath, outputList); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormatClass(StageOutputFormat.class); job.getConfiguration().setClass("mapred.output.committer.class", //$NON-NLS-1$ LegacyBridgeOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class); }