Example usage for org.apache.hadoop.mapreduce OutputCommitter setupJob

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce OutputCommitter setupJob.

Prototype

public abstract void setupJob(JobContext jobContext) throws IOException;

Source Link

Document

For the framework to setup the job output during initialization.

Usage

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2SetupTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("ConstantConditions")
@Override// w ww  .j a  v  a 2 s. c  o  m
protected void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    try {
        JobContextImpl jobCtx = taskCtx.jobContext();

        OutputFormat outputFormat = getOutputFormat(jobCtx);

        outputFormat.checkOutputSpecs(jobCtx);

        OutputCommitter committer = outputFormat.getOutputCommitter(hadoopContext());

        if (committer != null)
            committer.setupJob(jobCtx);
    } catch (ClassNotFoundException | IOException e) {
        throw new IgniteCheckedException(e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();

        throw new IgniteInterruptedCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2SetupTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("ConstantConditions")
@Override//w ww  . j  a  v a  2s .  co m
protected void run0(GridHadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    try {
        JobContextImpl jobCtx = taskCtx.jobContext();

        OutputFormat outputFormat = getOutputFormat(jobCtx);

        outputFormat.checkOutputSpecs(jobCtx);

        OutputCommitter committer = outputFormat.getOutputCommitter(hadoopContext());

        if (committer != null)
            committer.setupJob(jobCtx);
    } catch (ClassNotFoundException | IOException e) {
        throw new IgniteCheckedException(e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();

        throw new IgniteInterruptedCheckedException(e);
    }
}

From source file:org.apache.pig.impl.io.PigFile.java

License:Apache License

public void store(DataBag data, FuncSpec storeFuncSpec, PigContext pigContext) throws IOException {
    Configuration conf = ConfigurationUtil.toConfiguration(pigContext.getProperties());
    // create a simulated JobContext
    JobContext jc = HadoopShims.createJobContext(conf, new JobID());
    StoreFuncInterface sfunc = (StoreFuncInterface) PigContext.instantiateFuncFromSpec(storeFuncSpec);
    OutputFormat<?, ?> of = sfunc.getOutputFormat();

    POStore store = new POStore(new OperatorKey());
    store.setSFile(new FileSpec(file, storeFuncSpec));
    PigOutputFormat.setLocation(jc, store);
    OutputCommitter oc;
    // create a simulated TaskAttemptContext

    TaskAttemptContext tac = HadoopShims.createTaskAttemptContext(conf, HadoopShims.getNewTaskAttemptID());
    PigOutputFormat.setLocation(tac, store);
    RecordWriter<?, ?> rw;//from w w w .j  a va2s  .c o m
    try {
        of.checkOutputSpecs(jc);
        oc = of.getOutputCommitter(tac);
        oc.setupJob(jc);
        oc.setupTask(tac);
        rw = of.getRecordWriter(tac);
        sfunc.prepareToWrite(rw);

        for (Iterator<Tuple> it = data.iterator(); it.hasNext();) {
            Tuple row = it.next();
            sfunc.putNext(row);
        }
        rw.close(tac);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
    if (oc.needsTaskCommit(tac)) {
        oc.commitTask(tac);
    }
    HadoopShims.commitOrCleanup(oc, jc);
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2SetupTask.java

License:Open Source License

/** {@inheritDoc} */
@SuppressWarnings("ConstantConditions")
@Override//  www . j  a  v a2 s  .  c om
protected void run0(GridHadoopV2TaskContext taskCtx) throws GridException {
    try {
        JobContextImpl jobCtx = taskCtx.jobContext();

        OutputFormat outputFormat = getOutputFormat(jobCtx);

        outputFormat.checkOutputSpecs(jobCtx);

        OutputCommitter committer = outputFormat.getOutputCommitter(hadoopContext());

        if (committer != null)
            committer.setupJob(jobCtx);
    } catch (ClassNotFoundException | IOException e) {
        throw new GridException(e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();

        throw new GridInterruptedException(e);
    }
}

From source file:org.mrgeo.hadoop.multipleoutputs.DirectoryMultipleOutputsCommitter.java

License:Apache License

@Override
public void setupJob(final JobContext jobContext) throws IOException {
    for (final OutputCommitter that : committers) {
        that.setupJob(jobContext);
    }//from ww w  . j ava  2s. c om
}

From source file:org.tensorflow.hadoop.io.TFRecordFileTest.java

License:Open Source License

@Test
public void testInputOutputFormat() throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);/*from w w w .  j ava  2 s  .  co  m*/

    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "tfr-test");

    TFRecordFileOutputFormat.setOutputPath(job, outdir);

    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, NullWritable> outputFormat = new TFRecordFileOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, NullWritable> writer = outputFormat.getRecordWriter(context);

    // Write Example with random numbers
    Random rand = new Random();
    Map<Long, Long> records = new TreeMap<Long, Long>();
    try {
        for (int i = 0; i < RECORDS; ++i) {
            long randValue = rand.nextLong();
            records.put((long) i, randValue);
            Int64List data = Int64List.newBuilder().addValue(i).addValue(randValue).build();
            Feature feature = Feature.newBuilder().setInt64List(data).build();
            Features features = Features.newBuilder().putFeature("data", feature).build();
            Example example = Example.newBuilder().setFeatures(features).build();
            BytesWritable key = new BytesWritable(example.toByteArray());
            writer.write(key, NullWritable.get());
        }
    } finally {
        writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);

    // Read and compare
    TFRecordFileInputFormat.setInputPaths(job, outdir);
    InputFormat<BytesWritable, NullWritable> inputFormat = new TFRecordFileInputFormat();
    for (InputSplit split : inputFormat.getSplits(job)) {
        RecordReader<BytesWritable, NullWritable> reader = inputFormat.createRecordReader(split, context);
        MapContext<BytesWritable, NullWritable, BytesWritable, NullWritable> mcontext = new MapContextImpl<BytesWritable, NullWritable, BytesWritable, NullWritable>(
                job.getConfiguration(), context.getTaskAttemptID(), reader, null, null,
                MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        try {
            while (reader.nextKeyValue()) {
                BytesWritable bytes = reader.getCurrentKey();
                Example example = Example.parseFrom(bytes.getBytes());
                Int64List data = example.getFeatures().getFeatureMap().get("data").getInt64List();
                Long key = data.getValue(0);
                Long value = data.getValue(1);
                assertEquals(records.get(key), value);
                records.remove(key);
            }
        } finally {
            reader.close();
        }
    }
    assertEquals(0, records.size());
}