Example usage for org.apache.hadoop.mapreduce OutputCommitter commitTask

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce OutputCommitter commitTask.

Prototype

public abstract void commitTask(TaskAttemptContext taskContext) throws IOException;

Source Link

Document

To promote the task's temporary output to final output location.

Usage

From source file:org.apache.giraph.worker.BspServiceWorkerForCohesiveSubgraph.java

License:Apache License

/**
 * Save the vertices using the user-defined VertexOutputFormat from our
 * vertexArray based on the split./* w w w.j  a  va 2s  . c  o m*/
 *
 * @param numLocalVertices Number of local vertices
 * @throws InterruptedException
 */
private void saveVertices(long numLocalVertices) throws IOException, InterruptedException {
    if (getConfiguration().getVertexOutputFormatClass() == null) {
        LOG.warn("saveVertices: " + GiraphConstants.VERTEX_OUTPUT_FORMAT_CLASS
                + " not specified -- there will be no saved output");
        return;
    }
    if (getConfiguration().doOutputDuringComputation()) {
        if (LOG.isInfoEnabled()) {
            LOG.info("saveVertices: The option for doing output during "
                    + "computation is selected, so there will be no saving of the "
                    + "output in the end of application");
        }
        return;
    }

    int numThreads = Math.min(getConfiguration().getNumOutputThreads(), getPartitionStore().getNumPartitions());
    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Starting to save "
            + numLocalVertices + " vertices " + "using " + numThreads + " threads");
    final VertexOutputFormat<I, V, E> vertexOutputFormat = getConfiguration().createVertexOutputFormat();
    CallableFactory<Void> callableFactory = new CallableFactory<Void>() {
        @Override
        public Callable<Void> newCallable(int callableId) {
            return new Callable<Void>() {
                @Override
                public Void call() throws Exception {
                    VertexWriter<I, V, E> vertexWriter = vertexOutputFormat.createVertexWriter(getContext());
                    vertexWriter.setConf(
                            (ImmutableClassesGiraphConfiguration<I, V, E, Writable>) getConfiguration());
                    vertexWriter.initialize(getContext());
                    long verticesWritten = 0;
                    long nextPrintVertices = 0;
                    long nextPrintMsecs = System.currentTimeMillis() + 15000;
                    int partitionIndex = 0;
                    int numPartitions = getPartitionStore().getNumPartitions();
                    for (Integer partitionId : getPartitionStore().getPartitionIds()) {
                        Partition<I, V, E, M> partition = getPartitionStore().getPartition(partitionId);

                        if (getConfiguration().getBoolean("giraph.ktruss.subgraph", false)) {
                            /**
                             * Special for the ktruss output
                             */
                            BasicGraphStoreInterface gs = (BasicGraphStoreInterface) (partition);
                            for (BasicVertex rv : gs.getLocalVertex()) {
                                Vertex<I, V, E, M> vertex = getConfiguration().createVertex();
                                List<Edge<I, E>> edges = Lists.newLinkedList();
                                for (BasicEdge nb : rv.getNeighbors()) {
                                    edges.add(EdgeFactory.create(((I) new IntWritable(nb.getTargetId())),
                                            ((E) NullWritable.get())));
                                }
                                vertex.initialize((I) (new IntWritable(rv.getId())), ((V) new IntWritable(0)),
                                        edges);

                                vertexWriter.writeVertex(vertex);
                                ++verticesWritten;

                                // Update status at most every 250k vertices or 15 seconds
                                if (verticesWritten > nextPrintVertices
                                        && System.currentTimeMillis() > nextPrintMsecs) {
                                    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                                            "saveVertices: Saved " + verticesWritten + " out of "
                                                    + partition.getVertexCount() + " partition vertices, "
                                                    + "on partition " + partitionIndex + " out of "
                                                    + numPartitions);
                                    nextPrintMsecs = System.currentTimeMillis() + 15000;
                                    nextPrintVertices = verticesWritten + 250000;
                                }
                            }
                        } else {
                            for (Vertex<I, V, E, M> vertex : partition) {
                                vertexWriter.writeVertex(vertex);
                                ++verticesWritten;

                                // Update status at most every 250k vertices or 15 seconds
                                if (verticesWritten > nextPrintVertices
                                        && System.currentTimeMillis() > nextPrintMsecs) {
                                    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                                            "saveVertices: Saved " + verticesWritten + " out of "
                                                    + partition.getVertexCount() + " partition vertices, "
                                                    + "on partition " + partitionIndex + " out of "
                                                    + numPartitions);
                                    nextPrintMsecs = System.currentTimeMillis() + 15000;
                                    nextPrintVertices = verticesWritten + 250000;
                                }
                            }
                        }
                        ++partitionIndex;
                    }
                    vertexWriter.close(getContext()); // the temp results are saved now
                    return null;
                }
            };
        }
    };
    ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "save-vertices-%d", getContext());

    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Done saving vertices.");
    // YARN: must complete the commit the "task" output, Hadoop isn't there.
    if (getConfiguration().isPureYarnJob() && getConfiguration().getVertexOutputFormatClass() != null) {
        try {
            OutputCommitter outputCommitter = vertexOutputFormat.getOutputCommitter(getContext());
            if (outputCommitter.needsTaskCommit(getContext())) {
                LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                        "OutputCommitter: committing task output.");
                // transfer from temp dirs to "task commit" dirs to prep for
                // the master's OutputCommitter#commitJob(context) call to finish.
                outputCommitter.commitTask(getContext());
            }
        } catch (InterruptedException ie) {
            LOG.error("Interrupted while attempting to obtain " + "OutputCommitter.", ie);
        } catch (IOException ioe) {
            LOG.error("Master task's attempt to commit output has " + "FAILED.", ioe);
        }
    }
}

From source file:org.apache.hcatalog.data.transfer.impl.HCatOutputFormatWriter.java

License:Apache License

@Override
public void write(Iterator<HCatRecord> recordItr) throws HCatException {

    int id = sp.getId();
    setVarsInConf(id);/*from  w  w w  .j av  a 2s .co  m*/
    HCatOutputFormat outFormat = new HCatOutputFormat();
    TaskAttemptContext cntxt = HCatHadoopShims.Instance.get().createTaskAttemptContext(conf,
            new TaskAttemptID(HCatHadoopShims.Instance.get().createTaskID(), id));
    OutputCommitter committer = null;
    RecordWriter<WritableComparable<?>, HCatRecord> writer;
    try {
        committer = outFormat.getOutputCommitter(cntxt);
        committer.setupTask(cntxt);
        writer = outFormat.getRecordWriter(cntxt);
        while (recordItr.hasNext()) {
            HCatRecord rec = recordItr.next();
            writer.write(null, rec);
        }
        writer.close(cntxt);
        if (committer.needsTaskCommit(cntxt)) {
            committer.commitTask(cntxt);
        }
    } catch (IOException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    } catch (InterruptedException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    }
}

From source file:org.apache.hcatalog.mapreduce.FileRecordWriterContainer.java

License:Apache License

@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    Reporter reporter = InternalUtil.createReporter(context);
    if (dynamicPartitioningUsed) {
        for (org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> bwriter : baseDynamicWriters
                .values()) {/*  w ww .ja  v a 2s .  com*/
            //We are in RecordWriter.close() make sense that the context would be TaskInputOutput
            bwriter.close(reporter);
        }
        for (Map.Entry<String, org.apache.hadoop.mapred.OutputCommitter> entry : baseDynamicCommitters
                .entrySet()) {
            org.apache.hadoop.mapred.TaskAttemptContext currContext = dynamicContexts.get(entry.getKey());
            OutputCommitter baseOutputCommitter = entry.getValue();
            if (baseOutputCommitter.needsTaskCommit(currContext)) {
                baseOutputCommitter.commitTask(currContext);
            }
        }
    } else {
        getBaseRecordWriter().close(reporter);
    }
}

From source file:org.apache.hcatalog.pig.TestE2EScenarios.java

License:Apache License

private void copyTable(String in, String out) throws IOException, InterruptedException {
    Job ijob = new Job();
    Job ojob = new Job();
    HCatInputFormat inpy = new HCatInputFormat();
    inpy.setInput(ijob, null, in);//from ww  w. j  a v  a 2s .c om
    HCatOutputFormat oupy = new HCatOutputFormat();
    oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap<String, String>()));

    // Test HCatContext

    System.err.println("HCatContext INSTANCE is present : " + HCatContext.INSTANCE.getConf().isPresent());
    if (HCatContext.INSTANCE.getConf().isPresent()) {
        System.err.println("HCatContext tinyint->int promotion says " + HCatContext.INSTANCE.getConf().get()
                .getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION,
                        HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT));
    }

    HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration());
    System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + tableSchema.toString());
    oupy.setSchema(ojob, tableSchema);
    oupy.checkOutputSpecs(ojob);
    OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration()));
    oc.setupJob(ojob);

    for (InputSplit split : inpy.getSplits(ijob)) {

        TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration());
        TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration());

        RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext);
        rr.initialize(split, rtaskContext);

        OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext);
        taskOc.setupTask(wtaskContext);
        RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext);

        while (rr.nextKeyValue()) {
            rw.write(rr.getCurrentKey(), rr.getCurrentValue());
        }
        rw.close(wtaskContext);
        taskOc.commitTask(wtaskContext);
        rr.close();
    }

    oc.commitJob(ojob);
}

From source file:org.apache.hive.hcatalog.data.transfer.impl.HCatOutputFormatWriter.java

License:Apache License

@Override
public void write(Iterator<HCatRecord> recordItr) throws HCatException {

    int id = sp.getId();
    setVarsInConf(id);//from ww w  .j a v a 2 s  .c  o  m
    HCatOutputFormat outFormat = new HCatOutputFormat();
    TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf,
            new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id));
    OutputCommitter committer = null;
    RecordWriter<WritableComparable<?>, HCatRecord> writer;
    try {
        committer = outFormat.getOutputCommitter(cntxt);
        committer.setupTask(cntxt);
        writer = outFormat.getRecordWriter(cntxt);
        while (recordItr.hasNext()) {
            HCatRecord rec = recordItr.next();
            writer.write(null, rec);
        }
        writer.close(cntxt);
        if (committer.needsTaskCommit(cntxt)) {
            committer.commitTask(cntxt);
        }
    } catch (IOException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    } catch (InterruptedException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    }
}

From source file:org.apache.hive.hcatalog.mapreduce.DynamicPartitionFileRecordWriterContainer.java

License:Apache License

@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    Reporter reporter = InternalUtil.createReporter(context);
    for (RecordWriter<? super WritableComparable<?>, ? super Writable> bwriter : baseDynamicWriters.values()) {
        // We are in RecordWriter.close() make sense that the context would be
        // TaskInputOutput.
        bwriter.close(reporter);//from   ww  w. ja v  a  2 s .c  o  m
    }

    TaskCommitContextRegistry.getInstance().register(context,
            new TaskCommitContextRegistry.TaskCommitterProxy() {
                @Override
                public void abortTask(TaskAttemptContext context) throws IOException {
                    for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo
                            .entrySet()) {
                        String dynKey = outputJobInfoEntry.getKey();
                        OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue();
                        LOG.info("Aborting task-attempt for " + outputJobInfo.getLocation());
                        baseDynamicCommitters.get(dynKey).abortTask(dynamicContexts.get(dynKey));
                    }
                }

                @Override
                public void commitTask(TaskAttemptContext context) throws IOException {
                    for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo
                            .entrySet()) {
                        String dynKey = outputJobInfoEntry.getKey();
                        OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue();
                        LOG.info("Committing task-attempt for " + outputJobInfo.getLocation());
                        TaskAttemptContext dynContext = dynamicContexts.get(dynKey);
                        OutputCommitter dynCommitter = baseDynamicCommitters.get(dynKey);
                        if (dynCommitter.needsTaskCommit(dynContext)) {
                            dynCommitter.commitTask(dynContext);
                        } else {
                            LOG.info("Skipping commitTask() for " + outputJobInfo.getLocation());
                        }
                    }
                }
            });
}

From source file:org.apache.hive.hcatalog.mapreduce.TestHCatOutputFormat.java

License:Apache License

public void publishTest(Job job) throws Exception {
    HCatOutputFormat hcof = new HCatOutputFormat();
    TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(
            job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID());
    OutputCommitter committer = hcof.getOutputCommitter(tac);
    committer.setupJob(job);//from  w w w .ja  v  a2 s.c  om
    committer.setupTask(tac);
    committer.commitTask(tac);
    committer.commitJob(job);

    Partition part = client.getPartition(dbName, tblName, Arrays.asList("p1"));
    assertNotNull(part);

    StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters());
    assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue");
    assertTrue(part.getSd().getLocation().indexOf("p1") != -1);
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Task.java

License:Apache License

/**
 * Commit task.//from   w w w  . j  a  va  2  s . c  o  m
 *
 * @param outputFormat Output format.
 * @throws IgniteCheckedException In case of Grid exception.
 * @throws IOException In case of IO exception.
 * @throws InterruptedException In case of interrupt.
 */
protected void commit(@Nullable OutputFormat outputFormat)
        throws IgniteCheckedException, IOException, InterruptedException {
    if (hadoopCtx.writer() != null) {
        assert outputFormat != null;

        OutputCommitter outputCommitter = outputFormat.getOutputCommitter(hadoopCtx);

        if (outputCommitter.needsTaskCommit(hadoopCtx))
            outputCommitter.commitTask(hadoopCtx);
    }
}

From source file:org.apache.parquet.pig.PerfTest2.java

License:Apache License

public static void write(String out) throws IOException, ParserException, InterruptedException, ExecException {
    {/*  ww  w .  jav a 2s.c  om*/
        StringBuilder schemaString = new StringBuilder("a0: chararray");
        for (int i = 1; i < COLUMN_COUNT; i++) {
            schemaString.append(", a" + i + ": chararray");
        }

        String location = out;
        String schema = schemaString.toString();

        StoreFuncInterface storer = new ParquetStorer();
        Job job = new Job(conf);
        storer.setStoreFuncUDFContextSignature("sig");
        String absPath = storer.relToAbsPathForStoreLocation(location,
                new Path(new File(".").getAbsoluteFile().toURI()));
        storer.setStoreLocation(absPath, job);
        storer.checkSchema(new ResourceSchema(Utils.getSchemaFromString(schema)));
        @SuppressWarnings("unchecked") // that's how the base class is defined
        OutputFormat<Void, Tuple> outputFormat = storer.getOutputFormat();
        // it's ContextUtil.getConfiguration(job) and not just conf !
        JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job),
                new JobID("jt", jobid++));
        outputFormat.checkOutputSpecs(jobContext);
        if (schema != null) {
            ResourceSchema resourceSchema = new ResourceSchema(Utils.getSchemaFromString(schema));
            storer.checkSchema(resourceSchema);
            if (storer instanceof StoreMetadata) {
                ((StoreMetadata) storer).storeSchema(resourceSchema, absPath, job);
            }
        }
        TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(
                ContextUtil.getConfiguration(job), new TaskAttemptID("jt", jobid, true, 1, 0));
        RecordWriter<Void, Tuple> recordWriter = outputFormat.getRecordWriter(taskAttemptContext);
        storer.prepareToWrite(recordWriter);

        for (int i = 0; i < ROW_COUNT; i++) {
            Tuple tuple = TupleFactory.getInstance().newTuple(COLUMN_COUNT);
            for (int j = 0; j < COLUMN_COUNT; j++) {
                tuple.set(j, "a" + i + "_" + j);
            }
            storer.putNext(tuple);
        }

        recordWriter.close(taskAttemptContext);
        OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskAttemptContext);
        outputCommitter.commitTask(taskAttemptContext);
        outputCommitter.commitJob(jobContext);

    }
}

From source file:org.apache.pig.impl.io.PigFile.java

License:Apache License

public void store(DataBag data, FuncSpec storeFuncSpec, PigContext pigContext) throws IOException {
    Configuration conf = ConfigurationUtil.toConfiguration(pigContext.getProperties());
    // create a simulated JobContext
    JobContext jc = HadoopShims.createJobContext(conf, new JobID());
    StoreFuncInterface sfunc = (StoreFuncInterface) PigContext.instantiateFuncFromSpec(storeFuncSpec);
    OutputFormat<?, ?> of = sfunc.getOutputFormat();

    POStore store = new POStore(new OperatorKey());
    store.setSFile(new FileSpec(file, storeFuncSpec));
    PigOutputFormat.setLocation(jc, store);
    OutputCommitter oc;
    // create a simulated TaskAttemptContext

    TaskAttemptContext tac = HadoopShims.createTaskAttemptContext(conf, HadoopShims.getNewTaskAttemptID());
    PigOutputFormat.setLocation(tac, store);
    RecordWriter<?, ?> rw;/*from  w  ww  .  j  a  v a2  s. co m*/
    try {
        of.checkOutputSpecs(jc);
        oc = of.getOutputCommitter(tac);
        oc.setupJob(jc);
        oc.setupTask(tac);
        rw = of.getRecordWriter(tac);
        sfunc.prepareToWrite(rw);

        for (Iterator<Tuple> it = data.iterator(); it.hasNext();) {
            Tuple row = it.next();
            sfunc.putNext(row);
        }
        rw.close(tac);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
    if (oc.needsTaskCommit(tac)) {
        oc.commitTask(tac);
    }
    HadoopShims.commitOrCleanup(oc, jc);
}