Example usage for org.apache.hadoop.mapreduce OutputCommitter commitTask

List of usage examples for org.apache.hadoop.mapreduce OutputCommitter commitTask

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce OutputCommitter commitTask.

Prototype

public abstract void commitTask(TaskAttemptContext taskContext) throws IOException;

Source Link

Document

To promote the task's temporary output to final output location.

Usage

From source file:org.apache.giraph.worker.BspServiceWorkerForCohesiveSubgraph.java

License:Apache License

/**
 * Save the vertices using the user-defined VertexOutputFormat from our
 * vertexArray based on the split./* w w w.j  a  va 2s  . c  o m*/
 *
 * @param numLocalVertices Number of local vertices
 * @throws InterruptedException
 */
private void saveVertices(long numLocalVertices) throws IOException, InterruptedException {
    if (getConfiguration().getVertexOutputFormatClass() == null) {
        LOG.warn("saveVertices: " + GiraphConstants.VERTEX_OUTPUT_FORMAT_CLASS
                + " not specified -- there will be no saved output");
        return;
    }
    if (getConfiguration().doOutputDuringComputation()) {
        if (LOG.isInfoEnabled()) {
            LOG.info("saveVertices: The option for doing output during "
                    + "computation is selected, so there will be no saving of the "
                    + "output in the end of application");
        }
        return;
    }

    int numThreads = Math.min(getConfiguration().getNumOutputThreads(), getPartitionStore().getNumPartitions());
    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Starting to save "
            + numLocalVertices + " vertices " + "using " + numThreads + " threads");
    final VertexOutputFormat<I, V, E> vertexOutputFormat = getConfiguration().createVertexOutputFormat();
    CallableFactory<Void> callableFactory = new CallableFactory<Void>() {
        @Override
        public Callable<Void> newCallable(int callableId) {
            return new Callable<Void>() {
                @Override
                public Void call() throws Exception {
                    VertexWriter<I, V, E> vertexWriter = vertexOutputFormat.createVertexWriter(getContext());
                    vertexWriter.setConf(
                            (ImmutableClassesGiraphConfiguration<I, V, E, Writable>) getConfiguration());
                    vertexWriter.initialize(getContext());
                    long verticesWritten = 0;
                    long nextPrintVertices = 0;
                    long nextPrintMsecs = System.currentTimeMillis() + 15000;
                    int partitionIndex = 0;
                    int numPartitions = getPartitionStore().getNumPartitions();
                    for (Integer partitionId : getPartitionStore().getPartitionIds()) {
                        Partition<I, V, E, M> partition = getPartitionStore().getPartition(partitionId);

                        if (getConfiguration().getBoolean("giraph.ktruss.subgraph", false)) {
                            /**
                             * Special for the ktruss output
                             */
                            BasicGraphStoreInterface gs = (BasicGraphStoreInterface) (partition);
                            for (BasicVertex rv : gs.getLocalVertex()) {
                                Vertex<I, V, E, M> vertex = getConfiguration().createVertex();
                                List<Edge<I, E>> edges = Lists.newLinkedList();
                                for (BasicEdge nb : rv.getNeighbors()) {
                                    edges.add(EdgeFactory.create(((I) new IntWritable(nb.getTargetId())),
                                            ((E) NullWritable.get())));
                                }
                                vertex.initialize((I) (new IntWritable(rv.getId())), ((V) new IntWritable(0)),
                                        edges);

                                vertexWriter.writeVertex(vertex);
                                ++verticesWritten;

                                // Update status at most every 250k vertices or 15 seconds
                                if (verticesWritten > nextPrintVertices
                                        && System.currentTimeMillis() > nextPrintMsecs) {
                                    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                                            "saveVertices: Saved " + verticesWritten + " out of "
                                                    + partition.getVertexCount() + " partition vertices, "
                                                    + "on partition " + partitionIndex + " out of "
                                                    + numPartitions);
                                    nextPrintMsecs = System.currentTimeMillis() + 15000;
                                    nextPrintVertices = verticesWritten + 250000;
                                }
                            }
                        } else {
                            for (Vertex<I, V, E, M> vertex : partition) {
                                vertexWriter.writeVertex(vertex);
                                ++verticesWritten;

                                // Update status at most every 250k vertices or 15 seconds
                                if (verticesWritten > nextPrintVertices
                                        && System.currentTimeMillis() > nextPrintMsecs) {
                                    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                                            "saveVertices: Saved " + verticesWritten + " out of "
                                                    + partition.getVertexCount() + " partition vertices, "
                                                    + "on partition " + partitionIndex + " out of "
                                                    + numPartitions);
                                    nextPrintMsecs = System.currentTimeMillis() + 15000;
                                    nextPrintVertices = verticesWritten + 250000;
                                }
                            }
                        }
                        ++partitionIndex;
                    }
                    vertexWriter.close(getContext()); // the temp results are saved now
                    return null;
                }
            };
        }
    };
    ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "save-vertices-%d", getContext());

    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Done saving vertices.");
    // YARN: must complete the commit the "task" output, Hadoop isn't there.
    if (getConfiguration().isPureYarnJob() && getConfiguration().getVertexOutputFormatClass() != null) {
        try {
            OutputCommitter outputCommitter = vertexOutputFormat.getOutputCommitter(getContext());
            if (outputCommitter.needsTaskCommit(getContext())) {
                LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                        "OutputCommitter: committing task output.");
                // transfer from temp dirs to "task commit" dirs to prep for
                // the master's OutputCommitter#commitJob(context) call to finish.
                outputCommitter.commitTask(getContext());
            }
        } catch (InterruptedException ie) {
            LOG.error("Interrupted while attempting to obtain " + "OutputCommitter.", ie);
        } catch (IOException ioe) {
            LOG.error("Master task's attempt to commit output has " + "FAILED.", ioe);
        }
    }
}

From source file:org.apache.hcatalog.data.transfer.impl.HCatOutputFormatWriter.java

License:Apache License

@Override
public void write(Iterator<HCatRecord> recordItr) throws HCatException {

    int id = sp.getId();
    setVarsInConf(id);/*from  w  w w  .j av  a 2s .co  m*/
    HCatOutputFormat outFormat = new HCatOutputFormat();
    TaskAttemptContext cntxt = HCatHadoopShims.Instance.get().createTaskAttemptContext(conf,
            new TaskAttemptID(HCatHadoopShims.Instance.get().createTaskID(), id));
    OutputCommitter committer = null;
    RecordWriter<WritableComparable<?>, HCatRecord> writer;
    try {
        committer = outFormat.getOutputCommitter(cntxt);
        committer.setupTask(cntxt);
        writer = outFormat.getRecordWriter(cntxt);
        while (recordItr.hasNext()) {
            HCatRecord rec = recordItr.next();
            writer.write(null, rec);
        }
        writer.close(cntxt);
        if (committer.needsTaskCommit(cntxt)) {
            committer.commitTask(cntxt);
        }
    } catch (IOException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    } catch (InterruptedException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    }
}

From source file:org.apache.hcatalog.mapreduce.FileRecordWriterContainer.java

License:Apache License

@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    Reporter reporter = InternalUtil.createReporter(context);
    if (dynamicPartitioningUsed) {
        for (org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> bwriter : baseDynamicWriters
                .values()) {/*  w ww .ja  v a 2s .  com*/
            //We are in RecordWriter.close() make sense that the context would be TaskInputOutput
            bwriter.close(reporter);
        }
        for (Map.Entry<String, org.apache.hadoop.mapred.OutputCommitter> entry : baseDynamicCommitters
                .entrySet()) {
            org.apache.hadoop.mapred.TaskAttemptContext currContext = dynamicContexts.get(entry.getKey());
            OutputCommitter baseOutputCommitter = entry.getValue();
            if (baseOutputCommitter.needsTaskCommit(currContext)) {
                baseOutputCommitter.commitTask(currContext);
            }
        }
    } else {
        getBaseRecordWriter().close(reporter);
    }
}

From source file:org.apache.hcatalog.pig.TestE2EScenarios.java

License:Apache License

private void copyTable(String in, String out) throws IOException, InterruptedException {
    Job ijob = new Job();
    Job ojob = new Job();
    HCatInputFormat inpy = new HCatInputFormat();
    inpy.setInput(ijob, null, in);//from ww  w. j  a v  a 2s .c om
    HCatOutputFormat oupy = new HCatOutputFormat();
    oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap<String, String>()));

    // Test HCatContext

    System.err.println("HCatContext INSTANCE is present : " + HCatContext.INSTANCE.getConf().isPresent());
    if (HCatContext.INSTANCE.getConf().isPresent()) {
        System.err.println("HCatContext tinyint->int promotion says " + HCatContext.INSTANCE.getConf().get()
                .getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION,
                        HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT));
    }

    HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration());
    System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + tableSchema.toString());
    oupy.setSchema(ojob, tableSchema);
    oupy.checkOutputSpecs(ojob);
    OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration()));
    oc.setupJob(ojob);

    for (InputSplit split : inpy.getSplits(ijob)) {

        TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration());
        TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration());

        RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext);
        rr.initialize(split, rtaskContext);

        OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext);
        taskOc.setupTask(wtaskContext);
        RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext);

        while (rr.nextKeyValue()) {
            rw.write(rr.getCurrentKey(), rr.getCurrentValue());
        }
        rw.close(wtaskContext);
        taskOc.commitTask(wtaskContext);
        rr.close();
    }

    oc.commitJob(ojob);
}

From source file:org.apache.hive.hcatalog.data.transfer.impl.HCatOutputFormatWriter.java

License:Apache License

@Override
public void write(Iterator<HCatRecord> recordItr) throws HCatException {

    int id = sp.getId();
    setVarsInConf(id);//from ww w  .j a v a 2 s  .c  o  m
    HCatOutputFormat outFormat = new HCatOutputFormat();
    TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf,
            new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id));
    OutputCommitter committer = null;
    RecordWriter<WritableComparable<?>, HCatRecord> writer;
    try {
        committer = outFormat.getOutputCommitter(cntxt);
        committer.setupTask(cntxt);
        writer = outFormat.getRecordWriter(cntxt);
        while (recordItr.hasNext()) {
            HCatRecord rec = recordItr.next();
            writer.write(null, rec);
        }
        writer.close(cntxt);
        if (committer.needsTaskCommit(cntxt)) {
            committer.commitTask(cntxt);
        }
    } catch (IOException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    } catch (InterruptedException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    }
}

From source file:org.apache.hive.hcatalog.mapreduce.DynamicPartitionFileRecordWriterContainer.java

License:Apache License

@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    Reporter reporter = InternalUtil.createReporter(context);
    for (RecordWriter<? super WritableComparable<?>, ? super Writable> bwriter : baseDynamicWriters.values()) {
        // We are in RecordWriter.close() make sense that the context would be
        // TaskInputOutput.
        bwriter.close(reporter);//from   ww  w. ja v  a  2 s .c  o  m
    }

    TaskCommitContextRegistry.getInstance().register(context,
            new TaskCommitContextRegistry.TaskCommitterProxy() {
                @Override
                public void abortTask(TaskAttemptContext context) throws IOException {
                    for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo
                            .entrySet()) {
                        String dynKey = outputJobInfoEntry.getKey();
                        OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue();
                        LOG.info("Aborting task-attempt for " + outputJobInfo.getLocation());
                        baseDynamicCommitters.get(dynKey).abortTask(dynamicContexts.get(dynKey));
                    }
                }

                @Override
                public void commitTask(TaskAttemptContext context) throws IOException {
                    for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo
                            .entrySet()) {
                        String dynKey = outputJobInfoEntry.getKey();
                        OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue();
                        LOG.info("Committing task-attempt for " + outputJobInfo.getLocation());
                        TaskAttemptContext dynContext = dynamicContexts.get(dynKey);
                        OutputCommitter dynCommitter = baseDynamicCommitters.get(dynKey);
                        if (dynCommitter.needsTaskCommit(dynContext)) {
                            dynCommitter.commitTask(dynContext);
                        } else {
                            LOG.info("Skipping commitTask() for " + outputJobInfo.getLocation());
                        }
                    }
                }
            });
}

From source file:org.apache.hive.hcatalog.mapreduce.TestHCatOutputFormat.java

License:Apache License

public void publishTest(Job job) throws Exception {
    HCatOutputFormat hcof = new HCatOutputFormat();
    TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(
            job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID());
    OutputCommitter committer = hcof.getOutputCommitter(tac);
    committer.setupJob(job);//from  w w w .ja  v  a2 s.c  om
    committer.setupTask(tac);
    committer.commitTask(tac);
    committer.commitJob(job);

    Partition part = client.getPartition(dbName, tblName, Arrays.asList("p1"));
    assertNotNull(part);

    StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters());
    assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue");
    assertTrue(part.getSd().getLocation().indexOf("p1") != -1);
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Task.java

License:Apache License

/**
 * Commit task.//from   w w w  . j  a  va  2  s . c  o  m
 *
 * @param outputFormat Output format.
 * @throws IgniteCheckedException In case of Grid exception.
 * @throws IOException In case of IO exception.
 * @throws InterruptedException In case of interrupt.
 */
protected void commit(@Nullable OutputFormat outputFormat)
        throws IgniteCheckedException, IOException, InterruptedException {
    if (hadoopCtx.writer() != null) {
        assert outputFormat != null;

        OutputCommitter outputCommitter = outputFormat.getOutputCommitter(hadoopCtx);

        if (outputCommitter.needsTaskCommit(hadoopCtx))
            outputCommitter.commitTask(hadoopCtx);
    }
}

From source file:org.apache.parquet.pig.PerfTest2.java

License:Apache License

public static void write(String out) throws IOException, ParserException, InterruptedException, ExecException {
    {/*  ww  w .  jav a 2s.c  om*/
        StringBuilder schemaString = new StringBuilder("a0: chararray");
        for (int i = 1; i < COLUMN_COUNT; i++) {
            schemaString.append(", a" + i + ": chararray");
        }

        String location = out;
        String schema = schemaString.toString();

        StoreFuncInterface storer = new ParquetStorer();
        Job job = new Job(conf);
        storer.setStoreFuncUDFContextSignature("sig");
        String absPath = storer.relToAbsPathForStoreLocation(location,
                new Path(new File(".").getAbsoluteFile().toURI()));
        storer.setStoreLocation(absPath, job);
        storer.checkSchema(new ResourceSchema(Utils.getSchemaFromString(schema)));
        @SuppressWarnings("unchecked") // that's how the base class is defined
        OutputFormat<Void, Tuple> outputFormat = storer.getOutputFormat();
        // it's ContextUtil.getConfiguration(job) and not just conf !
        JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job),
                new JobID("jt", jobid++));
        outputFormat.checkOutputSpecs(jobContext);
        if (schema != null) {
            ResourceSchema resourceSchema = new ResourceSchema(Utils.getSchemaFromString(schema));
            storer.checkSchema(resourceSchema);
            if (storer instanceof StoreMetadata) {
                ((StoreMetadata) storer).storeSchema(resourceSchema, absPath, job);
            }
        }
        TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(
                ContextUtil.getConfiguration(job), new TaskAttemptID("jt", jobid, true, 1, 0));
        RecordWriter<Void, Tuple> recordWriter = outputFormat.getRecordWriter(taskAttemptContext);
        storer.prepareToWrite(recordWriter);

        for (int i = 0; i < ROW_COUNT; i++) {
            Tuple tuple = TupleFactory.getInstance().newTuple(COLUMN_COUNT);
            for (int j = 0; j < COLUMN_COUNT; j++) {
                tuple.set(j, "a" + i + "_" + j);
            }
            storer.putNext(tuple);
        }

        recordWriter.close(taskAttemptContext);
        OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskAttemptContext);
        outputCommitter.commitTask(taskAttemptContext);
        outputCommitter.commitJob(jobContext);

    }
}

From source file:org.apache.pig.impl.io.PigFile.java

License:Apache License

public void store(DataBag data, FuncSpec storeFuncSpec, PigContext pigContext) throws IOException {
    Configuration conf = ConfigurationUtil.toConfiguration(pigContext.getProperties());
    // create a simulated JobContext
    JobContext jc = HadoopShims.createJobContext(conf, new JobID());
    StoreFuncInterface sfunc = (StoreFuncInterface) PigContext.instantiateFuncFromSpec(storeFuncSpec);
    OutputFormat<?, ?> of = sfunc.getOutputFormat();

    POStore store = new POStore(new OperatorKey());
    store.setSFile(new FileSpec(file, storeFuncSpec));
    PigOutputFormat.setLocation(jc, store);
    OutputCommitter oc;
    // create a simulated TaskAttemptContext

    TaskAttemptContext tac = HadoopShims.createTaskAttemptContext(conf, HadoopShims.getNewTaskAttemptID());
    PigOutputFormat.setLocation(tac, store);
    RecordWriter<?, ?> rw;/*from  w  ww  .  j  a  v a2  s. co m*/
    try {
        of.checkOutputSpecs(jc);
        oc = of.getOutputCommitter(tac);
        oc.setupJob(jc);
        oc.setupTask(tac);
        rw = of.getRecordWriter(tac);
        sfunc.prepareToWrite(rw);

        for (Iterator<Tuple> it = data.iterator(); it.hasNext();) {
            Tuple row = it.next();
            sfunc.putNext(row);
        }
        rw.close(tac);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
    if (oc.needsTaskCommit(tac)) {
        oc.commitTask(tac);
    }
    HadoopShims.commitOrCleanup(oc, jc);
}