Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getOutputFormatClass.

Prototype

public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException;

Source Link

Document

Get the OutputFormat class for the job.

Usage

From source file:org.goldenorb.OrbPartition.java

License:Apache License

private void dumpData() {
    Configuration conf = new Configuration();
    Job job = null;/*w w  w .  j a  va2  s  . c om*/
    JobContext jobContext = null;
    TaskAttemptContext tao = null;
    RecordWriter rw;
    VertexWriter vw;
    FileOutputFormat outputFormat;

    boolean tryAgain = true;
    int count = 0;
    while (tryAgain && count < 15)
        try {
            count++;
            tryAgain = false;
            if (job == null) {
                job = new Job(conf);
                job.setOutputFormatClass(TextOutputFormat.class);
                FileOutputFormat.setOutputPath(job,
                        new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
            }
            if (jobContext == null) {
                jobContext = new JobContext(job.getConfiguration(), new JobID());
            }

            System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));

            tao = new TaskAttemptContext(jobContext.getConfiguration(),
                    new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
            outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
            rw = outputFormat.getRecordWriter(tao);
            vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
            for (Vertex v : vertices.values()) {
                OrbContext oc = vw.vertexWrite(v);
                rw.write(oc.getKey(), oc.getValue());
                // orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
                // oc.getKey().toString() + ", " + oc.getValue().toString());
            }
            rw.close(tao);

            FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
            if (cm.needsTaskCommit(tao)) {
                cm.commitTask(tao);
                cm.cleanupJob(jobContext);
            } else {
                cm.cleanupJob(jobContext);
                tryAgain = true;
            }

        } catch (IOException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (InstantiationException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (InterruptedException e) {
            tryAgain = true;
            e.printStackTrace();
        }
    if (tryAgain) {
        synchronized (this) {
            try {
                wait(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:org.mrgeo.hadoop.multipleoutputs.DirectoryMultipleOutputs.java

License:Apache License

@SuppressWarnings("rawtypes")
private synchronized RecordWriter getRecordWriter(final TaskAttemptContext taskContext,
        final String baseFileName) throws IOException, InterruptedException {

    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(baseFileName);

    // If not in cache, create a new one
    if (writer == null) {
        // in MultipleOutputs, the following commented out line of code was used here
        ///*ww  w  . j a va  2  s.c o  m*/
        // FileOutputFormat.setOutputName(taskContext, baseFileName);
        //
        // we can't do that because this method has package visibility but we can do something
        // even worse and inline that code
        //

        // this makes the output file have the same prefix as the directory, instead of the default
        // "part".
        //taskContext.getConfiguration().set(BASE_OUTPUT_NAME, baseFileName);

        try {
            Configuration conf = taskContext.getConfiguration();

            Class<? extends OutputFormat<?, ?>> format = taskContext.getOutputFormatClass();
            OutputFormat of = ReflectionUtils.newInstance(format, conf);

            writer = of.getRecordWriter(taskContext);
        } catch (final ClassNotFoundException e) {
            throw new IOException(e);
        }

        // if counters are enabled, wrap the writer with context
        // to increment counters
        if (countersEnabled) {
            writer = new RecordWriterWithCounter(writer, baseFileName, context);
        }

        // add the record-writer to the cache
        recordWriters.put(baseFileName, writer);
    }
    return writer;
}