List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getOutputFormatClass
public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException;
From source file:org.goldenorb.OrbPartition.java
License:Apache License
private void dumpData() { Configuration conf = new Configuration(); Job job = null;/*w w w . j a va2 s . c om*/ JobContext jobContext = null; TaskAttemptContext tao = null; RecordWriter rw; VertexWriter vw; FileOutputFormat outputFormat; boolean tryAgain = true; int count = 0; while (tryAgain && count < 15) try { count++; tryAgain = false; if (job == null) { job = new Job(conf); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath()))); } if (jobContext == null) { jobContext = new JobContext(job.getConfiguration(), new JobID()); } System.out.println(jobContext.getConfiguration().get("mapred.output.dir")); tao = new TaskAttemptContext(jobContext.getConfiguration(), new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0)); outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance(); rw = outputFormat.getRecordWriter(tao); vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance(); for (Vertex v : vertices.values()) { OrbContext oc = vw.vertexWrite(v); rw.write(oc.getKey(), oc.getValue()); // orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " + // oc.getKey().toString() + ", " + oc.getValue().toString()); } rw.close(tao); FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao); if (cm.needsTaskCommit(tao)) { cm.commitTask(tao); cm.cleanupJob(jobContext); } else { cm.cleanupJob(jobContext); tryAgain = true; } } catch (IOException e) { tryAgain = true; e.printStackTrace(); } catch (InstantiationException e) { tryAgain = true; e.printStackTrace(); } catch (IllegalAccessException e) { tryAgain = true; e.printStackTrace(); } catch (ClassNotFoundException e) { tryAgain = true; e.printStackTrace(); } catch (InterruptedException e) { tryAgain = true; e.printStackTrace(); } if (tryAgain) { synchronized (this) { try { wait(1000); } catch (InterruptedException e) { e.printStackTrace(); } } } }
From source file:org.mrgeo.hadoop.multipleoutputs.DirectoryMultipleOutputs.java
License:Apache License
@SuppressWarnings("rawtypes") private synchronized RecordWriter getRecordWriter(final TaskAttemptContext taskContext, final String baseFileName) throws IOException, InterruptedException { // look for record-writer in the cache RecordWriter writer = recordWriters.get(baseFileName); // If not in cache, create a new one if (writer == null) { // in MultipleOutputs, the following commented out line of code was used here ///*ww w . j a va 2 s.c o m*/ // FileOutputFormat.setOutputName(taskContext, baseFileName); // // we can't do that because this method has package visibility but we can do something // even worse and inline that code // // this makes the output file have the same prefix as the directory, instead of the default // "part". //taskContext.getConfiguration().set(BASE_OUTPUT_NAME, baseFileName); try { Configuration conf = taskContext.getConfiguration(); Class<? extends OutputFormat<?, ?>> format = taskContext.getOutputFormatClass(); OutputFormat of = ReflectionUtils.newInstance(format, conf); writer = of.getRecordWriter(taskContext); } catch (final ClassNotFoundException e) { throw new IOException(e); } // if counters are enabled, wrap the writer with context // to increment counters if (countersEnabled) { writer = new RecordWriterWithCounter(writer, baseFileName, context); } // add the record-writer to the cache recordWriters.put(baseFileName, writer); } return writer; }