List of usage examples for org.apache.hadoop.mapreduce RecordWriter close
public abstract void close(TaskAttemptContext context) throws IOException, InterruptedException;
RecordWriter
to future operations. From source file:org.apache.tez.mapreduce.output.MultiMROutput.java
License:Apache License
/** * Call this in the processor before finishing to ensure outputs that * outputs have been flushed. Must be called before commit. * @throws IOException/* w w w. j a v a2 s . co m*/ */ @Override public void flush() throws IOException { if (flushed.getAndSet(true)) { return; } try { if (useNewApi) { for (RecordWriter writer : newRecordWriters.values()) { writer.close(newApiTaskAttemptContext); } } else { for (org.apache.hadoop.mapred.RecordWriter writer : oldRecordWriters.values()) { writer.close(null); } } } catch (InterruptedException e) { throw new IOException("Interrupted while closing record writer", e); } }
From source file:org.apache.tez.mapreduce.processor.map.MapProcessor.java
License:Apache License
private void runNewMapper(final JobConf job, MRTaskReporter reporter, final MRInputLegacy in, KeyValueWriter out) throws IOException, InterruptedException { // Initialize input in-line since it sets parameters which may be used by the processor. // Done only for MRInput. // TODO use new method in MRInput to get required info //in.initialize(job, master); // make a task context so we can get the classes org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = getTaskAttemptContext(); // make a mapper org.apache.hadoop.mapreduce.Mapper mapper; try {/*from w w w . ja va 2 s . c o m*/ mapper = (org.apache.hadoop.mapreduce.Mapper) ReflectionUtils.newInstance(taskContext.getMapperClass(), job); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } org.apache.hadoop.mapreduce.RecordReader input = new NewRecordReader(in); org.apache.hadoop.mapreduce.RecordWriter output = new NewOutputCollector(out); org.apache.hadoop.mapreduce.InputSplit split = in.getNewInputSplit(); updateJobWithSplit(job, split); org.apache.hadoop.mapreduce.MapContext mapContext = new MapContextImpl(job, taskAttemptId, input, output, committer, processorContext, split, reporter); org.apache.hadoop.mapreduce.Mapper.Context mapperContext = new WrappedMapper().getMapContext(mapContext); input.initialize(split, mapperContext); mapper.run(mapperContext); // Set progress to 1.0f if there was no exception, reporter.setProgress(1.0f); this.statusUpdate(); input.close(); output.close(mapperContext); }
From source file:org.apache.tez.mapreduce.processor.reduce.ReduceProcessor.java
License:Apache License
void runNewReducer(JobConf job, final MRTaskReporter reporter, OrderedGroupedInputLegacy input, RawComparator comparator, Class keyClass, Class valueClass, final KeyValueWriter out) throws IOException, InterruptedException, ClassNotFoundException, TezException { // make a task context so we can get the classes org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = getTaskAttemptContext(); // make a reducer org.apache.hadoop.mapreduce.Reducer reducer = (org.apache.hadoop.mapreduce.Reducer) ReflectionUtils .newInstance(taskContext.getReducerClass(), job); // wrap value iterator to report progress. final TezRawKeyValueIterator rawIter = input.getIterator(); TezRawKeyValueIterator rIter = new TezRawKeyValueIterator() { public void close() throws IOException { rawIter.close();/*from w ww.jav a 2s . com*/ } public DataInputBuffer getKey() throws IOException { return rawIter.getKey(); } public Progress getProgress() { return rawIter.getProgress(); } @Override public boolean isSameKey() throws IOException { return rawIter.isSameKey(); } public DataInputBuffer getValue() throws IOException { return rawIter.getValue(); } public boolean next() throws IOException { boolean ret = rawIter.next(); reporter.setProgress(rawIter.getProgress().getProgress()); return ret; } }; org.apache.hadoop.mapreduce.RecordWriter trackedRW = new org.apache.hadoop.mapreduce.RecordWriter() { @Override public void write(Object key, Object value) throws IOException, InterruptedException { out.write(key, value); } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { } }; org.apache.hadoop.mapreduce.Reducer.Context reducerContext = createReduceContext(reducer, job, taskAttemptId, rIter, reduceInputKeyCounter, reduceInputValueCounter, trackedRW, committer, reporter, comparator, keyClass, valueClass); reducer.run(reducerContext); // Set progress to 1.0f if there was no exception, reporter.setProgress(1.0f); trackedRW.close(reducerContext); }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.AbstractIoRegistryCheck.java
License:Apache License
private void validateIoRegistryGraph(final HadoopGraph graph, final Class<? extends GraphComputer> graphComputerClass, final RecordWriter<NullWritable, VertexWritable> writer) throws Exception { for (int i = 0; i < NUMBER_OF_VERTICES; i++) { final StarGraph starGraph = StarGraph.open(); Vertex vertex = starGraph.addVertex(T.label, "place", T.id, i, "point", new ToyPoint(i, i * 10), "message", "I'm " + i, "triangle", new ToyTriangle(i, i * 10, i * 100)); vertex.addEdge("connection", starGraph.addVertex(T.id, i > 0 ? i - 1 : NUMBER_OF_VERTICES - 1)); writer.write(NullWritable.get(), new VertexWritable(starGraph.getStarVertex())); }//w w w . j a v a2 s. c o m writer.close(new TaskAttemptContextImpl(ConfUtil.makeHadoopConfiguration(graph.configuration()), new TaskAttemptID())); // OLAP TESTING // validatePointTriangles(graph.traversal().withComputer(graphComputerClass).V().project("point", "triangle") .by("point").by("triangle").toList()); validatePointTriangles(graph.traversal().withComputer(graphComputerClass).V().out() .project("point", "triangle").by("point").by("triangle").toList()); validatePointTriangles(graph.traversal().withComputer(graphComputerClass).V().out().out() .project("point", "triangle").by("point").by("triangle").toList()); // OLTP TESTING // validatePointTriangles( graph.traversal().V().project("point", "triangle").by("point").by("triangle").toList()); // HDFS TESTING // /*validatePointTriangles(IteratorUtils.<Map<String, Object>>asList(IteratorUtils.<Vertex, Map<String, Object>>map(FileSystemStorage.open(ConfUtil.makeHadoopConfiguration(graph.configuration())).head(graph.configuration().getInputLocation(), graph.configuration().getGraphReader()), vertex -> { return new HashMap<String, Object>() {{ put("point", vertex.value("point")); put("triangle", vertex.value("triangle")); }}; })));*/ }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java
License:Apache License
private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration, final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass, final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass) throws Exception { final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration); final TaskAttemptContext job = new TaskAttemptContextImpl(configuration, new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0)); int vertexCount = 0; int outEdgeCount = 0; int inEdgeCount = 0; final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent() ? ReflectionUtils.newInstance(outFormatClass.get(), configuration) : null;// ww w. jav a2 s . c om final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null : outputFormat.getRecordWriter(job); boolean foundKeyValue = false; for (final FileSplit split : fileSplits) { logger.info("\treading file split {}", split.getPath().getName() + " ({}", split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)"); final RecordReader reader = inputFormat.createRecordReader(split, job); float lastProgress = -1f; while (reader.nextKeyValue()) { //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue()); final float progress = reader.getProgress(); assertTrue(progress >= lastProgress); assertEquals(NullWritable.class, reader.getCurrentKey().getClass()); final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue(); if (null != writer) writer.write(NullWritable.get(), vertexWritable); vertexCount++; outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT)); inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN)); // final Vertex vertex = vertexWritable.get(); assertEquals(Integer.class, vertex.id().getClass()); if (vertex.value("name").equals("SUGAR MAGNOLIA")) { foundKeyValue = true; assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT))); assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN))); } lastProgress = progress; } } assertEquals(8049, outEdgeCount); assertEquals(8049, inEdgeCount); assertEquals(outEdgeCount, inEdgeCount); assertEquals(808, vertexCount); assertTrue(foundKeyValue); if (null != writer) { writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID())); for (int i = 1; i < 10; i++) { final File outputDirectory = new File( new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI()); final List<FileSplit> splits = generateFileSplits( new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/" + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0" + "/part-m-00000"), i); validateFileSplits(splits, configuration, inputFormatClass, Optional.empty()); } } }
From source file:org.goldenorb.OrbPartition.java
License:Apache License
private void dumpData() { Configuration conf = new Configuration(); Job job = null;// w w w . j a v a 2 s . c o m JobContext jobContext = null; TaskAttemptContext tao = null; RecordWriter rw; VertexWriter vw; FileOutputFormat outputFormat; boolean tryAgain = true; int count = 0; while (tryAgain && count < 15) try { count++; tryAgain = false; if (job == null) { job = new Job(conf); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath()))); } if (jobContext == null) { jobContext = new JobContext(job.getConfiguration(), new JobID()); } System.out.println(jobContext.getConfiguration().get("mapred.output.dir")); tao = new TaskAttemptContext(jobContext.getConfiguration(), new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0)); outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance(); rw = outputFormat.getRecordWriter(tao); vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance(); for (Vertex v : vertices.values()) { OrbContext oc = vw.vertexWrite(v); rw.write(oc.getKey(), oc.getValue()); // orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " + // oc.getKey().toString() + ", " + oc.getValue().toString()); } rw.close(tao); FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao); if (cm.needsTaskCommit(tao)) { cm.commitTask(tao); cm.cleanupJob(jobContext); } else { cm.cleanupJob(jobContext); tryAgain = true; } } catch (IOException e) { tryAgain = true; e.printStackTrace(); } catch (InstantiationException e) { tryAgain = true; e.printStackTrace(); } catch (IllegalAccessException e) { tryAgain = true; e.printStackTrace(); } catch (ClassNotFoundException e) { tryAgain = true; e.printStackTrace(); } catch (InterruptedException e) { tryAgain = true; e.printStackTrace(); } if (tryAgain) { synchronized (this) { try { wait(1000); } catch (InterruptedException e) { e.printStackTrace(); } } } }
From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeSameRow() throws Exception { final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW);/*from ww w . ja v a 2s . c o m*/ writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(!fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00001"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeNewRow() throws Exception { final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW);/* w ww . jav a2s . co m*/ writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertFalse(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue()); assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00002"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java
License:Apache License
@Test public void testMultipleLayouts() throws Exception { final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); writer.write(defaultEntry, NW);/*from ww w .j a va 2s . c o m*/ final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024)); writer.write(inMemoryEntry, NW); try { // Test with an invalid locality group ID: final ColumnId invalid = new ColumnId(1234); assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid)); writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW); fail("Output format did not fail on unknown locality group IDs."); } catch (IllegalArgumentException iae) { LOG.info("Expected error: " + iae); } writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue()); assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue()); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:org.kududb.mapreduce.TestKuduTableOutputFormat.java
License:Apache License
@Test public void test() throws Exception { createTable(TABLE_NAME, getBasicSchema(), new CreateTableOptions()); KuduTableOutputFormat output = new KuduTableOutputFormat(); Configuration conf = new Configuration(); conf.set(KuduTableOutputFormat.MASTER_ADDRESSES_KEY, getMasterAddresses()); conf.set(KuduTableOutputFormat.OUTPUT_TABLE_KEY, TABLE_NAME); output.setConf(conf);/*from w w w . ja v a 2 s . c o m*/ String multitonKey = conf.get(KuduTableOutputFormat.MULTITON_KEY); KuduTable table = KuduTableOutputFormat.getKuduTable(multitonKey); assertNotNull(table); Insert insert = table.newInsert(); PartialRow row = insert.getRow(); row.addInt(0, 1); row.addInt(1, 2); row.addInt(2, 3); row.addString(3, "a string"); row.addBoolean(4, true); RecordWriter<NullWritable, Operation> rw = output.getRecordWriter(null); rw.write(NullWritable.get(), insert); rw.close(null); AsyncKuduScanner.AsyncKuduScannerBuilder builder = client.newScannerBuilder(table); assertEquals(1, countRowsInScan(builder.build())); }