Example usage for org.apache.hadoop.mapreduce RecordWriter close

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordWriter close.

Prototype

public abstract void close(TaskAttemptContext context) throws IOException, InterruptedException;

Source Link

Document

Close this RecordWriter to future operations.

Usage

From source file:org.apache.tez.mapreduce.output.MultiMROutput.java

License:Apache License

/**
 * Call this in the processor before finishing to ensure outputs that
 * outputs have been flushed. Must be called before commit.
 * @throws IOException/* w w w. j a v  a2 s  .  co  m*/
 */
@Override
public void flush() throws IOException {
    if (flushed.getAndSet(true)) {
        return;
    }
    try {
        if (useNewApi) {
            for (RecordWriter writer : newRecordWriters.values()) {
                writer.close(newApiTaskAttemptContext);
            }
        } else {
            for (org.apache.hadoop.mapred.RecordWriter writer : oldRecordWriters.values()) {
                writer.close(null);
            }
        }
    } catch (InterruptedException e) {
        throw new IOException("Interrupted while closing record writer", e);
    }
}

From source file:org.apache.tez.mapreduce.processor.map.MapProcessor.java

License:Apache License

private void runNewMapper(final JobConf job, MRTaskReporter reporter, final MRInputLegacy in,
        KeyValueWriter out) throws IOException, InterruptedException {

    // Initialize input in-line since it sets parameters which may be used by the processor.
    // Done only for MRInput.
    // TODO use new method in MRInput to get required info
    //in.initialize(job, master);

    // make a task context so we can get the classes
    org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = getTaskAttemptContext();

    // make a mapper
    org.apache.hadoop.mapreduce.Mapper mapper;
    try {/*from w  w w .  ja  va  2  s  . c  o m*/
        mapper = (org.apache.hadoop.mapreduce.Mapper) ReflectionUtils.newInstance(taskContext.getMapperClass(),
                job);
    } catch (ClassNotFoundException cnfe) {
        throw new IOException(cnfe);
    }

    org.apache.hadoop.mapreduce.RecordReader input = new NewRecordReader(in);

    org.apache.hadoop.mapreduce.RecordWriter output = new NewOutputCollector(out);

    org.apache.hadoop.mapreduce.InputSplit split = in.getNewInputSplit();

    updateJobWithSplit(job, split);

    org.apache.hadoop.mapreduce.MapContext mapContext = new MapContextImpl(job, taskAttemptId, input, output,
            committer, processorContext, split, reporter);

    org.apache.hadoop.mapreduce.Mapper.Context mapperContext = new WrappedMapper().getMapContext(mapContext);

    input.initialize(split, mapperContext);
    mapper.run(mapperContext);
    // Set progress to 1.0f if there was no exception,
    reporter.setProgress(1.0f);

    this.statusUpdate();
    input.close();
    output.close(mapperContext);
}

From source file:org.apache.tez.mapreduce.processor.reduce.ReduceProcessor.java

License:Apache License

void runNewReducer(JobConf job, final MRTaskReporter reporter, OrderedGroupedInputLegacy input,
        RawComparator comparator, Class keyClass, Class valueClass, final KeyValueWriter out)
        throws IOException, InterruptedException, ClassNotFoundException, TezException {

    // make a task context so we can get the classes
    org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = getTaskAttemptContext();

    // make a reducer
    org.apache.hadoop.mapreduce.Reducer reducer = (org.apache.hadoop.mapreduce.Reducer) ReflectionUtils
            .newInstance(taskContext.getReducerClass(), job);

    // wrap value iterator to report progress.
    final TezRawKeyValueIterator rawIter = input.getIterator();
    TezRawKeyValueIterator rIter = new TezRawKeyValueIterator() {
        public void close() throws IOException {
            rawIter.close();/*from w  ww.jav a  2s .  com*/
        }

        public DataInputBuffer getKey() throws IOException {
            return rawIter.getKey();
        }

        public Progress getProgress() {
            return rawIter.getProgress();
        }

        @Override
        public boolean isSameKey() throws IOException {
            return rawIter.isSameKey();
        }

        public DataInputBuffer getValue() throws IOException {
            return rawIter.getValue();
        }

        public boolean next() throws IOException {
            boolean ret = rawIter.next();
            reporter.setProgress(rawIter.getProgress().getProgress());
            return ret;
        }
    };

    org.apache.hadoop.mapreduce.RecordWriter trackedRW = new org.apache.hadoop.mapreduce.RecordWriter() {

        @Override
        public void write(Object key, Object value) throws IOException, InterruptedException {
            out.write(key, value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
        }
    };

    org.apache.hadoop.mapreduce.Reducer.Context reducerContext = createReduceContext(reducer, job,
            taskAttemptId, rIter, reduceInputKeyCounter, reduceInputValueCounter, trackedRW, committer,
            reporter, comparator, keyClass, valueClass);

    reducer.run(reducerContext);

    // Set progress to 1.0f if there was no exception,
    reporter.setProgress(1.0f);

    trackedRW.close(reducerContext);
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.AbstractIoRegistryCheck.java

License:Apache License

private void validateIoRegistryGraph(final HadoopGraph graph,
        final Class<? extends GraphComputer> graphComputerClass,
        final RecordWriter<NullWritable, VertexWritable> writer) throws Exception {

    for (int i = 0; i < NUMBER_OF_VERTICES; i++) {
        final StarGraph starGraph = StarGraph.open();
        Vertex vertex = starGraph.addVertex(T.label, "place", T.id, i, "point", new ToyPoint(i, i * 10),
                "message", "I'm " + i, "triangle", new ToyTriangle(i, i * 10, i * 100));
        vertex.addEdge("connection", starGraph.addVertex(T.id, i > 0 ? i - 1 : NUMBER_OF_VERTICES - 1));
        writer.write(NullWritable.get(), new VertexWritable(starGraph.getStarVertex()));
    }//w  w w  .  j a  v a2  s.  c o  m
    writer.close(new TaskAttemptContextImpl(ConfUtil.makeHadoopConfiguration(graph.configuration()),
            new TaskAttemptID()));

    // OLAP TESTING //
    validatePointTriangles(graph.traversal().withComputer(graphComputerClass).V().project("point", "triangle")
            .by("point").by("triangle").toList());
    validatePointTriangles(graph.traversal().withComputer(graphComputerClass).V().out()
            .project("point", "triangle").by("point").by("triangle").toList());
    validatePointTriangles(graph.traversal().withComputer(graphComputerClass).V().out().out()
            .project("point", "triangle").by("point").by("triangle").toList());
    // OLTP TESTING //
    validatePointTriangles(
            graph.traversal().V().project("point", "triangle").by("point").by("triangle").toList());
    // HDFS TESTING //
    /*validatePointTriangles(IteratorUtils.<Map<String, Object>>asList(IteratorUtils.<Vertex, Map<String, Object>>map(FileSystemStorage.open(ConfUtil.makeHadoopConfiguration(graph.configuration())).head(graph.configuration().getInputLocation(), graph.configuration().getGraphReader()),
        vertex -> {
            return new HashMap<String, Object>() {{
                put("point", vertex.value("point"));
                put("triangle", vertex.value("triangle"));
            }};
        })));*/
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java

License:Apache License

private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration,
        final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass,
        final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass)
        throws Exception {

    final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration);
    final TaskAttemptContext job = new TaskAttemptContextImpl(configuration,
            new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0));

    int vertexCount = 0;
    int outEdgeCount = 0;
    int inEdgeCount = 0;

    final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent()
            ? ReflectionUtils.newInstance(outFormatClass.get(), configuration)
            : null;// ww  w.  jav a2 s  . c  om
    final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null
            : outputFormat.getRecordWriter(job);

    boolean foundKeyValue = false;
    for (final FileSplit split : fileSplits) {
        logger.info("\treading file split {}", split.getPath().getName() + " ({}",
                split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)");
        final RecordReader reader = inputFormat.createRecordReader(split, job);

        float lastProgress = -1f;
        while (reader.nextKeyValue()) {
            //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue());
            final float progress = reader.getProgress();
            assertTrue(progress >= lastProgress);
            assertEquals(NullWritable.class, reader.getCurrentKey().getClass());
            final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue();
            if (null != writer)
                writer.write(NullWritable.get(), vertexWritable);
            vertexCount++;
            outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT));
            inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN));
            //
            final Vertex vertex = vertexWritable.get();
            assertEquals(Integer.class, vertex.id().getClass());
            if (vertex.value("name").equals("SUGAR MAGNOLIA")) {
                foundKeyValue = true;
                assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT)));
                assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN)));
            }
            lastProgress = progress;
        }
    }

    assertEquals(8049, outEdgeCount);
    assertEquals(8049, inEdgeCount);
    assertEquals(outEdgeCount, inEdgeCount);
    assertEquals(808, vertexCount);
    assertTrue(foundKeyValue);

    if (null != writer) {
        writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID()));
        for (int i = 1; i < 10; i++) {
            final File outputDirectory = new File(
                    new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI());
            final List<FileSplit> splits = generateFileSplits(
                    new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/"
                            + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0"
                            + "/part-m-00000"),
                    i);
            validateFileSplits(splits, configuration, inputFormatClass, Optional.empty());
        }
    }
}

From source file:org.goldenorb.OrbPartition.java

License:Apache License

private void dumpData() {
    Configuration conf = new Configuration();
    Job job = null;//  w w  w  .  j  a  v  a 2  s  . c o m
    JobContext jobContext = null;
    TaskAttemptContext tao = null;
    RecordWriter rw;
    VertexWriter vw;
    FileOutputFormat outputFormat;

    boolean tryAgain = true;
    int count = 0;
    while (tryAgain && count < 15)
        try {
            count++;
            tryAgain = false;
            if (job == null) {
                job = new Job(conf);
                job.setOutputFormatClass(TextOutputFormat.class);
                FileOutputFormat.setOutputPath(job,
                        new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
            }
            if (jobContext == null) {
                jobContext = new JobContext(job.getConfiguration(), new JobID());
            }

            System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));

            tao = new TaskAttemptContext(jobContext.getConfiguration(),
                    new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
            outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
            rw = outputFormat.getRecordWriter(tao);
            vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
            for (Vertex v : vertices.values()) {
                OrbContext oc = vw.vertexWrite(v);
                rw.write(oc.getKey(), oc.getValue());
                // orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
                // oc.getKey().toString() + ", " + oc.getValue().toString());
            }
            rw.close(tao);

            FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
            if (cm.needsTaskCommit(tao)) {
                cm.commitTask(tao);
                cm.cleanupJob(jobContext);
            } else {
                cm.cleanupJob(jobContext);
                tryAgain = true;
            }

        } catch (IOException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (InstantiationException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (InterruptedException e) {
            tryAgain = true;
            e.printStackTrace();
        }
    if (tryAgain) {
        synchronized (this) {
            try {
                wait(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeSameRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);/*from ww w  .  ja v  a  2s  .  c  o m*/
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(!fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00001")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeNewRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);/*  w ww  . jav a2s  . co m*/
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertFalse(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue());
    assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00002")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMultipleLayouts() throws Exception {
    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);

    final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    writer.write(defaultEntry, NW);/*from   ww w .j  a va 2s . c  o m*/
    final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024));
    writer.write(inMemoryEntry, NW);

    try {
        // Test with an invalid locality group ID:
        final ColumnId invalid = new ColumnId(1234);
        assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid));
        writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW);
        fail("Output format did not fail on unknown locality group IDs.");
    } catch (IllegalArgumentException iae) {
        LOG.info("Expected error: " + iae);
    }

    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue());
    assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue());

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kududb.mapreduce.TestKuduTableOutputFormat.java

License:Apache License

@Test
public void test() throws Exception {
    createTable(TABLE_NAME, getBasicSchema(), new CreateTableOptions());

    KuduTableOutputFormat output = new KuduTableOutputFormat();
    Configuration conf = new Configuration();
    conf.set(KuduTableOutputFormat.MASTER_ADDRESSES_KEY, getMasterAddresses());
    conf.set(KuduTableOutputFormat.OUTPUT_TABLE_KEY, TABLE_NAME);
    output.setConf(conf);/*from w  w w . ja  v  a 2  s .  c o m*/

    String multitonKey = conf.get(KuduTableOutputFormat.MULTITON_KEY);
    KuduTable table = KuduTableOutputFormat.getKuduTable(multitonKey);
    assertNotNull(table);

    Insert insert = table.newInsert();
    PartialRow row = insert.getRow();
    row.addInt(0, 1);
    row.addInt(1, 2);
    row.addInt(2, 3);
    row.addString(3, "a string");
    row.addBoolean(4, true);

    RecordWriter<NullWritable, Operation> rw = output.getRecordWriter(null);
    rw.write(NullWritable.get(), insert);
    rw.close(null);
    AsyncKuduScanner.AsyncKuduScannerBuilder builder = client.newScannerBuilder(table);
    assertEquals(1, countRowsInScan(builder.build()));
}