Example usage for org.apache.hadoop.mapreduce RecordWriter close

List of usage examples for org.apache.hadoop.mapreduce RecordWriter close

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordWriter close.

Prototype

public abstract void close(TaskAttemptContext context) throws IOException, InterruptedException;

Source Link

Document

Close this RecordWriter to future operations.

Usage

From source file:org.apache.tez.mapreduce.output.MultiMROutput.java

License:Apache License

/**
 * Call this in the processor before finishing to ensure outputs that
 * outputs have been flushed. Must be called before commit.
 * @throws IOException/* w w w. j a v  a2 s  .  co  m*/
 */
@Override
public void flush() throws IOException {
    if (flushed.getAndSet(true)) {
        return;
    }
    try {
        if (useNewApi) {
            for (RecordWriter writer : newRecordWriters.values()) {
                writer.close(newApiTaskAttemptContext);
            }
        } else {
            for (org.apache.hadoop.mapred.RecordWriter writer : oldRecordWriters.values()) {
                writer.close(null);
            }
        }
    } catch (InterruptedException e) {
        throw new IOException("Interrupted while closing record writer", e);
    }
}

From source file:org.apache.tez.mapreduce.processor.map.MapProcessor.java

License:Apache License

private void runNewMapper(final JobConf job, MRTaskReporter reporter, final MRInputLegacy in,
        KeyValueWriter out) throws IOException, InterruptedException {

    // Initialize input in-line since it sets parameters which may be used by the processor.
    // Done only for MRInput.
    // TODO use new method in MRInput to get required info
    //in.initialize(job, master);

    // make a task context so we can get the classes
    org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = getTaskAttemptContext();

    // make a mapper
    org.apache.hadoop.mapreduce.Mapper mapper;
    try {/*from w  w w .  ja  va  2  s  . c  o m*/
        mapper = (org.apache.hadoop.mapreduce.Mapper) ReflectionUtils.newInstance(taskContext.getMapperClass(),
                job);
    } catch (ClassNotFoundException cnfe) {
        throw new IOException(cnfe);
    }

    org.apache.hadoop.mapreduce.RecordReader input = new NewRecordReader(in);

    org.apache.hadoop.mapreduce.RecordWriter output = new NewOutputCollector(out);

    org.apache.hadoop.mapreduce.InputSplit split = in.getNewInputSplit();

    updateJobWithSplit(job, split);

    org.apache.hadoop.mapreduce.MapContext mapContext = new MapContextImpl(job, taskAttemptId, input, output,
            committer, processorContext, split, reporter);

    org.apache.hadoop.mapreduce.Mapper.Context mapperContext = new WrappedMapper().getMapContext(mapContext);

    input.initialize(split, mapperContext);
    mapper.run(mapperContext);
    // Set progress to 1.0f if there was no exception,
    reporter.setProgress(1.0f);

    this.statusUpdate();
    input.close();
    output.close(mapperContext);
}

From source file:org.apache.tez.mapreduce.processor.reduce.ReduceProcessor.java

License:Apache License

void runNewReducer(JobConf job, final MRTaskReporter reporter, OrderedGroupedInputLegacy input,
        RawComparator comparator, Class keyClass, Class valueClass, final KeyValueWriter out)
        throws IOException, InterruptedException, ClassNotFoundException, TezException {

    // make a task context so we can get the classes
    org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = getTaskAttemptContext();

    // make a reducer
    org.apache.hadoop.mapreduce.Reducer reducer = (org.apache.hadoop.mapreduce.Reducer) ReflectionUtils
            .newInstance(taskContext.getReducerClass(), job);

    // wrap value iterator to report progress.
    final TezRawKeyValueIterator rawIter = input.getIterator();
    TezRawKeyValueIterator rIter = new TezRawKeyValueIterator() {
        public void close() throws IOException {
            rawIter.close();/*from w  ww.jav a  2s .  com*/
        }

        public DataInputBuffer getKey() throws IOException {
            return rawIter.getKey();
        }

        public Progress getProgress() {
            return rawIter.getProgress();
        }

        @Override
        public boolean isSameKey() throws IOException {
            return rawIter.isSameKey();
        }

        public DataInputBuffer getValue() throws IOException {
            return rawIter.getValue();
        }

        public boolean next() throws IOException {
            boolean ret = rawIter.next();
            reporter.setProgress(rawIter.getProgress().getProgress());
            return ret;
        }
    };

    org.apache.hadoop.mapreduce.RecordWriter trackedRW = new org.apache.hadoop.mapreduce.RecordWriter() {

        @Override
        public void write(Object key, Object value) throws IOException, InterruptedException {
            out.write(key, value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
        }
    };

    org.apache.hadoop.mapreduce.Reducer.Context reducerContext = createReduceContext(reducer, job,
            taskAttemptId, rIter, reduceInputKeyCounter, reduceInputValueCounter, trackedRW, committer,
            reporter, comparator, keyClass, valueClass);

    reducer.run(reducerContext);

    // Set progress to 1.0f if there was no exception,
    reporter.setProgress(1.0f);

    trackedRW.close(reducerContext);
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.AbstractIoRegistryCheck.java

License:Apache License

private void validateIoRegistryGraph(final HadoopGraph graph,
        final Class<? extends GraphComputer> graphComputerClass,
        final RecordWriter<NullWritable, VertexWritable> writer) throws Exception {

    for (int i = 0; i < NUMBER_OF_VERTICES; i++) {
        final StarGraph starGraph = StarGraph.open();
        Vertex vertex = starGraph.addVertex(T.label, "place", T.id, i, "point", new ToyPoint(i, i * 10),
                "message", "I'm " + i, "triangle", new ToyTriangle(i, i * 10, i * 100));
        vertex.addEdge("connection", starGraph.addVertex(T.id, i > 0 ? i - 1 : NUMBER_OF_VERTICES - 1));
        writer.write(NullWritable.get(), new VertexWritable(starGraph.getStarVertex()));
    }//w  w w  .  j a  v a2  s.  c o  m
    writer.close(new TaskAttemptContextImpl(ConfUtil.makeHadoopConfiguration(graph.configuration()),
            new TaskAttemptID()));

    // OLAP TESTING //
    validatePointTriangles(graph.traversal().withComputer(graphComputerClass).V().project("point", "triangle")
            .by("point").by("triangle").toList());
    validatePointTriangles(graph.traversal().withComputer(graphComputerClass).V().out()
            .project("point", "triangle").by("point").by("triangle").toList());
    validatePointTriangles(graph.traversal().withComputer(graphComputerClass).V().out().out()
            .project("point", "triangle").by("point").by("triangle").toList());
    // OLTP TESTING //
    validatePointTriangles(
            graph.traversal().V().project("point", "triangle").by("point").by("triangle").toList());
    // HDFS TESTING //
    /*validatePointTriangles(IteratorUtils.<Map<String, Object>>asList(IteratorUtils.<Vertex, Map<String, Object>>map(FileSystemStorage.open(ConfUtil.makeHadoopConfiguration(graph.configuration())).head(graph.configuration().getInputLocation(), graph.configuration().getGraphReader()),
        vertex -> {
            return new HashMap<String, Object>() {{
                put("point", vertex.value("point"));
                put("triangle", vertex.value("triangle"));
            }};
        })));*/
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java

License:Apache License

private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration,
        final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass,
        final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass)
        throws Exception {

    final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration);
    final TaskAttemptContext job = new TaskAttemptContextImpl(configuration,
            new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0));

    int vertexCount = 0;
    int outEdgeCount = 0;
    int inEdgeCount = 0;

    final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent()
            ? ReflectionUtils.newInstance(outFormatClass.get(), configuration)
            : null;// ww  w.  jav a2 s  . c  om
    final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null
            : outputFormat.getRecordWriter(job);

    boolean foundKeyValue = false;
    for (final FileSplit split : fileSplits) {
        logger.info("\treading file split {}", split.getPath().getName() + " ({}",
                split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)");
        final RecordReader reader = inputFormat.createRecordReader(split, job);

        float lastProgress = -1f;
        while (reader.nextKeyValue()) {
            //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue());
            final float progress = reader.getProgress();
            assertTrue(progress >= lastProgress);
            assertEquals(NullWritable.class, reader.getCurrentKey().getClass());
            final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue();
            if (null != writer)
                writer.write(NullWritable.get(), vertexWritable);
            vertexCount++;
            outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT));
            inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN));
            //
            final Vertex vertex = vertexWritable.get();
            assertEquals(Integer.class, vertex.id().getClass());
            if (vertex.value("name").equals("SUGAR MAGNOLIA")) {
                foundKeyValue = true;
                assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT)));
                assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN)));
            }
            lastProgress = progress;
        }
    }

    assertEquals(8049, outEdgeCount);
    assertEquals(8049, inEdgeCount);
    assertEquals(outEdgeCount, inEdgeCount);
    assertEquals(808, vertexCount);
    assertTrue(foundKeyValue);

    if (null != writer) {
        writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID()));
        for (int i = 1; i < 10; i++) {
            final File outputDirectory = new File(
                    new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI());
            final List<FileSplit> splits = generateFileSplits(
                    new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/"
                            + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0"
                            + "/part-m-00000"),
                    i);
            validateFileSplits(splits, configuration, inputFormatClass, Optional.empty());
        }
    }
}

From source file:org.goldenorb.OrbPartition.java

License:Apache License

private void dumpData() {
    Configuration conf = new Configuration();
    Job job = null;//  w w  w  .  j  a  v  a 2  s  . c o m
    JobContext jobContext = null;
    TaskAttemptContext tao = null;
    RecordWriter rw;
    VertexWriter vw;
    FileOutputFormat outputFormat;

    boolean tryAgain = true;
    int count = 0;
    while (tryAgain && count < 15)
        try {
            count++;
            tryAgain = false;
            if (job == null) {
                job = new Job(conf);
                job.setOutputFormatClass(TextOutputFormat.class);
                FileOutputFormat.setOutputPath(job,
                        new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
            }
            if (jobContext == null) {
                jobContext = new JobContext(job.getConfiguration(), new JobID());
            }

            System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));

            tao = new TaskAttemptContext(jobContext.getConfiguration(),
                    new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
            outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
            rw = outputFormat.getRecordWriter(tao);
            vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
            for (Vertex v : vertices.values()) {
                OrbContext oc = vw.vertexWrite(v);
                rw.write(oc.getKey(), oc.getValue());
                // orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
                // oc.getKey().toString() + ", " + oc.getValue().toString());
            }
            rw.close(tao);

            FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
            if (cm.needsTaskCommit(tao)) {
                cm.commitTask(tao);
                cm.cleanupJob(jobContext);
            } else {
                cm.cleanupJob(jobContext);
                tryAgain = true;
            }

        } catch (IOException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (InstantiationException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (InterruptedException e) {
            tryAgain = true;
            e.printStackTrace();
        }
    if (tryAgain) {
        synchronized (this) {
            try {
                wait(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeSameRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);/*from ww w  .  ja v  a  2s  .  c  o m*/
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(!fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00001")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeNewRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);/*  w ww  . jav a2s  . co m*/
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertFalse(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue());
    assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00002")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMultipleLayouts() throws Exception {
    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);

    final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    writer.write(defaultEntry, NW);/*from   ww w .j  a va 2s . c  o m*/
    final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024));
    writer.write(inMemoryEntry, NW);

    try {
        // Test with an invalid locality group ID:
        final ColumnId invalid = new ColumnId(1234);
        assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid));
        writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW);
        fail("Output format did not fail on unknown locality group IDs.");
    } catch (IllegalArgumentException iae) {
        LOG.info("Expected error: " + iae);
    }

    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue());
    assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue());

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kududb.mapreduce.TestKuduTableOutputFormat.java

License:Apache License

@Test
public void test() throws Exception {
    createTable(TABLE_NAME, getBasicSchema(), new CreateTableOptions());

    KuduTableOutputFormat output = new KuduTableOutputFormat();
    Configuration conf = new Configuration();
    conf.set(KuduTableOutputFormat.MASTER_ADDRESSES_KEY, getMasterAddresses());
    conf.set(KuduTableOutputFormat.OUTPUT_TABLE_KEY, TABLE_NAME);
    output.setConf(conf);/*from w  w w . ja  v  a 2  s .  c o m*/

    String multitonKey = conf.get(KuduTableOutputFormat.MULTITON_KEY);
    KuduTable table = KuduTableOutputFormat.getKuduTable(multitonKey);
    assertNotNull(table);

    Insert insert = table.newInsert();
    PartialRow row = insert.getRow();
    row.addInt(0, 1);
    row.addInt(1, 2);
    row.addInt(2, 3);
    row.addString(3, "a string");
    row.addBoolean(4, true);

    RecordWriter<NullWritable, Operation> rw = output.getRecordWriter(null);
    rw.write(NullWritable.get(), insert);
    rw.close(null);
    AsyncKuduScanner.AsyncKuduScannerBuilder builder = client.newScannerBuilder(table);
    assertEquals(1, countRowsInScan(builder.build()));
}