Example usage for org.apache.hadoop.mapreduce RecordReader getProgress

List of usage examples for org.apache.hadoop.mapreduce RecordReader getProgress

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader getProgress.

Prototype

public abstract float getProgress() throws IOException, InterruptedException;

Source Link

Document

The current progress of the record reader through its data.

Usage

From source file:org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests.java

License:Apache License

protected final int countTuples(RecordReader<LongWritable, T> reader) throws IOException, InterruptedException {
    int count = 0;

    // Check initial progress
    LOG.info(String.format("Initial Reported Progress %f", reader.getProgress()));
    float progress = reader.getProgress();
    if (Float.compare(0.0f, progress) == 0) {
        Assert.assertEquals(0.0d, reader.getProgress(), 0.0d);
    } else if (Float.compare(1.0f, progress) == 0) {
        // If reader is reported 1.0 straight away then we expect there to
        // be no key values
        Assert.assertEquals(1.0d, reader.getProgress(), 0.0d);
        Assert.assertFalse(reader.nextKeyValue());
    } else {//from w  w w  . j ava2  s.  com
        Assert.fail(String.format(
                "Expected progress of 0.0 or 1.0 before reader has been accessed for first time but got %f",
                progress));
    }

    // Count tuples
    boolean debug = LOG.isDebugEnabled();
    while (reader.nextKeyValue()) {
        count++;
        progress = reader.getProgress();
        if (debug)
            LOG.debug(String.format("Current Reported Progress %f", progress));
        Assert.assertTrue(String.format("Progress should be in the range 0.0 < p <= 1.0 but got %f", progress),
                progress > 0.0f && progress <= 1.0f);
    }
    reader.close();
    LOG.info(String.format("Got %d tuples from this record reader", count));

    // Check final progress
    LOG.info(String.format("Final Reported Progress %f", reader.getProgress()));
    Assert.assertEquals(1.0d, reader.getProgress(), 0.0d);

    return count;
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java

License:Apache License

private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration,
        final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass,
        final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass)
        throws Exception {

    final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration);
    final TaskAttemptContext job = new TaskAttemptContextImpl(configuration,
            new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0));

    int vertexCount = 0;
    int outEdgeCount = 0;
    int inEdgeCount = 0;

    final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent()
            ? ReflectionUtils.newInstance(outFormatClass.get(), configuration)
            : null;// w  w  w  . j a  v a2 s  .  co m
    final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null
            : outputFormat.getRecordWriter(job);

    boolean foundKeyValue = false;
    for (final FileSplit split : fileSplits) {
        logger.info("\treading file split {}", split.getPath().getName() + " ({}",
                split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)");
        final RecordReader reader = inputFormat.createRecordReader(split, job);

        float lastProgress = -1f;
        while (reader.nextKeyValue()) {
            //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue());
            final float progress = reader.getProgress();
            assertTrue(progress >= lastProgress);
            assertEquals(NullWritable.class, reader.getCurrentKey().getClass());
            final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue();
            if (null != writer)
                writer.write(NullWritable.get(), vertexWritable);
            vertexCount++;
            outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT));
            inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN));
            //
            final Vertex vertex = vertexWritable.get();
            assertEquals(Integer.class, vertex.id().getClass());
            if (vertex.value("name").equals("SUGAR MAGNOLIA")) {
                foundKeyValue = true;
                assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT)));
                assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN)));
            }
            lastProgress = progress;
        }
    }

    assertEquals(8049, outEdgeCount);
    assertEquals(8049, inEdgeCount);
    assertEquals(outEdgeCount, inEdgeCount);
    assertEquals(808, vertexCount);
    assertTrue(foundKeyValue);

    if (null != writer) {
        writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID()));
        for (int i = 1; i < 10; i++) {
            final File outputDirectory = new File(
                    new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI());
            final List<FileSplit> splits = generateFileSplits(
                    new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/"
                            + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0"
                            + "/part-m-00000"),
                    i);
            validateFileSplits(splits, configuration, inputFormatClass, Optional.empty());
        }
    }
}