Example usage for org.apache.hadoop.mapreduce RecordReader getProgress

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader getProgress.

Prototype

public abstract float getProgress() throws IOException, InterruptedException;

Source Link

Document

The current progress of the record reader through its data.

Usage

From source file:org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests.java

License:Apache License

protected final int countTuples(RecordReader<LongWritable, T> reader) throws IOException, InterruptedException {
    int count = 0;

    // Check initial progress
    LOG.info(String.format("Initial Reported Progress %f", reader.getProgress()));
    float progress = reader.getProgress();
    if (Float.compare(0.0f, progress) == 0) {
        Assert.assertEquals(0.0d, reader.getProgress(), 0.0d);
    } else if (Float.compare(1.0f, progress) == 0) {
        // If reader is reported 1.0 straight away then we expect there to
        // be no key values
        Assert.assertEquals(1.0d, reader.getProgress(), 0.0d);
        Assert.assertFalse(reader.nextKeyValue());
    } else {//from w  w w  . j ava2  s.  com
        Assert.fail(String.format(
                "Expected progress of 0.0 or 1.0 before reader has been accessed for first time but got %f",
                progress));
    }

    // Count tuples
    boolean debug = LOG.isDebugEnabled();
    while (reader.nextKeyValue()) {
        count++;
        progress = reader.getProgress();
        if (debug)
            LOG.debug(String.format("Current Reported Progress %f", progress));
        Assert.assertTrue(String.format("Progress should be in the range 0.0 < p <= 1.0 but got %f", progress),
                progress > 0.0f && progress <= 1.0f);
    }
    reader.close();
    LOG.info(String.format("Got %d tuples from this record reader", count));

    // Check final progress
    LOG.info(String.format("Final Reported Progress %f", reader.getProgress()));
    Assert.assertEquals(1.0d, reader.getProgress(), 0.0d);

    return count;
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java

License:Apache License

private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration,
        final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass,
        final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass)
        throws Exception {

    final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration);
    final TaskAttemptContext job = new TaskAttemptContextImpl(configuration,
            new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0));

    int vertexCount = 0;
    int outEdgeCount = 0;
    int inEdgeCount = 0;

    final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent()
            ? ReflectionUtils.newInstance(outFormatClass.get(), configuration)
            : null;// w  w  w  . j a  v a2 s  .  co m
    final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null
            : outputFormat.getRecordWriter(job);

    boolean foundKeyValue = false;
    for (final FileSplit split : fileSplits) {
        logger.info("\treading file split {}", split.getPath().getName() + " ({}",
                split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)");
        final RecordReader reader = inputFormat.createRecordReader(split, job);

        float lastProgress = -1f;
        while (reader.nextKeyValue()) {
            //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue());
            final float progress = reader.getProgress();
            assertTrue(progress >= lastProgress);
            assertEquals(NullWritable.class, reader.getCurrentKey().getClass());
            final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue();
            if (null != writer)
                writer.write(NullWritable.get(), vertexWritable);
            vertexCount++;
            outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT));
            inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN));
            //
            final Vertex vertex = vertexWritable.get();
            assertEquals(Integer.class, vertex.id().getClass());
            if (vertex.value("name").equals("SUGAR MAGNOLIA")) {
                foundKeyValue = true;
                assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT)));
                assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN)));
            }
            lastProgress = progress;
        }
    }

    assertEquals(8049, outEdgeCount);
    assertEquals(8049, inEdgeCount);
    assertEquals(outEdgeCount, inEdgeCount);
    assertEquals(808, vertexCount);
    assertTrue(foundKeyValue);

    if (null != writer) {
        writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID()));
        for (int i = 1; i < 10; i++) {
            final File outputDirectory = new File(
                    new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI());
            final List<FileSplit> splits = generateFileSplits(
                    new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/"
                            + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0"
                            + "/part-m-00000"),
                    i);
            validateFileSplits(splits, configuration, inputFormatClass, Optional.empty());
        }
    }
}