List of usage examples for org.apache.hadoop.mapreduce RecordReader getProgress
public abstract float getProgress() throws IOException, InterruptedException;
From source file:org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests.java
License:Apache License
protected final int countTuples(RecordReader<LongWritable, T> reader) throws IOException, InterruptedException { int count = 0; // Check initial progress LOG.info(String.format("Initial Reported Progress %f", reader.getProgress())); float progress = reader.getProgress(); if (Float.compare(0.0f, progress) == 0) { Assert.assertEquals(0.0d, reader.getProgress(), 0.0d); } else if (Float.compare(1.0f, progress) == 0) { // If reader is reported 1.0 straight away then we expect there to // be no key values Assert.assertEquals(1.0d, reader.getProgress(), 0.0d); Assert.assertFalse(reader.nextKeyValue()); } else {//from w w w . j ava2 s. com Assert.fail(String.format( "Expected progress of 0.0 or 1.0 before reader has been accessed for first time but got %f", progress)); } // Count tuples boolean debug = LOG.isDebugEnabled(); while (reader.nextKeyValue()) { count++; progress = reader.getProgress(); if (debug) LOG.debug(String.format("Current Reported Progress %f", progress)); Assert.assertTrue(String.format("Progress should be in the range 0.0 < p <= 1.0 but got %f", progress), progress > 0.0f && progress <= 1.0f); } reader.close(); LOG.info(String.format("Got %d tuples from this record reader", count)); // Check final progress LOG.info(String.format("Final Reported Progress %f", reader.getProgress())); Assert.assertEquals(1.0d, reader.getProgress(), 0.0d); return count; }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java
License:Apache License
private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration, final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass, final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass) throws Exception { final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration); final TaskAttemptContext job = new TaskAttemptContextImpl(configuration, new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0)); int vertexCount = 0; int outEdgeCount = 0; int inEdgeCount = 0; final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent() ? ReflectionUtils.newInstance(outFormatClass.get(), configuration) : null;// w w w . j a v a2 s . co m final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null : outputFormat.getRecordWriter(job); boolean foundKeyValue = false; for (final FileSplit split : fileSplits) { logger.info("\treading file split {}", split.getPath().getName() + " ({}", split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)"); final RecordReader reader = inputFormat.createRecordReader(split, job); float lastProgress = -1f; while (reader.nextKeyValue()) { //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue()); final float progress = reader.getProgress(); assertTrue(progress >= lastProgress); assertEquals(NullWritable.class, reader.getCurrentKey().getClass()); final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue(); if (null != writer) writer.write(NullWritable.get(), vertexWritable); vertexCount++; outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT)); inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN)); // final Vertex vertex = vertexWritable.get(); assertEquals(Integer.class, vertex.id().getClass()); if (vertex.value("name").equals("SUGAR MAGNOLIA")) { foundKeyValue = true; assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT))); assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN))); } lastProgress = progress; } } assertEquals(8049, outEdgeCount); assertEquals(8049, inEdgeCount); assertEquals(outEdgeCount, inEdgeCount); assertEquals(808, vertexCount); assertTrue(foundKeyValue); if (null != writer) { writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID())); for (int i = 1; i < 10; i++) { final File outputDirectory = new File( new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI()); final List<FileSplit> splits = generateFileSplits( new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/" + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0" + "/part-m-00000"), i); validateFileSplits(splits, configuration, inputFormatClass, Optional.empty()); } } }