Example usage for org.apache.hadoop.mapreduce RecordReader getCurrentValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader getCurrentValue.

Prototype

public abstract VALUEIN getCurrentValue() throws IOException, InterruptedException;

Source Link

Document

Get the current value.

Usage

From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java

License:Apache License

@Test
public void testGetSplits() throws Exception {
    DistCpOptions options = getOptions();
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
            new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"),
            options);/*w ww  .  j a  va2s .  co m*/

    JobID jobId = new JobID();
    JobContext jobContext = mock(JobContext.class);
    when(jobContext.getConfiguration()).thenReturn(configuration);
    when(jobContext.getJobID()).thenReturn(jobId);
    DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>();
    List<InputSplit> splits = inputFormat.getSplits(jobContext);

    int nFiles = 0;
    int taskId = 0;

    for (InputSplit split : splits) {
        TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0);
        final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
        when(taskAttemptContext.getConfiguration()).thenReturn(configuration);
        when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId);
        RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
        recordReader.initialize(splits.get(0), taskAttemptContext);
        float previousProgressValue = 0f;
        while (recordReader.nextKeyValue()) {
            FileStatus fileStatus = recordReader.getCurrentValue();
            String source = fileStatus.getPath().toString();
            System.out.println(source);
            Assert.assertTrue(expectedFilePaths.contains(source));
            final float progress = recordReader.getProgress();
            Assert.assertTrue(progress >= previousProgressValue);
            Assert.assertTrue(progress >= 0.0f);
            Assert.assertTrue(progress <= 1.0f);
            previousProgressValue = progress;
            ++nFiles;
        }
        Assert.assertTrue(recordReader.getProgress() == 1.0f);

        ++taskId;
    }

    Assert.assertEquals(expectedFilePaths.size(), nFiles);
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java

License:Apache License

public void testGetSplits(int nMaps) throws Exception {
    DistCpOptions options = getOptions(nMaps);
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq");
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options);

    JobContext jobContext = Mockito.mock(JobContext.class);
    Mockito.when(jobContext.getConfiguration()).thenReturn(configuration);
    Mockito.when(jobContext.getJobID()).thenReturn(new JobID());
    UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat();
    List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext);

    //Removing the legacy check - Refer HADOOP-9230
    int sizePerMap = totalFileSize / nMaps;

    checkSplits(listFile, splits);/*from  w w  w.  j a  va 2s . c  o  m*/

    int doubleCheckedTotalSize = 0;
    int previousSplitSize = -1;
    for (int i = 0; i < splits.size(); ++i) {
        InputSplit split = splits.get(i);
        int currentSplitSize = 0;
        TaskAttemptID taskId = new TaskAttemptID("", 0, true, 0, 0);
        final TaskAttemptContext taskAttemptContext = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(taskAttemptContext.getConfiguration()).thenReturn(configuration);
        Mockito.when(taskAttemptContext.getTaskAttemptID()).thenReturn(taskId);
        RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split,
                taskAttemptContext);
        recordReader.initialize(split, taskAttemptContext);
        while (recordReader.nextKeyValue()) {
            Path sourcePath = recordReader.getCurrentValue().getPath();
            FileSystem fs = sourcePath.getFileSystem(configuration);
            FileStatus fileStatus[] = fs.listStatus(sourcePath);
            Assert.assertEquals(fileStatus.length, 1);
            currentSplitSize += fileStatus[0].getLen();
        }
        Assert.assertTrue(previousSplitSize == -1
                || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1);

        doubleCheckedTotalSize += currentSplitSize;
    }

    Assert.assertEquals(totalFileSize, doubleCheckedTotalSize);
}

From source file:com.inmobi.messaging.consumer.databus.mapreduce.TestDatabusInputFormatMapReduce.java

License:Apache License

/**
 * read the the given split./*from   w ww  .  j  ava 2 s  .  c om*/
 * @return List : List of read messages
 */
private List<Message> readSplit(DatabusInputFormat format, org.apache.hadoop.mapreduce.InputSplit split,
        JobConf job) throws IOException, InterruptedException {
    List<Message> result = new ArrayList<Message>();
    RecordReader<LongWritable, Message> reader = format
            .createRecordReader((org.apache.hadoop.mapreduce.InputSplit) split, context);
    ((DatabusRecordReader) reader).initialize(split, context);
    while (reader.nextKeyValue()) {
        result.add(reader.getCurrentValue());
    }
    reader.close();
    return result;
}

From source file:com.marcolotz.lung.debug.InputTester.java

License:Creative Commons License

/***
 * Method used for local testing the record reader and the Input format. It
 * generates an input split from the local file system file.
 * //from w  w  w .  j a  v a  2 s. c o  m
 * @param filePath
 */
public void localTest(String filePath) {
    DICOM image;
    Configuration testConf = new Configuration(false);

    /* Reads the local file system */
    testConf.set("fs.default.name", "file:///");

    File testFile = new File(filePath);

    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat<NullWritable, BytesWritable> inputFormat = ReflectionUtils
            .newInstance(WholeFileInputFormat.class, testConf);
    TaskAttemptContext context = new TaskAttemptContextImpl(testConf, new TaskAttemptID());

    try {
        RecordReader<NullWritable, BytesWritable> reader = inputFormat.createRecordReader(split, context);
        while (reader.nextKeyValue()) {
            /* get the bytes array */
            BytesWritable inputBytesWritable = (BytesWritable) reader.getCurrentValue();
            byte[] inputContent = inputBytesWritable.getBytes();

            /* Check for Correct value */
            // generateLocalOutput("path/to/output");

            InputStream is = new ByteArrayInputStream(inputContent);

            image = new DICOM(is);
            image.run("Dicom Test");

            /* Prints the bytes as an ImagePlus image */
            ImageViewer debug = new ImageViewer();
            debug.setImage(image);
        }
    } catch (Exception e) {

    }
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileInputFormatTests.java

License:Apache License

@Test
public void testReadFile() throws Exception {
    MilanoProtoFileInputFormat inputFormat = new MilanoProtoFileInputFormat();

    FileSplit split = new FileSplit(readFile, 0, protoTestObjects.getFs().getFileStatus(readFile).getLen(),
            null);/*  w ww  .  j a v  a2 s  .c  o m*/
    org.apache.hadoop.mapreduce.RecordReader<String, Message> recordReader = inputFormat
            .createRecordReader(split, protoTestObjects.getContext());
    recordReader.initialize(split, protoTestObjects.getContext());

    for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) {
        Assert.assertTrue("Fewer objects than expected.", recordReader.nextKeyValue());
        Message message = recordReader.getCurrentValue();

        protoTestObjects.compareMessages(protoTestObjects.getTestItem(i), message);
    }

    recordReader.close();
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileInputFormatTests.java

License:Apache License

@Test
public void testReadFileNoMetadata() throws Exception {
    MilanoProtoFileInputFormat inputFormat = new MilanoProtoFileInputFormat();
    inputFormat.setBuilder(Testing.TestItem.newBuilder());

    FileSplit split = new FileSplit(readFile, 0, protoTestObjects.getFs().getFileStatus(readFile).getLen(),
            null);//from  w  ww .  j  a  v a2s  .  c o m
    org.apache.hadoop.mapreduce.RecordReader<String, Message> recordReader = inputFormat
            .createRecordReader(split, protoTestObjects.getContext());
    recordReader.initialize(split, protoTestObjects.getContext());

    for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) {
        Assert.assertTrue("Fewer objects than expected.", recordReader.nextKeyValue());
        Message message = recordReader.getCurrentValue();

        protoTestObjects.compareMessages(protoTestObjects.getTestItem(i), message);
    }

    recordReader.close();
}

From source file:com.splicemachine.derby.impl.io.WholeTextInputFormatTest.java

License:Apache License

private long collectRecords(Set<String> fileNames, RecordReader<String, InputStream> recordReader)
        throws IOException, InterruptedException {
    long count = 0L;
    while (recordReader.nextKeyValue()) {
        String key = recordReader.getCurrentKey();
        key = key.replaceAll("/+", "/"); // some platforms add more "/" at the beginning, coalesce them for equality check
        Assert.assertTrue("Seen the same file twice!", fileNames.add(key));

        InputStream is = recordReader.getCurrentValue();
        try (BufferedReader br = new BufferedReader(new InputStreamReader(is))) {
            String n;//from   w  w w.j  a v  a2  s  .  c  o  m
            while ((n = br.readLine()) != null) {
                count++;
            }
        }
    }
    return count;
}

From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHdfsSource.java

License:Apache License

private List<Map.Entry> previewTextBatch(FileStatus fileStatus, int batchSize)
        throws IOException, InterruptedException {
    TextInputFormat textInputFormat = new TextInputFormat();
    InputSplit fileSplit = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen(), null);
    TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(hadoopConf,
            TaskAttemptID.forName("attempt_1439420318532_0011_m_000000_0"));
    RecordReader<LongWritable, Text> recordReader = textInputFormat.createRecordReader(fileSplit,
            taskAttemptContext);// w w  w .  j ava2s  . c  o m
    recordReader.initialize(fileSplit, taskAttemptContext);
    boolean hasNext = recordReader.nextKeyValue();
    List<Map.Entry> batch = new ArrayList<>();
    while (hasNext && batch.size() < batchSize) {
        batch.add(new Pair(fileStatus.getPath().toUri().getPath() + "::" + recordReader.getCurrentKey(),
                String.valueOf(recordReader.getCurrentValue())));
        hasNext = recordReader.nextKeyValue(); // not like iterator.hasNext, actually advances
    }
    return batch;
}

From source file:edu.uci.ics.hyracks.hdfs2.dataflow.HDFSReadOperatorDescriptor.java

License:Apache License

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions)
        throws HyracksDataException {
    final List<FileSplit> inputSplits = splitsFactory.getSplits();

    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        private String nodeName = ctx.getJobletContext().getApplicationContext().getNodeId();
        private ContextFactory ctxFactory = new ContextFactory();

        @SuppressWarnings("unchecked")
        @Override//from www.j  a  va 2  s  . c o  m
        public void initialize() throws HyracksDataException {
            ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
            try {
                Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
                Job job = confFactory.getConf();
                job.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
                IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
                writer.open();
                InputFormat inputFormat = ReflectionUtils.newInstance(job.getInputFormatClass(),
                        job.getConfiguration());
                int size = inputSplits.size();
                for (int i = 0; i < size; i++) {
                    /**
                     * read all the partitions scheduled to the current node
                     */
                    if (scheduledLocations[i].equals(nodeName)) {
                        /**
                         * pick an unread split to read synchronize among
                         * simultaneous partitions in the same machine
                         */
                        synchronized (executed) {
                            if (executed[i] == false) {
                                executed[i] = true;
                            } else {
                                continue;
                            }
                        }

                        /**
                         * read the split
                         */
                        TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(), i);
                        context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
                        RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context);
                        reader.initialize(inputSplits.get(i), context);
                        while (reader.nextKeyValue() == true) {
                            parser.parse(reader.getCurrentKey(), reader.getCurrentValue(), writer,
                                    inputSplits.get(i).toString());
                        }
                    }
                }
                parser.close(writer);
                writer.close();
            } catch (Exception e) {
                throw new HyracksDataException(e);
            } finally {
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }
    };
}

From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java

License:Open Source License

public static long spatialJoinLocal(Path[] inFiles, Path outFile, OperationsParams params)
        throws IOException, InterruptedException {
    // Read the inputs and store them in memory
    List<Shape>[] datasets = new List[inFiles.length];
    final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();
    for (int i = 0; i < inFiles.length; i++) {
        datasets[i] = new ArrayList<Shape>();
        FileSystem inFs = inFiles[i].getFileSystem(params);
        Job job = Job.getInstance(params);
        SpatialInputFormat3.addInputPath(job, inFiles[i]);
        for (InputSplit split : inputFormat.getSplits(job)) {
            FileSplit fsplit = (FileSplit) split;
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }/*w w  w .  j a va 2  s.co  m*/

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    datasets[i].add(shape.clone());
                }
            }
            reader.close();
        }
    }

    // Apply the spatial join algorithm
    ResultCollector2<Shape, Shape> output = null;
    PrintStream out = null;
    if (outFile != null) {
        FileSystem outFS = outFile.getFileSystem(params);
        out = new PrintStream(outFS.create(outFile));
        final PrintStream outout = out;
        output = new ResultCollector2<Shape, Shape>() {
            @Override
            public void collect(Shape r, Shape s) {
                outout.println(r.toText(new Text()) + "," + s.toText(new Text()));
            }
        };
    }
    long resultCount = SpatialJoin_planeSweep(datasets[0], datasets[1], output, null);

    if (out != null)
        out.close();

    return resultCount;
}