Example usage for org.apache.hadoop.mapreduce InputFormat createRecordReader

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce InputFormat createRecordReader.

Prototype

public abstract RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException;

Source Link

Document

Create a record reader for a given split.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.DelegatingRecordReader.java

License:Apache License

/**
 * Constructs the DelegatingRecordReader.
 *
 * @param taggedInputSplit TaggedInputSplit object
 * @param context TaskAttemptContext object
 *
 * @throws IOException/* ww w. j  ava2 s .  co  m*/
 * @throws InterruptedException
 */
@SuppressWarnings("unchecked")
public DelegatingRecordReader(TaggedInputSplit taggedInputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Find the InputFormat and then the RecordReader from the TaggedInputSplit.
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration());
    InputSplit inputSplit = taggedInputSplit.getInputSplit();
    originalRR = inputFormat.createRecordReader(inputSplit, context);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultiInputFormat.java

License:Apache License

@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    MultiInputTaggedSplit taggedInputSplit = (MultiInputTaggedSplit) split;
    ConfigurationUtil.setAll((taggedInputSplit).getInputConfigs(), context.getConfiguration());
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration());
    InputSplit inputSplit = taggedInputSplit.getInputSplit();
    // we can't simply compute the underlying RecordReader and return it, because we need to override its
    // initialize method in order to initialize the underlying RecordReader with the underlying InputSplit
    // Find the InputFormat and then the RecordReader from the MultiInputTaggedSplit.
    return new DelegatingRecordReader<>(inputFormat.createRecordReader(inputSplit, context));
}

From source file:com.ambiata.ivory.operation.hadoop.DelegatingRecordReader.java

License:Apache License

/**
 * Constructs the DelegatingRecordReader.
 *
 * @param split TaggegInputSplit object//from  w  w  w. j av a 2  s .com
 * @param context TaskAttemptContext object
 *
 * @throws IOException
 * @throws InterruptedException
 */
@SuppressWarnings("unchecked")
public DelegatingRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Find the InputFormat and then the RecordReader from the
    // TaggedInputSplit.
    TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration());
    originalRR = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context);
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
private void runMap(Job job, KeyValueSorter<?, ?> sorter)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = input.getSplits(job);
    int serial = 1;
    for (InputSplit split : splits) {
        TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0);
        Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf);
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$
                    mapper.getClass().getName(), id, split.getLength()));
        }/*from  ww  w . ja v a  2s.co  m*/
        TaskAttemptContext context = newTaskAttemptContext(conf, id);
        // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached
        OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
        OutputCommitter committer = output.getOutputCommitter(context);
        committer.setupTask(context);
        boolean succeed = false;
        try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) {
            RecordWriter<?, ?> writer;
            if (sorter != null) {
                writer = new ShuffleWriter(sorter);
            } else {
                writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            }
            try {
                Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split);
                reader.initialize(split, c);
                mapper.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
            doCommitTask(context, committer);
            succeed = true;
        } finally {
            if (succeed == false) {
                doAbortTask(context, committer);
            }
        }
    }
}

From source file:com.asakusafw.runtime.stage.input.StageInputRecordReader.java

License:Apache License

private void prepare() throws IOException, InterruptedException {
    if (current != null) {
        baseProgress += progressPerSource;
        current.close();//ww w  .j  a v  a2s  .c  om
    }
    if (sources.hasNext()) {
        Source next = sources.next();
        InputFormat<?, ?> format = ReflectionUtils.newInstance(next.getFormatClass(),
                context.getConfiguration());
        current = format.createRecordReader(next.getSplit(), context);
        current.initialize(next.getSplit(), context);
    } else {
        eof = true;
        current = VOID;
    }
}

From source file:com.baynote.hadoop.DelegatingRecordReader.java

License:Apache License

@SuppressWarnings("unchecked")
public DelegatingRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Find the InputFormat and then the RecordReader from the TaggedInputSplit.
    TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration());
    originalRR = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context);
}

From source file:com.cloudera.crunch.impl.mr.run.CrunchRecordReader.java

License:Apache License

public CrunchRecordReader(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    CrunchInputSplit crunchSplit = (CrunchInputSplit) inputSplit;
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(crunchSplit.getInputFormatClass(), context.getConfiguration());
    this.delegate = inputFormat.createRecordReader(crunchSplit.getInputSplit(), context);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.DelegatingRecordReader.java

License:Apache License

/**
 * Constructs the DelegatingRecordReader.
 * //from w w w  . ja va2s. c  om
 * @param split
 *          TaggegInputSplit object
 * @param context
 *          TaskAttemptContext object
 * 
 * @throws IOException
 * @throws InterruptedException
 */
@SuppressWarnings("unchecked")
public DelegatingRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Find the InputFormat and then the RecordReader from the
    // TaggedInputSplit.
    TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) InstancesDistributor.loadInstance(
            context.getConfiguration(), InputFormat.class, taggedInputSplit.getInputFormatFile(), true);
    PangoolMultipleInputs.setSpecificInputContext(context.getConfiguration(),
            taggedInputSplit.getInputFormatFile());
    originalRecordReader = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context);
}

From source file:com.linkedin.cubert.io.CubertInputFormat.java

License:Open Source License

@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    ConfigurationDiff confDiff = new ConfigurationDiff(conf);

    MultiMapperSplit mmSplit = (MultiMapperSplit) split;
    int multiMapperIndex = mmSplit.getMultiMapperIndex();

    confDiff.applyDiff(multiMapperIndex);

    // reset the conf to multiMapperIndex
    InputSplit actualSplit = mmSplit.getActualSplit();

    // get the actual input format class
    InputFormat<K, V> actualInputFormat = getActualInputFormat(context);

    RecordReader<K, V> reader = null;

    if (actualSplit instanceof CombineFileSplit) {
        reader = new CombinedFileRecordReader<K, V>(actualInputFormat, (CombineFileSplit) actualSplit, context);
    } else {/*ww w .j  av a2  s.c o m*/
        reader = actualInputFormat.createRecordReader(actualSplit, context);
    }

    // confDiff.undoDiff(multiMapperIndex);

    return new MultiMapperRecordReader<K, V>(reader);
}

From source file:com.marcolotz.lung.debug.InputTester.java

License:Creative Commons License

/***
 * Method used for local testing the record reader and the Input format. It
 * generates an input split from the local file system file.
 * //w w  w.j a va 2s.  co  m
 * @param filePath
 */
public void localTest(String filePath) {
    DICOM image;
    Configuration testConf = new Configuration(false);

    /* Reads the local file system */
    testConf.set("fs.default.name", "file:///");

    File testFile = new File(filePath);

    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat<NullWritable, BytesWritable> inputFormat = ReflectionUtils
            .newInstance(WholeFileInputFormat.class, testConf);
    TaskAttemptContext context = new TaskAttemptContextImpl(testConf, new TaskAttemptID());

    try {
        RecordReader<NullWritable, BytesWritable> reader = inputFormat.createRecordReader(split, context);
        while (reader.nextKeyValue()) {
            /* get the bytes array */
            BytesWritable inputBytesWritable = (BytesWritable) reader.getCurrentValue();
            byte[] inputContent = inputBytesWritable.getBytes();

            /* Check for Correct value */
            // generateLocalOutput("path/to/output");

            InputStream is = new ByteArrayInputStream(inputContent);

            image = new DICOM(is);
            image.run("Dicom Test");

            /* Prints the bytes as an ImagePlus image */
            ImageViewer debug = new ImageViewer();
            debug.setImage(image);
        }
    } catch (Exception e) {

    }
}