Example usage for org.apache.hadoop.mapreduce InputFormat createRecordReader

List of usage examples for org.apache.hadoop.mapreduce InputFormat createRecordReader

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce InputFormat createRecordReader.

Prototype

public abstract RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException;

Source Link

Document

Create a record reader for a given split.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.DelegatingRecordReader.java

License:Apache License

/**
 * Constructs the DelegatingRecordReader.
 *
 * @param taggedInputSplit TaggedInputSplit object
 * @param context TaskAttemptContext object
 *
 * @throws IOException/* ww w. j  ava2 s .  co  m*/
 * @throws InterruptedException
 */
@SuppressWarnings("unchecked")
public DelegatingRecordReader(TaggedInputSplit taggedInputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Find the InputFormat and then the RecordReader from the TaggedInputSplit.
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration());
    InputSplit inputSplit = taggedInputSplit.getInputSplit();
    originalRR = inputFormat.createRecordReader(inputSplit, context);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultiInputFormat.java

License:Apache License

@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    MultiInputTaggedSplit taggedInputSplit = (MultiInputTaggedSplit) split;
    ConfigurationUtil.setAll((taggedInputSplit).getInputConfigs(), context.getConfiguration());
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration());
    InputSplit inputSplit = taggedInputSplit.getInputSplit();
    // we can't simply compute the underlying RecordReader and return it, because we need to override its
    // initialize method in order to initialize the underlying RecordReader with the underlying InputSplit
    // Find the InputFormat and then the RecordReader from the MultiInputTaggedSplit.
    return new DelegatingRecordReader<>(inputFormat.createRecordReader(inputSplit, context));
}

From source file:com.ambiata.ivory.operation.hadoop.DelegatingRecordReader.java

License:Apache License

/**
 * Constructs the DelegatingRecordReader.
 *
 * @param split TaggegInputSplit object//from  w  w  w. j av a 2  s .com
 * @param context TaskAttemptContext object
 *
 * @throws IOException
 * @throws InterruptedException
 */
@SuppressWarnings("unchecked")
public DelegatingRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Find the InputFormat and then the RecordReader from the
    // TaggedInputSplit.
    TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration());
    originalRR = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context);
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
private void runMap(Job job, KeyValueSorter<?, ?> sorter)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = input.getSplits(job);
    int serial = 1;
    for (InputSplit split : splits) {
        TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0);
        Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf);
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$
                    mapper.getClass().getName(), id, split.getLength()));
        }/*from  ww  w . ja v a  2s.co  m*/
        TaskAttemptContext context = newTaskAttemptContext(conf, id);
        // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached
        OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
        OutputCommitter committer = output.getOutputCommitter(context);
        committer.setupTask(context);
        boolean succeed = false;
        try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) {
            RecordWriter<?, ?> writer;
            if (sorter != null) {
                writer = new ShuffleWriter(sorter);
            } else {
                writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            }
            try {
                Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split);
                reader.initialize(split, c);
                mapper.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
            doCommitTask(context, committer);
            succeed = true;
        } finally {
            if (succeed == false) {
                doAbortTask(context, committer);
            }
        }
    }
}

From source file:com.asakusafw.runtime.stage.input.StageInputRecordReader.java

License:Apache License

private void prepare() throws IOException, InterruptedException {
    if (current != null) {
        baseProgress += progressPerSource;
        current.close();//ww w  .j  a v  a2s  .c  om
    }
    if (sources.hasNext()) {
        Source next = sources.next();
        InputFormat<?, ?> format = ReflectionUtils.newInstance(next.getFormatClass(),
                context.getConfiguration());
        current = format.createRecordReader(next.getSplit(), context);
        current.initialize(next.getSplit(), context);
    } else {
        eof = true;
        current = VOID;
    }
}

From source file:com.baynote.hadoop.DelegatingRecordReader.java

License:Apache License

@SuppressWarnings("unchecked")
public DelegatingRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Find the InputFormat and then the RecordReader from the TaggedInputSplit.
    TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration());
    originalRR = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context);
}

From source file:com.cloudera.crunch.impl.mr.run.CrunchRecordReader.java

License:Apache License

public CrunchRecordReader(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    CrunchInputSplit crunchSplit = (CrunchInputSplit) inputSplit;
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(crunchSplit.getInputFormatClass(), context.getConfiguration());
    this.delegate = inputFormat.createRecordReader(crunchSplit.getInputSplit(), context);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.DelegatingRecordReader.java

License:Apache License

/**
 * Constructs the DelegatingRecordReader.
 * //from w w w  . ja va2s. c  om
 * @param split
 *          TaggegInputSplit object
 * @param context
 *          TaskAttemptContext object
 * 
 * @throws IOException
 * @throws InterruptedException
 */
@SuppressWarnings("unchecked")
public DelegatingRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Find the InputFormat and then the RecordReader from the
    // TaggedInputSplit.
    TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) InstancesDistributor.loadInstance(
            context.getConfiguration(), InputFormat.class, taggedInputSplit.getInputFormatFile(), true);
    PangoolMultipleInputs.setSpecificInputContext(context.getConfiguration(),
            taggedInputSplit.getInputFormatFile());
    originalRecordReader = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context);
}

From source file:com.linkedin.cubert.io.CubertInputFormat.java

License:Open Source License

@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    ConfigurationDiff confDiff = new ConfigurationDiff(conf);

    MultiMapperSplit mmSplit = (MultiMapperSplit) split;
    int multiMapperIndex = mmSplit.getMultiMapperIndex();

    confDiff.applyDiff(multiMapperIndex);

    // reset the conf to multiMapperIndex
    InputSplit actualSplit = mmSplit.getActualSplit();

    // get the actual input format class
    InputFormat<K, V> actualInputFormat = getActualInputFormat(context);

    RecordReader<K, V> reader = null;

    if (actualSplit instanceof CombineFileSplit) {
        reader = new CombinedFileRecordReader<K, V>(actualInputFormat, (CombineFileSplit) actualSplit, context);
    } else {/*ww w .j  av a2  s.c o m*/
        reader = actualInputFormat.createRecordReader(actualSplit, context);
    }

    // confDiff.undoDiff(multiMapperIndex);

    return new MultiMapperRecordReader<K, V>(reader);
}

From source file:com.marcolotz.lung.debug.InputTester.java

License:Creative Commons License

/***
 * Method used for local testing the record reader and the Input format. It
 * generates an input split from the local file system file.
 * //w w  w.j a va 2s.  co  m
 * @param filePath
 */
public void localTest(String filePath) {
    DICOM image;
    Configuration testConf = new Configuration(false);

    /* Reads the local file system */
    testConf.set("fs.default.name", "file:///");

    File testFile = new File(filePath);

    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat<NullWritable, BytesWritable> inputFormat = ReflectionUtils
            .newInstance(WholeFileInputFormat.class, testConf);
    TaskAttemptContext context = new TaskAttemptContextImpl(testConf, new TaskAttemptID());

    try {
        RecordReader<NullWritable, BytesWritable> reader = inputFormat.createRecordReader(split, context);
        while (reader.nextKeyValue()) {
            /* get the bytes array */
            BytesWritable inputBytesWritable = (BytesWritable) reader.getCurrentValue();
            byte[] inputContent = inputBytesWritable.getBytes();

            /* Check for Correct value */
            // generateLocalOutput("path/to/output");

            InputStream is = new ByteArrayInputStream(inputContent);

            image = new DICOM(is);
            image.run("Dicom Test");

            /* Prints the bytes as an ImagePlus image */
            ImageViewer debug = new ImageViewer();
            debug.setImage(image);
        }
    } catch (Exception e) {

    }
}