List of usage examples for org.apache.hadoop.mapreduce InputFormat createRecordReader
public abstract RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException;
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.DelegatingRecordReader.java
License:Apache License
/** * Constructs the DelegatingRecordReader. * * @param taggedInputSplit TaggedInputSplit object * @param context TaskAttemptContext object * * @throws IOException/* ww w. j ava2 s . co m*/ * @throws InterruptedException */ @SuppressWarnings("unchecked") public DelegatingRecordReader(TaggedInputSplit taggedInputSplit, TaskAttemptContext context) throws IOException, InterruptedException { // Find the InputFormat and then the RecordReader from the TaggedInputSplit. InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration()); InputSplit inputSplit = taggedInputSplit.getInputSplit(); originalRR = inputFormat.createRecordReader(inputSplit, context); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultiInputFormat.java
License:Apache License
@Override public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { MultiInputTaggedSplit taggedInputSplit = (MultiInputTaggedSplit) split; ConfigurationUtil.setAll((taggedInputSplit).getInputConfigs(), context.getConfiguration()); InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration()); InputSplit inputSplit = taggedInputSplit.getInputSplit(); // we can't simply compute the underlying RecordReader and return it, because we need to override its // initialize method in order to initialize the underlying RecordReader with the underlying InputSplit // Find the InputFormat and then the RecordReader from the MultiInputTaggedSplit. return new DelegatingRecordReader<>(inputFormat.createRecordReader(inputSplit, context)); }
From source file:com.ambiata.ivory.operation.hadoop.DelegatingRecordReader.java
License:Apache License
/** * Constructs the DelegatingRecordReader. * * @param split TaggegInputSplit object//from w w w. j av a 2 s .com * @param context TaskAttemptContext object * * @throws IOException * @throws InterruptedException */ @SuppressWarnings("unchecked") public DelegatingRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // Find the InputFormat and then the RecordReader from the // TaggedInputSplit. TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split; InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration()); originalRR = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context); }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) private void runMap(Job job, KeyValueSorter<?, ?> sorter) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = job.getConfiguration(); InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); List<InputSplit> splits = input.getSplits(job); int serial = 1; for (InputSplit split : splits) { TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0); Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$ mapper.getClass().getName(), id, split.getLength())); }/*from ww w . ja v a 2s.co m*/ TaskAttemptContext context = newTaskAttemptContext(conf, id); // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf); OutputCommitter committer = output.getOutputCommitter(context); committer.setupTask(context); boolean succeed = false; try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) { RecordWriter<?, ?> writer; if (sorter != null) { writer = new ShuffleWriter(sorter); } else { writer = output.getRecordWriter(newTaskAttemptContext(conf, id)); } try { Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split); reader.initialize(split, c); mapper.run(c); } finally { writer.close(newTaskAttemptContext(conf, id)); } doCommitTask(context, committer); succeed = true; } finally { if (succeed == false) { doAbortTask(context, committer); } } } }
From source file:com.asakusafw.runtime.stage.input.StageInputRecordReader.java
License:Apache License
private void prepare() throws IOException, InterruptedException { if (current != null) { baseProgress += progressPerSource; current.close();//ww w .j a v a2s .c om } if (sources.hasNext()) { Source next = sources.next(); InputFormat<?, ?> format = ReflectionUtils.newInstance(next.getFormatClass(), context.getConfiguration()); current = format.createRecordReader(next.getSplit(), context); current.initialize(next.getSplit(), context); } else { eof = true; current = VOID; } }
From source file:com.baynote.hadoop.DelegatingRecordReader.java
License:Apache License
@SuppressWarnings("unchecked") public DelegatingRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // Find the InputFormat and then the RecordReader from the TaggedInputSplit. TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split; InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils .newInstance(taggedInputSplit.getInputFormatClass(), context.getConfiguration()); originalRR = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context); }
From source file:com.cloudera.crunch.impl.mr.run.CrunchRecordReader.java
License:Apache License
public CrunchRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { CrunchInputSplit crunchSplit = (CrunchInputSplit) inputSplit; InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils .newInstance(crunchSplit.getInputFormatClass(), context.getConfiguration()); this.delegate = inputFormat.createRecordReader(crunchSplit.getInputSplit(), context); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.DelegatingRecordReader.java
License:Apache License
/** * Constructs the DelegatingRecordReader. * //from w w w . ja va2s. c om * @param split * TaggegInputSplit object * @param context * TaskAttemptContext object * * @throws IOException * @throws InterruptedException */ @SuppressWarnings("unchecked") public DelegatingRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // Find the InputFormat and then the RecordReader from the // TaggedInputSplit. TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split; InputFormat<K, V> inputFormat = (InputFormat<K, V>) InstancesDistributor.loadInstance( context.getConfiguration(), InputFormat.class, taggedInputSplit.getInputFormatFile(), true); PangoolMultipleInputs.setSpecificInputContext(context.getConfiguration(), taggedInputSplit.getInputFormatFile()); originalRecordReader = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context); }
From source file:com.linkedin.cubert.io.CubertInputFormat.java
License:Open Source License
@Override public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); ConfigurationDiff confDiff = new ConfigurationDiff(conf); MultiMapperSplit mmSplit = (MultiMapperSplit) split; int multiMapperIndex = mmSplit.getMultiMapperIndex(); confDiff.applyDiff(multiMapperIndex); // reset the conf to multiMapperIndex InputSplit actualSplit = mmSplit.getActualSplit(); // get the actual input format class InputFormat<K, V> actualInputFormat = getActualInputFormat(context); RecordReader<K, V> reader = null; if (actualSplit instanceof CombineFileSplit) { reader = new CombinedFileRecordReader<K, V>(actualInputFormat, (CombineFileSplit) actualSplit, context); } else {/*ww w .j av a2 s.c o m*/ reader = actualInputFormat.createRecordReader(actualSplit, context); } // confDiff.undoDiff(multiMapperIndex); return new MultiMapperRecordReader<K, V>(reader); }
From source file:com.marcolotz.lung.debug.InputTester.java
License:Creative Commons License
/*** * Method used for local testing the record reader and the Input format. It * generates an input split from the local file system file. * //w w w.j a va 2s. co m * @param filePath */ public void localTest(String filePath) { DICOM image; Configuration testConf = new Configuration(false); /* Reads the local file system */ testConf.set("fs.default.name", "file:///"); File testFile = new File(filePath); Path path = new Path(testFile.getAbsoluteFile().toURI()); FileSplit split = new FileSplit(path, 0, testFile.length(), null); InputFormat<NullWritable, BytesWritable> inputFormat = ReflectionUtils .newInstance(WholeFileInputFormat.class, testConf); TaskAttemptContext context = new TaskAttemptContextImpl(testConf, new TaskAttemptID()); try { RecordReader<NullWritable, BytesWritable> reader = inputFormat.createRecordReader(split, context); while (reader.nextKeyValue()) { /* get the bytes array */ BytesWritable inputBytesWritable = (BytesWritable) reader.getCurrentValue(); byte[] inputContent = inputBytesWritable.getBytes(); /* Check for Correct value */ // generateLocalOutput("path/to/output"); InputStream is = new ByteArrayInputStream(inputContent); image = new DICOM(is); image.run("Dicom Test"); /* Prints the bytes as an ImagePlus image */ ImageViewer debug = new ImageViewer(); debug.setImage(image); } } catch (Exception e) { } }