List of usage examples for org.apache.hadoop.mapreduce RecordReader RecordReader
RecordReader
From source file:org.apache.carbondata.core.datamap.DistributableDataMapFormat.java
License:Apache License
@Override public RecordReader<Void, ExtendedBlocklet> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { return new RecordReader<Void, ExtendedBlocklet>() { private Iterator<ExtendedBlocklet> blockletIterator; private ExtendedBlocklet currBlocklet; private List<DataMap> dataMaps; @Override//from ww w . jav a 2 s .c o m public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { distributable = (DataMapDistributableWrapper) inputSplit; // clear the segmentMap and from cache in executor when there are invalid segments if (invalidSegments.size() > 0) { DataMapStoreManager.getInstance().clearInvalidSegments(table, invalidSegments); } TableDataMap tableDataMap = DataMapStoreManager.getInstance().getDataMap(table, distributable.getDistributable().getDataMapSchema()); if (isJobToClearDataMaps) { // if job is to clear datamaps just clear datamaps from cache and return DataMapStoreManager.getInstance() .clearDataMaps(table.getCarbonTableIdentifier().getTableUniqueName()); // clear the segment properties cache from executor SegmentPropertiesAndSchemaHolder.getInstance().invalidate(table.getAbsoluteTableIdentifier()); blockletIterator = Collections.emptyIterator(); return; } dataMaps = tableDataMap.getTableDataMaps(distributable.getDistributable()); List<ExtendedBlocklet> blocklets = tableDataMap.prune(dataMaps, distributable.getDistributable(), dataMapExprWrapper.getFilterResolverIntf(distributable.getUniqueId()), partitions); for (ExtendedBlocklet blocklet : blocklets) { blocklet.setDataMapUniqueId(distributable.getUniqueId()); } blockletIterator = blocklets.iterator(); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { boolean hasNext = blockletIterator.hasNext(); if (hasNext) { currBlocklet = blockletIterator.next(); } else { // close all resources when all the results are returned close(); } return hasNext; } @Override public Void getCurrentKey() throws IOException, InterruptedException { return null; } @Override public ExtendedBlocklet getCurrentValue() throws IOException, InterruptedException { return currBlocklet; } @Override public float getProgress() throws IOException, InterruptedException { return 0; } @Override public void close() throws IOException { if (null != dataMaps) { for (DataMap dataMap : dataMaps) { dataMap.finish(); } } } }; }
From source file:org.apache.carbondata.hadoop.api.DistributableDataMapFormat.java
License:Apache License
@Override public RecordReader<Void, Blocklet> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { return new RecordReader<Void, Blocklet>() { private Iterator<Blocklet> blockletIterator; private Blocklet currBlocklet; @Override/*from www. jav a2s . co m*/ public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { DataMapDistributable distributable = (DataMapDistributable) inputSplit; AbsoluteTableIdentifier identifier = AbsoluteTableIdentifier .fromTablePath(distributable.getTablePath()); TableDataMap dataMap = DataMapStoreManager.getInstance().getDataMap(identifier, distributable.getDataMapName(), distributable.getDataMapFactoryClass()); blockletIterator = dataMap.prune(distributable, getFilterExp(taskAttemptContext.getConfiguration())) .iterator(); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { boolean hasNext = blockletIterator.hasNext(); if (hasNext) { currBlocklet = blockletIterator.next(); } return hasNext; } @Override public Void getCurrentKey() throws IOException, InterruptedException { return null; } @Override public Blocklet getCurrentValue() throws IOException, InterruptedException { return currBlocklet; } @Override public float getProgress() throws IOException, InterruptedException { return 0; } @Override public void close() throws IOException { } }; }
From source file:org.apache.fluo.mapreduce.FluoEntryInputFormat.java
License:Apache License
@Override public RecordReader<RowColumn, Bytes> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return new RecordReader<RowColumn, Bytes>() { private RowColumn rowCol; private Bytes val; private RowIterator rowIter; private Bytes row; private ColumnIterator colIter = null; private Environment env = null; private TransactionImpl ti = null; @Override/* w ww .j a v a 2 s . c om*/ public void close() throws IOException { if (ti != null) { ti.close(); } if (env != null) { env.close(); } } @Override public RowColumn getCurrentKey() throws IOException, InterruptedException { return rowCol; } @Override public Bytes getCurrentValue() throws IOException, InterruptedException { return val; } @Override public float getProgress() throws IOException, InterruptedException { // TODO Auto-generated method stub return 0; } @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { try { ByteArrayInputStream bais = new ByteArrayInputStream( context.getConfiguration().get(PROPS_CONF_KEY).getBytes(StandardCharsets.UTF_8)); env = new Environment(new FluoConfiguration(bais)); ti = new TransactionImpl(env, context.getConfiguration().getLong(TIMESTAMP_CONF_KEY, -1)); // TODO this uses non public Accumulo API! RangeInputSplit ris = (RangeInputSplit) split; Span span = SpanUtil.toSpan(ris.getRange()); ScannerConfiguration sc = new ScannerConfiguration().setSpan(span); for (String fam : context.getConfiguration().getStrings(FAMS_CONF_KEY, new String[0])) { sc.fetchColumnFamily(Bytes.of(fam)); } rowIter = ti.get(sc); } catch (Exception e) { throw new IOException(e); } } @Override public boolean nextKeyValue() throws IOException, InterruptedException { while (true) { if ((colIter != null) && (colIter.hasNext())) { Entry<Column, Bytes> colEntry = colIter.next(); rowCol = new RowColumn(row, colEntry.getKey()); val = colEntry.getValue(); return true; } else if (rowIter.hasNext()) { Entry<Bytes, ColumnIterator> rowEntry = rowIter.next(); row = rowEntry.getKey(); colIter = rowEntry.getValue(); } else { return false; } } } }; }
From source file:org.apache.fluo.mapreduce.FluoRowInputFormat.java
License:Apache License
@Override public RecordReader<Bytes, ColumnIterator> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return new RecordReader<Bytes, ColumnIterator>() { private Entry<Bytes, ColumnIterator> entry; private RowIterator rowIter; private Environment env = null; private TransactionImpl ti = null; @Override/*from w w w . j a v a 2 s . c om*/ public void close() throws IOException { if (ti != null) { ti.close(); } if (env != null) { env.close(); } } @Override public Bytes getCurrentKey() throws IOException, InterruptedException { return entry.getKey(); } @Override public ColumnIterator getCurrentValue() throws IOException, InterruptedException { return entry.getValue(); } @Override public float getProgress() throws IOException, InterruptedException { // TODO Auto-generated method stub return 0; } @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { try { ByteArrayInputStream bais = new ByteArrayInputStream( context.getConfiguration().get(PROPS_CONF_KEY).getBytes(StandardCharsets.UTF_8)); env = new Environment(new FluoConfiguration(bais)); ti = new TransactionImpl(env, context.getConfiguration().getLong(TIMESTAMP_CONF_KEY, -1)); // TODO this uses non public Accumulo API! RangeInputSplit ris = (RangeInputSplit) split; Span span = SpanUtil.toSpan(ris.getRange()); ScannerConfiguration sc = new ScannerConfiguration().setSpan(span); for (String fam : context.getConfiguration().getStrings(FAMS_CONF_KEY, new String[0])) { sc.fetchColumnFamily(Bytes.of(fam)); } rowIter = ti.get(sc); } catch (Exception e) { throw new IOException(e); } } @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (rowIter.hasNext()) { entry = rowIter.next(); return true; } return false; } }; }
From source file:org.apache.pig.builtin.TrevniStorage.java
License:Apache License
@Override public InputFormat<NullWritable, GenericData.Record> getInputFormat() throws IOException { class TrevniStorageInputFormat extends PigFileInputFormat<NullWritable, GenericData.Record> { @Override/* w ww. jav a2 s . c om*/ protected boolean isSplitable(JobContext jc, Path p) { return false; } @Override protected List<FileStatus> listStatus(final JobContext job) throws IOException { List<FileStatus> results = Lists.newArrayList(); job.getConfiguration().setBoolean(MRConfiguration.INPUT_DIR_RECURSIVE, true); for (FileStatus file : super.listStatus(job)) { if (Utils.VISIBLE_FILES.accept(file.getPath())) { results.add(file); } } return results; } @Override public RecordReader<NullWritable, GenericData.Record> createRecordReader(final InputSplit is, final TaskAttemptContext tc) throws IOException, InterruptedException { RecordReader<NullWritable, GenericData.Record> rr = new RecordReader<NullWritable, GenericData.Record>() { private FileSplit fsplit; private AvroColumnReader.Params params; private AvroColumnReader<GenericData.Record> reader; private float rows; private long row = 0; private GenericData.Record currentRecord = null; @Override public void close() throws IOException { reader.close(); } @Override public NullWritable getCurrentKey() throws IOException, InterruptedException { return NullWritable.get(); } @Override public Record getCurrentValue() throws IOException, InterruptedException { return currentRecord; } @Override public float getProgress() throws IOException, InterruptedException { return row / rows; } @Override public void initialize(final InputSplit isplit, final TaskAttemptContext tac) throws IOException, InterruptedException { fsplit = (FileSplit) isplit; params = new AvroColumnReader.Params( new HadoopInput(fsplit.getPath(), tac.getConfiguration())); Schema inputSchema = getInputAvroSchema(); params.setSchema(inputSchema); reader = new AvroColumnReader<GenericData.Record>(params); rows = reader.getRowCount(); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (reader.hasNext()) { currentRecord = reader.next(); row++; return true; } else { return false; } } }; // rr.initialize(is, tc); tc.setStatus(is.toString()); return rr; } } return new TrevniStorageInputFormat(); }
From source file:org.apache.rya.accumulo.mr.AccumuloHDFSFileInputFormat.java
License:Apache License
@Override public RecordReader<Key, Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { return new RecordReader<Key, Value>() { private FileSKVIterator fileSKVIterator; private boolean started = false; @Override//from w ww .j av a2 s . c om public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration job = taskAttemptContext.getConfiguration(); Path file = split.getPath(); FileSystem fs = file.getFileSystem(job); Instance instance = MRUtils.AccumuloProps.getInstance(taskAttemptContext); fileSKVIterator = RFileOperations.getInstance().openReader(file.toString(), ALLRANGE, new HashSet<ByteSequence>(), false, fs, job, instance.getConfiguration()); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (started) { fileSKVIterator.next(); } else { started = true; // don't move past the first record yet } return fileSKVIterator.hasTop(); } @Override public Key getCurrentKey() throws IOException, InterruptedException { return fileSKVIterator.getTopKey(); } @Override public Value getCurrentValue() throws IOException, InterruptedException { return fileSKVIterator.getTopValue(); } @Override public float getProgress() throws IOException, InterruptedException { return 0; } @Override public void close() throws IOException { } }; }
From source file:org.apache.tinkerpop.gremlin.spark.structure.io.InputRDDFormat.java
License:Apache License
@Override public RecordReader<NullWritable, VertexWritable> createRecordReader(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { try {// w w w . jav a2 s . com final org.apache.hadoop.conf.Configuration hadoopConfiguration = taskAttemptContext.getConfiguration(); final SparkConf sparkConfiguration = new SparkConf(); sparkConfiguration.setAppName(UUID.randomUUID().toString()); hadoopConfiguration.forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue())); final InputRDD inputRDD = (InputRDD) Class .forName(sparkConfiguration.get(Constants.GREMLIN_HADOOP_GRAPH_READER)).newInstance(); final JavaSparkContext javaSparkContext = new JavaSparkContext( SparkContext.getOrCreate(sparkConfiguration)); Spark.create(javaSparkContext.sc()); final Iterator<Tuple2<Object, VertexWritable>> iterator = inputRDD .readGraphRDD(ConfUtil.makeApacheConfiguration(taskAttemptContext.getConfiguration()), javaSparkContext) .toLocalIterator(); return new RecordReader<NullWritable, VertexWritable>() { @Override public void initialize(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { } @Override public boolean nextKeyValue() throws IOException, InterruptedException { return iterator.hasNext(); } @Override public NullWritable getCurrentKey() throws IOException, InterruptedException { return NullWritable.get(); } @Override public VertexWritable getCurrentValue() throws IOException, InterruptedException { return iterator.next()._2(); } @Override public float getProgress() throws IOException, InterruptedException { return 1.0f; // TODO: make this dynamic (how? its an iterator.) } @Override public void close() throws IOException { } }; } catch (final ClassNotFoundException | InstantiationException | IllegalAccessException e) { throw new IOException(e.getMessage(), e); } }
From source file:org.locationtech.geomesa.bigtable.spark.BigtableInputFormatBase.java
License:Open Source License
/** * Builds a TableRecordReader. If no TableRecordReader was provided, uses the * default.//from w w w.ja v a 2s . c o m * * @param split The split to work with. * @param context The current context. * @return The newly created record reader. * @throws IOException When creating the reader fails. * @throws InterruptedException when record reader initialization fails * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader( * org.apache.hadoop.mapreduce.InputSplit, * org.apache.hadoop.mapreduce.TaskAttemptContext) */ @Override public RecordReader<ImmutableBytesWritable, Result> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { BigtableExtendedScanSplit tSplit = (BigtableExtendedScanSplit) split; LOG.info(MessageFormat.format("Input split length: {0} bytes.", tSplit.getLength())); if (tSplit.name == null) { throw new IOException("Cannot create a record reader because of a" + " previous error. Please look at the previous logs lines from" + " the task's full log for more details."); } final Connection connection = ConnectionFactory.createConnection(context.getConfiguration()); Table table = connection.getTable(tSplit.name); if (this.tableRecordReader == null) { this.tableRecordReader = new BigtableTableRecordReader(); } final BigtableTableRecordReader trr = this.tableRecordReader; BigtableExtendedScan sc = tSplit.scan; trr.setHTable(table); trr.setScan(sc); return new RecordReader<ImmutableBytesWritable, Result>() { @Override public void close() throws IOException { trr.close(); connection.close(); } @Override public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException { return trr.getCurrentKey(); } @Override public Result getCurrentValue() throws IOException, InterruptedException { return trr.getCurrentValue(); } @Override public float getProgress() throws IOException, InterruptedException { return trr.getProgress(); } @Override public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws IOException, InterruptedException { trr.initialize(inputsplit, context); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { return trr.nextKeyValue(); } }; }
From source file:org.tensorflow.hadoop.io.TFRecordFileInputFormat.java
License:Open Source License
@Override public RecordReader<BytesWritable, NullWritable> createRecordReader(InputSplit inputSplit, final TaskAttemptContext context) throws IOException, InterruptedException { return new RecordReader<BytesWritable, NullWritable>() { private FSDataInputStream fsdis; private TFRecordReader reader; private long length; private long begin; private byte[] current; @Override/* w ww .ja va 2 s . c o m*/ public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); FileSplit fileSplit = (FileSplit) split; length = fileSplit.getLength(); begin = fileSplit.getStart(); final Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); fsdis = fs.open(file, TFRecordIOConf.getBufferSize(conf)); reader = new TFRecordReader(fsdis, TFRecordIOConf.getDoCrc32Check(conf)); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { current = reader.read(); return current != null; } @Override public BytesWritable getCurrentKey() throws IOException, InterruptedException { return new BytesWritable(current); } @Override public NullWritable getCurrentValue() throws IOException, InterruptedException { return NullWritable.get(); } @Override public float getProgress() throws IOException, InterruptedException { return (fsdis.getPos() - begin) / (length + 1e-6f); } @Override public void close() throws IOException { fsdis.close(); } }; }