Example usage for org.apache.hadoop.mapreduce RecordReader RecordReader

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader RecordReader.

Prototype

RecordReader

Source Link

Usage

From source file:org.apache.carbondata.core.datamap.DistributableDataMapFormat.java

License:Apache License

@Override
public RecordReader<Void, ExtendedBlocklet> createRecordReader(InputSplit inputSplit,
        TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    return new RecordReader<Void, ExtendedBlocklet>() {
        private Iterator<ExtendedBlocklet> blockletIterator;
        private ExtendedBlocklet currBlocklet;
        private List<DataMap> dataMaps;

        @Override//from   ww w .  jav  a  2  s .c  o  m
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
                throws IOException, InterruptedException {
            distributable = (DataMapDistributableWrapper) inputSplit;
            // clear the segmentMap and from cache in executor when there are invalid segments
            if (invalidSegments.size() > 0) {
                DataMapStoreManager.getInstance().clearInvalidSegments(table, invalidSegments);
            }
            TableDataMap tableDataMap = DataMapStoreManager.getInstance().getDataMap(table,
                    distributable.getDistributable().getDataMapSchema());
            if (isJobToClearDataMaps) {
                // if job is to clear datamaps just clear datamaps from cache and return
                DataMapStoreManager.getInstance()
                        .clearDataMaps(table.getCarbonTableIdentifier().getTableUniqueName());
                // clear the segment properties cache from executor
                SegmentPropertiesAndSchemaHolder.getInstance().invalidate(table.getAbsoluteTableIdentifier());
                blockletIterator = Collections.emptyIterator();
                return;
            }
            dataMaps = tableDataMap.getTableDataMaps(distributable.getDistributable());
            List<ExtendedBlocklet> blocklets = tableDataMap.prune(dataMaps, distributable.getDistributable(),
                    dataMapExprWrapper.getFilterResolverIntf(distributable.getUniqueId()), partitions);
            for (ExtendedBlocklet blocklet : blocklets) {
                blocklet.setDataMapUniqueId(distributable.getUniqueId());
            }
            blockletIterator = blocklets.iterator();
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            boolean hasNext = blockletIterator.hasNext();
            if (hasNext) {
                currBlocklet = blockletIterator.next();
            } else {
                // close all resources when all the results are returned
                close();
            }
            return hasNext;
        }

        @Override
        public Void getCurrentKey() throws IOException, InterruptedException {
            return null;
        }

        @Override
        public ExtendedBlocklet getCurrentValue() throws IOException, InterruptedException {
            return currBlocklet;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public void close() throws IOException {
            if (null != dataMaps) {
                for (DataMap dataMap : dataMaps) {
                    dataMap.finish();
                }
            }
        }
    };
}

From source file:org.apache.carbondata.hadoop.api.DistributableDataMapFormat.java

License:Apache License

@Override
public RecordReader<Void, Blocklet> createRecordReader(InputSplit inputSplit,
        TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    return new RecordReader<Void, Blocklet>() {
        private Iterator<Blocklet> blockletIterator;
        private Blocklet currBlocklet;

        @Override/*from www.  jav a2s . co  m*/
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
                throws IOException, InterruptedException {
            DataMapDistributable distributable = (DataMapDistributable) inputSplit;
            AbsoluteTableIdentifier identifier = AbsoluteTableIdentifier
                    .fromTablePath(distributable.getTablePath());
            TableDataMap dataMap = DataMapStoreManager.getInstance().getDataMap(identifier,
                    distributable.getDataMapName(), distributable.getDataMapFactoryClass());
            blockletIterator = dataMap.prune(distributable, getFilterExp(taskAttemptContext.getConfiguration()))
                    .iterator();
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            boolean hasNext = blockletIterator.hasNext();
            if (hasNext) {
                currBlocklet = blockletIterator.next();
            }
            return hasNext;
        }

        @Override
        public Void getCurrentKey() throws IOException, InterruptedException {
            return null;
        }

        @Override
        public Blocklet getCurrentValue() throws IOException, InterruptedException {
            return currBlocklet;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public void close() throws IOException {

        }
    };
}

From source file:org.apache.fluo.mapreduce.FluoEntryInputFormat.java

License:Apache License

@Override
public RecordReader<RowColumn, Bytes> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    return new RecordReader<RowColumn, Bytes>() {

        private RowColumn rowCol;
        private Bytes val;
        private RowIterator rowIter;
        private Bytes row;
        private ColumnIterator colIter = null;
        private Environment env = null;
        private TransactionImpl ti = null;

        @Override/*  w  ww  .j a v  a 2  s  . c  om*/
        public void close() throws IOException {
            if (ti != null) {
                ti.close();
            }

            if (env != null) {
                env.close();
            }
        }

        @Override
        public RowColumn getCurrentKey() throws IOException, InterruptedException {
            return rowCol;
        }

        @Override
        public Bytes getCurrentValue() throws IOException, InterruptedException {
            return val;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            // TODO Auto-generated method stub
            return 0;
        }

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            try {
                ByteArrayInputStream bais = new ByteArrayInputStream(
                        context.getConfiguration().get(PROPS_CONF_KEY).getBytes(StandardCharsets.UTF_8));

                env = new Environment(new FluoConfiguration(bais));

                ti = new TransactionImpl(env, context.getConfiguration().getLong(TIMESTAMP_CONF_KEY, -1));

                // TODO this uses non public Accumulo API!
                RangeInputSplit ris = (RangeInputSplit) split;
                Span span = SpanUtil.toSpan(ris.getRange());
                ScannerConfiguration sc = new ScannerConfiguration().setSpan(span);

                for (String fam : context.getConfiguration().getStrings(FAMS_CONF_KEY, new String[0])) {
                    sc.fetchColumnFamily(Bytes.of(fam));
                }

                rowIter = ti.get(sc);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            while (true) {
                if ((colIter != null) && (colIter.hasNext())) {
                    Entry<Column, Bytes> colEntry = colIter.next();
                    rowCol = new RowColumn(row, colEntry.getKey());
                    val = colEntry.getValue();
                    return true;
                } else if (rowIter.hasNext()) {
                    Entry<Bytes, ColumnIterator> rowEntry = rowIter.next();
                    row = rowEntry.getKey();
                    colIter = rowEntry.getValue();
                } else {
                    return false;
                }
            }
        }
    };
}

From source file:org.apache.fluo.mapreduce.FluoRowInputFormat.java

License:Apache License

@Override
public RecordReader<Bytes, ColumnIterator> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    return new RecordReader<Bytes, ColumnIterator>() {

        private Entry<Bytes, ColumnIterator> entry;
        private RowIterator rowIter;
        private Environment env = null;
        private TransactionImpl ti = null;

        @Override/*from   w w w . j a v  a 2  s .  c  om*/
        public void close() throws IOException {
            if (ti != null) {
                ti.close();
            }

            if (env != null) {
                env.close();
            }
        }

        @Override
        public Bytes getCurrentKey() throws IOException, InterruptedException {
            return entry.getKey();
        }

        @Override
        public ColumnIterator getCurrentValue() throws IOException, InterruptedException {
            return entry.getValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            // TODO Auto-generated method stub
            return 0;
        }

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            try {
                ByteArrayInputStream bais = new ByteArrayInputStream(
                        context.getConfiguration().get(PROPS_CONF_KEY).getBytes(StandardCharsets.UTF_8));

                env = new Environment(new FluoConfiguration(bais));

                ti = new TransactionImpl(env, context.getConfiguration().getLong(TIMESTAMP_CONF_KEY, -1));

                // TODO this uses non public Accumulo API!
                RangeInputSplit ris = (RangeInputSplit) split;
                Span span = SpanUtil.toSpan(ris.getRange());
                ScannerConfiguration sc = new ScannerConfiguration().setSpan(span);

                for (String fam : context.getConfiguration().getStrings(FAMS_CONF_KEY, new String[0])) {
                    sc.fetchColumnFamily(Bytes.of(fam));
                }

                rowIter = ti.get(sc);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (rowIter.hasNext()) {
                entry = rowIter.next();
                return true;
            }
            return false;
        }
    };

}

From source file:org.apache.pig.builtin.TrevniStorage.java

License:Apache License

@Override
public InputFormat<NullWritable, GenericData.Record> getInputFormat() throws IOException {

    class TrevniStorageInputFormat extends PigFileInputFormat<NullWritable, GenericData.Record> {

        @Override/*  w ww.  jav  a2 s  .  c om*/
        protected boolean isSplitable(JobContext jc, Path p) {
            return false;
        }

        @Override
        protected List<FileStatus> listStatus(final JobContext job) throws IOException {
            List<FileStatus> results = Lists.newArrayList();
            job.getConfiguration().setBoolean(MRConfiguration.INPUT_DIR_RECURSIVE, true);
            for (FileStatus file : super.listStatus(job)) {
                if (Utils.VISIBLE_FILES.accept(file.getPath())) {
                    results.add(file);
                }
            }
            return results;
        }

        @Override
        public RecordReader<NullWritable, GenericData.Record> createRecordReader(final InputSplit is,
                final TaskAttemptContext tc) throws IOException, InterruptedException {
            RecordReader<NullWritable, GenericData.Record> rr = new RecordReader<NullWritable, GenericData.Record>() {

                private FileSplit fsplit;
                private AvroColumnReader.Params params;
                private AvroColumnReader<GenericData.Record> reader;
                private float rows;
                private long row = 0;
                private GenericData.Record currentRecord = null;

                @Override
                public void close() throws IOException {
                    reader.close();
                }

                @Override
                public NullWritable getCurrentKey() throws IOException, InterruptedException {
                    return NullWritable.get();
                }

                @Override
                public Record getCurrentValue() throws IOException, InterruptedException {
                    return currentRecord;
                }

                @Override
                public float getProgress() throws IOException, InterruptedException {
                    return row / rows;
                }

                @Override
                public void initialize(final InputSplit isplit, final TaskAttemptContext tac)
                        throws IOException, InterruptedException {
                    fsplit = (FileSplit) isplit;
                    params = new AvroColumnReader.Params(
                            new HadoopInput(fsplit.getPath(), tac.getConfiguration()));
                    Schema inputSchema = getInputAvroSchema();
                    params.setSchema(inputSchema);
                    reader = new AvroColumnReader<GenericData.Record>(params);
                    rows = reader.getRowCount();
                }

                @Override
                public boolean nextKeyValue() throws IOException, InterruptedException {
                    if (reader.hasNext()) {
                        currentRecord = reader.next();
                        row++;
                        return true;
                    } else {
                        return false;
                    }
                }
            };

            // rr.initialize(is, tc);
            tc.setStatus(is.toString());
            return rr;
        }

    }

    return new TrevniStorageInputFormat();

}

From source file:org.apache.rya.accumulo.mr.AccumuloHDFSFileInputFormat.java

License:Apache License

@Override
public RecordReader<Key, Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    return new RecordReader<Key, Value>() {

        private FileSKVIterator fileSKVIterator;
        private boolean started = false;

        @Override//from w ww .j  av  a2 s  . c om
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
                throws IOException, InterruptedException {
            FileSplit split = (FileSplit) inputSplit;
            Configuration job = taskAttemptContext.getConfiguration();
            Path file = split.getPath();
            FileSystem fs = file.getFileSystem(job);
            Instance instance = MRUtils.AccumuloProps.getInstance(taskAttemptContext);

            fileSKVIterator = RFileOperations.getInstance().openReader(file.toString(), ALLRANGE,
                    new HashSet<ByteSequence>(), false, fs, job, instance.getConfiguration());
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (started) {
                fileSKVIterator.next();
            } else {
                started = true; // don't move past the first record yet
            }
            return fileSKVIterator.hasTop();
        }

        @Override
        public Key getCurrentKey() throws IOException, InterruptedException {
            return fileSKVIterator.getTopKey();
        }

        @Override
        public Value getCurrentValue() throws IOException, InterruptedException {
            return fileSKVIterator.getTopValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public void close() throws IOException {
        }
    };
}

From source file:org.apache.tinkerpop.gremlin.spark.structure.io.InputRDDFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, VertexWritable> createRecordReader(final InputSplit inputSplit,
        final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    try {// w  w w . jav a2  s  . com
        final org.apache.hadoop.conf.Configuration hadoopConfiguration = taskAttemptContext.getConfiguration();
        final SparkConf sparkConfiguration = new SparkConf();
        sparkConfiguration.setAppName(UUID.randomUUID().toString());
        hadoopConfiguration.forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
        final InputRDD inputRDD = (InputRDD) Class
                .forName(sparkConfiguration.get(Constants.GREMLIN_HADOOP_GRAPH_READER)).newInstance();
        final JavaSparkContext javaSparkContext = new JavaSparkContext(
                SparkContext.getOrCreate(sparkConfiguration));
        Spark.create(javaSparkContext.sc());
        final Iterator<Tuple2<Object, VertexWritable>> iterator = inputRDD
                .readGraphRDD(ConfUtil.makeApacheConfiguration(taskAttemptContext.getConfiguration()),
                        javaSparkContext)
                .toLocalIterator();
        return new RecordReader<NullWritable, VertexWritable>() {
            @Override
            public void initialize(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext)
                    throws IOException, InterruptedException {

            }

            @Override
            public boolean nextKeyValue() throws IOException, InterruptedException {
                return iterator.hasNext();
            }

            @Override
            public NullWritable getCurrentKey() throws IOException, InterruptedException {
                return NullWritable.get();
            }

            @Override
            public VertexWritable getCurrentValue() throws IOException, InterruptedException {
                return iterator.next()._2();
            }

            @Override
            public float getProgress() throws IOException, InterruptedException {
                return 1.0f; // TODO: make this dynamic (how? its an iterator.)
            }

            @Override
            public void close() throws IOException {

            }
        };
    } catch (final ClassNotFoundException | InstantiationException | IllegalAccessException e) {
        throw new IOException(e.getMessage(), e);
    }

}

From source file:org.locationtech.geomesa.bigtable.spark.BigtableInputFormatBase.java

License:Open Source License

/**
 * Builds a TableRecordReader. If no TableRecordReader was provided, uses the
 * default.//from  w  w w.ja v a  2s  .  c  o m
 *
 * @param split The split to work with.
 * @param context The current context.
 * @return The newly created record reader.
 * @throws IOException When creating the reader fails.
 * @throws InterruptedException when record reader initialization fails
 * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
 *      org.apache.hadoop.mapreduce.InputSplit,
 *      org.apache.hadoop.mapreduce.TaskAttemptContext)
 */
@Override
public RecordReader<ImmutableBytesWritable, Result> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException, InterruptedException {
    BigtableExtendedScanSplit tSplit = (BigtableExtendedScanSplit) split;
    LOG.info(MessageFormat.format("Input split length: {0} bytes.", tSplit.getLength()));

    if (tSplit.name == null) {
        throw new IOException("Cannot create a record reader because of a"
                + " previous error. Please look at the previous logs lines from"
                + " the task's full log for more details.");
    }
    final Connection connection = ConnectionFactory.createConnection(context.getConfiguration());
    Table table = connection.getTable(tSplit.name);

    if (this.tableRecordReader == null) {
        this.tableRecordReader = new BigtableTableRecordReader();
    }
    final BigtableTableRecordReader trr = this.tableRecordReader;

    BigtableExtendedScan sc = tSplit.scan;
    trr.setHTable(table);
    trr.setScan(sc);
    return new RecordReader<ImmutableBytesWritable, Result>() {

        @Override
        public void close() throws IOException {
            trr.close();
            connection.close();
        }

        @Override
        public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
            return trr.getCurrentKey();
        }

        @Override
        public Result getCurrentValue() throws IOException, InterruptedException {
            return trr.getCurrentValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return trr.getProgress();
        }

        @Override
        public void initialize(InputSplit inputsplit, TaskAttemptContext context)
                throws IOException, InterruptedException {
            trr.initialize(inputsplit, context);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return trr.nextKeyValue();
        }
    };
}

From source file:org.tensorflow.hadoop.io.TFRecordFileInputFormat.java

License:Open Source License

@Override
public RecordReader<BytesWritable, NullWritable> createRecordReader(InputSplit inputSplit,
        final TaskAttemptContext context) throws IOException, InterruptedException {

    return new RecordReader<BytesWritable, NullWritable>() {
        private FSDataInputStream fsdis;
        private TFRecordReader reader;
        private long length;
        private long begin;
        private byte[] current;

        @Override/* w ww  .ja  va 2  s  .  c o  m*/
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            Configuration conf = context.getConfiguration();
            FileSplit fileSplit = (FileSplit) split;
            length = fileSplit.getLength();
            begin = fileSplit.getStart();

            final Path file = fileSplit.getPath();
            FileSystem fs = file.getFileSystem(conf);
            fsdis = fs.open(file, TFRecordIOConf.getBufferSize(conf));
            reader = new TFRecordReader(fsdis, TFRecordIOConf.getDoCrc32Check(conf));
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            current = reader.read();
            return current != null;
        }

        @Override
        public BytesWritable getCurrentKey() throws IOException, InterruptedException {
            return new BytesWritable(current);
        }

        @Override
        public NullWritable getCurrentValue() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return (fsdis.getPos() - begin) / (length + 1e-6f);
        }

        @Override
        public void close() throws IOException {
            fsdis.close();
        }
    };
}