Example usage for org.apache.hadoop.mapreduce RecordReader RecordReader

List of usage examples for org.apache.hadoop.mapreduce RecordReader RecordReader

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader RecordReader.

Prototype

RecordReader

Source Link

Usage

From source file:org.apache.carbondata.core.datamap.DistributableDataMapFormat.java

License:Apache License

@Override
public RecordReader<Void, ExtendedBlocklet> createRecordReader(InputSplit inputSplit,
        TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    return new RecordReader<Void, ExtendedBlocklet>() {
        private Iterator<ExtendedBlocklet> blockletIterator;
        private ExtendedBlocklet currBlocklet;
        private List<DataMap> dataMaps;

        @Override//from   ww w .  jav  a  2  s .c  o  m
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
                throws IOException, InterruptedException {
            distributable = (DataMapDistributableWrapper) inputSplit;
            // clear the segmentMap and from cache in executor when there are invalid segments
            if (invalidSegments.size() > 0) {
                DataMapStoreManager.getInstance().clearInvalidSegments(table, invalidSegments);
            }
            TableDataMap tableDataMap = DataMapStoreManager.getInstance().getDataMap(table,
                    distributable.getDistributable().getDataMapSchema());
            if (isJobToClearDataMaps) {
                // if job is to clear datamaps just clear datamaps from cache and return
                DataMapStoreManager.getInstance()
                        .clearDataMaps(table.getCarbonTableIdentifier().getTableUniqueName());
                // clear the segment properties cache from executor
                SegmentPropertiesAndSchemaHolder.getInstance().invalidate(table.getAbsoluteTableIdentifier());
                blockletIterator = Collections.emptyIterator();
                return;
            }
            dataMaps = tableDataMap.getTableDataMaps(distributable.getDistributable());
            List<ExtendedBlocklet> blocklets = tableDataMap.prune(dataMaps, distributable.getDistributable(),
                    dataMapExprWrapper.getFilterResolverIntf(distributable.getUniqueId()), partitions);
            for (ExtendedBlocklet blocklet : blocklets) {
                blocklet.setDataMapUniqueId(distributable.getUniqueId());
            }
            blockletIterator = blocklets.iterator();
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            boolean hasNext = blockletIterator.hasNext();
            if (hasNext) {
                currBlocklet = blockletIterator.next();
            } else {
                // close all resources when all the results are returned
                close();
            }
            return hasNext;
        }

        @Override
        public Void getCurrentKey() throws IOException, InterruptedException {
            return null;
        }

        @Override
        public ExtendedBlocklet getCurrentValue() throws IOException, InterruptedException {
            return currBlocklet;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public void close() throws IOException {
            if (null != dataMaps) {
                for (DataMap dataMap : dataMaps) {
                    dataMap.finish();
                }
            }
        }
    };
}

From source file:org.apache.carbondata.hadoop.api.DistributableDataMapFormat.java

License:Apache License

@Override
public RecordReader<Void, Blocklet> createRecordReader(InputSplit inputSplit,
        TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    return new RecordReader<Void, Blocklet>() {
        private Iterator<Blocklet> blockletIterator;
        private Blocklet currBlocklet;

        @Override/*from www.  jav a2s . co  m*/
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
                throws IOException, InterruptedException {
            DataMapDistributable distributable = (DataMapDistributable) inputSplit;
            AbsoluteTableIdentifier identifier = AbsoluteTableIdentifier
                    .fromTablePath(distributable.getTablePath());
            TableDataMap dataMap = DataMapStoreManager.getInstance().getDataMap(identifier,
                    distributable.getDataMapName(), distributable.getDataMapFactoryClass());
            blockletIterator = dataMap.prune(distributable, getFilterExp(taskAttemptContext.getConfiguration()))
                    .iterator();
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            boolean hasNext = blockletIterator.hasNext();
            if (hasNext) {
                currBlocklet = blockletIterator.next();
            }
            return hasNext;
        }

        @Override
        public Void getCurrentKey() throws IOException, InterruptedException {
            return null;
        }

        @Override
        public Blocklet getCurrentValue() throws IOException, InterruptedException {
            return currBlocklet;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public void close() throws IOException {

        }
    };
}

From source file:org.apache.fluo.mapreduce.FluoEntryInputFormat.java

License:Apache License

@Override
public RecordReader<RowColumn, Bytes> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    return new RecordReader<RowColumn, Bytes>() {

        private RowColumn rowCol;
        private Bytes val;
        private RowIterator rowIter;
        private Bytes row;
        private ColumnIterator colIter = null;
        private Environment env = null;
        private TransactionImpl ti = null;

        @Override/*  w  ww  .j a v  a 2  s  . c  om*/
        public void close() throws IOException {
            if (ti != null) {
                ti.close();
            }

            if (env != null) {
                env.close();
            }
        }

        @Override
        public RowColumn getCurrentKey() throws IOException, InterruptedException {
            return rowCol;
        }

        @Override
        public Bytes getCurrentValue() throws IOException, InterruptedException {
            return val;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            // TODO Auto-generated method stub
            return 0;
        }

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            try {
                ByteArrayInputStream bais = new ByteArrayInputStream(
                        context.getConfiguration().get(PROPS_CONF_KEY).getBytes(StandardCharsets.UTF_8));

                env = new Environment(new FluoConfiguration(bais));

                ti = new TransactionImpl(env, context.getConfiguration().getLong(TIMESTAMP_CONF_KEY, -1));

                // TODO this uses non public Accumulo API!
                RangeInputSplit ris = (RangeInputSplit) split;
                Span span = SpanUtil.toSpan(ris.getRange());
                ScannerConfiguration sc = new ScannerConfiguration().setSpan(span);

                for (String fam : context.getConfiguration().getStrings(FAMS_CONF_KEY, new String[0])) {
                    sc.fetchColumnFamily(Bytes.of(fam));
                }

                rowIter = ti.get(sc);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            while (true) {
                if ((colIter != null) && (colIter.hasNext())) {
                    Entry<Column, Bytes> colEntry = colIter.next();
                    rowCol = new RowColumn(row, colEntry.getKey());
                    val = colEntry.getValue();
                    return true;
                } else if (rowIter.hasNext()) {
                    Entry<Bytes, ColumnIterator> rowEntry = rowIter.next();
                    row = rowEntry.getKey();
                    colIter = rowEntry.getValue();
                } else {
                    return false;
                }
            }
        }
    };
}

From source file:org.apache.fluo.mapreduce.FluoRowInputFormat.java

License:Apache License

@Override
public RecordReader<Bytes, ColumnIterator> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    return new RecordReader<Bytes, ColumnIterator>() {

        private Entry<Bytes, ColumnIterator> entry;
        private RowIterator rowIter;
        private Environment env = null;
        private TransactionImpl ti = null;

        @Override/*from   w w w . j a v  a 2  s .  c  om*/
        public void close() throws IOException {
            if (ti != null) {
                ti.close();
            }

            if (env != null) {
                env.close();
            }
        }

        @Override
        public Bytes getCurrentKey() throws IOException, InterruptedException {
            return entry.getKey();
        }

        @Override
        public ColumnIterator getCurrentValue() throws IOException, InterruptedException {
            return entry.getValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            // TODO Auto-generated method stub
            return 0;
        }

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            try {
                ByteArrayInputStream bais = new ByteArrayInputStream(
                        context.getConfiguration().get(PROPS_CONF_KEY).getBytes(StandardCharsets.UTF_8));

                env = new Environment(new FluoConfiguration(bais));

                ti = new TransactionImpl(env, context.getConfiguration().getLong(TIMESTAMP_CONF_KEY, -1));

                // TODO this uses non public Accumulo API!
                RangeInputSplit ris = (RangeInputSplit) split;
                Span span = SpanUtil.toSpan(ris.getRange());
                ScannerConfiguration sc = new ScannerConfiguration().setSpan(span);

                for (String fam : context.getConfiguration().getStrings(FAMS_CONF_KEY, new String[0])) {
                    sc.fetchColumnFamily(Bytes.of(fam));
                }

                rowIter = ti.get(sc);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (rowIter.hasNext()) {
                entry = rowIter.next();
                return true;
            }
            return false;
        }
    };

}

From source file:org.apache.pig.builtin.TrevniStorage.java

License:Apache License

@Override
public InputFormat<NullWritable, GenericData.Record> getInputFormat() throws IOException {

    class TrevniStorageInputFormat extends PigFileInputFormat<NullWritable, GenericData.Record> {

        @Override/*  w ww.  jav  a2 s  .  c om*/
        protected boolean isSplitable(JobContext jc, Path p) {
            return false;
        }

        @Override
        protected List<FileStatus> listStatus(final JobContext job) throws IOException {
            List<FileStatus> results = Lists.newArrayList();
            job.getConfiguration().setBoolean(MRConfiguration.INPUT_DIR_RECURSIVE, true);
            for (FileStatus file : super.listStatus(job)) {
                if (Utils.VISIBLE_FILES.accept(file.getPath())) {
                    results.add(file);
                }
            }
            return results;
        }

        @Override
        public RecordReader<NullWritable, GenericData.Record> createRecordReader(final InputSplit is,
                final TaskAttemptContext tc) throws IOException, InterruptedException {
            RecordReader<NullWritable, GenericData.Record> rr = new RecordReader<NullWritable, GenericData.Record>() {

                private FileSplit fsplit;
                private AvroColumnReader.Params params;
                private AvroColumnReader<GenericData.Record> reader;
                private float rows;
                private long row = 0;
                private GenericData.Record currentRecord = null;

                @Override
                public void close() throws IOException {
                    reader.close();
                }

                @Override
                public NullWritable getCurrentKey() throws IOException, InterruptedException {
                    return NullWritable.get();
                }

                @Override
                public Record getCurrentValue() throws IOException, InterruptedException {
                    return currentRecord;
                }

                @Override
                public float getProgress() throws IOException, InterruptedException {
                    return row / rows;
                }

                @Override
                public void initialize(final InputSplit isplit, final TaskAttemptContext tac)
                        throws IOException, InterruptedException {
                    fsplit = (FileSplit) isplit;
                    params = new AvroColumnReader.Params(
                            new HadoopInput(fsplit.getPath(), tac.getConfiguration()));
                    Schema inputSchema = getInputAvroSchema();
                    params.setSchema(inputSchema);
                    reader = new AvroColumnReader<GenericData.Record>(params);
                    rows = reader.getRowCount();
                }

                @Override
                public boolean nextKeyValue() throws IOException, InterruptedException {
                    if (reader.hasNext()) {
                        currentRecord = reader.next();
                        row++;
                        return true;
                    } else {
                        return false;
                    }
                }
            };

            // rr.initialize(is, tc);
            tc.setStatus(is.toString());
            return rr;
        }

    }

    return new TrevniStorageInputFormat();

}

From source file:org.apache.rya.accumulo.mr.AccumuloHDFSFileInputFormat.java

License:Apache License

@Override
public RecordReader<Key, Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    return new RecordReader<Key, Value>() {

        private FileSKVIterator fileSKVIterator;
        private boolean started = false;

        @Override//from w ww .j  av  a2 s  . c om
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
                throws IOException, InterruptedException {
            FileSplit split = (FileSplit) inputSplit;
            Configuration job = taskAttemptContext.getConfiguration();
            Path file = split.getPath();
            FileSystem fs = file.getFileSystem(job);
            Instance instance = MRUtils.AccumuloProps.getInstance(taskAttemptContext);

            fileSKVIterator = RFileOperations.getInstance().openReader(file.toString(), ALLRANGE,
                    new HashSet<ByteSequence>(), false, fs, job, instance.getConfiguration());
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (started) {
                fileSKVIterator.next();
            } else {
                started = true; // don't move past the first record yet
            }
            return fileSKVIterator.hasTop();
        }

        @Override
        public Key getCurrentKey() throws IOException, InterruptedException {
            return fileSKVIterator.getTopKey();
        }

        @Override
        public Value getCurrentValue() throws IOException, InterruptedException {
            return fileSKVIterator.getTopValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public void close() throws IOException {
        }
    };
}

From source file:org.apache.tinkerpop.gremlin.spark.structure.io.InputRDDFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, VertexWritable> createRecordReader(final InputSplit inputSplit,
        final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    try {// w  w w . jav a2  s  . com
        final org.apache.hadoop.conf.Configuration hadoopConfiguration = taskAttemptContext.getConfiguration();
        final SparkConf sparkConfiguration = new SparkConf();
        sparkConfiguration.setAppName(UUID.randomUUID().toString());
        hadoopConfiguration.forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
        final InputRDD inputRDD = (InputRDD) Class
                .forName(sparkConfiguration.get(Constants.GREMLIN_HADOOP_GRAPH_READER)).newInstance();
        final JavaSparkContext javaSparkContext = new JavaSparkContext(
                SparkContext.getOrCreate(sparkConfiguration));
        Spark.create(javaSparkContext.sc());
        final Iterator<Tuple2<Object, VertexWritable>> iterator = inputRDD
                .readGraphRDD(ConfUtil.makeApacheConfiguration(taskAttemptContext.getConfiguration()),
                        javaSparkContext)
                .toLocalIterator();
        return new RecordReader<NullWritable, VertexWritable>() {
            @Override
            public void initialize(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext)
                    throws IOException, InterruptedException {

            }

            @Override
            public boolean nextKeyValue() throws IOException, InterruptedException {
                return iterator.hasNext();
            }

            @Override
            public NullWritable getCurrentKey() throws IOException, InterruptedException {
                return NullWritable.get();
            }

            @Override
            public VertexWritable getCurrentValue() throws IOException, InterruptedException {
                return iterator.next()._2();
            }

            @Override
            public float getProgress() throws IOException, InterruptedException {
                return 1.0f; // TODO: make this dynamic (how? its an iterator.)
            }

            @Override
            public void close() throws IOException {

            }
        };
    } catch (final ClassNotFoundException | InstantiationException | IllegalAccessException e) {
        throw new IOException(e.getMessage(), e);
    }

}

From source file:org.locationtech.geomesa.bigtable.spark.BigtableInputFormatBase.java

License:Open Source License

/**
 * Builds a TableRecordReader. If no TableRecordReader was provided, uses the
 * default.//from  w  w w.ja v a  2s  .  c  o m
 *
 * @param split The split to work with.
 * @param context The current context.
 * @return The newly created record reader.
 * @throws IOException When creating the reader fails.
 * @throws InterruptedException when record reader initialization fails
 * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
 *      org.apache.hadoop.mapreduce.InputSplit,
 *      org.apache.hadoop.mapreduce.TaskAttemptContext)
 */
@Override
public RecordReader<ImmutableBytesWritable, Result> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException, InterruptedException {
    BigtableExtendedScanSplit tSplit = (BigtableExtendedScanSplit) split;
    LOG.info(MessageFormat.format("Input split length: {0} bytes.", tSplit.getLength()));

    if (tSplit.name == null) {
        throw new IOException("Cannot create a record reader because of a"
                + " previous error. Please look at the previous logs lines from"
                + " the task's full log for more details.");
    }
    final Connection connection = ConnectionFactory.createConnection(context.getConfiguration());
    Table table = connection.getTable(tSplit.name);

    if (this.tableRecordReader == null) {
        this.tableRecordReader = new BigtableTableRecordReader();
    }
    final BigtableTableRecordReader trr = this.tableRecordReader;

    BigtableExtendedScan sc = tSplit.scan;
    trr.setHTable(table);
    trr.setScan(sc);
    return new RecordReader<ImmutableBytesWritable, Result>() {

        @Override
        public void close() throws IOException {
            trr.close();
            connection.close();
        }

        @Override
        public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
            return trr.getCurrentKey();
        }

        @Override
        public Result getCurrentValue() throws IOException, InterruptedException {
            return trr.getCurrentValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return trr.getProgress();
        }

        @Override
        public void initialize(InputSplit inputsplit, TaskAttemptContext context)
                throws IOException, InterruptedException {
            trr.initialize(inputsplit, context);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return trr.nextKeyValue();
        }
    };
}

From source file:org.tensorflow.hadoop.io.TFRecordFileInputFormat.java

License:Open Source License

@Override
public RecordReader<BytesWritable, NullWritable> createRecordReader(InputSplit inputSplit,
        final TaskAttemptContext context) throws IOException, InterruptedException {

    return new RecordReader<BytesWritable, NullWritable>() {
        private FSDataInputStream fsdis;
        private TFRecordReader reader;
        private long length;
        private long begin;
        private byte[] current;

        @Override/* w ww  .ja  va 2  s  .  c o  m*/
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            Configuration conf = context.getConfiguration();
            FileSplit fileSplit = (FileSplit) split;
            length = fileSplit.getLength();
            begin = fileSplit.getStart();

            final Path file = fileSplit.getPath();
            FileSystem fs = file.getFileSystem(conf);
            fsdis = fs.open(file, TFRecordIOConf.getBufferSize(conf));
            reader = new TFRecordReader(fsdis, TFRecordIOConf.getDoCrc32Check(conf));
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            current = reader.read();
            return current != null;
        }

        @Override
        public BytesWritable getCurrentKey() throws IOException, InterruptedException {
            return new BytesWritable(current);
        }

        @Override
        public NullWritable getCurrentValue() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return (fsdis.getPos() - begin) / (length + 1e-6f);
        }

        @Override
        public void close() throws IOException {
            fsdis.close();
        }
    };
}