Example usage for org.apache.hadoop.mapreduce RecordReader RecordReader

List of usage examples for org.apache.hadoop.mapreduce RecordReader RecordReader

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader RecordReader.

Prototype

RecordReader

Source Link

Usage

From source file:co.cask.cdap.data2.dataset2.lib.partitioned.EmptyInputFormat.java

License:Apache License

@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) {
    return new RecordReader<K, V>() {
        @Override/*from   w  ww.j av a  2  s . c om*/
        public void initialize(InputSplit split, TaskAttemptContext context) {
            // do nothing
        }

        @Override
        public boolean nextKeyValue() {
            return false;
        }

        @Override
        public K getCurrentKey() {
            return null;
        }

        @Override
        public V getCurrentValue() {
            return null;
        }

        @Override
        public float getProgress() {
            return 1.0F;
        }

        @Override
        public void close() {
            // nothing to do
        }
    };
}

From source file:co.cask.cdap.template.etl.common.ETLDBInputFormat.java

License:Apache License

@Override
protected RecordReader createDBRecordReader(DBInputSplit split, Configuration conf) throws IOException {
    final RecordReader dbRecordReader = super.createDBRecordReader(split, conf);
    return new RecordReader() {
        @Override/*w  w w.j  a v  a2  s.c o m*/
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            dbRecordReader.initialize(split, context);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return dbRecordReader.nextKeyValue();
        }

        @Override
        public Object getCurrentKey() throws IOException, InterruptedException {
            return dbRecordReader.getCurrentKey();
        }

        @Override
        public Object getCurrentValue() throws IOException, InterruptedException {
            return dbRecordReader.getCurrentValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return dbRecordReader.getProgress();
        }

        @Override
        public void close() throws IOException {
            dbRecordReader.close();
            try {
                DriverManager.deregisterDriver(driverShim);
            } catch (SQLException e) {
                throw new IOException(e);
            }
        }
    };
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.HCatTupleInputFormat.java

License:Apache License

@Override
public RecordReader<ITuple, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext taskContext)
        throws IOException, InterruptedException {

    HCatInputFormat iF = new HCatInputFormat();

    @SuppressWarnings("rawtypes")
    final RecordReader<WritableComparable, HCatRecord> hCatRecordReader = iF.createRecordReader(split,
            taskContext);//  w ww  .j  av  a  2s  .c o  m

    return new RecordReader<ITuple, NullWritable>() {

        ITuple tuple = new Tuple(pangoolSchema);

        @Override
        public void close() throws IOException {
            hCatRecordReader.close();
        }

        @Override
        public ITuple getCurrentKey() throws IOException, InterruptedException {
            HCatRecord record = hCatRecordReader.getCurrentValue();
            // Perform conversion between HCatRecord and Tuple
            for (int pos = 0; pos < schema.size(); pos++) {
                tuple.set(pos, record.get(pos));
            }
            return tuple;
        }

        @Override
        public NullWritable getCurrentValue() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return hCatRecordReader.getProgress();
        }

        @Override
        public void initialize(InputSplit iS, TaskAttemptContext context)
                throws IOException, InterruptedException {
            hCatRecordReader.initialize(iS, context);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return hCatRecordReader.nextKeyValue();
        }
    };
}

From source file:com.datasalt.pangool.utils.test.CollectionInputFormat.java

License:Apache License

@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new RecordReader<K, V>() {
        Iterator<Entry<K, V>> it;
        Entry<K, V> currentEntry;/*  w  w  w .  j a  va  2s  .  co m*/
        int count = 0;
        int total;

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            Collection<Entry<K, V>> entries = dataToServe();
            total = entries.size();
            it = entries.iterator();
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (it.hasNext()) {
                currentEntry = it.next();
                return true;
            } else {
                return false;
            }
        }

        @Override
        public K getCurrentKey() throws IOException, InterruptedException {
            return currentEntry.getKey();
        }

        @Override
        public V getCurrentValue() throws IOException, InterruptedException {
            return currentEntry.getValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return count / (float) total;
        }

        @Override
        public void close() throws IOException {
        }
    };
}

From source file:cz.seznam.euphoria.hadoop.input.DataSourceInputFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, V> createRecordReader(InputSplit is, TaskAttemptContext tac)
        throws IOException, InterruptedException {

    initialize(tac.getConfiguration());/*w  ww.  j av  a  2 s.  c  om*/
    @SuppressWarnings("unchecked")
    SourceSplit<V> split = (SourceSplit<V>) is;
    Reader<V> reader = split.partition.openReader();
    return new RecordReader<NullWritable, V>() {

        V v;

        @Override
        public void initialize(InputSplit is, TaskAttemptContext tac) throws IOException, InterruptedException {
            // nop
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (reader.hasNext()) {
                v = reader.next();
                return true;
            }
            return false;
        }

        @Override
        public NullWritable getCurrentKey() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        @Override
        public V getCurrentValue() throws IOException, InterruptedException {
            return v;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0.0f;
        }

        @Override
        public void close() throws IOException {
            reader.close();
        }

    };
}

From source file:io.fluo.mapreduce.FluoInputFormat.java

License:Apache License

@Override
public RecordReader<Bytes, ColumnIterator> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    return new RecordReader<Bytes, ColumnIterator>() {

        private Entry<Bytes, ColumnIterator> entry;
        private RowIterator rowIter;
        private Environment env = null;
        private TransactionImpl ti = null;

        @Override/*from www. ja  v  a  2 s  .c o  m*/
        public void close() throws IOException {
            if (env != null) {
                env.close();
            }
            if (ti != null) {
                ti.close();
            }
        }

        @Override
        public Bytes getCurrentKey() throws IOException, InterruptedException {
            return entry.getKey();
        }

        @Override
        public ColumnIterator getCurrentValue() throws IOException, InterruptedException {
            return entry.getValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            // TODO Auto-generated method stub
            return 0;
        }

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            try {
                // TODO this uses non public Accumulo API!
                RangeInputSplit ris = (RangeInputSplit) split;

                Span span = SpanUtil.toSpan(ris.getRange());

                ByteArrayInputStream bais = new ByteArrayInputStream(
                        context.getConfiguration().get(PROPS_CONF_KEY).getBytes("UTF-8"));
                PropertiesConfiguration props = new PropertiesConfiguration();
                props.load(bais);

                env = new Environment(new FluoConfiguration(props));

                ti = new TransactionImpl(env, context.getConfiguration().getLong(TIMESTAMP_CONF_KEY, -1));
                ScannerConfiguration sc = new ScannerConfiguration().setSpan(span);

                for (String fam : context.getConfiguration().getStrings(FAMS_CONF_KEY, new String[0]))
                    sc.fetchColumnFamily(Bytes.wrap(fam));

                rowIter = ti.get(sc);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (rowIter.hasNext()) {
                entry = rowIter.next();
                return true;
            }
            return false;
        }
    };

}

From source file:mvm.rya.accumulo.mr.utils.AccumuloHDFSFileInputFormat.java

License:Apache License

@Override
public RecordReader<Key, Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    return new RecordReader<Key, Value>() {

        private FileSKVIterator fileSKVIterator;

        @Override/*from  ww  w.  j  a va2  s  .c o m*/
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
                throws IOException, InterruptedException {
            FileSplit split = (FileSplit) inputSplit;
            Configuration job = taskAttemptContext.getConfiguration();
            Path file = split.getPath();
            //                long start = split.getStart();
            //                long length = split.getLength();
            FileSystem fs = file.getFileSystem(job);
            //                FSDataInputStream fileIn = fs.open(file);
            //                System.out.println(start);
            //                if (start != 0L) {
            //                    fileIn.seek(start);
            //                }
            Instance instance = AccumuloProps.getInstance(taskAttemptContext);

            fileSKVIterator = RFileOperations.getInstance().openReader(file.toString(), ALLRANGE,
                    new HashSet<ByteSequence>(), false, fs, job, instance.getConfiguration());
            //                fileSKVIterator = new RFileOperations2().openReader(fileIn, length - start, job);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            fileSKVIterator.next();
            return fileSKVIterator.hasTop();
        }

        @Override
        public Key getCurrentKey() throws IOException, InterruptedException {
            return fileSKVIterator.getTopKey();
        }

        @Override
        public Value getCurrentValue() throws IOException, InterruptedException {
            return fileSKVIterator.getTopValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public void close() throws IOException {
            //To change body of implemented methods use File | Settings | File Templates.
        }
    };
}

From source file:org.apache.bigtop.bigpetstore.generator.GeneratePetStoreTransactionsInputFormat.java

License:Apache License

@Override
public RecordReader<Text, Text> createRecordReader(final InputSplit inputSplit, TaskAttemptContext arg1)
        throws IOException, InterruptedException {
    return new RecordReader<Text, Text>() {

        @Override/* w w w  . j a v a  2  s  . c o m*/
        public void close() throws IOException {

        }

        /**
         * We need the "state" information to generate records. - Each state
         * has a probability associated with it, so that our data set can be
         * realistic (i.e. Colorado should have more transactions than rhode
         * island).
         *
         * - Each state also will its name as part of the key.
         *
         * - This task would be distributed, for example, into 50 nodes on a
         * real cluster, each creating the data for a given state.
         */

        // String storeCode = ((Split) inputSplit).storeCode;
        int records = ((PetStoreTransactionInputSplit) inputSplit).records;
        Iterator<KeyVal<String, String>> data = (new TransactionIteratorFactory(records,
                ((PetStoreTransactionInputSplit) inputSplit).state)).getData();
        KeyVal<String, String> currentRecord;

        @Override
        public Text getCurrentKey() throws IOException, InterruptedException {
            return new Text(currentRecord.key);
        }

        @Override
        public Text getCurrentValue() throws IOException, InterruptedException {
            return new Text(currentRecord.val);
        }

        @Override
        public void initialize(InputSplit arg0, TaskAttemptContext arg1)
                throws IOException, InterruptedException {
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (data.hasNext()) {
                currentRecord = data.next();
                return true;
            }
            return false;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0f;
        }

    };
}

From source file:org.apache.bigtop.bigpetstore.generator.PetStoreTransactionsInputFormat.java

License:Apache License

@Override
public RecordReader<Text, Text> createRecordReader(final InputSplit inputSplit, TaskAttemptContext arg1)
        throws IOException, InterruptedException {
    return new RecordReader<Text, Text>() {

        @Override//from  ww w . ja  va 2  s .  c  o m
        public void close() throws IOException {

        }

        /**
         * We need the "state" information to generate records. - Each state
         * has a probability associated with it, so that our data set can be
         * realistic (i.e. Colorado should have more transactions than rhode
         * island).
         *
         * - Each state also will its name as part of the key.
         *
         * - This task would be distributed, for example, into 50 nodes on a
         * real cluster, each creating the data for a given state.
         */

        PetStoreTransactionInputSplit bpsInputplit = (PetStoreTransactionInputSplit) inputSplit;
        int records = bpsInputplit.records;
        // TODO why not send the whole InputSplit there?
        Iterator<KeyVal<String, String>> data = (new TransactionIteratorFactory(records,
                bpsInputplit.customerIdRange, bpsInputplit.state)).data();
        KeyVal<String, String> currentRecord;

        @Override
        public Text getCurrentKey() throws IOException, InterruptedException {
            return new Text(currentRecord.key());
        }

        @Override
        public Text getCurrentValue() throws IOException, InterruptedException {
            return new Text(currentRecord.value());
        }

        @Override
        public void initialize(InputSplit arg0, TaskAttemptContext arg1)
                throws IOException, InterruptedException {
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (data.hasNext()) {
                currentRecord = data.next();
                return true;
            }
            return false;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0f;
        }

    };
}

From source file:org.apache.blur.mapreduce.lib.BlurInputFormat.java

License:Apache License

@Override
public RecordReader<Text, TableBlurRecord> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    final GenericRecordReaderCollection genericRecordReader = new GenericRecordReaderCollection();
    genericRecordReader.initialize((BlurInputSplitColletion) split, context.getConfiguration());
    return new RecordReader<Text, TableBlurRecord>() {

        @Override//from   w w  w .j a va  2  s.com
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            genericRecordReader.initialize((BlurInputSplitColletion) split, context.getConfiguration());
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return genericRecordReader.nextKeyValue();
        }

        @Override
        public Text getCurrentKey() throws IOException, InterruptedException {
            return genericRecordReader.getCurrentKey();
        }

        @Override
        public TableBlurRecord getCurrentValue() throws IOException, InterruptedException {
            return genericRecordReader.getCurrentValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return genericRecordReader.getProgress();
        }

        @Override
        public void close() throws IOException {
            genericRecordReader.close();
        }

    };
}