Example usage for org.apache.hadoop.mapred RecordReader RecordReader

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RecordReader RecordReader.

Prototype

RecordReader

Source Link

Usage

From source file:com.aliyun.openservices.tablestore.hive.TableStoreInputFormat.java

License:Apache License

@Override
public RecordReader<PrimaryKeyWritable, RowWritable> getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
    Preconditions.checkNotNull(split, "split must be nonnull");
    Preconditions.checkNotNull(job, "job must be nonnull");
    Preconditions.checkArgument(split instanceof TableStoreInputSplit,
            "split must be an instance of " + TableStoreInputSplit.class.getName());
    TableStoreInputSplit tsSplit = (TableStoreInputSplit) split;
    Configuration conf;/*  www .ja  v a 2  s.  co m*/
    if (isHiveConfiguration(job)) {
        // map task, such as 'select *'
        conf = translateConfig(job);
    } else {
        // reduce task, such as 'select count(*)'
        conf = job;
    }
    final com.aliyun.openservices.tablestore.hadoop.TableStoreRecordReader rdr = new com.aliyun.openservices.tablestore.hadoop.TableStoreRecordReader();
    rdr.initialize(tsSplit.getDelegated(), conf);
    return new RecordReader<PrimaryKeyWritable, RowWritable>() {
        @Override
        public boolean next(PrimaryKeyWritable key, RowWritable value) throws IOException {
            boolean next = rdr.nextKeyValue();
            if (next) {
                key.setPrimaryKey(rdr.getCurrentKey().getPrimaryKey());
                value.setRow(rdr.getCurrentValue().getRow());
            }
            return next;
        }

        @Override
        public PrimaryKeyWritable createKey() {
            return new PrimaryKeyWritable();
        }

        @Override
        public RowWritable createValue() {
            return new RowWritable();
        }

        @Override
        public long getPos() throws IOException {
            return 0;
        }

        @Override
        public void close() throws IOException {
            rdr.close();
        }

        @Override
        public float getProgress() throws IOException {
            return rdr.getProgress();
        }
    };
}

From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;/*from ww w . j  ava  2 s  . c o  m*/
    try {
        iKey = parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (Exception se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }
            scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<Text, Text>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public Text createKey() {
            return new Text();
        }

        // @Override
        public Text createValue() {
            return new Text();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(Text rowKey, Text value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                //logic for to find the column name 
                if (next) {
                    rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));
                    StringBuilder val = new StringBuilder();
                    String prev = "";
                    for (KeyValue kv : recordReader.getCurrentValue().raw()) {
                        String current = new String(kv.getQualifier());
                        char[] col = new String(current).toCharArray();
                        if (val.length() > 0) {
                            if (prev.equals(current))
                                val.append(",");
                            else
                                val.append("\t");
                        }
                        prev = current;
                        val.append(col[0]).append("_");
                        val.append(Bytes.toString(kv.getValue()));
                    }
                    value.set(val.toString()); // rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));;
                    // value.set(Bytes.toString(recordReader.getCurrentValue().value()));
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.ask.hive.hbase.HiveHBaseTimeTableInputFormat.java

License:Apache License

public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf jobConf,
        final Reporter reporter) throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;//www .j  a  va 2 s  .co m
    try {
        iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (SerDeException se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<ImmutableBytesWritable, Result>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public ImmutableBytesWritable createKey() {
            return new ImmutableBytesWritable();
        }

        // @Override
        public Result createValue() {
            return new Result();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(ImmutableBytesWritable rowKey, Result value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                if (next) {
                    rowKey.set(recordReader.getCurrentValue().getRow());
                    Writables.copyWritable(recordReader.getCurrentValue(), value);
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableInputFormat.java

License:Apache License

@Override
public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf jobConf,
        final Reporter reporter) throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getTableSplit();

    Job job = new Job(jobConf);
    TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(),
            reporter);/*from   w w  w.  j a  va 2  s. com*/

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader;
    if (hbaseSplit.isTxIndexScan()) {
        LOG.info("getRecordReader: TxHiveIndexScan -> " + tableSplit);
        recordReader = TxHiveTableInputFormatUtil.createRecordReader(tableSplit, tac, jobConf);
    } else {
        LOG.info("getRecordReader: no TxHiveIndexScan -> " + tableSplit);
        setHTable(HiveHBaseInputFormatUtil.getTable(jobConf));
        setScan(HiveHBaseInputFormatUtil.getScan(jobConf));
        recordReader = createRecordReader(tableSplit, tac);
    }
    try {
        recordReader.initialize(tableSplit, tac);
    } catch (InterruptedException e) {
        throw new IOException("Failed to initialize RecordReader", e);
    }

    return new RecordReader<ImmutableBytesWritable, ResultWritable>() {

        @Override
        public void close() throws IOException {
            recordReader.close();
            closeTable();
        }

        @Override
        public ImmutableBytesWritable createKey() {
            return new ImmutableBytesWritable();
        }

        @Override
        public ResultWritable createValue() {
            return new ResultWritable(new Result());
        }

        @Override
        public long getPos() throws IOException {
            return 0;
        }

        @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;
            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
            return progress;
        }

        @Override
        public boolean next(ImmutableBytesWritable rowKey, ResultWritable value) throws IOException {
            boolean next = false;
            try {
                next = recordReader.nextKeyValue();
                if (next) {
                    rowKey.set(recordReader.getCurrentValue().getRow());
                    value.setResult(recordReader.getCurrentValue());
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
            return next;
        }
    };
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableSnapshotInputFormat.java

License:Apache License

@Override
public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
    setColumns(job);/*from   ww w .  j  a  v a  2s .  c o m*/
    final RecordReader<ImmutableBytesWritable, Result> rr = delegate
            .getRecordReader(((HBaseSplit) split).getSnapshotSplit(), job, reporter);

    return new RecordReader<ImmutableBytesWritable, ResultWritable>() {
        @Override
        public boolean next(ImmutableBytesWritable key, ResultWritable value) throws IOException {
            return rr.next(key, value.getResult());
        }

        @Override
        public ImmutableBytesWritable createKey() {
            return rr.createKey();
        }

        @Override
        public ResultWritable createValue() {
            return new ResultWritable(rr.createValue());
        }

        @Override
        public long getPos() throws IOException {
            return rr.getPos();
        }

        @Override
        public void close() throws IOException {
            rr.close();
        }

        @Override
        public float getProgress() throws IOException {
            return rr.getProgress();
        }
    };
}

From source file:com.ibm.jaql.io.hadoop.CompositeInputAdapter.java

License:Apache License

@SuppressWarnings("unchecked")
public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
        throws IOException {
    CompositeSplit cSplit = (CompositeSplit) split;

    // 1. get the InputAdapter's array index (i) from the split
    final int idx = cSplit.getAdapterIdx();
    InputSplit baseSplit = cSplit.getSplit();

    try {/* w w  w.ja  v  a 2  s. c  o m*/
        // 2. get the ith adapter's args record
        JsonValue value = this.args.get(idx);
        // JRecord baseArgs = (JRecord) item.getNonNull();
        // record the current index to the job conf
        // ASSUMES: in map/reduce, the format's record reader is called *before*
        // the map class is configured
        writeCurrentIndex(job, idx); // FIXME: no longer needed

        // 3. insantiate and initialize the adapter
        HadoopInputAdapter adapter = (HadoopInputAdapter) AdapterStore.getStore().input
                .getAdapter(/** baseArgs, */
                        value);

        // 4. create a new JobConf j'
        JobConf jTmp = new JobConf(job);

        // 5. call adapter's setupConf(j')
        // ConfiguratorUtil.writeToConf(adapter, jTmp, item/**baseArgs*/);
        adapter.setParallel(jTmp);

        // 6. configure the adapter from j'
        adapter.configure(jTmp);

        // 7. call adapter's getRecordReader with j'
        final RecordReader<JsonHolder, JsonHolder> reader = (RecordReader<JsonHolder, JsonHolder>) adapter
                .getRecordReader(baseSplit, jTmp, reporter);

        if (!addIndex) {
            return reader;
        }

        return new RecordReader<JsonHolder, JsonHolder>() {

            @Override
            public void close() throws IOException {
                reader.close();
            }

            @Override
            public JsonHolder createKey() {
                return reader.createKey();
            }

            @Override
            public JsonHolder createValue() {
                return reader.createValue();
            }

            @Override
            public long getPos() throws IOException {
                return reader.getPos();
            }

            @Override
            public float getProgress() throws IOException {
                return reader.getProgress();
            }

            @Override
            public boolean next(JsonHolder key, JsonHolder value) throws IOException {
                BufferedJsonArray pair = (BufferedJsonArray) value.value;
                if (pair != null) {
                    value.value = pair.get(1);
                } else {
                    pair = new BufferedJsonArray(2);
                    pair.set(0, JsonLong.make(idx));
                }

                if (reader.next(key, value)) {
                    pair.set(1, value.value);
                    value.value = pair;
                    return true;
                }

                return false;
            }
        };

    } catch (Exception e) {
        return null;
    }
}

From source file:com.ibm.jaql.io.hadoop.DefaultHadoopInputAdapter.java

License:Apache License

@SuppressWarnings("unchecked")
public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
        throws IOException {
    if (split instanceof DHIASplit) {
        // not using order-preserving wrapper
        split = ((DHIASplit) split).split;
    }//from   w  w  w .  j  av a 2  s.c  o  m

    if (converter == null)
        return ((InputFormat<JsonHolder, JsonHolder>) iFormat).getRecordReader(split, job, reporter);
    final RecordReader<K, V> baseReader = ((InputFormat<K, V>) iFormat).getRecordReader(split, job, reporter);
    final K baseKey = baseReader.createKey();
    final V baseValue = baseReader.createValue();

    return new RecordReader<JsonHolder, JsonHolder>() {

        public void close() throws IOException {
            baseReader.close();
        }

        public JsonHolder createKey() {
            return keyHolder();
        }

        public JsonHolder createValue() {
            JsonHolder holder = valueHolder();
            holder.value = converter.createTarget();
            return holder;
        }

        public long getPos() throws IOException {
            return baseReader.getPos();
        }

        public float getProgress() throws IOException {
            return baseReader.getProgress();
        }

        public boolean next(JsonHolder key, JsonHolder value) throws IOException {
            boolean hasMore = baseReader.next(baseKey, baseValue);
            if (!hasMore)
                return false;
            value.value = converter.convert(baseKey, baseValue, value.value);
            return true;
        }
    };
}

From source file:infinidb.hadoop.db.IDBFileInputFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, NullWritable> getRecordReader(InputSplit arg0, JobConf arg1, Reporter arg2)
        throws IOException {
    final String filename = ((FileSplit) arg0).getPath().toString();
    final JobConf job = arg1;

    return new RecordReader<NullWritable, NullWritable>() {
        private boolean unread = true;

        @Override//  ww w. ja  v  a2s  .  c  o m
        public void close() throws IOException {
        }

        @Override
        public NullWritable createKey() {
            return NullWritable.get();
        }

        @Override
        public NullWritable createValue() {
            return NullWritable.get();
        }

        @Override
        public long getPos() throws IOException {
            return 0;
        }

        @Override
        public float getProgress() throws IOException {
            return unread ? 0 : 1;
        }

        @Override
        /* spawn a cpimport process for each input file */
        public boolean next(NullWritable arg0, NullWritable arg1) throws IOException {
            InfiniDBConfiguration dbConf = new InfiniDBConfiguration(job);
            String schemaName = dbConf.getOutputSchemaName();
            String tableName = (filename.substring(filename.lastIndexOf('/') + 1, filename.length()));
            tableName = tableName.substring(0, tableName.lastIndexOf('.'));
            String output = job.get("mapred.output.dir");
            if (unread) {
                try {
                    StringBuilder loadCmdStr = new StringBuilder();
                    loadCmdStr.append(dbConf.getInfiniDBHome());
                    loadCmdStr.append("/bin/");
                    loadCmdStr.append("infinidoop_load.sh ");
                    loadCmdStr.append(filename);
                    loadCmdStr.append(" ");
                    loadCmdStr.append(schemaName);
                    loadCmdStr.append(" ");
                    loadCmdStr.append(tableName);

                    Process lChldProc = Runtime.getRuntime().exec(loadCmdStr.toString());

                    // Wait for the child to exit
                    lChldProc.waitFor();
                    BufferedReader lChldProcOutStream = new BufferedReader(
                            new InputStreamReader(lChldProc.getInputStream()));
                    BufferedReader stdError = new BufferedReader(
                            new InputStreamReader(lChldProc.getErrorStream()));

                    String lChldProcOutPutStr = null;
                    StringBuffer outpath = new StringBuffer();
                    outpath.append(job.getWorkingDirectory());
                    outpath.append("/");
                    outpath.append(output);
                    outpath.append("/");
                    outpath.append(tableName);
                    outpath.append(".log");

                    Path pt = new Path(outpath.toString());
                    FileSystem fs = FileSystem.get(new Configuration());
                    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt, false)));

                    // catch output
                    while ((lChldProcOutPutStr = lChldProcOutStream.readLine()) != null) {
                        br.write(lChldProcOutPutStr);
                        br.newLine();
                    }

                    // catch error
                    while ((lChldProcOutPutStr = stdError.readLine()) != null) {
                        br.write(lChldProcOutPutStr);
                        br.newLine();
                    }

                    //br.write(outpath.toString());
                    //br.newLine();
                    //br.write(loadCmdStr.toString());
                    //br.newLine();
                    //br.write(filename);
                    br.close();

                    lChldProcOutStream.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
                unread = false;
                return true;
            } else {
                return false;
            }
        }
    };
}

From source file:org.apache.blur.hive.NullHiveInputFormat.java

License:Apache License

@Override
public RecordReader<Writable, Writable> getRecordReader(InputSplit arg0, JobConf arg1, Reporter arg2)
        throws IOException {
    return new RecordReader<Writable, Writable>() {

        @Override/*  w  ww.j  a  v  a2 s .co  m*/
        public void close() throws IOException {

        }

        @Override
        public Writable createKey() {
            return null;
        }

        @Override
        public Writable createValue() {
            return null;
        }

        @Override
        public long getPos() throws IOException {
            return 0l;
        }

        @Override
        public float getProgress() throws IOException {
            return 0.0f;
        }

        @Override
        public boolean next(Writable key, Writable value) throws IOException {
            return false;
        }

    };
}

From source file:org.apache.oozie.action.hadoop.OozieLauncherInputFormat.java

License:Apache License

public RecordReader<Object, Object> getRecordReader(InputSplit arg0, JobConf arg1, Reporter arg2)
        throws IOException {
    return new RecordReader<Object, Object>() {

        @Override//from   ww  w .  j a  va  2  s .c om
        public void close() throws IOException {
        }

        @Override
        public float getProgress() throws IOException {
            if (isReadingDone) {
                return 1.0f;
            } else
                return 0.0f;
        }

        @Override
        public Object createKey() {
            return new ObjectWritable();
        }

        @Override
        public Object createValue() {
            return new ObjectWritable();
        }

        @Override
        public long getPos() throws IOException {
            if (isReadingDone) {
                return 1;
            } else {
                return 0;
            }
        }

        @Override
        public boolean next(Object arg0, Object arg1) throws IOException {
            if (isReadingDone) {
                return false;
            } else {
                isReadingDone = true;
                return true;
            }
        }

    };
}