Example usage for org.apache.hadoop.mapred RecordReader RecordReader

List of usage examples for org.apache.hadoop.mapred RecordReader RecordReader

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RecordReader RecordReader.

Prototype

RecordReader

Source Link

Usage

From source file:com.aliyun.openservices.tablestore.hive.TableStoreInputFormat.java

License:Apache License

@Override
public RecordReader<PrimaryKeyWritable, RowWritable> getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
    Preconditions.checkNotNull(split, "split must be nonnull");
    Preconditions.checkNotNull(job, "job must be nonnull");
    Preconditions.checkArgument(split instanceof TableStoreInputSplit,
            "split must be an instance of " + TableStoreInputSplit.class.getName());
    TableStoreInputSplit tsSplit = (TableStoreInputSplit) split;
    Configuration conf;/*  www .ja  v a 2  s.  co m*/
    if (isHiveConfiguration(job)) {
        // map task, such as 'select *'
        conf = translateConfig(job);
    } else {
        // reduce task, such as 'select count(*)'
        conf = job;
    }
    final com.aliyun.openservices.tablestore.hadoop.TableStoreRecordReader rdr = new com.aliyun.openservices.tablestore.hadoop.TableStoreRecordReader();
    rdr.initialize(tsSplit.getDelegated(), conf);
    return new RecordReader<PrimaryKeyWritable, RowWritable>() {
        @Override
        public boolean next(PrimaryKeyWritable key, RowWritable value) throws IOException {
            boolean next = rdr.nextKeyValue();
            if (next) {
                key.setPrimaryKey(rdr.getCurrentKey().getPrimaryKey());
                value.setRow(rdr.getCurrentValue().getRow());
            }
            return next;
        }

        @Override
        public PrimaryKeyWritable createKey() {
            return new PrimaryKeyWritable();
        }

        @Override
        public RowWritable createValue() {
            return new RowWritable();
        }

        @Override
        public long getPos() throws IOException {
            return 0;
        }

        @Override
        public void close() throws IOException {
            rdr.close();
        }

        @Override
        public float getProgress() throws IOException {
            return rdr.getProgress();
        }
    };
}

From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;/*from ww w . j  ava  2 s  . c o  m*/
    try {
        iKey = parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (Exception se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }
            scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<Text, Text>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public Text createKey() {
            return new Text();
        }

        // @Override
        public Text createValue() {
            return new Text();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(Text rowKey, Text value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                //logic for to find the column name 
                if (next) {
                    rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));
                    StringBuilder val = new StringBuilder();
                    String prev = "";
                    for (KeyValue kv : recordReader.getCurrentValue().raw()) {
                        String current = new String(kv.getQualifier());
                        char[] col = new String(current).toCharArray();
                        if (val.length() > 0) {
                            if (prev.equals(current))
                                val.append(",");
                            else
                                val.append("\t");
                        }
                        prev = current;
                        val.append(col[0]).append("_");
                        val.append(Bytes.toString(kv.getValue()));
                    }
                    value.set(val.toString()); // rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));;
                    // value.set(Bytes.toString(recordReader.getCurrentValue().value()));
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.ask.hive.hbase.HiveHBaseTimeTableInputFormat.java

License:Apache License

public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf jobConf,
        final Reporter reporter) throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;//www .j  a  va 2 s  .co m
    try {
        iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (SerDeException se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<ImmutableBytesWritable, Result>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public ImmutableBytesWritable createKey() {
            return new ImmutableBytesWritable();
        }

        // @Override
        public Result createValue() {
            return new Result();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(ImmutableBytesWritable rowKey, Result value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                if (next) {
                    rowKey.set(recordReader.getCurrentValue().getRow());
                    Writables.copyWritable(recordReader.getCurrentValue(), value);
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableInputFormat.java

License:Apache License

@Override
public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf jobConf,
        final Reporter reporter) throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getTableSplit();

    Job job = new Job(jobConf);
    TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(),
            reporter);/*from   w w  w.  j a  va 2  s. com*/

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader;
    if (hbaseSplit.isTxIndexScan()) {
        LOG.info("getRecordReader: TxHiveIndexScan -> " + tableSplit);
        recordReader = TxHiveTableInputFormatUtil.createRecordReader(tableSplit, tac, jobConf);
    } else {
        LOG.info("getRecordReader: no TxHiveIndexScan -> " + tableSplit);
        setHTable(HiveHBaseInputFormatUtil.getTable(jobConf));
        setScan(HiveHBaseInputFormatUtil.getScan(jobConf));
        recordReader = createRecordReader(tableSplit, tac);
    }
    try {
        recordReader.initialize(tableSplit, tac);
    } catch (InterruptedException e) {
        throw new IOException("Failed to initialize RecordReader", e);
    }

    return new RecordReader<ImmutableBytesWritable, ResultWritable>() {

        @Override
        public void close() throws IOException {
            recordReader.close();
            closeTable();
        }

        @Override
        public ImmutableBytesWritable createKey() {
            return new ImmutableBytesWritable();
        }

        @Override
        public ResultWritable createValue() {
            return new ResultWritable(new Result());
        }

        @Override
        public long getPos() throws IOException {
            return 0;
        }

        @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;
            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
            return progress;
        }

        @Override
        public boolean next(ImmutableBytesWritable rowKey, ResultWritable value) throws IOException {
            boolean next = false;
            try {
                next = recordReader.nextKeyValue();
                if (next) {
                    rowKey.set(recordReader.getCurrentValue().getRow());
                    value.setResult(recordReader.getCurrentValue());
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
            return next;
        }
    };
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableSnapshotInputFormat.java

License:Apache License

@Override
public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
    setColumns(job);/*from   ww w .  j  a  v a  2s .  c o m*/
    final RecordReader<ImmutableBytesWritable, Result> rr = delegate
            .getRecordReader(((HBaseSplit) split).getSnapshotSplit(), job, reporter);

    return new RecordReader<ImmutableBytesWritable, ResultWritable>() {
        @Override
        public boolean next(ImmutableBytesWritable key, ResultWritable value) throws IOException {
            return rr.next(key, value.getResult());
        }

        @Override
        public ImmutableBytesWritable createKey() {
            return rr.createKey();
        }

        @Override
        public ResultWritable createValue() {
            return new ResultWritable(rr.createValue());
        }

        @Override
        public long getPos() throws IOException {
            return rr.getPos();
        }

        @Override
        public void close() throws IOException {
            rr.close();
        }

        @Override
        public float getProgress() throws IOException {
            return rr.getProgress();
        }
    };
}

From source file:com.ibm.jaql.io.hadoop.CompositeInputAdapter.java

License:Apache License

@SuppressWarnings("unchecked")
public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
        throws IOException {
    CompositeSplit cSplit = (CompositeSplit) split;

    // 1. get the InputAdapter's array index (i) from the split
    final int idx = cSplit.getAdapterIdx();
    InputSplit baseSplit = cSplit.getSplit();

    try {/* w w  w.ja  v  a 2  s. c  o m*/
        // 2. get the ith adapter's args record
        JsonValue value = this.args.get(idx);
        // JRecord baseArgs = (JRecord) item.getNonNull();
        // record the current index to the job conf
        // ASSUMES: in map/reduce, the format's record reader is called *before*
        // the map class is configured
        writeCurrentIndex(job, idx); // FIXME: no longer needed

        // 3. insantiate and initialize the adapter
        HadoopInputAdapter adapter = (HadoopInputAdapter) AdapterStore.getStore().input
                .getAdapter(/** baseArgs, */
                        value);

        // 4. create a new JobConf j'
        JobConf jTmp = new JobConf(job);

        // 5. call adapter's setupConf(j')
        // ConfiguratorUtil.writeToConf(adapter, jTmp, item/**baseArgs*/);
        adapter.setParallel(jTmp);

        // 6. configure the adapter from j'
        adapter.configure(jTmp);

        // 7. call adapter's getRecordReader with j'
        final RecordReader<JsonHolder, JsonHolder> reader = (RecordReader<JsonHolder, JsonHolder>) adapter
                .getRecordReader(baseSplit, jTmp, reporter);

        if (!addIndex) {
            return reader;
        }

        return new RecordReader<JsonHolder, JsonHolder>() {

            @Override
            public void close() throws IOException {
                reader.close();
            }

            @Override
            public JsonHolder createKey() {
                return reader.createKey();
            }

            @Override
            public JsonHolder createValue() {
                return reader.createValue();
            }

            @Override
            public long getPos() throws IOException {
                return reader.getPos();
            }

            @Override
            public float getProgress() throws IOException {
                return reader.getProgress();
            }

            @Override
            public boolean next(JsonHolder key, JsonHolder value) throws IOException {
                BufferedJsonArray pair = (BufferedJsonArray) value.value;
                if (pair != null) {
                    value.value = pair.get(1);
                } else {
                    pair = new BufferedJsonArray(2);
                    pair.set(0, JsonLong.make(idx));
                }

                if (reader.next(key, value)) {
                    pair.set(1, value.value);
                    value.value = pair;
                    return true;
                }

                return false;
            }
        };

    } catch (Exception e) {
        return null;
    }
}

From source file:com.ibm.jaql.io.hadoop.DefaultHadoopInputAdapter.java

License:Apache License

@SuppressWarnings("unchecked")
public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
        throws IOException {
    if (split instanceof DHIASplit) {
        // not using order-preserving wrapper
        split = ((DHIASplit) split).split;
    }//from   w  w  w .  j  av a 2  s.c  o  m

    if (converter == null)
        return ((InputFormat<JsonHolder, JsonHolder>) iFormat).getRecordReader(split, job, reporter);
    final RecordReader<K, V> baseReader = ((InputFormat<K, V>) iFormat).getRecordReader(split, job, reporter);
    final K baseKey = baseReader.createKey();
    final V baseValue = baseReader.createValue();

    return new RecordReader<JsonHolder, JsonHolder>() {

        public void close() throws IOException {
            baseReader.close();
        }

        public JsonHolder createKey() {
            return keyHolder();
        }

        public JsonHolder createValue() {
            JsonHolder holder = valueHolder();
            holder.value = converter.createTarget();
            return holder;
        }

        public long getPos() throws IOException {
            return baseReader.getPos();
        }

        public float getProgress() throws IOException {
            return baseReader.getProgress();
        }

        public boolean next(JsonHolder key, JsonHolder value) throws IOException {
            boolean hasMore = baseReader.next(baseKey, baseValue);
            if (!hasMore)
                return false;
            value.value = converter.convert(baseKey, baseValue, value.value);
            return true;
        }
    };
}

From source file:infinidb.hadoop.db.IDBFileInputFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, NullWritable> getRecordReader(InputSplit arg0, JobConf arg1, Reporter arg2)
        throws IOException {
    final String filename = ((FileSplit) arg0).getPath().toString();
    final JobConf job = arg1;

    return new RecordReader<NullWritable, NullWritable>() {
        private boolean unread = true;

        @Override//  ww w. ja  v  a2s  .  c  o m
        public void close() throws IOException {
        }

        @Override
        public NullWritable createKey() {
            return NullWritable.get();
        }

        @Override
        public NullWritable createValue() {
            return NullWritable.get();
        }

        @Override
        public long getPos() throws IOException {
            return 0;
        }

        @Override
        public float getProgress() throws IOException {
            return unread ? 0 : 1;
        }

        @Override
        /* spawn a cpimport process for each input file */
        public boolean next(NullWritable arg0, NullWritable arg1) throws IOException {
            InfiniDBConfiguration dbConf = new InfiniDBConfiguration(job);
            String schemaName = dbConf.getOutputSchemaName();
            String tableName = (filename.substring(filename.lastIndexOf('/') + 1, filename.length()));
            tableName = tableName.substring(0, tableName.lastIndexOf('.'));
            String output = job.get("mapred.output.dir");
            if (unread) {
                try {
                    StringBuilder loadCmdStr = new StringBuilder();
                    loadCmdStr.append(dbConf.getInfiniDBHome());
                    loadCmdStr.append("/bin/");
                    loadCmdStr.append("infinidoop_load.sh ");
                    loadCmdStr.append(filename);
                    loadCmdStr.append(" ");
                    loadCmdStr.append(schemaName);
                    loadCmdStr.append(" ");
                    loadCmdStr.append(tableName);

                    Process lChldProc = Runtime.getRuntime().exec(loadCmdStr.toString());

                    // Wait for the child to exit
                    lChldProc.waitFor();
                    BufferedReader lChldProcOutStream = new BufferedReader(
                            new InputStreamReader(lChldProc.getInputStream()));
                    BufferedReader stdError = new BufferedReader(
                            new InputStreamReader(lChldProc.getErrorStream()));

                    String lChldProcOutPutStr = null;
                    StringBuffer outpath = new StringBuffer();
                    outpath.append(job.getWorkingDirectory());
                    outpath.append("/");
                    outpath.append(output);
                    outpath.append("/");
                    outpath.append(tableName);
                    outpath.append(".log");

                    Path pt = new Path(outpath.toString());
                    FileSystem fs = FileSystem.get(new Configuration());
                    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt, false)));

                    // catch output
                    while ((lChldProcOutPutStr = lChldProcOutStream.readLine()) != null) {
                        br.write(lChldProcOutPutStr);
                        br.newLine();
                    }

                    // catch error
                    while ((lChldProcOutPutStr = stdError.readLine()) != null) {
                        br.write(lChldProcOutPutStr);
                        br.newLine();
                    }

                    //br.write(outpath.toString());
                    //br.newLine();
                    //br.write(loadCmdStr.toString());
                    //br.newLine();
                    //br.write(filename);
                    br.close();

                    lChldProcOutStream.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
                unread = false;
                return true;
            } else {
                return false;
            }
        }
    };
}

From source file:org.apache.blur.hive.NullHiveInputFormat.java

License:Apache License

@Override
public RecordReader<Writable, Writable> getRecordReader(InputSplit arg0, JobConf arg1, Reporter arg2)
        throws IOException {
    return new RecordReader<Writable, Writable>() {

        @Override/*  w  ww.j  a  v  a2 s .co  m*/
        public void close() throws IOException {

        }

        @Override
        public Writable createKey() {
            return null;
        }

        @Override
        public Writable createValue() {
            return null;
        }

        @Override
        public long getPos() throws IOException {
            return 0l;
        }

        @Override
        public float getProgress() throws IOException {
            return 0.0f;
        }

        @Override
        public boolean next(Writable key, Writable value) throws IOException {
            return false;
        }

    };
}

From source file:org.apache.oozie.action.hadoop.OozieLauncherInputFormat.java

License:Apache License

public RecordReader<Object, Object> getRecordReader(InputSplit arg0, JobConf arg1, Reporter arg2)
        throws IOException {
    return new RecordReader<Object, Object>() {

        @Override//from   ww  w .  j a  va  2  s .c om
        public void close() throws IOException {
        }

        @Override
        public float getProgress() throws IOException {
            if (isReadingDone) {
                return 1.0f;
            } else
                return 0.0f;
        }

        @Override
        public Object createKey() {
            return new ObjectWritable();
        }

        @Override
        public Object createValue() {
            return new ObjectWritable();
        }

        @Override
        public long getPos() throws IOException {
            if (isReadingDone) {
                return 1;
            } else {
                return 0;
            }
        }

        @Override
        public boolean next(Object arg0, Object arg1) throws IOException {
            if (isReadingDone) {
                return false;
            } else {
                isReadingDone = true;
                return true;
            }
        }

    };
}