List of usage examples for org.apache.hadoop.mapred RecordReader RecordReader
RecordReader
From source file:com.aliyun.openservices.tablestore.hive.TableStoreInputFormat.java
License:Apache License
@Override public RecordReader<PrimaryKeyWritable, RowWritable> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { Preconditions.checkNotNull(split, "split must be nonnull"); Preconditions.checkNotNull(job, "job must be nonnull"); Preconditions.checkArgument(split instanceof TableStoreInputSplit, "split must be an instance of " + TableStoreInputSplit.class.getName()); TableStoreInputSplit tsSplit = (TableStoreInputSplit) split; Configuration conf;/* www .ja v a 2 s. co m*/ if (isHiveConfiguration(job)) { // map task, such as 'select *' conf = translateConfig(job); } else { // reduce task, such as 'select count(*)' conf = job; } final com.aliyun.openservices.tablestore.hadoop.TableStoreRecordReader rdr = new com.aliyun.openservices.tablestore.hadoop.TableStoreRecordReader(); rdr.initialize(tsSplit.getDelegated(), conf); return new RecordReader<PrimaryKeyWritable, RowWritable>() { @Override public boolean next(PrimaryKeyWritable key, RowWritable value) throws IOException { boolean next = rdr.nextKeyValue(); if (next) { key.setPrimaryKey(rdr.getCurrentKey().getPrimaryKey()); value.setRow(rdr.getCurrentValue().getRow()); } return next; } @Override public PrimaryKeyWritable createKey() { return new PrimaryKeyWritable(); } @Override public RowWritable createValue() { return new RowWritable(); } @Override public long getPos() throws IOException { return 0; } @Override public void close() throws IOException { rdr.close(); } @Override public float getProgress() throws IOException { return rdr.getProgress(); } }; }
From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java
License:Apache License
public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HBaseSplit hbaseSplit = (HBaseSplit) split; TableSplit tableSplit = hbaseSplit.getSplit(); String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME); setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName))); String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); List<String> hbaseColumnFamilies = new ArrayList<String>(); List<String> hbaseColumnQualifiers = new ArrayList<String>(); List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>(); List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>(); int iKey;/*from ww w . j ava 2 s . c o m*/ try { iKey = parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes, hbaseColumnQualifiers, hbaseColumnQualifiersBytes); } catch (Exception se) { throw new IOException(se); } List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); if (hbaseColumnFamilies.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); } boolean addAll = (readColIDs.size() == 0); Scan scan = new Scan(); boolean empty = true; if (!addAll) { for (int i : readColIDs) { if (i == iKey) { continue; } scan.addFamily(hbaseColumnFamiliesBytes.get(i)); empty = false; } } // The HBase table's row key maps to a Hive table column. In the corner case when only the // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/ // column qualifier will have been added to the scan. We arbitrarily add at least one column // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive // tables column projection. if (empty) { for (int i = 0; i < hbaseColumnFamilies.size(); i++) { if (i == iKey) { continue; } if (hbaseColumnQualifiers.get(i) == null) { scan.addFamily(hbaseColumnFamiliesBytes.get(i)); } else { scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i)); } if (!addAll) { break; } } } //setting start and end time for scanning setTime(jobConf, scan); // If Hive's optimizer gave us a filter to process, convert it to the // HBase scan form now. tableSplit = convertFilter(jobConf, scan, tableSplit, iKey); setScan(scan); Job job = new Job(jobConf); TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) { @Override public void progress() { reporter.progress(); } }; final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader( tableSplit, tac); return new RecordReader<Text, Text>() { //@Override public void close() throws IOException { recordReader.close(); } // @Override public Text createKey() { return new Text(); } // @Override public Text createValue() { return new Text(); } // @Override public long getPos() throws IOException { return 0; } // @Override public float getProgress() throws IOException { float progress = 0.0F; try { progress = recordReader.getProgress(); } catch (InterruptedException e) { throw new IOException(e); } return progress; } // @Override public boolean next(Text rowKey, Text value) throws IOException { boolean next = false; try { next = recordReader.nextKeyValue(); //logic for to find the column name if (next) { rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow())); StringBuilder val = new StringBuilder(); String prev = ""; for (KeyValue kv : recordReader.getCurrentValue().raw()) { String current = new String(kv.getQualifier()); char[] col = new String(current).toCharArray(); if (val.length() > 0) { if (prev.equals(current)) val.append(","); else val.append("\t"); } prev = current; val.append(col[0]).append("_"); val.append(Bytes.toString(kv.getValue())); } value.set(val.toString()); // rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));; // value.set(Bytes.toString(recordReader.getCurrentValue().value())); } } catch (InterruptedException e) { throw new IOException(e); } return next; } }; }
From source file:com.ask.hive.hbase.HiveHBaseTimeTableInputFormat.java
License:Apache License
public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HBaseSplit hbaseSplit = (HBaseSplit) split; TableSplit tableSplit = hbaseSplit.getSplit(); String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME); setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName))); String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); List<String> hbaseColumnFamilies = new ArrayList<String>(); List<String> hbaseColumnQualifiers = new ArrayList<String>(); List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>(); List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>(); int iKey;//www .j a va 2 s .co m try { iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes, hbaseColumnQualifiers, hbaseColumnQualifiersBytes); } catch (SerDeException se) { throw new IOException(se); } List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); if (hbaseColumnFamilies.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); } boolean addAll = (readColIDs.size() == 0); Scan scan = new Scan(); boolean empty = true; if (!addAll) { for (int i : readColIDs) { if (i == iKey) { continue; } if (hbaseColumnQualifiers.get(i) == null) { scan.addFamily(hbaseColumnFamiliesBytes.get(i)); } else { scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i)); } empty = false; } } // The HBase table's row key maps to a Hive table column. In the corner case when only the // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/ // column qualifier will have been added to the scan. We arbitrarily add at least one column // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive // tables column projection. if (empty) { for (int i = 0; i < hbaseColumnFamilies.size(); i++) { if (i == iKey) { continue; } if (hbaseColumnQualifiers.get(i) == null) { scan.addFamily(hbaseColumnFamiliesBytes.get(i)); } else { scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i)); } if (!addAll) { break; } } } //setting start and end time for scanning setTime(jobConf, scan); // If Hive's optimizer gave us a filter to process, convert it to the // HBase scan form now. tableSplit = convertFilter(jobConf, scan, tableSplit, iKey); setScan(scan); Job job = new Job(jobConf); TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) { @Override public void progress() { reporter.progress(); } }; final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader( tableSplit, tac); return new RecordReader<ImmutableBytesWritable, Result>() { //@Override public void close() throws IOException { recordReader.close(); } // @Override public ImmutableBytesWritable createKey() { return new ImmutableBytesWritable(); } // @Override public Result createValue() { return new Result(); } // @Override public long getPos() throws IOException { return 0; } // @Override public float getProgress() throws IOException { float progress = 0.0F; try { progress = recordReader.getProgress(); } catch (InterruptedException e) { throw new IOException(e); } return progress; } // @Override public boolean next(ImmutableBytesWritable rowKey, Result value) throws IOException { boolean next = false; try { next = recordReader.nextKeyValue(); if (next) { rowKey.set(recordReader.getCurrentValue().getRow()); Writables.copyWritable(recordReader.getCurrentValue(), value); } } catch (InterruptedException e) { throw new IOException(e); } return next; } }; }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableInputFormat.java
License:Apache License
@Override public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HBaseSplit hbaseSplit = (HBaseSplit) split; TableSplit tableSplit = hbaseSplit.getTableSplit(); Job job = new Job(jobConf); TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(), reporter);/*from w w w. j a va 2 s. com*/ final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader; if (hbaseSplit.isTxIndexScan()) { LOG.info("getRecordReader: TxHiveIndexScan -> " + tableSplit); recordReader = TxHiveTableInputFormatUtil.createRecordReader(tableSplit, tac, jobConf); } else { LOG.info("getRecordReader: no TxHiveIndexScan -> " + tableSplit); setHTable(HiveHBaseInputFormatUtil.getTable(jobConf)); setScan(HiveHBaseInputFormatUtil.getScan(jobConf)); recordReader = createRecordReader(tableSplit, tac); } try { recordReader.initialize(tableSplit, tac); } catch (InterruptedException e) { throw new IOException("Failed to initialize RecordReader", e); } return new RecordReader<ImmutableBytesWritable, ResultWritable>() { @Override public void close() throws IOException { recordReader.close(); closeTable(); } @Override public ImmutableBytesWritable createKey() { return new ImmutableBytesWritable(); } @Override public ResultWritable createValue() { return new ResultWritable(new Result()); } @Override public long getPos() throws IOException { return 0; } @Override public float getProgress() throws IOException { float progress = 0.0F; try { progress = recordReader.getProgress(); } catch (InterruptedException e) { throw new IOException(e); } return progress; } @Override public boolean next(ImmutableBytesWritable rowKey, ResultWritable value) throws IOException { boolean next = false; try { next = recordReader.nextKeyValue(); if (next) { rowKey.set(recordReader.getCurrentValue().getRow()); value.setResult(recordReader.getCurrentValue()); } } catch (InterruptedException e) { throw new IOException(e); } return next; } }; }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableSnapshotInputFormat.java
License:Apache License
@Override public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { setColumns(job);/*from ww w . j a v a 2s . c o m*/ final RecordReader<ImmutableBytesWritable, Result> rr = delegate .getRecordReader(((HBaseSplit) split).getSnapshotSplit(), job, reporter); return new RecordReader<ImmutableBytesWritable, ResultWritable>() { @Override public boolean next(ImmutableBytesWritable key, ResultWritable value) throws IOException { return rr.next(key, value.getResult()); } @Override public ImmutableBytesWritable createKey() { return rr.createKey(); } @Override public ResultWritable createValue() { return new ResultWritable(rr.createValue()); } @Override public long getPos() throws IOException { return rr.getPos(); } @Override public void close() throws IOException { rr.close(); } @Override public float getProgress() throws IOException { return rr.getProgress(); } }; }
From source file:com.ibm.jaql.io.hadoop.CompositeInputAdapter.java
License:Apache License
@SuppressWarnings("unchecked") public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { CompositeSplit cSplit = (CompositeSplit) split; // 1. get the InputAdapter's array index (i) from the split final int idx = cSplit.getAdapterIdx(); InputSplit baseSplit = cSplit.getSplit(); try {/* w w w.ja v a 2 s. c o m*/ // 2. get the ith adapter's args record JsonValue value = this.args.get(idx); // JRecord baseArgs = (JRecord) item.getNonNull(); // record the current index to the job conf // ASSUMES: in map/reduce, the format's record reader is called *before* // the map class is configured writeCurrentIndex(job, idx); // FIXME: no longer needed // 3. insantiate and initialize the adapter HadoopInputAdapter adapter = (HadoopInputAdapter) AdapterStore.getStore().input .getAdapter(/** baseArgs, */ value); // 4. create a new JobConf j' JobConf jTmp = new JobConf(job); // 5. call adapter's setupConf(j') // ConfiguratorUtil.writeToConf(adapter, jTmp, item/**baseArgs*/); adapter.setParallel(jTmp); // 6. configure the adapter from j' adapter.configure(jTmp); // 7. call adapter's getRecordReader with j' final RecordReader<JsonHolder, JsonHolder> reader = (RecordReader<JsonHolder, JsonHolder>) adapter .getRecordReader(baseSplit, jTmp, reporter); if (!addIndex) { return reader; } return new RecordReader<JsonHolder, JsonHolder>() { @Override public void close() throws IOException { reader.close(); } @Override public JsonHolder createKey() { return reader.createKey(); } @Override public JsonHolder createValue() { return reader.createValue(); } @Override public long getPos() throws IOException { return reader.getPos(); } @Override public float getProgress() throws IOException { return reader.getProgress(); } @Override public boolean next(JsonHolder key, JsonHolder value) throws IOException { BufferedJsonArray pair = (BufferedJsonArray) value.value; if (pair != null) { value.value = pair.get(1); } else { pair = new BufferedJsonArray(2); pair.set(0, JsonLong.make(idx)); } if (reader.next(key, value)) { pair.set(1, value.value); value.value = pair; return true; } return false; } }; } catch (Exception e) { return null; } }
From source file:com.ibm.jaql.io.hadoop.DefaultHadoopInputAdapter.java
License:Apache License
@SuppressWarnings("unchecked") public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { if (split instanceof DHIASplit) { // not using order-preserving wrapper split = ((DHIASplit) split).split; }//from w w w . j av a 2 s.c o m if (converter == null) return ((InputFormat<JsonHolder, JsonHolder>) iFormat).getRecordReader(split, job, reporter); final RecordReader<K, V> baseReader = ((InputFormat<K, V>) iFormat).getRecordReader(split, job, reporter); final K baseKey = baseReader.createKey(); final V baseValue = baseReader.createValue(); return new RecordReader<JsonHolder, JsonHolder>() { public void close() throws IOException { baseReader.close(); } public JsonHolder createKey() { return keyHolder(); } public JsonHolder createValue() { JsonHolder holder = valueHolder(); holder.value = converter.createTarget(); return holder; } public long getPos() throws IOException { return baseReader.getPos(); } public float getProgress() throws IOException { return baseReader.getProgress(); } public boolean next(JsonHolder key, JsonHolder value) throws IOException { boolean hasMore = baseReader.next(baseKey, baseValue); if (!hasMore) return false; value.value = converter.convert(baseKey, baseValue, value.value); return true; } }; }
From source file:infinidb.hadoop.db.IDBFileInputFormat.java
License:Apache License
@Override public RecordReader<NullWritable, NullWritable> getRecordReader(InputSplit arg0, JobConf arg1, Reporter arg2) throws IOException { final String filename = ((FileSplit) arg0).getPath().toString(); final JobConf job = arg1; return new RecordReader<NullWritable, NullWritable>() { private boolean unread = true; @Override// ww w. ja v a2s . c o m public void close() throws IOException { } @Override public NullWritable createKey() { return NullWritable.get(); } @Override public NullWritable createValue() { return NullWritable.get(); } @Override public long getPos() throws IOException { return 0; } @Override public float getProgress() throws IOException { return unread ? 0 : 1; } @Override /* spawn a cpimport process for each input file */ public boolean next(NullWritable arg0, NullWritable arg1) throws IOException { InfiniDBConfiguration dbConf = new InfiniDBConfiguration(job); String schemaName = dbConf.getOutputSchemaName(); String tableName = (filename.substring(filename.lastIndexOf('/') + 1, filename.length())); tableName = tableName.substring(0, tableName.lastIndexOf('.')); String output = job.get("mapred.output.dir"); if (unread) { try { StringBuilder loadCmdStr = new StringBuilder(); loadCmdStr.append(dbConf.getInfiniDBHome()); loadCmdStr.append("/bin/"); loadCmdStr.append("infinidoop_load.sh "); loadCmdStr.append(filename); loadCmdStr.append(" "); loadCmdStr.append(schemaName); loadCmdStr.append(" "); loadCmdStr.append(tableName); Process lChldProc = Runtime.getRuntime().exec(loadCmdStr.toString()); // Wait for the child to exit lChldProc.waitFor(); BufferedReader lChldProcOutStream = new BufferedReader( new InputStreamReader(lChldProc.getInputStream())); BufferedReader stdError = new BufferedReader( new InputStreamReader(lChldProc.getErrorStream())); String lChldProcOutPutStr = null; StringBuffer outpath = new StringBuffer(); outpath.append(job.getWorkingDirectory()); outpath.append("/"); outpath.append(output); outpath.append("/"); outpath.append(tableName); outpath.append(".log"); Path pt = new Path(outpath.toString()); FileSystem fs = FileSystem.get(new Configuration()); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt, false))); // catch output while ((lChldProcOutPutStr = lChldProcOutStream.readLine()) != null) { br.write(lChldProcOutPutStr); br.newLine(); } // catch error while ((lChldProcOutPutStr = stdError.readLine()) != null) { br.write(lChldProcOutPutStr); br.newLine(); } //br.write(outpath.toString()); //br.newLine(); //br.write(loadCmdStr.toString()); //br.newLine(); //br.write(filename); br.close(); lChldProcOutStream.close(); } catch (Exception e) { e.printStackTrace(); } unread = false; return true; } else { return false; } } }; }
From source file:org.apache.blur.hive.NullHiveInputFormat.java
License:Apache License
@Override public RecordReader<Writable, Writable> getRecordReader(InputSplit arg0, JobConf arg1, Reporter arg2) throws IOException { return new RecordReader<Writable, Writable>() { @Override/* w ww.j a v a2 s .co m*/ public void close() throws IOException { } @Override public Writable createKey() { return null; } @Override public Writable createValue() { return null; } @Override public long getPos() throws IOException { return 0l; } @Override public float getProgress() throws IOException { return 0.0f; } @Override public boolean next(Writable key, Writable value) throws IOException { return false; } }; }
From source file:org.apache.oozie.action.hadoop.OozieLauncherInputFormat.java
License:Apache License
public RecordReader<Object, Object> getRecordReader(InputSplit arg0, JobConf arg1, Reporter arg2) throws IOException { return new RecordReader<Object, Object>() { @Override//from ww w . j a va 2 s .c om public void close() throws IOException { } @Override public float getProgress() throws IOException { if (isReadingDone) { return 1.0f; } else return 0.0f; } @Override public Object createKey() { return new ObjectWritable(); } @Override public Object createValue() { return new ObjectWritable(); } @Override public long getPos() throws IOException { if (isReadingDone) { return 1; } else { return 0; } } @Override public boolean next(Object arg0, Object arg1) throws IOException { if (isReadingDone) { return false; } else { isReadingDone = true; return true; } } }; }