List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:com.facebook.presto.hive.RcFileFileWriterFactory.java
License:Apache License
@Override public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) { if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) { return Optional.empty(); }//from w w w . j a v a 2 s. co m if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) { return Optional.empty(); } RcFileEncoding rcFileEncoding; if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) { rcFileEncoding = new BinaryRcFileEncoding(); } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) { rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone); } else { return Optional.empty(); } Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC)); // existing tables and partitions may have columns in a different order than the writer is providing, so build // an index to rearrange columns in the proper order List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings() .splitToList(schema.getProperty(META_TABLE_COLUMNS, "")); List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream() .map(hiveType -> hiveType.getType(typeManager)).collect(toList()); int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray(); try { FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration); OutputStream outputStream = fileSystem.create(path); Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty(); if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) { validationInputFactory = Optional.of(() -> { try { return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats); } catch (IOException e) { throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e); } }); } Callable<Void> rollbackAction = () -> { fileSystem.delete(path, false); return null; }; return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder() .put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()) .put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory)); } catch (Exception e) { throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e); } }
From source file:com.github.dryangkun.hbase.tidx.hive.HBaseSerDe.java
License:Apache License
public static int getTxTimeColumnIndex(ColumnMappings columnMappings, JobConf jobConf) throws SerDeException { String timeColumn = jobConf.get(TX_HIVE_TIME_COL); if (TxUtils.isEmpty(timeColumn)) { LOG.warn(TX_HIVE_TIME_COL + " is empty in job conf"); return -1; }//w w w . j a v a 2s .c om String[] items = timeColumn.split(":", 2); if (items.length != 2) { throw new SerDeException(TX_HIVE_TIME_COL + "=" + timeColumn + " invalid(family:qualifier)"); } byte[] timeFamily = Bytes.toBytes(items[0]); byte[] timeQualifier = Bytes.toBytes(items[1]); ColumnMapping[] columns = columnMappings.getColumnsMapping(); for (int i = 0; i < columns.length; i++) { ColumnMapping columnMapping = columns[i]; if (Bytes.equals(timeFamily, columnMapping.familyNameBytes) && Bytes.equals(timeQualifier, columnMapping.qualifierNameBytes)) { return i; } } return -1; }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseInputFormatUtil.java
License:Apache License
/** * Parse {@code jobConf} to create the target {@link HTable} instance. *//*from w w w.ja v a 2 s . com*/ public static HTable getTable(JobConf jobConf) throws IOException { String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME); return new HTable(HBaseConfiguration.create(jobConf), Bytes.toBytes(hbaseTableName)); }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseInputFormatUtil.java
License:Apache License
/** * Parse {@code jobConf} to create a {@link Scan} instance. *//*from w w w. j av a 2s. c o m*/ public static Scan getScan(JobConf jobConf) throws IOException { String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); boolean doColumnRegexMatching = jobConf.getBoolean(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, true); List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); ColumnMappings columnMappings; try { columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping, doColumnRegexMatching); } catch (SerDeException e) { throw new IOException(e); } if (columnMappings.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); } boolean readAllColumns = ColumnProjectionUtils.isReadAllColumns(jobConf); Scan scan = new Scan(); boolean empty = true; // The list of families that have been added to the scan List<String> addedFamilies = new ArrayList<String>(); if (!readAllColumns) { ColumnMapping[] columnsMapping = columnMappings.getColumnsMapping(); for (int i : readColIDs) { ColumnMapping colMap = columnsMapping[i]; if (colMap.hbaseRowKey || colMap.hbaseTimestamp) { continue; } if (colMap.qualifierName == null) { scan.addFamily(colMap.familyNameBytes); addedFamilies.add(colMap.familyName); } else { if (!addedFamilies.contains(colMap.familyName)) { // add only if the corresponding family has not already been added scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes); } } empty = false; } } // The HBase table's row key maps to a Hive table column. In the corner case when only the // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/ // column qualifier will have been added to the scan. We arbitrarily add at least one column // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive // tables column projection. if (empty) { for (ColumnMapping colMap : columnMappings) { if (colMap.hbaseRowKey || colMap.hbaseTimestamp) { continue; } if (colMap.qualifierName == null) { scan.addFamily(colMap.familyNameBytes); } else { scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes); } if (!readAllColumns) { break; } } } String scanCache = jobConf.get(HBaseSerDe.HBASE_SCAN_CACHE); if (scanCache != null) { scan.setCaching(Integer.valueOf(scanCache)); } String scanCacheBlocks = jobConf.get(HBaseSerDe.HBASE_SCAN_CACHEBLOCKS); if (scanCacheBlocks != null) { scan.setCacheBlocks(Boolean.valueOf(scanCacheBlocks)); } String scanBatch = jobConf.get(HBaseSerDe.HBASE_SCAN_BATCH); if (scanBatch != null) { scan.setBatch(Integer.valueOf(scanBatch)); } return scan; }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableInputFormat.java
License:Apache License
/** * Converts a filter (which has been pushed down from Hive's optimizer) * into corresponding restrictions on the HBase scan. The * filter should already be in a form which can be fully converted. * * @param jobConf configuration for the scan * * @param iKey 0-based offset of key column within Hive table * * @return converted table split if any/*w ww . j av a 2 s. com*/ */ private Scan createFilterScan(JobConf jobConf, int iKey, int iTimestamp, boolean isKeyBinary) throws IOException { // TODO: assert iKey is HBaseSerDe#HBASE_KEY_COL Scan scan = new Scan(); String filterObjectSerialized = jobConf.get(TableScanDesc.FILTER_OBJECT_CONF_STR); if (filterObjectSerialized != null) { HBaseScanRange range = Utilities.deserializeObject(filterObjectSerialized, HBaseScanRange.class); try { range.setup(scan, jobConf); } catch (Exception e) { throw new IOException(e); } return scan; } String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR); if (filterExprSerialized == null) { return scan; } ExprNodeGenericFuncDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized); String keyColName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iKey]; String colType = jobConf.get(serdeConstants.LIST_COLUMN_TYPES).split(",")[iKey]; boolean isKeyComparable = isKeyBinary || colType.equalsIgnoreCase("string"); String tsColName = null; if (iTimestamp >= 0) { tsColName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iTimestamp]; } IndexPredicateAnalyzer analyzer = newIndexPredicateAnalyzer(keyColName, isKeyComparable, tsColName); List<IndexSearchCondition> conditions = new ArrayList<IndexSearchCondition>(); ExprNodeDesc residualPredicate = analyzer.analyzePredicate(filterExpr, conditions); // There should be no residual since we already negotiated that earlier in // HBaseStorageHandler.decomposePredicate. However, with hive.optimize.index.filter // OpProcFactory#pushFilterToStorageHandler pushes the original filter back down again. // Since pushed-down filters are not omitted at the higher levels (and thus the // contract of negotiation is ignored anyway), just ignore the residuals. // Re-assess this when negotiation is honored and the duplicate evaluation is removed. // THIS IGNORES RESIDUAL PARSING FROM HBaseStorageHandler#decomposePredicate if (residualPredicate != null) { LOG.debug("Ignoring residual predicate " + residualPredicate.getExprString()); } Map<String, List<IndexSearchCondition>> split = HiveHBaseInputFormatUtil.decompose(conditions); List<IndexSearchCondition> keyConditions = split.get(keyColName); if (keyConditions != null && !keyConditions.isEmpty()) { setupKeyRange(scan, keyConditions, isKeyBinary); } List<IndexSearchCondition> tsConditions = split.get(tsColName); if (tsConditions != null && !tsConditions.isEmpty()) { setupTimeRange(scan, tsConditions); } return scan; }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableInputFormat.java
License:Apache License
@Override public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { synchronized (hbaseTableMonitor) { LOG.info("__abc__ = " + jobConf.get("__abc__")); return getSplitsInternal(jobConf, numSplits); }//w w w . ja v a2 s .c om }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableInputFormat.java
License:Apache License
private InputSplit[] getSplitsInternal(JobConf jobConf, int numSplits) throws IOException { //obtain delegation tokens for the job if (UserGroupInformation.getCurrentUser().hasKerberosCredentials()) { TableMapReduceUtil.initCredentials(jobConf); }/* ww w.j av a 2s .c o m*/ String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME); String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); boolean doColumnRegexMatching = jobConf.getBoolean(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, true); if (hbaseColumnsMapping == null) { throw new IOException(HBaseSerDe.HBASE_COLUMNS_MAPPING + " required for HBase Table."); } ColumnMappings columnMappings = null; int iTimeColumn = -1; try { columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping, doColumnRegexMatching); iTimeColumn = HBaseSerDe.getTxTimeColumnIndex(columnMappings, jobConf); } catch (SerDeException e) { throw new IOException(e); } int iKey = columnMappings.getKeyIndex(); int iTimestamp = columnMappings.getTimestampIndex(); ColumnMapping keyMapping = columnMappings.getKeyMapping(); if (iTimeColumn != -1) { List<org.apache.hadoop.mapreduce.InputSplit> splits = TxHiveTableInputFormatUtil.getSplits(jobConf, numSplits, columnMappings, iTimeColumn, hbaseTableName); if (splits != null) { Job job = new Job(jobConf); JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job); Path[] tablePaths = FileInputFormat.getInputPaths(jobContext); InputSplit[] results = new InputSplit[splits.size()]; for (int i = 0; i < splits.size(); i++) { results[i] = new HBaseSplit((TableSplit) splits.get(i), tablePaths[0], true); } LOG.info("getSplits: TxHiveIndexScan"); return results; } } LOG.info("getSplits: no TxHiveIndexScan"); setHTable(new HTable(HBaseConfiguration.create(jobConf), Bytes.toBytes(hbaseTableName))); // Take filter pushdown into account while calculating splits; this // allows us to prune off regions immediately. Note that although // the Javadoc for the superclass getSplits says that it returns one // split per region, the implementation actually takes the scan // definition into account and excludes regions which don't satisfy // the start/stop row conditions (HBASE-1829). Scan scan = createFilterScan(jobConf, iKey, iTimestamp, HiveHBaseInputFormatUtil.getStorageFormatOfKey( keyMapping.mappingSpec, jobConf.get(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "string"))); // The list of families that have been added to the scan List<String> addedFamilies = new ArrayList<String>(); // REVIEW: are we supposed to be applying the getReadColumnIDs // same as in getRecordReader? for (ColumnMapping colMap : columnMappings) { if (colMap.hbaseRowKey || colMap.hbaseTimestamp) { continue; } if (colMap.qualifierName == null) { scan.addFamily(colMap.familyNameBytes); addedFamilies.add(colMap.familyName); } else { if (!addedFamilies.contains(colMap.familyName)) { // add the column only if the family has not already been added scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes); } } } setScan(scan); Job job = new Job(jobConf); JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job); Path[] tablePaths = FileInputFormat.getInputPaths(jobContext); List<org.apache.hadoop.mapreduce.InputSplit> splits = super.getSplits(jobContext); InputSplit[] results = new InputSplit[splits.size()]; for (int i = 0; i < splits.size(); i++) { results[i] = new HBaseSplit((TableSplit) splits.get(i), tablePaths[0]); } return results; }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableOutputFormat.java
License:Apache License
/** * Update the out table, and output an empty key as the key. * * @param jc the job configuration file// w w w .j a v a2 s .com * @param finalOutPath the final output table name * @param valueClass the value class * @param isCompressed whether the content is compressed or not * @param tableProperties the table info of the corresponding table * @param progress progress used for status report * @return the RecordWriter for the output file */ @Override public void checkOutputSpecs(FileSystem fs, JobConf jc) throws IOException { //obtain delegation tokens for the job if (UserGroupInformation.getCurrentUser().hasKerberosCredentials()) { TableMapReduceUtil.initCredentials(jc); } String hbaseTableName = jc.get(HBaseSerDe.HBASE_TABLE_NAME); jc.set(TableOutputFormat.OUTPUT_TABLE, hbaseTableName); Job job = new Job(jc); JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job); try { checkOutputSpecs(jobContext); } catch (InterruptedException e) { throw new IOException(e); } }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableOutputFormat.java
License:Apache License
@Override public org.apache.hadoop.mapred.RecordWriter<ImmutableBytesWritable, Object> getRecordWriter( FileSystem fileSystem, JobConf jobConf, String name, Progressable progressable) throws IOException { String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME); jobConf.set(TableOutputFormat.OUTPUT_TABLE, hbaseTableName); final boolean walEnabled = HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_HBASE_WAL_ENABLED); final HTable table = new HTable(HBaseConfiguration.create(jobConf), hbaseTableName); table.setAutoFlush(false);//from w w w .j av a2 s. co m return new MyRecordWriter(table, walEnabled); }
From source file:com.google.mr4c.hadoop.MR4CMRJob.java
License:Open Source License
private void importProperty(MR4CConfig bbConf, JobConf jobConf, Category category, String name, String hadoopName) {//from ww w .j a v a2 s.com setProperty(bbConf, category, name, jobConf.get(hadoopName)); }