Example usage for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name)

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:com.facebook.presto.hive.RcFileFileWriterFactory.java

License:Apache License

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames,
        StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) {
    if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }//from w  w  w  . j  a  v  a 2  s. co  m

    if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }

    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }

    Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));

    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // an index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings()
            .splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream()
            .map(hiveType -> hiveType.getType(typeManager)).collect(toList());

    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();

    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        OutputStream outputStream = fileSystem.create(path);

        Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path),
                            fileSystem.getFileStatus(path).getLen(), stats);
                } catch (IOException e) {
                    throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e);
                }
            });
        }

        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };

        return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes,
                codecName, fileInputColumnIndexes,
                ImmutableMap.<String, String>builder()
                        .put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString())
                        .put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(),
                validationInputFactory));
    } catch (Exception e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
    }
}

From source file:com.github.dryangkun.hbase.tidx.hive.HBaseSerDe.java

License:Apache License

public static int getTxTimeColumnIndex(ColumnMappings columnMappings, JobConf jobConf) throws SerDeException {
    String timeColumn = jobConf.get(TX_HIVE_TIME_COL);
    if (TxUtils.isEmpty(timeColumn)) {
        LOG.warn(TX_HIVE_TIME_COL + " is empty in job conf");
        return -1;
    }//w w  w .  j a  v a 2s .c  om

    String[] items = timeColumn.split(":", 2);
    if (items.length != 2) {
        throw new SerDeException(TX_HIVE_TIME_COL + "=" + timeColumn + " invalid(family:qualifier)");
    }
    byte[] timeFamily = Bytes.toBytes(items[0]);
    byte[] timeQualifier = Bytes.toBytes(items[1]);

    ColumnMapping[] columns = columnMappings.getColumnsMapping();
    for (int i = 0; i < columns.length; i++) {
        ColumnMapping columnMapping = columns[i];
        if (Bytes.equals(timeFamily, columnMapping.familyNameBytes)
                && Bytes.equals(timeQualifier, columnMapping.qualifierNameBytes)) {
            return i;
        }
    }
    return -1;
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseInputFormatUtil.java

License:Apache License

/**
 * Parse {@code jobConf} to create the target {@link HTable} instance.
 *//*from w  w w.ja  v a  2 s . com*/
public static HTable getTable(JobConf jobConf) throws IOException {
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    return new HTable(HBaseConfiguration.create(jobConf), Bytes.toBytes(hbaseTableName));
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseInputFormatUtil.java

License:Apache License

/**
 * Parse {@code jobConf} to create a {@link Scan} instance.
 *//*from  w  w  w.  j av a 2s.  c o m*/
public static Scan getScan(JobConf jobConf) throws IOException {
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    boolean doColumnRegexMatching = jobConf.getBoolean(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, true);
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);
    ColumnMappings columnMappings;

    try {
        columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping, doColumnRegexMatching);
    } catch (SerDeException e) {
        throw new IOException(e);
    }

    if (columnMappings.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean readAllColumns = ColumnProjectionUtils.isReadAllColumns(jobConf);
    Scan scan = new Scan();
    boolean empty = true;

    // The list of families that have been added to the scan
    List<String> addedFamilies = new ArrayList<String>();

    if (!readAllColumns) {
        ColumnMapping[] columnsMapping = columnMappings.getColumnsMapping();
        for (int i : readColIDs) {
            ColumnMapping colMap = columnsMapping[i];
            if (colMap.hbaseRowKey || colMap.hbaseTimestamp) {
                continue;
            }

            if (colMap.qualifierName == null) {
                scan.addFamily(colMap.familyNameBytes);
                addedFamilies.add(colMap.familyName);
            } else {
                if (!addedFamilies.contains(colMap.familyName)) {
                    // add only if the corresponding family has not already been added
                    scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
                }
            }

            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (ColumnMapping colMap : columnMappings) {
            if (colMap.hbaseRowKey || colMap.hbaseTimestamp) {
                continue;
            }

            if (colMap.qualifierName == null) {
                scan.addFamily(colMap.familyNameBytes);
            } else {
                scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
            }

            if (!readAllColumns) {
                break;
            }
        }
    }

    String scanCache = jobConf.get(HBaseSerDe.HBASE_SCAN_CACHE);
    if (scanCache != null) {
        scan.setCaching(Integer.valueOf(scanCache));
    }
    String scanCacheBlocks = jobConf.get(HBaseSerDe.HBASE_SCAN_CACHEBLOCKS);
    if (scanCacheBlocks != null) {
        scan.setCacheBlocks(Boolean.valueOf(scanCacheBlocks));
    }
    String scanBatch = jobConf.get(HBaseSerDe.HBASE_SCAN_BATCH);
    if (scanBatch != null) {
        scan.setBatch(Integer.valueOf(scanBatch));
    }
    return scan;
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableInputFormat.java

License:Apache License

/**
 * Converts a filter (which has been pushed down from Hive's optimizer)
 * into corresponding restrictions on the HBase scan.  The
 * filter should already be in a form which can be fully converted.
 *
 * @param jobConf configuration for the scan
 *
 * @param iKey 0-based offset of key column within Hive table
 *
 * @return converted table split if any/*w ww . j  av a  2 s. com*/
 */
private Scan createFilterScan(JobConf jobConf, int iKey, int iTimestamp, boolean isKeyBinary)
        throws IOException {

    // TODO: assert iKey is HBaseSerDe#HBASE_KEY_COL

    Scan scan = new Scan();
    String filterObjectSerialized = jobConf.get(TableScanDesc.FILTER_OBJECT_CONF_STR);
    if (filterObjectSerialized != null) {
        HBaseScanRange range = Utilities.deserializeObject(filterObjectSerialized, HBaseScanRange.class);
        try {
            range.setup(scan, jobConf);
        } catch (Exception e) {
            throw new IOException(e);
        }
        return scan;
    }

    String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
    if (filterExprSerialized == null) {
        return scan;
    }

    ExprNodeGenericFuncDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized);

    String keyColName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iKey];
    String colType = jobConf.get(serdeConstants.LIST_COLUMN_TYPES).split(",")[iKey];
    boolean isKeyComparable = isKeyBinary || colType.equalsIgnoreCase("string");

    String tsColName = null;
    if (iTimestamp >= 0) {
        tsColName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iTimestamp];
    }

    IndexPredicateAnalyzer analyzer = newIndexPredicateAnalyzer(keyColName, isKeyComparable, tsColName);

    List<IndexSearchCondition> conditions = new ArrayList<IndexSearchCondition>();
    ExprNodeDesc residualPredicate = analyzer.analyzePredicate(filterExpr, conditions);

    // There should be no residual since we already negotiated that earlier in
    // HBaseStorageHandler.decomposePredicate. However, with hive.optimize.index.filter
    // OpProcFactory#pushFilterToStorageHandler pushes the original filter back down again.
    // Since pushed-down filters are not omitted at the higher levels (and thus the
    // contract of negotiation is ignored anyway), just ignore the residuals.
    // Re-assess this when negotiation is honored and the duplicate evaluation is removed.
    // THIS IGNORES RESIDUAL PARSING FROM HBaseStorageHandler#decomposePredicate
    if (residualPredicate != null) {
        LOG.debug("Ignoring residual predicate " + residualPredicate.getExprString());
    }

    Map<String, List<IndexSearchCondition>> split = HiveHBaseInputFormatUtil.decompose(conditions);
    List<IndexSearchCondition> keyConditions = split.get(keyColName);
    if (keyConditions != null && !keyConditions.isEmpty()) {
        setupKeyRange(scan, keyConditions, isKeyBinary);
    }
    List<IndexSearchCondition> tsConditions = split.get(tsColName);
    if (tsConditions != null && !tsConditions.isEmpty()) {
        setupTimeRange(scan, tsConditions);
    }
    return scan;
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableInputFormat.java

License:Apache License

@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
    synchronized (hbaseTableMonitor) {
        LOG.info("__abc__ = " + jobConf.get("__abc__"));
        return getSplitsInternal(jobConf, numSplits);
    }//w w  w .  ja v a2 s  .c om
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableInputFormat.java

License:Apache License

private InputSplit[] getSplitsInternal(JobConf jobConf, int numSplits) throws IOException {

    //obtain delegation tokens for the job
    if (UserGroupInformation.getCurrentUser().hasKerberosCredentials()) {
        TableMapReduceUtil.initCredentials(jobConf);
    }/* ww  w.j av  a 2s .c  o m*/

    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    boolean doColumnRegexMatching = jobConf.getBoolean(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, true);

    if (hbaseColumnsMapping == null) {
        throw new IOException(HBaseSerDe.HBASE_COLUMNS_MAPPING + " required for HBase Table.");
    }

    ColumnMappings columnMappings = null;
    int iTimeColumn = -1;
    try {
        columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping, doColumnRegexMatching);
        iTimeColumn = HBaseSerDe.getTxTimeColumnIndex(columnMappings, jobConf);
    } catch (SerDeException e) {
        throw new IOException(e);
    }

    int iKey = columnMappings.getKeyIndex();
    int iTimestamp = columnMappings.getTimestampIndex();
    ColumnMapping keyMapping = columnMappings.getKeyMapping();

    if (iTimeColumn != -1) {
        List<org.apache.hadoop.mapreduce.InputSplit> splits = TxHiveTableInputFormatUtil.getSplits(jobConf,
                numSplits, columnMappings, iTimeColumn, hbaseTableName);
        if (splits != null) {
            Job job = new Job(jobConf);
            JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
            Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);

            InputSplit[] results = new InputSplit[splits.size()];
            for (int i = 0; i < splits.size(); i++) {
                results[i] = new HBaseSplit((TableSplit) splits.get(i), tablePaths[0], true);
            }
            LOG.info("getSplits: TxHiveIndexScan");
            return results;
        }
    }
    LOG.info("getSplits: no TxHiveIndexScan");

    setHTable(new HTable(HBaseConfiguration.create(jobConf), Bytes.toBytes(hbaseTableName)));
    // Take filter pushdown into account while calculating splits; this
    // allows us to prune off regions immediately.  Note that although
    // the Javadoc for the superclass getSplits says that it returns one
    // split per region, the implementation actually takes the scan
    // definition into account and excludes regions which don't satisfy
    // the start/stop row conditions (HBASE-1829).
    Scan scan = createFilterScan(jobConf, iKey, iTimestamp, HiveHBaseInputFormatUtil.getStorageFormatOfKey(
            keyMapping.mappingSpec, jobConf.get(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "string")));

    // The list of families that have been added to the scan
    List<String> addedFamilies = new ArrayList<String>();

    // REVIEW:  are we supposed to be applying the getReadColumnIDs
    // same as in getRecordReader?
    for (ColumnMapping colMap : columnMappings) {
        if (colMap.hbaseRowKey || colMap.hbaseTimestamp) {
            continue;
        }

        if (colMap.qualifierName == null) {
            scan.addFamily(colMap.familyNameBytes);
            addedFamilies.add(colMap.familyName);
        } else {
            if (!addedFamilies.contains(colMap.familyName)) {
                // add the column only if the family has not already been added
                scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
            }
        }
    }
    setScan(scan);

    Job job = new Job(jobConf);
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
    Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);

    List<org.apache.hadoop.mapreduce.InputSplit> splits = super.getSplits(jobContext);
    InputSplit[] results = new InputSplit[splits.size()];

    for (int i = 0; i < splits.size(); i++) {
        results[i] = new HBaseSplit((TableSplit) splits.get(i), tablePaths[0]);
    }

    return results;
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableOutputFormat.java

License:Apache License

/**
 * Update the out table, and output an empty key as the key.
 *
 * @param jc the job configuration file//  w  w  w  .j a v a2  s  .com
 * @param finalOutPath the final output table name
 * @param valueClass the value class
 * @param isCompressed whether the content is compressed or not
 * @param tableProperties the table info of the corresponding table
 * @param progress progress used for status report
 * @return the RecordWriter for the output file
 */

@Override
public void checkOutputSpecs(FileSystem fs, JobConf jc) throws IOException {

    //obtain delegation tokens for the job
    if (UserGroupInformation.getCurrentUser().hasKerberosCredentials()) {
        TableMapReduceUtil.initCredentials(jc);
    }

    String hbaseTableName = jc.get(HBaseSerDe.HBASE_TABLE_NAME);
    jc.set(TableOutputFormat.OUTPUT_TABLE, hbaseTableName);
    Job job = new Job(jc);
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);

    try {
        checkOutputSpecs(jobContext);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableOutputFormat.java

License:Apache License

@Override
public org.apache.hadoop.mapred.RecordWriter<ImmutableBytesWritable, Object> getRecordWriter(
        FileSystem fileSystem, JobConf jobConf, String name, Progressable progressable) throws IOException {

    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    jobConf.set(TableOutputFormat.OUTPUT_TABLE, hbaseTableName);
    final boolean walEnabled = HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_HBASE_WAL_ENABLED);
    final HTable table = new HTable(HBaseConfiguration.create(jobConf), hbaseTableName);
    table.setAutoFlush(false);//from  w  w  w  .j av a2  s.  co m
    return new MyRecordWriter(table, walEnabled);
}

From source file:com.google.mr4c.hadoop.MR4CMRJob.java

License:Open Source License

private void importProperty(MR4CConfig bbConf, JobConf jobConf, Category category, String name,
        String hadoopName) {//from  ww  w  .j  a  v a2  s.com
    setProperty(bbConf, category, name, jobConf.get(hadoopName));
}