Example usage for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration()

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.cloudera.recordservice.hcatalog.mapreduce.HCatRSBaseInputFormat.java

License:Apache License

/**
 * Set the schema for the HCatRecord data returned by HCatInputFormat.
 * @param job the job object//from   w w  w  .j  a  v  a  2 s  .c  o m
 * @param hcatSchema the schema to use as the consolidated schema
 */
public static void setOutputSchema(Job job, HCatSchema hcatSchema) throws IOException {
    job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(hcatSchema));
}

From source file:com.cloudera.recordservice.hcatalog.mapreduce.HCatRSInputFormat.java

License:Apache License

/**
 * Initializes the input with a provided filter.
 * See {@link #setInput(Configuration, String, String, String)}
 *//*from  w ww. j  a v a2s  . com*/
public static HCatRSInputFormat setInput(Job job, String location, String filter) throws IOException {
    Configuration conf = job.getConfiguration();
    String kerberosPrincipal = conf.get(ConfVars.KERBEROS_PRINCIPAL_CONF.name);
    Pair<String, String> dbTablePair = HCatUtil.getDbAndTableName(location);
    dbTablePair = HCatRSUtil.cleanQueryPair(dbTablePair);
    String dbName = dbTablePair.first;
    String tableName = dbTablePair.second;
    if (location.toLowerCase().startsWith("select")) {
        RecordServiceConfig.setInputQuery(conf, location);
    } else {
        RecordServiceConfig.setInputTable(conf, dbName, tableName);
    }
    Credentials credentials = job.getCredentials();
    RecordServicePlannerClient.Builder builder = PlanUtil.getBuilder(conf);
    List<NetworkAddress> plannerHosts = PlanUtil.getPlannerHostPorts(conf);
    RecordServicePlannerClient planner = PlanUtil.getPlanner(conf, builder, plannerHosts, kerberosPrincipal,
            credentials);
    try {
        if (planner.isKerberosAuthenticated()) {
            Token<DelegationTokenIdentifier> delegationToken = TokenUtils
                    .fromTDelegationToken(planner.getDelegationToken(""));
            credentials.addToken(DelegationTokenIdentifier.DELEGATION_KIND, delegationToken);
        }
    } catch (RecordServiceException e) {
        throw new IOException(e);
    } finally {
        if (planner != null)
            planner.close();
    }
    job.setInputFormatClass(HCatRSInputFormat.class);
    return setInput(conf, dbName, tableName, filter);
}

From source file:com.cloudera.recordservice.hcatalog.mapreduce.InitializeInput.java

License:Apache License

/**
 * @see org.apache.hive.hcatalog.mapreduce.InitializeInput#setInput(
 * Configuration, org.apache.hive.hcatalog.mapreduce.InputJobInfo)
 *///  w  ww .  j a  va2s. c  om
public static void setInput(Job job, InputJobInfo theirInputJobInfo) throws Exception {
    setInput(job.getConfiguration(), theirInputJobInfo);
}

From source file:com.cloudera.recordservice.pig.HCatRSLoader.java

License:Apache License

@Override
public void setLocation(String location, Job job) throws IOException {
    HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get()
            .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true);
    UDFContext udfContext = UDFContext.getUDFContext();
    Properties udfProps = udfContext.getUDFProperties(this.getClass(), new String[] { signature });
    job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature);

    RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps.get(PRUNE_PROJECTION_INFO);
    // get partitionFilterString stored in the UDFContext - it would have
    // been stored there by an earlier call to setPartitionFilter
    // call setInput on HCatInputFormat only in the frontend because internally
    // it makes calls to the hcat server - we don't want these to happen in
    // the backend
    // in the hadoop front end mapred.task.id property will not be set in
    // the Configuration
    if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) {
        for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements();) {
            PigHCatUtil.getConfigFromUDFProperties(udfProps, job.getConfiguration(),
                    emr.nextElement().toString());
        }//w  w w. j a v  a2s .  c  om
        if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) {
            //Combine credentials and credentials from job takes precedence for freshness
            Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature);
            job.getCredentials().addAll(crd);
        }
    } else {
        Job clone = new Job(job.getConfiguration());
        HCatRSInputFormat.setInput(job, location, getPartitionFilterString());
        InputJobInfo inputJobInfo = (InputJobInfo) HCatRSUtil
                .deserialize(job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO));

        // TODO: Add back special cases call when I find out where the code has moved.
        addSpecialCasesParametersForHCatLoader(job.getConfiguration(), inputJobInfo.getTableInfo());

        // We will store all the new /changed properties in the job in the
        // udf context, so the the HCatInputFormat.setInput method need not
        //be called many times.
        for (Entry<String, String> keyValue : job.getConfiguration()) {
            String oldValue = clone.getConfiguration().getRaw(keyValue.getKey());
            if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) {
                udfProps.put(keyValue.getKey(), keyValue.getValue());
            }
        }
        udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true);
        //Store credentials in a private hash map and not the udf context to
        // make sure they are not public.
        Credentials crd = new Credentials();
        crd.addAll(job.getCredentials());
        jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd);
        clone.setInputFormatClass(HCatRSInputFormat.class);
    }

    // Need to also push projections by calling setOutputSchema on
    // HCatInputFormat - we have to get the RequiredFields information
    // from the UdfContext, translate it to an Schema and then pass it
    // The reason we do this here is because setLocation() is called by
    // Pig runtime at InputFormat.getSplits() and
    // InputFormat.createRecordReader() time - we are not sure when
    // HCatInputFormat needs to know about pruned projections - so doing it
    // here will ensure we communicate to HCatInputFormat about pruned
    // projections at getSplits() and createRecordReader() time

    if (requiredFieldsInfo != null) {
        // convert to hcatschema and pass to HCatInputFormat
        try {
            outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass());
            HCatRSInputFormat.setOutputSchema(job, outputSchema);
        } catch (Exception e) {
            throw new IOException(e);
        }
    } else {
        // else - this means pig's optimizer never invoked the pushProjection
        // method - so we need all fields and hence we should not call the
        // setOutputSchema on HCatInputFormat
        if (HCatUtil.checkJobContextIfRunningFromBackend(job)) {
            try {
                HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA);
                outputSchema = hcatTableSchema;
                HCatRSInputFormat.setOutputSchema(job, outputSchema);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("outputSchema=" + outputSchema);
    }
    job.setInputFormatClass(HCatRSInputFormat.class);
}

From source file:com.cloudera.recordservice.pig.HCatRSLoader.java

License:Apache License

@Override
public ResourceSchema getSchema(String location, Job job) throws IOException {
    HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get()
            .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true);
    Table table = phutil.getTable(location,
            hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job),
            PigHCatUtil.getHCatServerPrincipal(job),

            // Pass job to initialize metastore conf overrides for embedded metastore case
            // (hive.metastore.uris = "").
            job);/*from  ww  w.j a  v a 2  s .c o m*/
    HCatSchema hcatTableSchema = HCatUtil.getTableSchemaWithPtnCols(table);
    try {
        PigHCatUtil.validateHCatTableSchemaFollowsPigRules(hcatTableSchema);
    } catch (IOException e) {
        throw new PigException(
                "Table schema incompatible for reading through HCatLoader :" + e.getMessage()
                        + ";[Table schema was " + hcatTableSchema.toString() + "]",
                PigHCatUtil.PIG_EXCEPTION_CODE, e);
    }
    storeInUDFContext(signature, HCatConstants.HCAT_TABLE_SCHEMA, hcatTableSchema);
    outputSchema = hcatTableSchema;
    return PigHCatUtil.getResourceSchema(hcatTableSchema);
}

From source file:com.cloudera.recordservice.pig.HCatRSLoader.java

License:Apache License

/**
 * Get statistics about the data to be loaded. Only input data size is implemented
 * at this time./*from www  .  jav  a2s.  com*/
 */
@Override
public ResourceStatistics getStatistics(String location, Job job) throws IOException {
    try {
        ResourceStatistics stats = new ResourceStatistics();
        InputJobInfo inputJobInfo = (InputJobInfo) HCatRSUtil
                .deserialize(job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO));
        stats.setmBytes(getSizeInBytes(inputJobInfo) / 1024 / 1024);
        return stats;
    } catch (Exception e) {
        throw new IOException(e);
    }
}

From source file:com.cloudera.recordservice.pig.PigHCatUtil.java

License:Apache License

static public String getHCatServerUri(Job job) {

    return job.getConfiguration().get(HiveConf.ConfVars.METASTOREURIS.varname);
}

From source file:com.cloudera.recordservice.pig.PigHCatUtil.java

License:Apache License

static public String getHCatServerPrincipal(Job job) {

    return job.getConfiguration().get(HCatConstants.HCAT_METASTORE_PRINCIPAL);
}

From source file:com.cloudera.recordservice.pig.PigHCatUtil.java

License:Apache License

private static HiveMetaStoreClient getHiveMetaClient(String serverUri, String serverKerberosPrincipal,
        Class<?> clazz, Job job) throws Exception {

    // The job configuration is passed in so the configuration will be cloned
    // from the pig job configuration. This is necessary for overriding
    // metastore configuration arguments like the metastore jdbc connection string
    // and password, in the case of an embedded metastore, which you get when
    // hive.metastore.uris = "".
    HiveConf hiveConf = new HiveConf(job.getConfiguration(), clazz);

    if (serverUri != null) {
        hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, serverUri.trim());
    }/*from w  ww .ja  v  a 2s. c  o m*/

    if (serverKerberosPrincipal != null) {
        hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, true);
        hiveConf.setVar(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL, serverKerberosPrincipal);
    }

    try {
        return HCatUtil.getHiveClient(hiveConf);
    } catch (Exception e) {
        throw new Exception(
                "Could not instantiate a HiveMetaStoreClient connecting to server uri:[" + serverUri + "]", e);
    }
}

From source file:com.cloudera.sqoop.mapreduce.db.DBOutputFormat.java

License:Apache License

private static DBConfiguration setOutput(Job job, String tableName) throws IOException {
    job.setOutputFormatClass(DBOutputFormat.class);
    ConfigurationHelper.setJobReduceSpeculativeExecution(job, false);

    DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());

    dbConf.setOutputTableName(tableName);
    return dbConf;
}