List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.cloudera.recordservice.hcatalog.mapreduce.HCatRSBaseInputFormat.java
License:Apache License
/** * Set the schema for the HCatRecord data returned by HCatInputFormat. * @param job the job object//from w w w .j a v a 2 s .c o m * @param hcatSchema the schema to use as the consolidated schema */ public static void setOutputSchema(Job job, HCatSchema hcatSchema) throws IOException { job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(hcatSchema)); }
From source file:com.cloudera.recordservice.hcatalog.mapreduce.HCatRSInputFormat.java
License:Apache License
/** * Initializes the input with a provided filter. * See {@link #setInput(Configuration, String, String, String)} *//*from w ww. j a v a2s . com*/ public static HCatRSInputFormat setInput(Job job, String location, String filter) throws IOException { Configuration conf = job.getConfiguration(); String kerberosPrincipal = conf.get(ConfVars.KERBEROS_PRINCIPAL_CONF.name); Pair<String, String> dbTablePair = HCatUtil.getDbAndTableName(location); dbTablePair = HCatRSUtil.cleanQueryPair(dbTablePair); String dbName = dbTablePair.first; String tableName = dbTablePair.second; if (location.toLowerCase().startsWith("select")) { RecordServiceConfig.setInputQuery(conf, location); } else { RecordServiceConfig.setInputTable(conf, dbName, tableName); } Credentials credentials = job.getCredentials(); RecordServicePlannerClient.Builder builder = PlanUtil.getBuilder(conf); List<NetworkAddress> plannerHosts = PlanUtil.getPlannerHostPorts(conf); RecordServicePlannerClient planner = PlanUtil.getPlanner(conf, builder, plannerHosts, kerberosPrincipal, credentials); try { if (planner.isKerberosAuthenticated()) { Token<DelegationTokenIdentifier> delegationToken = TokenUtils .fromTDelegationToken(planner.getDelegationToken("")); credentials.addToken(DelegationTokenIdentifier.DELEGATION_KIND, delegationToken); } } catch (RecordServiceException e) { throw new IOException(e); } finally { if (planner != null) planner.close(); } job.setInputFormatClass(HCatRSInputFormat.class); return setInput(conf, dbName, tableName, filter); }
From source file:com.cloudera.recordservice.hcatalog.mapreduce.InitializeInput.java
License:Apache License
/** * @see org.apache.hive.hcatalog.mapreduce.InitializeInput#setInput( * Configuration, org.apache.hive.hcatalog.mapreduce.InputJobInfo) */// w ww . j a va2s. c om public static void setInput(Job job, InputJobInfo theirInputJobInfo) throws Exception { setInput(job.getConfiguration(), theirInputJobInfo); }
From source file:com.cloudera.recordservice.pig.HCatRSLoader.java
License:Apache License
@Override public void setLocation(String location, Job job) throws IOException { HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); UDFContext udfContext = UDFContext.getUDFContext(); Properties udfProps = udfContext.getUDFProperties(this.getClass(), new String[] { signature }); job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature); RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps.get(PRUNE_PROJECTION_INFO); // get partitionFilterString stored in the UDFContext - it would have // been stored there by an earlier call to setPartitionFilter // call setInput on HCatInputFormat only in the frontend because internally // it makes calls to the hcat server - we don't want these to happen in // the backend // in the hadoop front end mapred.task.id property will not be set in // the Configuration if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) { for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements();) { PigHCatUtil.getConfigFromUDFProperties(udfProps, job.getConfiguration(), emr.nextElement().toString()); }//w w w. j a v a2s . c om if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { //Combine credentials and credentials from job takes precedence for freshness Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature); job.getCredentials().addAll(crd); } } else { Job clone = new Job(job.getConfiguration()); HCatRSInputFormat.setInput(job, location, getPartitionFilterString()); InputJobInfo inputJobInfo = (InputJobInfo) HCatRSUtil .deserialize(job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); // TODO: Add back special cases call when I find out where the code has moved. addSpecialCasesParametersForHCatLoader(job.getConfiguration(), inputJobInfo.getTableInfo()); // We will store all the new /changed properties in the job in the // udf context, so the the HCatInputFormat.setInput method need not //be called many times. for (Entry<String, String> keyValue : job.getConfiguration()) { String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { udfProps.put(keyValue.getKey(), keyValue.getValue()); } } udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true); //Store credentials in a private hash map and not the udf context to // make sure they are not public. Credentials crd = new Credentials(); crd.addAll(job.getCredentials()); jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd); clone.setInputFormatClass(HCatRSInputFormat.class); } // Need to also push projections by calling setOutputSchema on // HCatInputFormat - we have to get the RequiredFields information // from the UdfContext, translate it to an Schema and then pass it // The reason we do this here is because setLocation() is called by // Pig runtime at InputFormat.getSplits() and // InputFormat.createRecordReader() time - we are not sure when // HCatInputFormat needs to know about pruned projections - so doing it // here will ensure we communicate to HCatInputFormat about pruned // projections at getSplits() and createRecordReader() time if (requiredFieldsInfo != null) { // convert to hcatschema and pass to HCatInputFormat try { outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); HCatRSInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e); } } else { // else - this means pig's optimizer never invoked the pushProjection // method - so we need all fields and hence we should not call the // setOutputSchema on HCatInputFormat if (HCatUtil.checkJobContextIfRunningFromBackend(job)) { try { HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA); outputSchema = hcatTableSchema; HCatRSInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e); } } } if (LOG.isDebugEnabled()) { LOG.debug("outputSchema=" + outputSchema); } job.setInputFormatClass(HCatRSInputFormat.class); }
From source file:com.cloudera.recordservice.pig.HCatRSLoader.java
License:Apache License
@Override public ResourceSchema getSchema(String location, Job job) throws IOException { HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); Table table = phutil.getTable(location, hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), PigHCatUtil.getHCatServerPrincipal(job), // Pass job to initialize metastore conf overrides for embedded metastore case // (hive.metastore.uris = ""). job);/*from ww w.j a v a 2 s .c o m*/ HCatSchema hcatTableSchema = HCatUtil.getTableSchemaWithPtnCols(table); try { PigHCatUtil.validateHCatTableSchemaFollowsPigRules(hcatTableSchema); } catch (IOException e) { throw new PigException( "Table schema incompatible for reading through HCatLoader :" + e.getMessage() + ";[Table schema was " + hcatTableSchema.toString() + "]", PigHCatUtil.PIG_EXCEPTION_CODE, e); } storeInUDFContext(signature, HCatConstants.HCAT_TABLE_SCHEMA, hcatTableSchema); outputSchema = hcatTableSchema; return PigHCatUtil.getResourceSchema(hcatTableSchema); }
From source file:com.cloudera.recordservice.pig.HCatRSLoader.java
License:Apache License
/** * Get statistics about the data to be loaded. Only input data size is implemented * at this time./*from www . jav a2s. com*/ */ @Override public ResourceStatistics getStatistics(String location, Job job) throws IOException { try { ResourceStatistics stats = new ResourceStatistics(); InputJobInfo inputJobInfo = (InputJobInfo) HCatRSUtil .deserialize(job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); stats.setmBytes(getSizeInBytes(inputJobInfo) / 1024 / 1024); return stats; } catch (Exception e) { throw new IOException(e); } }
From source file:com.cloudera.recordservice.pig.PigHCatUtil.java
License:Apache License
static public String getHCatServerUri(Job job) { return job.getConfiguration().get(HiveConf.ConfVars.METASTOREURIS.varname); }
From source file:com.cloudera.recordservice.pig.PigHCatUtil.java
License:Apache License
static public String getHCatServerPrincipal(Job job) { return job.getConfiguration().get(HCatConstants.HCAT_METASTORE_PRINCIPAL); }
From source file:com.cloudera.recordservice.pig.PigHCatUtil.java
License:Apache License
private static HiveMetaStoreClient getHiveMetaClient(String serverUri, String serverKerberosPrincipal, Class<?> clazz, Job job) throws Exception { // The job configuration is passed in so the configuration will be cloned // from the pig job configuration. This is necessary for overriding // metastore configuration arguments like the metastore jdbc connection string // and password, in the case of an embedded metastore, which you get when // hive.metastore.uris = "". HiveConf hiveConf = new HiveConf(job.getConfiguration(), clazz); if (serverUri != null) { hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, serverUri.trim()); }/*from w ww .ja v a 2s. c o m*/ if (serverKerberosPrincipal != null) { hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, true); hiveConf.setVar(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL, serverKerberosPrincipal); } try { return HCatUtil.getHiveClient(hiveConf); } catch (Exception e) { throw new Exception( "Could not instantiate a HiveMetaStoreClient connecting to server uri:[" + serverUri + "]", e); } }
From source file:com.cloudera.sqoop.mapreduce.db.DBOutputFormat.java
License:Apache License
private static DBConfiguration setOutput(Job job, String tableName) throws IOException { job.setOutputFormatClass(DBOutputFormat.class); ConfigurationHelper.setJobReduceSpeculativeExecution(job, false); DBConfiguration dbConf = new DBConfiguration(job.getConfiguration()); dbConf.setOutputTableName(tableName); return dbConf; }