List of usage examples for org.apache.hadoop.security Credentials Credentials
public Credentials()
From source file:org.apache.flink.tez.dag.FlinkDataSourceVertex.java
License:Apache License
@Override public Vertex createVertex(TezConfiguration conf) { try {// ww w . j a v a 2s.c o m this.writeInputPositionsToConfig(); this.writeSubTasksInOutputToConfig(); taskConfig.setDatasourceProcessorName(this.getUniqueName()); conf.set(TezTaskConfig.TEZ_TASK_CONFIG, EncodingUtils.encodeObjectToString(taskConfig)); ProcessorDescriptor descriptor = ProcessorDescriptor.create(DataSourceProcessor.class.getName()); descriptor.setUserPayload(TezUtils.createUserPayloadFromConf(conf)); InputDescriptor inputDescriptor = InputDescriptor.create(FlinkInput.class.getName()); InputInitializerDescriptor inputInitializerDescriptor = InputInitializerDescriptor .create(FlinkInputSplitGenerator.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(conf)); DataSourceDescriptor dataSourceDescriptor = DataSourceDescriptor.create(inputDescriptor, inputInitializerDescriptor, new Credentials()); cached = Vertex.create(this.getUniqueName(), descriptor, getParallelism()); cached.addDataSource("Input " + this.getUniqueName(), dataSourceDescriptor); return cached; } catch (IOException e) { throw new CompilerException("An error occurred while creating a Tez Vertex: " + e.getMessage(), e); } }
From source file:org.apache.giraph.yarn.GiraphYarnClient.java
License:Apache License
/** * Set delegation tokens for AM container * @param amContainer AM container//from w w w .j a v a2 s . com * @return */ private void setToken(ContainerLaunchContext amContainer) throws IOException { // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = giraphConf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } FileSystem fs = FileSystem.get(giraphConf); // For now, only getting tokens for the default file-system. final Token<?>[] tokens = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } }
From source file:org.apache.gobblin.azkaban.AzkabanJobLauncher.java
License:Apache License
public AzkabanJobLauncher(String jobId, Properties props) throws Exception { super(jobId, LOG); HadoopUtils.addGobblinSite();//from w w w .ja v a 2s. co m // Configure root metric context List<Tag<?>> tags = Lists.newArrayList(); tags.addAll(Tag.fromMap(AzkabanTags.getAzkabanTags())); RootMetricContext.get(tags); if (props.containsKey(GOBBLIN_LOG_LEVEL_KEY)) { Level logLevel = Level.toLevel(props.getProperty(GOBBLIN_LOG_LEVEL_KEY), Level.INFO); Logger.getLogger("org.apache.gobblin").setLevel(logLevel); } this.props = new Properties(); this.props.putAll(props); // initialize job listeners after properties has been initialized this.jobListener = initJobListener(); // load dynamic configuration and add them to the job properties Config propsAsConfig = ConfigUtils.propertiesToConfig(props); DynamicConfigGenerator dynamicConfigGenerator = DynamicConfigGeneratorFactory .createDynamicConfigGenerator(propsAsConfig); Config dynamicConfig = dynamicConfigGenerator.generateDynamicConfig(propsAsConfig); // add the dynamic config to the job config for (Map.Entry<String, ConfigValue> entry : dynamicConfig.entrySet()) { this.props.put(entry.getKey(), entry.getValue().unwrapped().toString()); } Configuration conf = new Configuration(); String fsUri = conf.get(HADOOP_FS_DEFAULT_NAME); if (!Strings.isNullOrEmpty(fsUri)) { if (!this.props.containsKey(ConfigurationKeys.FS_URI_KEY)) { this.props.setProperty(ConfigurationKeys.FS_URI_KEY, fsUri); } if (!this.props.containsKey(ConfigurationKeys.STATE_STORE_FS_URI_KEY)) { this.props.setProperty(ConfigurationKeys.STATE_STORE_FS_URI_KEY, fsUri); } } // Set the job tracking URL to point to the Azkaban job execution link URL this.props.setProperty(ConfigurationKeys.JOB_TRACKING_URL_KEY, Strings.nullToEmpty(conf.get(AZKABAN_LINK_JOBEXEC_URL))); if (props.containsKey(JOB_TYPE) && JOB_TYPES_WITH_AUTOMATIC_TOKEN.contains(props.getProperty(JOB_TYPE))) { // Necessary for compatibility with Azkaban's hadoopJava job type // http://azkaban.github.io/azkaban/docs/2.5/#hadoopjava-type LOG.info("Job type " + props.getProperty(JOB_TYPE) + " provides Hadoop tokens automatically. Using provided tokens."); if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) { this.props.setProperty(MAPREDUCE_JOB_CREDENTIALS_BINARY, System.getenv(HADOOP_TOKEN_FILE_LOCATION)); } } else { // see javadoc for more information LOG.info(String.format("Job type %s does not provide Hadoop tokens. Negotiating Hadoop tokens.", props.getProperty(JOB_TYPE))); File tokenFile = File.createTempFile("mr-azkaban", ".token"); TokenUtils.getHadoopTokens(new State(props), Optional.of(tokenFile), new Credentials()); System.setProperty(HADOOP_TOKEN_FILE_LOCATION, tokenFile.getAbsolutePath()); System.setProperty(MAPREDUCE_JOB_CREDENTIALS_BINARY, tokenFile.getAbsolutePath()); this.props.setProperty(MAPREDUCE_JOB_CREDENTIALS_BINARY, tokenFile.getAbsolutePath()); this.props.setProperty("env." + HADOOP_TOKEN_FILE_LOCATION, tokenFile.getAbsolutePath()); } Properties jobProps = this.props; if (jobProps.containsKey(TEMPLATE_KEY)) { URI templateUri = new URI(jobProps.getProperty(TEMPLATE_KEY)); Config resolvedJob = new PackagedTemplatesJobCatalogDecorator().getTemplate(templateUri) .getResolvedConfig(ConfigUtils.propertiesToConfig(jobProps)); jobProps = ConfigUtils.configToProperties(resolvedJob); } GobblinMetrics.addCustomTagsToProperties(jobProps, tags); // If the job launcher type is not specified in the job configuration, // override the default to use the MAPREDUCE launcher. if (!jobProps.containsKey(ConfigurationKeys.JOB_LAUNCHER_TYPE_KEY)) { jobProps.setProperty(ConfigurationKeys.JOB_LAUNCHER_TYPE_KEY, JobLauncherFactory.JobLauncherType.MAPREDUCE.toString()); } this.ownAzkabanSla = Long.parseLong(jobProps.getProperty(AZKABAN_GOBBLIN_JOB_SLA_IN_SECONDS, DEFAULT_AZKABAN_GOBBLIN_JOB_SLA_IN_SECONDS)); List<? extends Tag<?>> metadataTags = Lists.newArrayList(); //Is the job triggered using Gobblin-as-a-Service? If so, add additional tags needed for tracking //the job execution. if (jobProps.containsKey(ConfigurationKeys.FLOW_NAME_KEY)) { metadataTags = addAdditionalMetadataTags(jobProps); } // Create a JobLauncher instance depending on the configuration. The same properties object is // used for both system and job configuration properties because Azkaban puts configuration // properties in the .job file and in the .properties file into the same Properties object. this.jobLauncher = this.closer .register(JobLauncherFactory.newJobLauncher(jobProps, jobProps, null, metadataTags)); // Since Java classes cannot extend multiple classes and Azkaban jobs must extend AbstractJob, we must use composition // verses extending ServiceBasedAppLauncher this.applicationLauncher = this.closer .register(new ServiceBasedAppLauncher(jobProps, "Azkaban-" + UUID.randomUUID())); }
From source file:org.apache.gobblin.util.hadoop.TokenUtils.java
License:Apache License
/** * Get Hadoop tokens (tokens for job history server, job tracker, hive and HDFS) using Kerberos keytab, * on behalf on a proxy user, embed tokens into a {@link UserGroupInformation} as returned result, persist in-memory * credentials if tokenFile specified// w ww.ja va2 s . c om * * Note that when a super-user is fetching tokens for other users, * {@link #fetchHcatToken(String, HiveConf, String, IMetaStoreClient)} getDelegationToken} explicitly * contains a string parameter indicating proxy user, while other hadoop services require impersonation first. * * @param state A {@link State} object that should contain properties. * @param tokenFile If present, the file will store materialized credentials. * @param ugi The {@link UserGroupInformation} that used to impersonate into the proxy user by a "doAs block". * @param targetUser The user to be impersonated as, for fetching hadoop tokens. * @return A {@link UserGroupInformation} containing negotiated credentials. */ public static UserGroupInformation getHadoopAndHiveTokensForProxyUser(final State state, Optional<File> tokenFile, UserGroupInformation ugi, IMetaStoreClient client, String targetUser) throws IOException, InterruptedException { final Credentials cred = new Credentials(); ugi.doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { getHadoopTokens(state, Optional.absent(), cred); return null; } }); ugi.getCredentials().addAll(cred); // Will add hive tokens into ugi in this method. getHiveToken(state, client, cred, targetUser, ugi); if (tokenFile.isPresent()) { persistTokens(cred, tokenFile.get()); } // at this point, tokens in ugi can be more than that in Credential object, // since hive token is not put in Credential object. return ugi; }
From source file:org.apache.hama.bsp.YARNBSPJobClient.java
License:Apache License
@Override protected RunningJob launchJob(BSPJobID jobId, BSPJob normalJob, Path submitJobFile, FileSystem pFs) throws IOException { YARNBSPJob job = (YARNBSPJob) normalJob; LOG.info("Submitting job..."); if (getConf().get("bsp.child.mem.in.mb") == null) { LOG.warn("BSP Child memory has not been set, YARN will guess your needs or use default values."); }/* w w w.j av a2 s. c om*/ FileSystem fs = pFs; if (fs == null) { fs = FileSystem.get(getConf()); } if (getConf().get("bsp.user.name") == null) { String s = getUnixUserName(); getConf().set("bsp.user.name", s); LOG.debug("Retrieved username: " + s); } yarnClient.start(); try { YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo("default"); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); // Create a new ApplicationSubmissionContext //ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); id = appContext.getApplicationId(); // set the application name appContext.setApplicationName(job.getJobName()); // Create a new container launch context for the AM's container ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // Define the local resources required Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); // Lets assume the jar we need for our ApplicationMaster is available in // HDFS at a certain known path to us and we want to make it available to // the ApplicationMaster in the launched container if (job.getJar() == null) { throw new IllegalArgumentException("Jar must be set in order to run the application!"); } Path jarPath = new Path(job.getJar()); jarPath = fs.makeQualified(jarPath); getConf().set("bsp.jar", jarPath.makeQualified(fs.getUri(), jarPath).toString()); FileStatus jarStatus = fs.getFileStatus(jarPath); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); amJarRsrc.setType(LocalResourceType.FILE); amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(jarPath)); amJarRsrc.setTimestamp(jarStatus.getModificationTime()); amJarRsrc.setSize(jarStatus.getLen()); // this creates a symlink in the working directory localResources.put(YARNBSPConstants.APP_MASTER_JAR_PATH, amJarRsrc); // add hama related jar files to localresources for container List<File> hamaJars; if (System.getProperty("hama.home.dir") != null) hamaJars = localJarfromPath(System.getProperty("hama.home.dir")); else hamaJars = localJarfromPath(getConf().get("hama.home.dir")); String hamaPath = getSystemDir() + "/hama"; for (File fileEntry : hamaJars) { addToLocalResources(fs, fileEntry.getCanonicalPath(), hamaPath, fileEntry.getName(), localResources); } // Set the local resources into the launch context amContainer.setLocalResources(localResources); // Set up the environment needed for the launch context Map<String, String> env = new HashMap<String, String>(); // Assuming our classes or jars are available as local resources in the // working directory from which the command will be run, we need to append // "." to the path. // By default, all the hadoop specific classpaths will already be available // in $CLASSPATH, so we should be careful not to overwrite it. StringBuilder classPathEnv = new StringBuilder(ApplicationConstants.Environment.CLASSPATH.$()) .append(File.pathSeparatorChar).append("./*"); for (String c : yarnConf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } env.put(YARNBSPConstants.HAMA_YARN_LOCATION, jarPath.toUri().toString()); env.put(YARNBSPConstants.HAMA_YARN_SIZE, Long.toString(jarStatus.getLen())); env.put(YARNBSPConstants.HAMA_YARN_TIMESTAMP, Long.toString(jarStatus.getModificationTime())); env.put(YARNBSPConstants.HAMA_LOCATION, hamaPath); env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); // Set the necessary command to execute on the allocated container Vector<CharSequence> vargs = new Vector<CharSequence>(5); vargs.add("${JAVA_HOME}/bin/java"); vargs.add("-cp " + classPathEnv + ""); vargs.add(ApplicationMaster.class.getCanonicalName()); vargs.add(submitJobFile.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString()); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/hama-appmaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/hama-appmaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); LOG.debug("Start command: " + command); Resource capability = Records.newRecord(Resource.class); // we have at least 3 threads, which comsumes 1mb each, for each bsptask and // a base usage of 100mb capability.setMemory(3 * job.getNumBspTask() + getConf().getInt("hama.appmaster.memory.mb", 100)); LOG.info("Set memory for the application master to " + capability.getMemory() + "mb!"); // Set the container launch content into the ApplicationSubmissionContext appContext.setResource(capability); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = yarnConf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Create the request to send to the ApplicationsManager ApplicationId appId = appContext.getApplicationId(); yarnClient.submitApplication(appContext); return monitorApplication(appId) ? new NetworkedJob() : null; } catch (YarnException e) { e.printStackTrace(); return null; } }
From source file:org.apache.hcatalog.pig.HCatLoader.java
License:Apache License
@Override public void setLocation(String location, Job job) throws IOException { HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); UDFContext udfContext = UDFContext.getUDFContext(); Properties udfProps = udfContext.getUDFProperties(this.getClass(), new String[] { signature }); job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature); Pair<String, String> dbTablePair = PigHCatUtil.getDBTableNames(location); dbName = dbTablePair.first;// ww w . j av a 2s. com tableName = dbTablePair.second; RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps.get(PRUNE_PROJECTION_INFO); // get partitionFilterString stored in the UDFContext - it would have // been stored there by an earlier call to setPartitionFilter // call setInput on HCatInputFormat only in the frontend because internally // it makes calls to the hcat server - we don't want these to happen in // the backend // in the hadoop front end mapred.task.id property will not be set in // the Configuration if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) { for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements();) { PigHCatUtil.getConfigFromUDFProperties(udfProps, job.getConfiguration(), emr.nextElement().toString()); } if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { //Combine credentials and credentials from job takes precedence for freshness Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature); crd.addAll(job.getCredentials()); job.getCredentials().addAll(crd); } } else { Job clone = new Job(job.getConfiguration()); HCatInputFormat.setInput(job, dbName, tableName).setFilter(getPartitionFilterString()); // We will store all the new /changed properties in the job in the // udf context, so the the HCatInputFormat.setInput method need not //be called many times. for (Entry<String, String> keyValue : job.getConfiguration()) { String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { udfProps.put(keyValue.getKey(), keyValue.getValue()); } } udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true); //Store credentials in a private hash map and not the udf context to // make sure they are not public. Credentials crd = new Credentials(); crd.addAll(job.getCredentials()); jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd); } // Need to also push projections by calling setOutputSchema on // HCatInputFormat - we have to get the RequiredFields information // from the UdfContext, translate it to an Schema and then pass it // The reason we do this here is because setLocation() is called by // Pig runtime at InputFormat.getSplits() and // InputFormat.createRecordReader() time - we are not sure when // HCatInputFormat needs to know about pruned projections - so doing it // here will ensure we communicate to HCatInputFormat about pruned // projections at getSplits() and createRecordReader() time if (requiredFieldsInfo != null) { // convert to hcatschema and pass to HCatInputFormat try { outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e); } } else { // else - this means pig's optimizer never invoked the pushProjection // method - so we need all fields and hence we should not call the // setOutputSchema on HCatInputFormat if (HCatUtil.checkJobContextIfRunningFromBackend(job)) { try { HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA); outputSchema = hcatTableSchema; HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e); } } } }
From source file:org.apache.hcatalog.templeton.SecureProxySupport.java
License:Apache License
private void writeProxyDelegationTokens(final Token<?> fsToken, final Token<?> msToken, final Configuration conf, String user, final Path tokenPath) throws IOException, InterruptedException { LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); final UserGroupInformation ugi = UgiFactory.getUgi(user); ugi.doAs(new PrivilegedExceptionAction<Object>() { public Object run() throws IOException { Credentials cred = new Credentials(); cred.addToken(fsToken.getService(), fsToken); cred.addToken(msToken.getService(), msToken); cred.writeTokenStorageFile(tokenPath, conf); return null; }/*from w w w .jav a 2 s . c om*/ }); }
From source file:org.apache.helix.provisioning.yarn.AppLauncher.java
License:Apache License
public boolean launch() throws Exception { LOG.info("Running Client"); yarnClient.start();//from w ww . java 2 s . c om // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); _appId = appContext.getApplicationId(); _appMasterConfig.setAppId(_appId.getId()); String appName = _applicationSpec.getAppName(); _appMasterConfig.setAppName(appName); _appMasterConfig.setApplicationSpecFactory(_applicationSpecFactory.getClass().getCanonicalName()); appContext.setApplicationName(appName); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); LOG.info("Copy Application archive file from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(_conf); // get packages for each component packages Map<String, URI> packages = new HashMap<String, URI>(); packages.put(AppMasterConfig.AppEnvironment.APP_MASTER_PKG.toString(), appMasterArchive.toURI()); packages.put(AppMasterConfig.AppEnvironment.APP_SPEC_FILE.toString(), _yamlConfigFile.toURI()); for (String serviceName : _applicationSpec.getServices()) { packages.put(serviceName, _applicationSpec.getServicePackage(serviceName)); } Map<String, Path> hdfsDest = new HashMap<String, Path>(); Map<String, String> classpathMap = new HashMap<String, String>(); for (String name : packages.keySet()) { URI uri = packages.get(name); Path dst = copyToHDFS(fs, name, uri); hdfsDest.put(name, dst); String classpath = generateClasspathAfterExtraction(name, new File(uri)); classpathMap.put(name, classpath); _appMasterConfig.setClasspath(name, classpath); String serviceMainClass = _applicationSpec.getServiceMainClass(name); if (serviceMainClass != null) { _appMasterConfig.setMainClass(name, serviceMainClass); } } // Get YAML files describing all workflows to immediately start Map<String, URI> workflowFiles = new HashMap<String, URI>(); List<TaskConfig> taskConfigs = _applicationSpec.getTaskConfigs(); if (taskConfigs != null) { for (TaskConfig taskConfig : taskConfigs) { URI configUri = taskConfig.getYamlURI(); if (taskConfig.name != null && configUri != null) { workflowFiles.put(taskConfig.name, taskConfig.getYamlURI()); } } } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LocalResource appMasterPkg = setupLocalResource(fs, hdfsDest.get(AppMasterConfig.AppEnvironment.APP_MASTER_PKG.toString())); LocalResource appSpecFile = setupLocalResource(fs, hdfsDest.get(AppMasterConfig.AppEnvironment.APP_SPEC_FILE.toString())); localResources.put(AppMasterConfig.AppEnvironment.APP_MASTER_PKG.toString(), appMasterPkg); localResources.put(AppMasterConfig.AppEnvironment.APP_SPEC_FILE.toString(), appSpecFile); for (String name : workflowFiles.keySet()) { URI uri = workflowFiles.get(name); Path dst = copyToHDFS(fs, name, uri); LocalResource taskLocalResource = setupLocalResource(fs, dst); localResources.put(AppMasterConfig.AppEnvironment.TASK_CONFIG_FILE.toString() + "_" + name, taskLocalResource); } // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed // amContainer.setContainerTokens(containerToken); // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar) .append("./*").append(File.pathSeparatorChar); classPathEnv.append(classpathMap.get(AppMasterConfig.AppEnvironment.APP_MASTER_PKG.toString())); for (String c : _conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (_conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } LOG.info("\n\n Setting the classpath to launch AppMaster:\n\n"); // Set the env variables to be setup in the env where the application master will be run Map<String, String> env = new HashMap<String, String>(_appMasterConfig.getEnv()); env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master launch command"); vargs.add(Environment.JAVA_HOME.$() + "/bin/java"); int amMemory = 4096; // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(AppMasterLauncher.class.getCanonicalName()); // Set params for Application Master // vargs.add("--num_containers " + String.valueOf(numContainers)); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = _conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); int amPriority = 0; // TODO - what is the range for priority? how to decide? pri.setPriority(amPriority); appContext.setPriority(pri); String amQueue = "default"; // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); LOG.info("Submitting application to YARN Resource Manager"); ApplicationId applicationId = yarnClient.submitApplication(appContext); LOG.info("Submitted application with applicationId:" + applicationId); return true; }
From source file:org.apache.hive.hcatalog.pig.HCatLoader.java
License:Apache License
@Override public void setLocation(String location, Job job) throws IOException { HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); UDFContext udfContext = UDFContext.getUDFContext(); Properties udfProps = udfContext.getUDFProperties(this.getClass(), new String[] { signature }); job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature); Pair<String, String> dbTablePair = PigHCatUtil.getDBTableNames(location); dbName = dbTablePair.first;// www. j av a 2 s . co m tableName = dbTablePair.second; RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps.get(PRUNE_PROJECTION_INFO); // get partitionFilterString stored in the UDFContext - it would have // been stored there by an earlier call to setPartitionFilter // call setInput on HCatInputFormat only in the frontend because internally // it makes calls to the hcat server - we don't want these to happen in // the backend // in the hadoop front end mapred.task.id property will not be set in // the Configuration if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) { for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements();) { PigHCatUtil.getConfigFromUDFProperties(udfProps, job.getConfiguration(), emr.nextElement().toString()); } if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { //Combine credentials and credentials from job takes precedence for freshness Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature); job.getCredentials().addAll(crd); } } else { Job clone = new Job(job.getConfiguration()); HCatInputFormat.setInput(job, dbName, tableName, getPartitionFilterString()); InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil .deserialize(job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); SpecialCases.addSpecialCasesParametersForHCatLoader(job.getConfiguration(), inputJobInfo.getTableInfo()); // We will store all the new /changed properties in the job in the // udf context, so the the HCatInputFormat.setInput method need not //be called many times. for (Entry<String, String> keyValue : job.getConfiguration()) { String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { udfProps.put(keyValue.getKey(), keyValue.getValue()); } } udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true); //Store credentials in a private hash map and not the udf context to // make sure they are not public. Credentials crd = new Credentials(); crd.addAll(job.getCredentials()); jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd); } // Need to also push projections by calling setOutputSchema on // HCatInputFormat - we have to get the RequiredFields information // from the UdfContext, translate it to an Schema and then pass it // The reason we do this here is because setLocation() is called by // Pig runtime at InputFormat.getSplits() and // InputFormat.createRecordReader() time - we are not sure when // HCatInputFormat needs to know about pruned projections - so doing it // here will ensure we communicate to HCatInputFormat about pruned // projections at getSplits() and createRecordReader() time if (requiredFieldsInfo != null) { // convert to hcatschema and pass to HCatInputFormat try { //push down projections to columnar store works for RCFile and ORCFile ArrayList<Integer> list = new ArrayList<Integer>(requiredFieldsInfo.getFields().size()); for (RequiredField rf : requiredFieldsInfo.getFields()) { list.add(rf.getIndex()); } ColumnProjectionUtils.setReadColumns(job.getConfiguration(), list); outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e); } } else { // else - this means pig's optimizer never invoked the pushProjection // method - so we need all fields and hence we should not call the // setOutputSchema on HCatInputFormat ColumnProjectionUtils.setReadAllColumns(job.getConfiguration()); if (HCatUtil.checkJobContextIfRunningFromBackend(job)) { try { HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA); outputSchema = hcatTableSchema; HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e); } } } if (LOG.isDebugEnabled()) { LOG.debug("outputSchema=" + outputSchema); } }
From source file:org.apache.hive.hcatalog.templeton.SecureProxySupport.java
License:Apache License
private Token<?>[] getFSDelegationToken(String user, final Configuration conf) throws IOException, InterruptedException { LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); final UserGroupInformation ugi = UgiFactory.getUgi(user); final TokenWrapper twrapper = new TokenWrapper(); ugi.doAs(new PrivilegedExceptionAction<Object>() { public Object run() throws IOException, URISyntaxException { Credentials creds = new Credentials(); //get Tokens for default FS. Not all FSs support delegation tokens, e.g. WASB collectTokens(FileSystem.get(conf), twrapper, creds, ugi.getShortUserName()); //get tokens for all other known FSs since Hive tables may result in different ones //passing "creds" prevents duplicate tokens from being added Collection<String> URIs = conf.getStringCollection("mapreduce.job.hdfs-servers"); for (String uri : URIs) { LOG.debug("Getting tokens for " + uri); collectTokens(FileSystem.get(new URI(uri), conf), twrapper, creds, ugi.getShortUserName()); }/* ww w . j ava 2s . c om*/ return null; } }); return twrapper.tokens; }