List of usage examples for org.apache.hadoop.fs FileSystem makeQualified
public Path makeQualified(Path path)
From source file:org.apache.tez.client.TestTezClientUtils.java
License:Apache License
private Path createFile(FileSystem fs, Path dir, String fileName) throws IOException { Path f1 = new Path(dir, fileName); FSDataOutputStream outputStream = fs.create(f1, true); outputStream.write(1);//from www . ja va 2 s .c o m outputStream.close(); return fs.makeQualified(f1); }
From source file:org.apache.tez.client.TestTezClientUtils.java
License:Apache License
@Test(timeout = 5000) public void validateSetTezAuxLocalResourcesWithFilesAndFolders() throws Exception { FileSystem localFs = FileSystem.getLocal(new Configuration()); List<String> resources = new ArrayList<String>(); StringBuilder auxUriStr = new StringBuilder(); // Create 2 files Path topDir = new Path(TEST_ROOT_DIR, "validateauxwithfiles"); if (localFs.exists(topDir)) { localFs.delete(topDir, true);/*from ww w . ja va 2s . com*/ } localFs.mkdirs(topDir); resources.add(createFile(localFs, topDir, "f1.txt").toString()); auxUriStr.append(localFs.makeQualified(topDir).toString()).append(","); Path dir2 = new Path(topDir, "dir2"); localFs.mkdirs(dir2); Path nestedDir = new Path(dir2, "nestedDir"); localFs.mkdirs(nestedDir); createFile(localFs, nestedDir, "nested-f.txt"); resources.add(createFile(localFs, dir2, "dir2-f.txt").toString()); auxUriStr.append(localFs.makeQualified(dir2).toString()).append(","); Path dir3 = new Path(topDir, "dir3"); localFs.mkdirs(dir3); auxUriStr.append(localFs.makeQualified(dir3).toString()).append(","); TezConfiguration conf = new TezConfiguration(); conf.setBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, true); conf.set(TezConfiguration.TEZ_AUX_URIS, auxUriStr.toString()); Credentials credentials = new Credentials(); Map<String, LocalResource> localizedMap = new HashMap<String, LocalResource>(); TezClientUtils.setupTezJarsLocalResources(conf, credentials, localizedMap); Set<String> resourceNames = localizedMap.keySet(); Assert.assertEquals(2, resourceNames.size()); Assert.assertTrue(resourceNames.contains("f1.txt")); Assert.assertTrue(resourceNames.contains("dir2-f.txt")); }
From source file:org.apache.tez.client.TezClientUtils.java
License:Apache License
/** * Create an ApplicationSubmissionContext to launch a Tez AM * @param appId Application Id/*from www . j a va 2s. c o m*/ * @param dag DAG to be submitted * @param amName Name for the application * @param amConfig AM Configuration * @param tezJarResources Resources to be used by the AM * @param sessionCreds the credential object which will be populated with session specific * @param historyACLPolicyManager * @return an ApplicationSubmissionContext to launch a Tez AM * @throws IOException * @throws YarnException */ @Private @VisibleForTesting public static ApplicationSubmissionContext createApplicationSubmissionContext(ApplicationId appId, DAG dag, String amName, AMConfiguration amConfig, Map<String, LocalResource> tezJarResources, Credentials sessionCreds, boolean tezLrsAsArchive, TezApiVersionInfo apiVersionInfo, HistoryACLPolicyManager historyACLPolicyManager) throws IOException, YarnException { Preconditions.checkNotNull(sessionCreds); TezConfiguration conf = amConfig.getTezConfiguration(); FileSystem fs = TezClientUtils.ensureStagingDirExists(conf, TezCommonUtils.getTezBaseStagingPath(conf)); String strAppId = appId.toString(); Path tezSysStagingPath = TezCommonUtils.createTezSystemStagingPath(conf, strAppId); Path binaryConfPath = TezCommonUtils.getTezConfStagingPath(tezSysStagingPath); binaryConfPath = fs.makeQualified(binaryConfPath); // Setup resource requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amConfig.getTezConfiguration().getInt(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB_DEFAULT)); capability.setVirtualCores(amConfig.getTezConfiguration().getInt( TezConfiguration.TEZ_AM_RESOURCE_CPU_VCORES, TezConfiguration.TEZ_AM_RESOURCE_CPU_VCORES_DEFAULT)); if (LOG.isDebugEnabled()) { LOG.debug("AppMaster capability = " + capability); } // Setup required Credentials for the AM launch. DAG specific credentials // are handled separately. ByteBuffer securityTokens = null; // Setup security tokens Credentials amLaunchCredentials = new Credentials(); if (amConfig.getCredentials() != null) { amLaunchCredentials.addAll(amConfig.getCredentials()); } // Add Staging dir creds to the list of session credentials. TokenCache.obtainTokensForFileSystems(sessionCreds, new Path[] { binaryConfPath }, conf); // Add session specific credentials to the AM credentials. amLaunchCredentials.mergeAll(sessionCreds); DataOutputBuffer dob = new DataOutputBuffer(); amLaunchCredentials.writeTokenStorageToStream(dob); securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); // Setup the command to run the AM List<String> vargs = new ArrayList<String>(8); vargs.add(Environment.JAVA_HOME.$() + "/bin/java"); String amOpts = constructAMLaunchOpts(amConfig.getTezConfiguration(), capability); vargs.add(amOpts); String amLogLevelString = amConfig.getTezConfiguration().get(TezConfiguration.TEZ_AM_LOG_LEVEL, TezConfiguration.TEZ_AM_LOG_LEVEL_DEFAULT); String[] amLogParams = parseLogParams(amLogLevelString); String amLogLevel = amLogParams[0]; maybeAddDefaultLoggingJavaOpts(amLogLevel, vargs); // FIX sun bug mentioned in TEZ-327 vargs.add("-Dsun.nio.ch.bugLevel=''"); vargs.add(TezConstants.TEZ_APPLICATION_MASTER_CLASS); if (dag == null) { vargs.add("--" + TezConstants.TEZ_SESSION_MODE_CLI_OPTION); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + File.separator + ApplicationConstants.STDOUT); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + File.separator + ApplicationConstants.STDERR); Vector<String> vargsFinal = new Vector<String>(8); // Final command StringBuilder mergedCommand = new StringBuilder(); for (CharSequence str : vargs) { mergedCommand.append(str).append(" "); } vargsFinal.add(mergedCommand.toString()); if (LOG.isDebugEnabled()) { LOG.debug("Command to launch container for ApplicationMaster is : " + mergedCommand); } Map<String, String> environment = new TreeMap<String, String>(); TezYARNUtils.setupDefaultEnv(environment, conf, TezConfiguration.TEZ_AM_LAUNCH_ENV, TezConfiguration.TEZ_AM_LAUNCH_ENV_DEFAULT, tezLrsAsArchive); addVersionInfoToEnv(environment, apiVersionInfo); addLogParamsToEnv(environment, amLogParams); Map<String, LocalResource> amLocalResources = new TreeMap<String, LocalResource>(); // Not fetching credentials for AMLocalResources. Expect this to be provided via AMCredentials. if (amConfig.getAMLocalResources() != null) { amLocalResources.putAll(amConfig.getAMLocalResources()); } amLocalResources.putAll(tezJarResources); // Setup Session ACLs and update conf as needed Map<String, String> aclConfigs = null; if (historyACLPolicyManager != null) { if (dag == null) { aclConfigs = historyACLPolicyManager.setupSessionACLs(amConfig.getTezConfiguration(), appId); } else { // Non-session mode // As only a single DAG is support, we should combine AM and DAG ACLs under the same // acl management layer aclConfigs = historyACLPolicyManager.setupNonSessionACLs(amConfig.getTezConfiguration(), appId, dag.getDagAccessControls()); } } // emit conf as PB file ConfigurationProto finalConfProto = createFinalConfProtoForApp(amConfig.getTezConfiguration(), aclConfigs); FSDataOutputStream amConfPBOutBinaryStream = null; try { amConfPBOutBinaryStream = TezCommonUtils.createFileForAM(fs, binaryConfPath); finalConfProto.writeTo(amConfPBOutBinaryStream); } finally { if (amConfPBOutBinaryStream != null) { amConfPBOutBinaryStream.close(); } } LocalResource binaryConfLRsrc = TezClientUtils.createLocalResource(fs, binaryConfPath, LocalResourceType.FILE, LocalResourceVisibility.APPLICATION); amConfig.setBinaryConfLR(binaryConfLRsrc); amLocalResources.put(TezConstants.TEZ_PB_BINARY_CONF_NAME, binaryConfLRsrc); // Create Session Jars definition to be sent to AM as a local resource Path sessionJarsPath = TezCommonUtils.getTezAMJarStagingPath(tezSysStagingPath); FSDataOutputStream sessionJarsPBOutStream = null; try { sessionJarsPBOutStream = TezCommonUtils.createFileForAM(fs, sessionJarsPath); // Write out the initial list of resources which will be available in the AM DAGProtos.PlanLocalResourcesProto amResourceProto; if (amLocalResources != null && !amLocalResources.isEmpty()) { amResourceProto = DagTypeConverters.convertFromLocalResources(amLocalResources); } else { amResourceProto = DAGProtos.PlanLocalResourcesProto.getDefaultInstance(); } amResourceProto.writeDelimitedTo(sessionJarsPBOutStream); } finally { if (sessionJarsPBOutStream != null) { sessionJarsPBOutStream.close(); } } LocalResource sessionJarsPBLRsrc = TezClientUtils.createLocalResource(fs, sessionJarsPath, LocalResourceType.FILE, LocalResourceVisibility.APPLICATION); amLocalResources.put(TezConstants.TEZ_AM_LOCAL_RESOURCES_PB_FILE_NAME, sessionJarsPBLRsrc); String user = UserGroupInformation.getCurrentUser().getShortUserName(); ACLManager aclManager = new ACLManager(user, amConfig.getTezConfiguration()); Map<ApplicationAccessType, String> acls = aclManager.toYARNACls(); if (dag != null) { DAGPlan dagPB = prepareAndCreateDAGPlan(dag, amConfig, tezJarResources, tezLrsAsArchive, sessionCreds); // emit protobuf DAG file style Path binaryPath = TezCommonUtils.getTezBinPlanStagingPath(tezSysStagingPath); if (LOG.isDebugEnabled()) { LOG.debug("Stage directory information for AppId :" + appId + " tezSysStagingPath :" + tezSysStagingPath + " binaryConfPath :" + binaryConfPath + " sessionJarsPath :" + sessionJarsPath + " binaryPlanPath :" + binaryPath); } FSDataOutputStream dagPBOutBinaryStream = null; try { //binary output dagPBOutBinaryStream = TezCommonUtils.createFileForAM(fs, binaryPath); dagPB.writeTo(dagPBOutBinaryStream); } finally { if (dagPBOutBinaryStream != null) { dagPBOutBinaryStream.close(); } } amLocalResources.put(TezConstants.TEZ_PB_PLAN_BINARY_NAME, TezClientUtils.createLocalResource(fs, binaryPath, LocalResourceType.FILE, LocalResourceVisibility.APPLICATION)); if (Level.DEBUG.isGreaterOrEqual(Level.toLevel(amLogLevel))) { Path textPath = localizeDagPlanAsText(dagPB, fs, amConfig, strAppId, tezSysStagingPath); amLocalResources.put(TezConstants.TEZ_PB_PLAN_TEXT_NAME, TezClientUtils.createLocalResource(fs, textPath, LocalResourceType.FILE, LocalResourceVisibility.APPLICATION)); } } // Setup ContainerLaunchContext for AM container ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(amLocalResources, environment, vargsFinal, null, securityTokens, acls); // Set up the ApplicationSubmissionContext ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); appContext.setApplicationType(TezConstants.TEZ_APPLICATION_TYPE); appContext.setApplicationId(appId); appContext.setResource(capability); if (amConfig.getQueueName() != null) { appContext.setQueue(amConfig.getQueueName()); } appContext.setApplicationName(amName); appContext.setCancelTokensWhenComplete(amConfig.getTezConfiguration().getBoolean( TezConfiguration.TEZ_CANCEL_DELEGATION_TOKENS_ON_COMPLETION, TezConfiguration.TEZ_CANCEL_DELEGATION_TOKENS_ON_COMPLETION_DEFAULT)); appContext.setAMContainerSpec(amContainer); appContext.setMaxAppAttempts(amConfig.getTezConfiguration().getInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS_DEFAULT)); return appContext; }
From source file:org.apache.tez.common.TestReflectionUtils.java
License:Apache License
@Test(timeout = 5000) public void testAddResourceToClasspath() throws IOException, TezException { String rsrcName = "dummyfile.xml"; FileSystem localFs = FileSystem.getLocal(new Configuration()); Path p = new Path(rsrcName); p = localFs.makeQualified(p); localFs.delete(p, false);//w w w .j a va2 s. c o m try { URL loadedUrl = null; loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName); assertNull(loadedUrl); // Add parent to classpath since we're not adding a jar assertTrue(localFs.createNewFile(p)); String urlForm = p.toUri().toURL().toString(); urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1); URL url = new URL(urlForm); ReflectionUtils.addResourcesToClasspath(Collections.singletonList(url)); loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName); assertNotNull(loadedUrl); } finally { localFs.delete(p, false); } }
From source file:org.apache.tez.common.TezCommonUtils.java
License:Apache License
/** * <p>//from w w w. j a va 2 s .c o m * Returns a path to store recovery information * </p> * * @param tezSysStagingPath * TEZ system level staging directory used for Tez internals * @param conf * Tez configuration * @return App recovery path * @throws IOException */ @Private public static Path getRecoveryPath(Path tezSysStagingPath, Configuration conf) throws IOException { Path baseReecoveryPath = new Path(tezSysStagingPath, TezConstants.DAG_RECOVERY_DATA_DIR_NAME); FileSystem recoveryFS = baseReecoveryPath.getFileSystem(conf); return recoveryFS.makeQualified(baseReecoveryPath); }
From source file:org.apache.tez.mapreduce.examples.BroadcastAndOneToOneExample.java
License:Apache License
public boolean run(Configuration conf, boolean doLocalityCheck) throws Exception { System.out.println("Running BroadcastAndOneToOneExample"); // conf and UGI TezConfiguration tezConf;//from w w w . j ava 2 s. c o m if (conf != null) { tezConf = new TezConfiguration(conf); } else { tezConf = new TezConfiguration(); } tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); UserGroupInformation.setConfiguration(tezConf); // staging dir FileSystem fs = FileSystem.get(tezConf); String stagingDirStr = tezConf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR + "BroadcastAndOneToOneExample" + Path.SEPARATOR + Long.toString(System.currentTimeMillis()); Path stagingDir = new Path(stagingDirStr); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr); stagingDir = fs.makeQualified(stagingDir); // No need to add jar containing this class as assumed to be part of // the tez jars. // TEZ-674 Obtain tokens based on the Input / Output paths. For now assuming staging dir // is the same filesystem as the one used for Input/Output. TezClient tezSession = null; // needs session or else TaskScheduler does not hold onto containers tezSession = TezClient.create("broadcastAndOneToOneExample", tezConf); tezSession.start(); DAGClient dagClient = null; try { DAG dag = createDAG(fs, tezConf, stagingDir, doLocalityCheck); tezSession.waitTillReady(); dagClient = tezSession.submitDAG(dag); // monitoring DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(null); if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) { System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics()); return false; } return true; } finally { fs.delete(stagingDir, true); tezSession.stop(); } }
From source file:org.apache.tez.mapreduce.examples.FilterLinesByWord.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Credentials credentials = new Credentials(); boolean generateSplitsInClient = false; SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser(); try {/*from www. j av a 2 s.co m*/ generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false); otherArgs = splitCmdLineParser.getRemainingArgs(); } catch (ParseException e1) { System.err.println("Invalid options"); printUsage(); return 2; } if (otherArgs.length != 3) { printUsage(); return 2; } String inputPath = otherArgs[0]; String outputPath = otherArgs[1]; String filterWord = otherArgs[2]; FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(outputPath))) { System.err.println("Output directory : " + outputPath + " already exists"); return 2; } TezConfiguration tezConf = new TezConfiguration(conf); fs.getWorkingDirectory(); Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString()); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString()); TezClientUtils.ensureStagingDirExists(tezConf, stagingDir); String jarPath = ClassUtil.findContainingJar(FilterLinesByWord.class); if (jarPath == null) { throw new TezUncheckedException( "Could not find any jar containing" + FilterLinesByWord.class.getName() + " in the classpath"); } Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar")); fs.copyFromLocalFile(new Path(jarPath), remoteJarPath); FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath); TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, conf); Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>(); LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime()); commonLocalResources.put("dag_job.jar", dagJarLocalRsrc); TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, credentials); tezSession.start(); // Why do I need to start the TezSession. Configuration stage1Conf = new JobConf(conf); stage1Conf.set(FILTER_PARAM_NAME, filterWord); Configuration stage2Conf = new JobConf(conf); stage2Conf.set(FileOutputFormat.OUTDIR, outputPath); stage2Conf.setBoolean("mapred.mapper.new-api", false); UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf); // Setup stage1 Vertex Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor .create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload)) .addTaskLocalFiles(commonLocalResources); DataSourceDescriptor dsd; if (generateSplitsInClient) { // TODO TEZ-1406. Dont' use MRInputLegacy stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath); stage1Conf.setBoolean("mapred.mapper.new-api", false); dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true); } else { dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false) .build(); } stage1Vertex.addDataSource("MRInput", dsd); // Setup stage2 Vertex Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), 1); stage2Vertex.addTaskLocalFiles(commonLocalResources); // Configure the Output for stage2 OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)); OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MROutputCommitter.class.getName()); stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, ocd, null)); UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig .newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf) .build(); DAG dag = DAG.create("FilterLinesByWord"); Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultBroadcastEdgeProperty()); dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge); LOG.info("Submitting DAG to Tez Session"); DAGClient dagClient = tezSession.submitDAG(dag); LOG.info("Submitted DAG to Tez Session"); DAGStatus dagStatus = null; String[] vNames = { "stage1", "stage2" }; try { while (true) { dagStatus = dagClient.getDAGStatus(null); if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) { break; } try { Thread.sleep(500); } catch (InterruptedException e) { // continue; } } while (dagStatus.getState() == DAGStatus.State.RUNNING) { try { ExampleDriver.printDAGStatus(dagClient, vNames); try { Thread.sleep(1000); } catch (InterruptedException e) { // continue; } dagStatus = dagClient.getDAGStatus(null); } catch (TezException e) { LOG.fatal("Failed to get application progress. Exiting"); return -1; } } dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS)); } finally { fs.delete(stagingDir, true); tezSession.stop(); } ExampleDriver.printDAGStatus(dagClient, vNames, true, true); LOG.info("Application completed. " + "FinalState=" + dagStatus.getState()); return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1; }
From source file:org.apache.tez.mapreduce.examples.FilterLinesByWordOneToOne.java
License:Apache License
@Override public int run(String[] otherArgs) throws Exception { boolean generateSplitsInClient = false; SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser(); try {/*ww w .java2 s . c o m*/ generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false); otherArgs = splitCmdLineParser.getRemainingArgs(); } catch (ParseException e1) { System.err.println("Invalid options"); printUsage(); return 2; } if (otherArgs.length != 3) { printUsage(); return 2; } String inputPath = otherArgs[0]; String outputPath = otherArgs[1]; String filterWord = otherArgs[2]; Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(outputPath))) { System.err.println("Output directory : " + outputPath + " already exists"); return 2; } TezConfiguration tezConf = new TezConfiguration(conf); fs.getWorkingDirectory(); Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString()); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString()); TezClientUtils.ensureStagingDirExists(tezConf, stagingDir); String jarPath = ClassUtil.findContainingJar(FilterLinesByWordOneToOne.class); if (jarPath == null) { throw new TezUncheckedException("Could not find any jar containing" + FilterLinesByWordOneToOne.class.getName() + " in the classpath"); } Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar")); fs.copyFromLocalFile(new Path(jarPath), remoteJarPath); FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath); Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>(); LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime()); commonLocalResources.put("dag_job.jar", dagJarLocalRsrc); TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, null); tezSession.start(); // Why do I need to start the TezSession. Configuration stage1Conf = new JobConf(conf); stage1Conf.set(FILTER_PARAM_NAME, filterWord); Configuration stage2Conf = new JobConf(conf); stage2Conf.set(FileOutputFormat.OUTDIR, outputPath); stage2Conf.setBoolean("mapred.mapper.new-api", false); UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf); // Setup stage1 Vertex Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor .create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload)) .addTaskLocalFiles(commonLocalResources); DataSourceDescriptor dsd; if (generateSplitsInClient) { // TODO TEZ-1406. Dont' use MRInputLegacy stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath); stage1Conf.setBoolean("mapred.mapper.new-api", false); dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true); } else { dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false) .build(); } stage1Vertex.addDataSource("MRInput", dsd); // Setup stage2 Vertex Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), dsd.getNumberOfShards()); stage2Vertex.addTaskLocalFiles(commonLocalResources); // Configure the Output for stage2 stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create( OutputDescriptor.create(MROutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null)); UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig .newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf) .build(); DAG dag = DAG.create("FilterLinesByWord"); Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultOneToOneEdgeProperty()); dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge); LOG.info("Submitting DAG to Tez Session"); DAGClient dagClient = tezSession.submitDAG(dag); LOG.info("Submitted DAG to Tez Session"); DAGStatus dagStatus = null; String[] vNames = { "stage1", "stage2" }; try { while (true) { dagStatus = dagClient.getDAGStatus(null); if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) { break; } try { Thread.sleep(500); } catch (InterruptedException e) { // continue; } } while (dagStatus.getState() == DAGStatus.State.RUNNING) { try { ExampleDriver.printDAGStatus(dagClient, vNames); try { Thread.sleep(1000); } catch (InterruptedException e) { // continue; } dagStatus = dagClient.getDAGStatus(null); } catch (TezException e) { LOG.fatal("Failed to get application progress. Exiting"); return -1; } } } finally { fs.delete(stagingDir, true); tezSession.stop(); } ExampleDriver.printDAGStatus(dagClient, vNames); LOG.info("Application completed. " + "FinalState=" + dagStatus.getState()); return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1; }
From source file:org.apache.tez.mapreduce.examples.GroupByOrderByMRRTest.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: groupbyorderbymrrtest <in> <out>"); ToolRunner.printGenericCommandUsage(System.err); return 2; }// w w w . j ava2 s . c om String inputPath = otherArgs[0]; String outputPath = otherArgs[1]; UserGroupInformation.setConfiguration(conf); TezConfiguration tezConf = new TezConfiguration(conf); FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(outputPath))) { throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists"); } Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>(); String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR + Long.toString(System.currentTimeMillis()); Path stagingDir = new Path(stagingDirStr); FileSystem pathFs = stagingDir.getFileSystem(tezConf); pathFs.mkdirs(new Path(stagingDirStr)); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr); stagingDir = pathFs.makeQualified(new Path(stagingDirStr)); TezClient tezClient = TezClient.create("groupbyorderbymrrtest", tezConf); tezClient.start(); LOG.info("Submitting groupbyorderbymrrtest DAG as a new Tez Application"); try { DAG dag = createDAG(conf, localResources, stagingDir, inputPath, outputPath, true); tezClient.waitTillReady(); DAGClient dagClient = tezClient.submitDAG(dag); DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(null); if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) { LOG.error("groupbyorderbymrrtest failed, state=" + dagStatus.getState() + ", diagnostics=" + dagStatus.getDiagnostics()); return -1; } LOG.info("Application completed. " + "FinalState=" + dagStatus.getState()); return 0; } finally { tezClient.stop(); } }
From source file:org.apache.tez.mapreduce.examples.IntersectDataGen.java
License:Apache License
private void setupURIsForCredentials(DAG dag, Path... paths) throws IOException { List<URI> uris = new LinkedList<URI>(); for (Path path : paths) { FileSystem fs = path.getFileSystem(getConf()); Path qPath = fs.makeQualified(path); uris.add(qPath.toUri());/*from w w w . jav a 2 s . co m*/ } dag.addURIsForCredentials(uris); }