List of usage examples for org.apache.hadoop.mapreduce.security TokenCache obtainTokensForNamenodes
public static void obtainTokensForNamenodes(Credentials credentials, Path[] ps, Configuration conf) throws IOException
From source file:org.apache.hadoop.examples.terasort.TeraOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException { // Ensure that the output directory is set Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set in JobConf."); }/* ww w . jav a2 s.c o m*/ final Configuration jobConf = job.getConfiguration(); // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf); final FileSystem fs = outDir.getFileSystem(jobConf); try { // existing output dir is considered empty iff its only content is the // partition file. // final FileStatus[] outDirKids = fs.listStatus(outDir); boolean empty = false; if (outDirKids != null && outDirKids.length == 1) { final FileStatus st = outDirKids[0]; final String fname = st.getPath().getName(); empty = !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname); } if (TeraSort.getUseSimplePartitioner(job) || !empty) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } } catch (FileNotFoundException ignored) { } }
From source file:org.apache.oozie.action.hadoop.HDFSCredentials.java
License:Apache License
private void obtainTokensForNamenodes(final Credentials credentials, final Configuration config, final UserGroupInformation ugi, final Path[] paths) throws IOException, InterruptedException { LOG.info(String.format("\"%s\" is present in workflow configuration. Obtaining tokens for NameNode(s) [%s]", MRJobConfig.JOB_NAMENODES, config.get(MRJobConfig.JOB_NAMENODES))); ugi.doAs(new PrivilegedExceptionAction<Void>() { @Override//from w w w. j a v a 2 s . c o m public Void run() throws Exception { TokenCache.obtainTokensForNamenodes(credentials, paths, config); return null; } }); }
From source file:org.apache.pig.backend.hadoop.executionengine.tez.util.SecurityHelper.java
License:Apache License
public static void populateTokenCache(Configuration conf, Credentials credentials) throws IOException { readTokensFromFiles(conf, credentials); // add the delegation tokens from configuration String[] nameNodes = conf.getStrings(MRJobConfig.JOB_NAMENODES); LOG.debug("adding the following namenodes' delegation tokens:" + Arrays.toString(nameNodes)); if (nameNodes != null) { Path[] ps = new Path[nameNodes.length]; for (int i = 0; i < nameNodes.length; i++) { ps[i] = new Path(nameNodes[i]); }//ww w . j a v a2 s .co m TokenCache.obtainTokensForNamenodes(credentials, ps, conf); } }
From source file:org.apache.sqoop.connector.hdfs.security.SecurityUtils.java
License:Apache License
/** * Generate delegation tokens for current user (this code is suppose to run in doAs) and store them * serialized in given mutable context./*from w ww . j a v a 2s. com*/ */ static public void generateDelegationTokens(MutableContext context, Path path, Configuration configuration) throws IOException { if (!UserGroupInformation.isSecurityEnabled()) { LOG.info("Running on unsecured cluster, skipping delegation token generation."); return; } // String representation of all tokens that we will create (most likely single one) List<String> tokens = new LinkedList<>(); Credentials credentials = new Credentials(); TokenCache.obtainTokensForNamenodes(credentials, new Path[] { path }, configuration); for (Token token : credentials.getAllTokens()) { LOG.info("Generated token: " + token.toString()); tokens.add(serializeToken(token)); } // The context classes are transferred via "Credentials" rather then with jobconf, so we're not leaking the DT out here if (tokens.size() > 0) { context.setString(HdfsConstants.DELEGATION_TOKENS, StringUtils.join(tokens, " ")); } }
From source file:org.apache.tez.mapreduce.examples.FilterLinesByWord.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Credentials credentials = new Credentials(); boolean generateSplitsInClient = false; SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser(); try {//from w w w . j a v a 2s . c om generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false); otherArgs = splitCmdLineParser.getRemainingArgs(); } catch (ParseException e1) { System.err.println("Invalid options"); printUsage(); return 2; } if (otherArgs.length != 3) { printUsage(); return 2; } String inputPath = otherArgs[0]; String outputPath = otherArgs[1]; String filterWord = otherArgs[2]; FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(outputPath))) { System.err.println("Output directory : " + outputPath + " already exists"); return 2; } TezConfiguration tezConf = new TezConfiguration(conf); fs.getWorkingDirectory(); Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString()); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString()); TezClientUtils.ensureStagingDirExists(tezConf, stagingDir); String jarPath = ClassUtil.findContainingJar(FilterLinesByWord.class); if (jarPath == null) { throw new TezUncheckedException( "Could not find any jar containing" + FilterLinesByWord.class.getName() + " in the classpath"); } Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar")); fs.copyFromLocalFile(new Path(jarPath), remoteJarPath); FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath); TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, conf); Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>(); LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime()); commonLocalResources.put("dag_job.jar", dagJarLocalRsrc); TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, credentials); tezSession.start(); // Why do I need to start the TezSession. Configuration stage1Conf = new JobConf(conf); stage1Conf.set(FILTER_PARAM_NAME, filterWord); Configuration stage2Conf = new JobConf(conf); stage2Conf.set(FileOutputFormat.OUTDIR, outputPath); stage2Conf.setBoolean("mapred.mapper.new-api", false); UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf); // Setup stage1 Vertex Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor .create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload)) .addTaskLocalFiles(commonLocalResources); DataSourceDescriptor dsd; if (generateSplitsInClient) { // TODO TEZ-1406. Dont' use MRInputLegacy stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath); stage1Conf.setBoolean("mapred.mapper.new-api", false); dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true); } else { dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false) .build(); } stage1Vertex.addDataSource("MRInput", dsd); // Setup stage2 Vertex Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), 1); stage2Vertex.addTaskLocalFiles(commonLocalResources); // Configure the Output for stage2 OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)); OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MROutputCommitter.class.getName()); stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, ocd, null)); UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig .newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf) .build(); DAG dag = DAG.create("FilterLinesByWord"); Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultBroadcastEdgeProperty()); dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge); LOG.info("Submitting DAG to Tez Session"); DAGClient dagClient = tezSession.submitDAG(dag); LOG.info("Submitted DAG to Tez Session"); DAGStatus dagStatus = null; String[] vNames = { "stage1", "stage2" }; try { while (true) { dagStatus = dagClient.getDAGStatus(null); if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) { break; } try { Thread.sleep(500); } catch (InterruptedException e) { // continue; } } while (dagStatus.getState() == DAGStatus.State.RUNNING) { try { ExampleDriver.printDAGStatus(dagClient, vNames); try { Thread.sleep(1000); } catch (InterruptedException e) { // continue; } dagStatus = dagClient.getDAGStatus(null); } catch (TezException e) { LOG.fatal("Failed to get application progress. Exiting"); return -1; } } dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS)); } finally { fs.delete(stagingDir, true); tezSession.stop(); } ExampleDriver.printDAGStatus(dagClient, vNames, true, true); LOG.info("Application completed. " + "FinalState=" + dagStatus.getState()); return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1; }
From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java
License:Apache License
public DAG createDAG(FileSystem remoteFs, Configuration conf, Path remoteStagingDir, int numMapper, int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime, int iReduceSleepCount, boolean writeSplitsToDFS, boolean generateSplitsInAM) throws IOException, YarnException { Configuration mapStageConf = new JobConf(conf); mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper); mapStageConf.setLong(MAP_SLEEP_TIME, mapSleepTime); mapStageConf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime); mapStageConf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime); mapStageConf.setInt(MAP_SLEEP_COUNT, mapSleepCount); mapStageConf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount); mapStageConf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount); mapStageConf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount); mapStageConf.setInt(IREDUCE_TASKS_COUNT, numIReducer); mapStageConf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName()); mapStageConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName()); if (numIReducer == 0 && numReducer == 0) { mapStageConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName()); }//from ww w. jav a2s.com MRHelpers.translateMRConfToTez(mapStageConf); Configuration[] intermediateReduceStageConfs = null; if (iReduceStagesCount > 0 && numIReducer > 0) { intermediateReduceStageConfs = new JobConf[iReduceStagesCount]; for (int i = 1; i <= iReduceStagesCount; ++i) { JobConf iReduceStageConf = new JobConf(conf); iReduceStageConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, iReduceSleepTime); iReduceStageConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, iReduceSleepCount); iReduceStageConf.setInt(MRJobConfig.NUM_REDUCES, numIReducer); iReduceStageConf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName()); iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName()); iReduceStageConf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName()); MRHelpers.translateMRConfToTez(iReduceStageConf); intermediateReduceStageConfs[i - 1] = iReduceStageConf; } } Configuration finalReduceConf = null; if (numReducer > 0) { finalReduceConf = new JobConf(conf); finalReduceConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, reduceSleepTime); finalReduceConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, reduceSleepCount); finalReduceConf.setInt(MRJobConfig.NUM_REDUCES, numReducer); finalReduceConf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName()); finalReduceConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); finalReduceConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName()); finalReduceConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName()); MRHelpers.translateMRConfToTez(finalReduceConf); } MRHelpers.configureMRApiUsage(mapStageConf); if (iReduceStagesCount > 0 && numIReducer > 0) { for (int i = 0; i < iReduceStagesCount; ++i) { MRHelpers.configureMRApiUsage(intermediateReduceStageConfs[i]); } } if (numReducer > 0) { MRHelpers.configureMRApiUsage(finalReduceConf); } DataSourceDescriptor dataSource = null; if (!generateSplitsInAM && writeSplitsToDFS) { LOG.info("Writing splits to DFS"); dataSource = MRInputHelpers.configureMRInputWithLegacySplitGeneration(mapStageConf, remoteStagingDir, true); } else { dataSource = MRInputLegacy.createConfigBuilder(mapStageConf, SleepInputFormat.class) .generateSplitsInAM(generateSplitsInAM).build(); } DAG dag = DAG.create("MRRSleepJob"); String jarPath = ClassUtil.findContainingJar(getClass()); if (jarPath == null) { throw new TezUncheckedException( "Could not find any jar containing" + " MRRSleepJob.class in the classpath"); } Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar")); remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath); FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath); TokenCache.obtainTokensForNamenodes(this.credentials, new Path[] { remoteJarPath }, mapStageConf); Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>(); LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(), jarFileStatus.getModificationTime()); commonLocalResources.put("dag_job.jar", dagJarLocalRsrc); List<Vertex> vertices = new ArrayList<Vertex>(); UserPayload mapUserPayload = TezUtils.createUserPayloadFromConf(mapStageConf); int numTasks = generateSplitsInAM ? -1 : numMapper; Map<String, String> mapEnv = Maps.newHashMap(); MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, mapEnv, true); Map<String, String> reduceEnv = Maps.newHashMap(); MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, reduceEnv, false); Vertex mapVertex = Vertex.create("map", ProcessorDescriptor.create(MapProcessor.class.getName()).setUserPayload(mapUserPayload), numTasks, MRHelpers.getResourceForMRMapper(mapStageConf)); mapVertex.addTaskLocalFiles(commonLocalResources).addDataSource("MRInput", dataSource) .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRMapper(mapStageConf)).setTaskEnvironment(mapEnv); vertices.add(mapVertex); if (iReduceStagesCount > 0 && numIReducer > 0) { for (int i = 0; i < iReduceStagesCount; ++i) { Configuration iconf = intermediateReduceStageConfs[i]; UserPayload iReduceUserPayload = TezUtils.createUserPayloadFromConf(iconf); Vertex ivertex = Vertex.create("ireduce" + (i + 1), ProcessorDescriptor.create(ReduceProcessor.class.getName()) .setUserPayload(iReduceUserPayload), numIReducer, MRHelpers.getResourceForMRReducer(intermediateReduceStageConfs[i])); ivertex.addTaskLocalFiles(commonLocalResources) .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(intermediateReduceStageConfs[i])) .setTaskEnvironment(reduceEnv); vertices.add(ivertex); } } Vertex finalReduceVertex = null; if (numReducer > 0) { UserPayload reducePayload = TezUtils.createUserPayloadFromConf(finalReduceConf); finalReduceVertex = Vertex.create("reduce", ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(reducePayload), numReducer, MRHelpers.getResourceForMRReducer(finalReduceConf)); finalReduceVertex.addTaskLocalFiles(commonLocalResources) .addDataSink("MROutput", MROutputLegacy.createConfigBuilder(finalReduceConf, NullOutputFormat.class).build()) .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(finalReduceConf)) .setTaskEnvironment(reduceEnv); vertices.add(finalReduceVertex); } else { // Map only job mapVertex.addDataSink("MROutput", MROutputLegacy.createConfigBuilder(mapStageConf, NullOutputFormat.class).build()); } Map<String, String> partitionerConf = Maps.newHashMap(); partitionerConf.put(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName()); OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig .newBuilder(IntWritable.class.getName(), IntWritable.class.getName(), HashPartitioner.class.getName(), partitionerConf) .configureInput().useLegacyInput().done().build(); for (int i = 0; i < vertices.size(); ++i) { dag.addVertex(vertices.get(i)); if (i != 0) { dag.addEdge( Edge.create(vertices.get(i - 1), vertices.get(i), edgeConf.createDefaultEdgeProperty())); } } return dag; }
From source file:org.apache.tez.mapreduce.examples.TestOrderedWordCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); boolean generateSplitsInClient; SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser(); try {/*from ww w .j a v a2 s . c o m*/ generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false); otherArgs = splitCmdLineParser.getRemainingArgs(); } catch (ParseException e1) { System.err.println("Invalid options"); printUsage(); return 2; } boolean useTezSession = conf.getBoolean("USE_TEZ_SESSION", true); long interJobSleepTimeout = conf.getInt("INTER_JOB_SLEEP_INTERVAL", 0) * 1000; boolean retainStagingDir = conf.getBoolean("RETAIN_STAGING_DIR", false); boolean useMRSettings = conf.getBoolean("USE_MR_CONFIGS", true); // TODO needs to use auto reduce parallelism int intermediateNumReduceTasks = conf.getInt("IREDUCE_NUM_TASKS", 2); if (((otherArgs.length % 2) != 0) || (!useTezSession && otherArgs.length != 2)) { printUsage(); return 2; } List<String> inputPaths = new ArrayList<String>(); List<String> outputPaths = new ArrayList<String>(); for (int i = 0; i < otherArgs.length; i += 2) { inputPaths.add(otherArgs[i]); outputPaths.add(otherArgs[i + 1]); } UserGroupInformation.setConfiguration(conf); TezConfiguration tezConf = new TezConfiguration(conf); TestOrderedWordCount instance = new TestOrderedWordCount(); FileSystem fs = FileSystem.get(conf); String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR + Long.toString(System.currentTimeMillis()); Path stagingDir = new Path(stagingDirStr); FileSystem pathFs = stagingDir.getFileSystem(tezConf); pathFs.mkdirs(new Path(stagingDirStr)); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr); stagingDir = pathFs.makeQualified(new Path(stagingDirStr)); TokenCache.obtainTokensForNamenodes(instance.credentials, new Path[] { stagingDir }, conf); TezClientUtils.ensureStagingDirExists(tezConf, stagingDir); // No need to add jar containing this class as assumed to be part of // the tez jars. // TEZ-674 Obtain tokens based on the Input / Output paths. For now assuming staging dir // is the same filesystem as the one used for Input/Output. if (useTezSession) { LOG.info("Creating Tez Session"); tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true); } else { tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false); } TezClient tezSession = TezClient.create("OrderedWordCountSession", tezConf, null, instance.credentials); tezSession.start(); DAGStatus dagStatus = null; DAGClient dagClient = null; String[] vNames = { "initialmap", "intermediate_reducer", "finalreduce" }; Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS); try { for (int dagIndex = 1; dagIndex <= inputPaths.size(); ++dagIndex) { if (dagIndex != 1 && interJobSleepTimeout > 0) { try { LOG.info("Sleeping between jobs, sleepInterval=" + (interJobSleepTimeout / 1000)); Thread.sleep(interJobSleepTimeout); } catch (InterruptedException e) { LOG.info("Main thread interrupted. Breaking out of job loop"); break; } } String inputPath = inputPaths.get(dagIndex - 1); String outputPath = outputPaths.get(dagIndex - 1); if (fs.exists(new Path(outputPath))) { throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists"); } LOG.info("Running OrderedWordCount DAG" + ", dagIndex=" + dagIndex + ", inputPath=" + inputPath + ", outputPath=" + outputPath); Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>(); DAG dag = instance.createDAG(fs, conf, localResources, stagingDir, dagIndex, inputPath, outputPath, generateSplitsInClient, useMRSettings, intermediateNumReduceTasks); boolean doPreWarm = dagIndex == 1 && useTezSession && conf.getBoolean("PRE_WARM_SESSION", true); int preWarmNumContainers = 0; if (doPreWarm) { preWarmNumContainers = conf.getInt("PRE_WARM_NUM_CONTAINERS", 0); if (preWarmNumContainers <= 0) { doPreWarm = false; } } if (doPreWarm) { LOG.info("Pre-warming Session"); PreWarmVertex preWarmVertex = PreWarmVertex.create("PreWarm", preWarmNumContainers, dag.getVertex("initialmap").getTaskResource()); preWarmVertex.addTaskLocalFiles(dag.getVertex("initialmap").getTaskLocalFiles()); preWarmVertex.setTaskEnvironment(dag.getVertex("initialmap").getTaskEnvironment()); preWarmVertex.setTaskLaunchCmdOpts(dag.getVertex("initialmap").getTaskLaunchCmdOpts()); tezSession.preWarm(preWarmVertex); } if (useTezSession) { LOG.info("Waiting for TezSession to get into ready state"); waitForTezSessionReady(tezSession); LOG.info("Submitting DAG to Tez Session, dagIndex=" + dagIndex); dagClient = tezSession.submitDAG(dag); LOG.info("Submitted DAG to Tez Session, dagIndex=" + dagIndex); } else { LOG.info("Submitting DAG as a new Tez Application"); dagClient = tezSession.submitDAG(dag); } while (true) { dagStatus = dagClient.getDAGStatus(statusGetOpts); if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) { break; } try { Thread.sleep(500); } catch (InterruptedException e) { // continue; } } while (dagStatus.getState() != DAGStatus.State.SUCCEEDED && dagStatus.getState() != DAGStatus.State.FAILED && dagStatus.getState() != DAGStatus.State.KILLED && dagStatus.getState() != DAGStatus.State.ERROR) { if (dagStatus.getState() == DAGStatus.State.RUNNING) { ExampleDriver.printDAGStatus(dagClient, vNames); } try { try { Thread.sleep(1000); } catch (InterruptedException e) { // continue; } dagStatus = dagClient.getDAGStatus(statusGetOpts); } catch (TezException e) { LOG.fatal("Failed to get application progress. Exiting"); return -1; } } ExampleDriver.printDAGStatus(dagClient, vNames, true, true); LOG.info("DAG " + dagIndex + " completed. " + "FinalState=" + dagStatus.getState()); if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) { LOG.info("DAG " + dagIndex + " diagnostics: " + dagStatus.getDiagnostics()); } } } catch (Exception e) { LOG.error("Error occurred when submitting/running DAGs", e); throw e; } finally { if (!retainStagingDir) { pathFs.delete(stagingDir, true); } LOG.info("Shutting down session"); tezSession.stop(); } if (!useTezSession) { ExampleDriver.printDAGStatus(dagClient, vNames); LOG.info("Application completed. " + "FinalState=" + dagStatus.getState()); } return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1; }
From source file:org.jd.copier.mapred.DistCp.java
License:Apache License
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments// w w w . ja v a 2s.co m * @return true if it is necessary to launch a job. */ private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); //set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean skipCRCCheck = args.flags.contains(Options.SKIPCRC); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.SKIPCRC.propertyname, skipCRCCheck); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path stagingArea; try { stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf); } catch (InterruptedException e) { throw new IOException(e); } Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); FileSystem.mkdirs(jClient.getFs(), jobDirectory, mapredSysPerms); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP); FileSystem dstfs = args.dst.getFileSystem(conf); // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (null == parent) { // If dst is '/' on S3, it might not exist yet, but dst.getParent() // will return null. In this case, use '/' as its own parent to prevent // NPE errors below. parent = args.dst; } if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { //skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst), skipCRCCheck); //skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); // Explicitly create the tmpDir to ensure that it can be cleaned // up by fullyDelete() later. tmpDir.getFileSystem(conf).mkdirs(tmpDir); LOG.info("sourcePathsCount=" + srcCount); LOG.info("filesToCopyCount=" + fileCount); LOG.info("bytesToCopyCount=" + StringUtils.humanReadableInt(byteCount)); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); return fileCount > 0; }
From source file:org.mrgeo.hdfs.input.image.HdfsMrsImagePyramidInputFormat.java
License:Apache License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); HdfsMrsImageDataProvider dp = new HdfsMrsImageDataProvider(job.getConfiguration(), input, null); String inputWithZoom = getZoomName(dp, inputZoom); // We are going to read all of the input dirs Path[] dirs = new Path[] { new Path(inputWithZoom) }; // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter);//w w w . ja v a 2s. c om PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { findInputs(globStat, fs, inputFilter, result); } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }
From source file:terasort.io.TeraOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext job) throws IOException { // Ensure that the output directory is set Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set in JobConf."); }/* w w w.ja v a 2 s.com*/ final Configuration jobConf = job.getConfiguration(); // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf); final FileSystem fs = outDir.getFileSystem(jobConf); if (fs.exists(outDir)) { // existing output dir is considered empty iff its only content is the // partition file. // final FileStatus[] outDirKids = fs.listStatus(outDir); boolean empty = false; if (outDirKids != null && outDirKids.length == 1) { final FileStatus st = outDirKids[0]; final String fname = st.getPath().getName(); empty = !st.isDirectory(); } if (!empty) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } } }