Example usage for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path)

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java

License:Apache License

public DAG createDAG(FileSystem remoteFs, Configuration conf, Path remoteStagingDir, int numMapper,
        int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime, int mapSleepCount,
        long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime, int iReduceSleepCount,
        boolean writeSplitsToDFS, boolean generateSplitsInAM) throws IOException, YarnException {

    Configuration mapStageConf = new JobConf(conf);
    mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    mapStageConf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    mapStageConf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    mapStageConf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime);
    mapStageConf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    mapStageConf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    mapStageConf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount);
    mapStageConf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount);
    mapStageConf.setInt(IREDUCE_TASKS_COUNT, numIReducer);
    mapStageConf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName());
    mapStageConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName());
    if (numIReducer == 0 && numReducer == 0) {
        mapStageConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName());
    }//from w  w  w .  j a v  a2s  .  c  om

    MRHelpers.translateMRConfToTez(mapStageConf);

    Configuration[] intermediateReduceStageConfs = null;
    if (iReduceStagesCount > 0 && numIReducer > 0) {
        intermediateReduceStageConfs = new JobConf[iReduceStagesCount];
        for (int i = 1; i <= iReduceStagesCount; ++i) {
            JobConf iReduceStageConf = new JobConf(conf);
            iReduceStageConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, iReduceSleepTime);
            iReduceStageConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, iReduceSleepCount);
            iReduceStageConf.setInt(MRJobConfig.NUM_REDUCES, numIReducer);
            iReduceStageConf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName());
            iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
            iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
            iReduceStageConf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());

            MRHelpers.translateMRConfToTez(iReduceStageConf);
            intermediateReduceStageConfs[i - 1] = iReduceStageConf;
        }
    }

    Configuration finalReduceConf = null;
    if (numReducer > 0) {
        finalReduceConf = new JobConf(conf);
        finalReduceConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, reduceSleepTime);
        finalReduceConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, reduceSleepCount);
        finalReduceConf.setInt(MRJobConfig.NUM_REDUCES, numReducer);
        finalReduceConf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName());
        finalReduceConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
        finalReduceConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
        finalReduceConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName());

        MRHelpers.translateMRConfToTez(finalReduceConf);
    }

    MRHelpers.configureMRApiUsage(mapStageConf);
    if (iReduceStagesCount > 0 && numIReducer > 0) {
        for (int i = 0; i < iReduceStagesCount; ++i) {
            MRHelpers.configureMRApiUsage(intermediateReduceStageConfs[i]);
        }
    }
    if (numReducer > 0) {
        MRHelpers.configureMRApiUsage(finalReduceConf);
    }

    DataSourceDescriptor dataSource = null;
    if (!generateSplitsInAM && writeSplitsToDFS) {

        LOG.info("Writing splits to DFS");
        dataSource = MRInputHelpers.configureMRInputWithLegacySplitGeneration(mapStageConf, remoteStagingDir,
                true);
    } else {
        dataSource = MRInputLegacy.createConfigBuilder(mapStageConf, SleepInputFormat.class)
                .generateSplitsInAM(generateSplitsInAM).build();
    }

    DAG dag = DAG.create("MRRSleepJob");
    String jarPath = ClassUtil.findContainingJar(getClass());
    if (jarPath == null) {
        throw new TezUncheckedException(
                "Could not find any jar containing" + " MRRSleepJob.class in the classpath");
    }
    Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar"));
    remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath);

    TokenCache.obtainTokensForNamenodes(this.credentials, new Path[] { remoteJarPath }, mapStageConf);

    Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(),
            jarFileStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

    List<Vertex> vertices = new ArrayList<Vertex>();

    UserPayload mapUserPayload = TezUtils.createUserPayloadFromConf(mapStageConf);
    int numTasks = generateSplitsInAM ? -1 : numMapper;

    Map<String, String> mapEnv = Maps.newHashMap();
    MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, mapEnv, true);
    Map<String, String> reduceEnv = Maps.newHashMap();
    MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, reduceEnv, false);

    Vertex mapVertex = Vertex.create("map",
            ProcessorDescriptor.create(MapProcessor.class.getName()).setUserPayload(mapUserPayload), numTasks,
            MRHelpers.getResourceForMRMapper(mapStageConf));
    mapVertex.addTaskLocalFiles(commonLocalResources).addDataSource("MRInput", dataSource)
            .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRMapper(mapStageConf)).setTaskEnvironment(mapEnv);
    vertices.add(mapVertex);

    if (iReduceStagesCount > 0 && numIReducer > 0) {
        for (int i = 0; i < iReduceStagesCount; ++i) {
            Configuration iconf = intermediateReduceStageConfs[i];
            UserPayload iReduceUserPayload = TezUtils.createUserPayloadFromConf(iconf);
            Vertex ivertex = Vertex.create("ireduce" + (i + 1),
                    ProcessorDescriptor.create(ReduceProcessor.class.getName())
                            .setUserPayload(iReduceUserPayload),
                    numIReducer, MRHelpers.getResourceForMRReducer(intermediateReduceStageConfs[i]));
            ivertex.addTaskLocalFiles(commonLocalResources)
                    .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(intermediateReduceStageConfs[i]))
                    .setTaskEnvironment(reduceEnv);
            vertices.add(ivertex);
        }
    }

    Vertex finalReduceVertex = null;
    if (numReducer > 0) {
        UserPayload reducePayload = TezUtils.createUserPayloadFromConf(finalReduceConf);
        finalReduceVertex = Vertex.create("reduce",
                ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(reducePayload),
                numReducer, MRHelpers.getResourceForMRReducer(finalReduceConf));
        finalReduceVertex.addTaskLocalFiles(commonLocalResources)
                .addDataSink("MROutput",
                        MROutputLegacy.createConfigBuilder(finalReduceConf, NullOutputFormat.class).build())
                .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(finalReduceConf))
                .setTaskEnvironment(reduceEnv);
        vertices.add(finalReduceVertex);
    } else {
        // Map only job
        mapVertex.addDataSink("MROutput",
                MROutputLegacy.createConfigBuilder(mapStageConf, NullOutputFormat.class).build());
    }

    Map<String, String> partitionerConf = Maps.newHashMap();
    partitionerConf.put(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());
    OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
            .newBuilder(IntWritable.class.getName(), IntWritable.class.getName(),
                    HashPartitioner.class.getName(), partitionerConf)
            .configureInput().useLegacyInput().done().build();

    for (int i = 0; i < vertices.size(); ++i) {
        dag.addVertex(vertices.get(i));
        if (i != 0) {
            dag.addEdge(
                    Edge.create(vertices.get(i - 1), vertices.get(i), edgeConf.createDefaultEdgeProperty()));
        }
    }

    return dag;
}

From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 1) {
        System.err.println("MRRSleepJob [-m numMapper] [-r numReducer]" + " [-ir numIntermediateReducer]"
                + " [-irs numIntermediateReducerStages]"
                + " [-mt mapSleepTime (msec)] [-rt reduceSleepTime (msec)]"
                + " [-irt intermediateReduceSleepTime]" + " [-recordt recordSleepTime (msec)]"
                + " [-generateSplitsInAM (false)/true]" + " [-writeSplitsToDfs (false)/true]");
        ToolRunner.printGenericCommandUsage(System.err);
        return 2;
    }// w  ww.ja  va  2 s .  c  o  m

    int numMapper = 1, numReducer = 1, numIReducer = 1;
    long mapSleepTime = 100, reduceSleepTime = 100, recSleepTime = 100, iReduceSleepTime = 1;
    int mapSleepCount = 1, reduceSleepCount = 1, iReduceSleepCount = 1;
    int iReduceStagesCount = 1;
    boolean writeSplitsToDfs = false;
    boolean generateSplitsInAM = false;
    boolean splitsOptionFound = false;

    for (int i = 0; i < args.length; i++) {
        if (args[i].equals("-m")) {
            numMapper = Integer.parseInt(args[++i]);
        } else if (args[i].equals("-r")) {
            numReducer = Integer.parseInt(args[++i]);
        } else if (args[i].equals("-ir")) {
            numIReducer = Integer.parseInt(args[++i]);
        } else if (args[i].equals("-mt")) {
            mapSleepTime = Long.parseLong(args[++i]);
        } else if (args[i].equals("-rt")) {
            reduceSleepTime = Long.parseLong(args[++i]);
        } else if (args[i].equals("-irt")) {
            iReduceSleepTime = Long.parseLong(args[++i]);
        } else if (args[i].equals("-irs")) {
            iReduceStagesCount = Integer.parseInt(args[++i]);
        } else if (args[i].equals("-recordt")) {
            recSleepTime = Long.parseLong(args[++i]);
        } else if (args[i].equals("-generateSplitsInAM")) {
            if (splitsOptionFound) {
                throw new RuntimeException(
                        "Cannot use both -generateSplitsInAm and -writeSplitsToDfs together");
            }
            splitsOptionFound = true;
            generateSplitsInAM = Boolean.parseBoolean(args[++i]);

        } else if (args[i].equals("-writeSplitsToDfs")) {
            if (splitsOptionFound) {
                throw new RuntimeException(
                        "Cannot use both -generateSplitsInAm and -writeSplitsToDfs together");
            }
            splitsOptionFound = true;
            writeSplitsToDfs = Boolean.parseBoolean(args[++i]);
        }
    }

    if (numIReducer > 0 && numReducer <= 0) {
        throw new RuntimeException("Cannot have intermediate reduces without" + " a final reduce");
    }

    // sleep for *SleepTime duration in Task by recSleepTime per record
    mapSleepCount = (int) Math.ceil(mapSleepTime / ((double) recSleepTime));
    reduceSleepCount = (int) Math.ceil(reduceSleepTime / ((double) recSleepTime));
    iReduceSleepCount = (int) Math.ceil(iReduceSleepTime / ((double) recSleepTime));

    TezConfiguration conf = new TezConfiguration(getConf());
    FileSystem remoteFs = FileSystem.get(conf);

    conf.set(TezConfiguration.TEZ_AM_STAGING_DIR,
            conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT));

    Path remoteStagingDir = remoteFs.makeQualified(
            new Path(conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT),
                    Long.toString(System.currentTimeMillis())));
    TezClientUtils.ensureStagingDirExists(conf, remoteStagingDir);

    DAG dag = createDAG(remoteFs, conf, remoteStagingDir, numMapper, numReducer, iReduceStagesCount,
            numIReducer, mapSleepTime, mapSleepCount, reduceSleepTime, reduceSleepCount, iReduceSleepTime,
            iReduceSleepCount, writeSplitsToDfs, generateSplitsInAM);

    TezClient tezSession = TezClient.create("MRRSleep", conf, false, null, credentials);
    tezSession.start();
    DAGClient dagClient = tezSession.submitDAG(dag);
    dagClient.waitForCompletion();
    tezSession.stop();

    return dagClient.getDAGStatus(null).getState().equals(DAGStatus.State.SUCCEEDED) ? 0 : 1;
}

From source file:org.apache.tez.mapreduce.examples.TestOrderedWordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    boolean generateSplitsInClient;

    SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
    try {/*www. j  av  a 2  s.c om*/
        generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
        otherArgs = splitCmdLineParser.getRemainingArgs();
    } catch (ParseException e1) {
        System.err.println("Invalid options");
        printUsage();
        return 2;
    }

    boolean useTezSession = conf.getBoolean("USE_TEZ_SESSION", true);
    long interJobSleepTimeout = conf.getInt("INTER_JOB_SLEEP_INTERVAL", 0) * 1000;

    boolean retainStagingDir = conf.getBoolean("RETAIN_STAGING_DIR", false);
    boolean useMRSettings = conf.getBoolean("USE_MR_CONFIGS", true);
    // TODO needs to use auto reduce parallelism
    int intermediateNumReduceTasks = conf.getInt("IREDUCE_NUM_TASKS", 2);

    if (((otherArgs.length % 2) != 0) || (!useTezSession && otherArgs.length != 2)) {
        printUsage();
        return 2;
    }

    List<String> inputPaths = new ArrayList<String>();
    List<String> outputPaths = new ArrayList<String>();

    for (int i = 0; i < otherArgs.length; i += 2) {
        inputPaths.add(otherArgs[i]);
        outputPaths.add(otherArgs[i + 1]);
    }

    UserGroupInformation.setConfiguration(conf);

    TezConfiguration tezConf = new TezConfiguration(conf);
    TestOrderedWordCount instance = new TestOrderedWordCount();

    FileSystem fs = FileSystem.get(conf);

    String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR,
            TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR
            + Long.toString(System.currentTimeMillis());
    Path stagingDir = new Path(stagingDirStr);
    FileSystem pathFs = stagingDir.getFileSystem(tezConf);
    pathFs.mkdirs(new Path(stagingDirStr));

    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
    stagingDir = pathFs.makeQualified(new Path(stagingDirStr));

    TokenCache.obtainTokensForNamenodes(instance.credentials, new Path[] { stagingDir }, conf);
    TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);

    // No need to add jar containing this class as assumed to be part of
    // the tez jars.

    // TEZ-674 Obtain tokens based on the Input / Output paths. For now assuming staging dir
    // is the same filesystem as the one used for Input/Output.

    if (useTezSession) {
        LOG.info("Creating Tez Session");
        tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true);
    } else {
        tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
    }
    TezClient tezSession = TezClient.create("OrderedWordCountSession", tezConf, null, instance.credentials);
    tezSession.start();

    DAGStatus dagStatus = null;
    DAGClient dagClient = null;
    String[] vNames = { "initialmap", "intermediate_reducer", "finalreduce" };

    Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
    try {
        for (int dagIndex = 1; dagIndex <= inputPaths.size(); ++dagIndex) {
            if (dagIndex != 1 && interJobSleepTimeout > 0) {
                try {
                    LOG.info("Sleeping between jobs, sleepInterval=" + (interJobSleepTimeout / 1000));
                    Thread.sleep(interJobSleepTimeout);
                } catch (InterruptedException e) {
                    LOG.info("Main thread interrupted. Breaking out of job loop");
                    break;
                }
            }

            String inputPath = inputPaths.get(dagIndex - 1);
            String outputPath = outputPaths.get(dagIndex - 1);

            if (fs.exists(new Path(outputPath))) {
                throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists");
            }
            LOG.info("Running OrderedWordCount DAG" + ", dagIndex=" + dagIndex + ", inputPath=" + inputPath
                    + ", outputPath=" + outputPath);

            Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>();

            DAG dag = instance.createDAG(fs, conf, localResources, stagingDir, dagIndex, inputPath, outputPath,
                    generateSplitsInClient, useMRSettings, intermediateNumReduceTasks);

            boolean doPreWarm = dagIndex == 1 && useTezSession && conf.getBoolean("PRE_WARM_SESSION", true);
            int preWarmNumContainers = 0;
            if (doPreWarm) {
                preWarmNumContainers = conf.getInt("PRE_WARM_NUM_CONTAINERS", 0);
                if (preWarmNumContainers <= 0) {
                    doPreWarm = false;
                }
            }
            if (doPreWarm) {
                LOG.info("Pre-warming Session");
                PreWarmVertex preWarmVertex = PreWarmVertex.create("PreWarm", preWarmNumContainers,
                        dag.getVertex("initialmap").getTaskResource());
                preWarmVertex.addTaskLocalFiles(dag.getVertex("initialmap").getTaskLocalFiles());
                preWarmVertex.setTaskEnvironment(dag.getVertex("initialmap").getTaskEnvironment());
                preWarmVertex.setTaskLaunchCmdOpts(dag.getVertex("initialmap").getTaskLaunchCmdOpts());

                tezSession.preWarm(preWarmVertex);
            }

            if (useTezSession) {
                LOG.info("Waiting for TezSession to get into ready state");
                waitForTezSessionReady(tezSession);
                LOG.info("Submitting DAG to Tez Session, dagIndex=" + dagIndex);
                dagClient = tezSession.submitDAG(dag);
                LOG.info("Submitted DAG to Tez Session, dagIndex=" + dagIndex);
            } else {
                LOG.info("Submitting DAG as a new Tez Application");
                dagClient = tezSession.submitDAG(dag);
            }

            while (true) {
                dagStatus = dagClient.getDAGStatus(statusGetOpts);
                if (dagStatus.getState() == DAGStatus.State.RUNNING
                        || dagStatus.getState() == DAGStatus.State.SUCCEEDED
                        || dagStatus.getState() == DAGStatus.State.FAILED
                        || dagStatus.getState() == DAGStatus.State.KILLED
                        || dagStatus.getState() == DAGStatus.State.ERROR) {
                    break;
                }
                try {
                    Thread.sleep(500);
                } catch (InterruptedException e) {
                    // continue;
                }
            }

            while (dagStatus.getState() != DAGStatus.State.SUCCEEDED
                    && dagStatus.getState() != DAGStatus.State.FAILED
                    && dagStatus.getState() != DAGStatus.State.KILLED
                    && dagStatus.getState() != DAGStatus.State.ERROR) {
                if (dagStatus.getState() == DAGStatus.State.RUNNING) {
                    ExampleDriver.printDAGStatus(dagClient, vNames);
                }
                try {
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException e) {
                        // continue;
                    }
                    dagStatus = dagClient.getDAGStatus(statusGetOpts);
                } catch (TezException e) {
                    LOG.fatal("Failed to get application progress. Exiting");
                    return -1;
                }
            }
            ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
            LOG.info("DAG " + dagIndex + " completed. " + "FinalState=" + dagStatus.getState());
            if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
                LOG.info("DAG " + dagIndex + " diagnostics: " + dagStatus.getDiagnostics());
            }
        }
    } catch (Exception e) {
        LOG.error("Error occurred when submitting/running DAGs", e);
        throw e;
    } finally {
        if (!retainStagingDir) {
            pathFs.delete(stagingDir, true);
        }
        LOG.info("Shutting down session");
        tezSession.stop();
    }

    if (!useTezSession) {
        ExampleDriver.printDAGStatus(dagClient, vNames);
        LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    }
    return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}

From source file:org.apache.tez.mapreduce.examples.UnionExample.java

License:Apache License

public boolean run(String inputPath, String outputPath, Configuration conf) throws Exception {
    System.out.println("Running UnionExample");
    // conf and UGI
    TezConfiguration tezConf;/*from w w  w.j a v  a  2  s.c  om*/
    if (conf != null) {
        tezConf = new TezConfiguration(conf);
    } else {
        tezConf = new TezConfiguration();
    }
    UserGroupInformation.setConfiguration(tezConf);
    String user = UserGroupInformation.getCurrentUser().getShortUserName();

    // staging dir
    FileSystem fs = FileSystem.get(tezConf);
    String stagingDirStr = Path.SEPARATOR + "user" + Path.SEPARATOR + user + Path.SEPARATOR + ".staging"
            + Path.SEPARATOR + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
    Path stagingDir = new Path(stagingDirStr);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
    stagingDir = fs.makeQualified(stagingDir);

    // No need to add jar containing this class as assumed to be part of
    // the tez jars.

    // TEZ-674 Obtain tokens based on the Input / Output paths. For now assuming staging dir
    // is the same filesystem as the one used for Input/Output.

    TezClient tezSession = TezClient.create("UnionExampleSession", tezConf);
    tezSession.start();

    DAGClient dagClient = null;

    try {
        if (fs.exists(new Path(outputPath))) {
            throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists");
        }

        Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>();

        DAG dag = createDAG(fs, tezConf, localResources, stagingDir, inputPath, outputPath);

        tezSession.waitTillReady();
        dagClient = tezSession.submitDAG(dag);

        // monitoring
        DAGStatus dagStatus = dagClient
                .waitForCompletionWithStatusUpdates(EnumSet.of(StatusGetOpts.GET_COUNTERS));
        if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
            System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics());
            return false;
        }
        return true;
    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }
}

From source file:org.apache.tez.mapreduce.input.SimpleInput.java

License:Apache License

public org.apache.hadoop.mapred.InputSplit getOldSplitDetails(TaskSplitIndex splitMetaInfo) throws IOException {
    Path file = new Path(splitMetaInfo.getSplitLocation());
    FileSystem fs = FileSystem.getLocal(jobConf);
    file = fs.makeQualified(file);
    LOG.info("Reading input split file from : " + file);
    long offset = splitMetaInfo.getStartOffset();

    FSDataInputStream inFile = fs.open(file);
    inFile.seek(offset);// ww  w .j a  v a2s .  c  o m
    String className = Text.readString(inFile);
    Class<org.apache.hadoop.mapred.InputSplit> cls;
    try {
        cls = (Class<org.apache.hadoop.mapred.InputSplit>) jobConf.getClassByName(className);
    } catch (ClassNotFoundException ce) {
        IOException wrap = new IOException("Split class " + className + " not found");
        wrap.initCause(ce);
        throw wrap;
    }
    SerializationFactory factory = new SerializationFactory(jobConf);
    Deserializer<org.apache.hadoop.mapred.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapred.InputSplit>) factory
            .getDeserializer(cls);
    deserializer.open(inFile);
    org.apache.hadoop.mapred.InputSplit split = deserializer.deserialize(null);
    long pos = inFile.getPos();
    reporter.getCounter(TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset);
    inFile.close();
    return split;
}

From source file:org.apache.tez.mapreduce.input.SimpleInput.java

License:Apache License

public org.apache.hadoop.mapreduce.InputSplit getNewSplitDetails(TaskSplitIndex splitMetaInfo)
        throws IOException {
    Path file = new Path(splitMetaInfo.getSplitLocation());
    long offset = splitMetaInfo.getStartOffset();

    // Split information read from local filesystem.
    FileSystem fs = FileSystem.getLocal(jobConf);
    file = fs.makeQualified(file);
    LOG.info("Reading input split file from : " + file);
    FSDataInputStream inFile = fs.open(file);
    inFile.seek(offset);/*  w  ww.  j  a va 2 s  . c o  m*/
    String className = Text.readString(inFile);
    Class<org.apache.hadoop.mapreduce.InputSplit> cls;
    try {
        cls = (Class<org.apache.hadoop.mapreduce.InputSplit>) jobConf.getClassByName(className);
    } catch (ClassNotFoundException ce) {
        IOException wrap = new IOException("Split class " + className + " not found");
        wrap.initCause(ce);
        throw wrap;
    }
    SerializationFactory factory = new SerializationFactory(jobConf);
    Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapreduce.InputSplit>) factory
            .getDeserializer(cls);
    deserializer.open(inFile);
    org.apache.hadoop.mapreduce.InputSplit split = deserializer.deserialize(null);
    long pos = inFile.getPos();
    reporter.getCounter(TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset);
    inFile.close();
    return split;
}

From source file:org.apache.tez.mapreduce.lib.MRInputUtils.java

License:Apache License

@SuppressWarnings("unchecked")
public static org.apache.hadoop.mapreduce.InputSplit getNewSplitDetailsFromDisk(TaskSplitIndex splitMetaInfo,
        JobConf jobConf, TezCounter splitBytesCounter) throws IOException {
    Path file = new Path(splitMetaInfo.getSplitLocation());
    long offset = splitMetaInfo.getStartOffset();

    // Split information read from local filesystem.
    FileSystem fs = FileSystem.getLocal(jobConf);
    file = fs.makeQualified(file);
    LOG.info("Reading input split file from : " + file);
    FSDataInputStream inFile = fs.open(file);
    inFile.seek(offset);/*w  w w . j a  v  a  2  s  .c om*/
    String className = Text.readString(inFile);
    Class<org.apache.hadoop.mapreduce.InputSplit> cls;
    try {
        cls = (Class<org.apache.hadoop.mapreduce.InputSplit>) jobConf.getClassByName(className);
    } catch (ClassNotFoundException ce) {
        IOException wrap = new IOException("Split class " + className + " not found");
        wrap.initCause(ce);
        throw wrap;
    }
    SerializationFactory factory = new SerializationFactory(jobConf);
    Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapreduce.InputSplit>) factory
            .getDeserializer(cls);
    deserializer.open(inFile);
    org.apache.hadoop.mapreduce.InputSplit split = deserializer.deserialize(null);
    long pos = inFile.getPos();
    if (splitBytesCounter != null) {
        splitBytesCounter.increment(pos - offset);
    }
    inFile.close();
    return split;
}

From source file:org.apache.tez.mapreduce.lib.MRInputUtils.java

License:Apache License

@SuppressWarnings("unchecked")
public static InputSplit getOldSplitDetailsFromDisk(TaskSplitIndex splitMetaInfo, JobConf jobConf,
        TezCounter splitBytesCounter) throws IOException {
    Path file = new Path(splitMetaInfo.getSplitLocation());
    FileSystem fs = FileSystem.getLocal(jobConf);
    file = fs.makeQualified(file);
    LOG.info("Reading input split file from : " + file);
    long offset = splitMetaInfo.getStartOffset();

    FSDataInputStream inFile = fs.open(file);
    inFile.seek(offset);//from  w ww .  j  a  v  a 2 s. co m
    String className = Text.readString(inFile);
    Class<org.apache.hadoop.mapred.InputSplit> cls;
    try {
        cls = (Class<org.apache.hadoop.mapred.InputSplit>) jobConf.getClassByName(className);
    } catch (ClassNotFoundException ce) {
        IOException wrap = new IOException("Split class " + className + " not found");
        wrap.initCause(ce);
        throw wrap;
    }
    SerializationFactory factory = new SerializationFactory(jobConf);
    Deserializer<org.apache.hadoop.mapred.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapred.InputSplit>) factory
            .getDeserializer(cls);
    deserializer.open(inFile);
    org.apache.hadoop.mapred.InputSplit split = deserializer.deserialize(null);
    long pos = inFile.getPos();
    if (splitBytesCounter != null) {
        splitBytesCounter.increment(pos - offset);
    }
    inFile.close();
    return split;
}

From source file:org.apache.tez.mapreduce.TestMRRJobsDAGApi.java

License:Apache License

@Test(timeout = 60000)
public void testNonDefaultFSStagingDir() throws Exception {
    SleepProcessorConfig spConf = new SleepProcessorConfig(1);

    DAG dag = DAG.create("TezSleepProcessor");
    Vertex vertex = Vertex.create("SleepVertex",
            ProcessorDescriptor.create(SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()),
            1, Resource.newInstance(1024, 1));
    dag.addVertex(vertex);// ww w  . ja  v  a 2  s.  c  om

    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    Path stagingDir = new Path(TEST_ROOT_DIR,
            "testNonDefaultFSStagingDir" + String.valueOf(random.nextInt(100000)));
    FileSystem localFs = FileSystem.getLocal(tezConf);
    stagingDir = localFs.makeQualified(stagingDir);
    localFs.mkdirs(stagingDir);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());

    TezClient tezSession = TezClient.create("TezSleepProcessor", tezConf, false);
    tezSession.start();

    DAGClient dagClient = tezSession.submitDAG(dag);

    DAGStatus dagStatus = dagClient.getDAGStatus(null);
    while (!dagStatus.isCompleted()) {
        LOG.info(
                "Waiting for job to complete. Sleeping for 500ms." + " Current state: " + dagStatus.getState());
        Thread.sleep(500l);
        dagStatus = dagClient.getDAGStatus(null);
    }
    dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));

    assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
    assertNotNull(dagStatus.getDAGCounters());
    assertNotNull(dagStatus.getDAGCounters().getGroup(FileSystemCounter.class.getName()));
    assertNotNull(dagStatus.getDAGCounters().findCounter(TaskCounter.GC_TIME_MILLIS));
    ExampleDriver.printDAGStatus(dagClient, new String[] { "SleepVertex" }, true, true);
    tezSession.stop();
}

From source file:org.apache.tez.mapreduce.TestMRRJobsDAGApi.java

License:Apache License

@Test(timeout = 60000)
public void testHistoryLogging()
        throws IOException, InterruptedException, TezException, ClassNotFoundException, YarnException {
    SleepProcessorConfig spConf = new SleepProcessorConfig(1);

    DAG dag = DAG.create("TezSleepProcessorHistoryLogging");
    Vertex vertex = Vertex.create("SleepVertex",
            ProcessorDescriptor.create(SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()),
            2, Resource.newInstance(1024, 1));
    dag.addVertex(vertex);/*ww w .ja  va 2  s .co m*/

    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random.nextInt(100000))));
    remoteFs.mkdirs(remoteStagingDir);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

    FileSystem localFs = FileSystem.getLocal(tezConf);
    Path historyLogDir = new Path(TEST_ROOT_DIR, "testHistoryLogging");
    localFs.mkdirs(historyLogDir);

    tezConf.set(TezConfiguration.TEZ_SIMPLE_HISTORY_LOGGING_DIR,
            localFs.makeQualified(historyLogDir).toString());

    tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
    TezClient tezSession = TezClient.create("TezSleepProcessorHistoryLogging", tezConf);
    tezSession.start();

    DAGClient dagClient = tezSession.submitDAG(dag);

    DAGStatus dagStatus = dagClient.getDAGStatus(null);
    while (!dagStatus.isCompleted()) {
        LOG.info(
                "Waiting for job to complete. Sleeping for 500ms." + " Current state: " + dagStatus.getState());
        Thread.sleep(500l);
        dagStatus = dagClient.getDAGStatus(null);
    }
    assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());

    FileStatus historyLogFileStatus = null;
    for (FileStatus fileStatus : localFs.listStatus(historyLogDir)) {
        if (fileStatus.isDirectory()) {
            continue;
        }
        Path p = fileStatus.getPath();
        if (p.getName().startsWith(SimpleHistoryLoggingService.LOG_FILE_NAME_PREFIX)) {
            historyLogFileStatus = fileStatus;
            break;
        }
    }
    Assert.assertNotNull(historyLogFileStatus);
    Assert.assertTrue(historyLogFileStatus.getLen() > 0);
    tezSession.stop();
}