Example usage for org.apache.hadoop.fs FileStatus getModificationTime

List of usage examples for org.apache.hadoop.fs FileStatus getModificationTime

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getModificationTime.

Prototype

public long getModificationTime() 

Source Link

Document

Get the modification time of the file.

Usage

From source file:org.apache.tez.client.TezClientUtils.java

License:Apache License

/**
 * Helper function to create a YARN LocalResource
 * @param fs FileSystem object/*from   w ww . ja v  a2  s  . c om*/
 * @param p Path of resource to localize
 * @param type LocalResource Type
 * @return a YARN LocalResource for the given Path
 * @throws IOException
 */
static LocalResource createLocalResource(FileSystem fs, Path p, LocalResourceType type,
        LocalResourceVisibility visibility) throws IOException {
    LocalResource rsrc = Records.newRecord(LocalResource.class);
    FileStatus rsrcStat = fs.getFileStatus(p);
    rsrc.setResource(ConverterUtils.getYarnUrlFromPath(fs.resolvePath(rsrcStat.getPath())));
    rsrc.setSize(rsrcStat.getLen());
    rsrc.setTimestamp(rsrcStat.getModificationTime());
    rsrc.setType(type);
    rsrc.setVisibility(visibility);
    return rsrc;
}

From source file:org.apache.tez.dag.app.RecoveryParser.java

License:Apache License

public RecoveredDAGData parseRecoveryData() throws IOException {
    Path previousAttemptRecoveryDataDir = getPreviousAttemptRecoveryDataDir();
    LOG.info("Using " + previousAttemptRecoveryDataDir.toString()
            + " for recovering data from previous attempt");
    if (!recoveryFS.exists(previousAttemptRecoveryDataDir)) {
        LOG.info("Nothing to recover as previous attempt data does not exist" + ", previousAttemptDir="
                + previousAttemptRecoveryDataDir.toString());
        createDataRecoveredFlagFile();/*w  w w. java 2  s  .  c  om*/
        return null;
    }

    Path summaryPath = getSummaryPath(previousAttemptRecoveryDataDir);
    FSDataInputStream summaryStream = getSummaryStream(summaryPath);
    if (summaryStream == null) {
        LOG.info("Nothing to recover as summary file does not exist" + ", previousAttemptDir="
                + previousAttemptRecoveryDataDir.toString() + ", summaryPath=" + summaryPath.toString());
        createDataRecoveredFlagFile();
        return null;
    }

    Path newSummaryPath = getSummaryPath(currentAttemptRecoveryDataDir);
    FSDataOutputStream newSummaryStream = getSummaryOutputStream(newSummaryPath);

    FileStatus summaryFileStatus = recoveryFS.getFileStatus(summaryPath);
    LOG.info("Parsing summary file" + ", path=" + summaryPath.toString() + ", len=" + summaryFileStatus.getLen()
            + ", lastModTime=" + summaryFileStatus.getModificationTime());

    int dagCounter = 0;

    Map<TezDAGID, DAGSummaryData> dagSummaryDataMap = new HashMap<TezDAGID, DAGSummaryData>();
    while (true) {
        RecoveryProtos.SummaryEventProto proto;
        try {
            proto = RecoveryProtos.SummaryEventProto.parseDelimitedFrom(summaryStream);
            if (proto == null) {
                LOG.info("Reached end of summary stream");
                break;
            }
        } catch (EOFException eof) {
            LOG.info("Reached end of summary stream");
            break;
        }
        HistoryEventType eventType = HistoryEventType.values()[proto.getEventType()];
        if (LOG.isDebugEnabled()) {
            LOG.debug("[RECOVERY SUMMARY]" + " dagId=" + proto.getDagId() + ", timestamp="
                    + proto.getTimestamp() + ", event=" + eventType);
        }
        TezDAGID dagId = TezDAGID.fromString(proto.getDagId());
        if (dagCounter < dagId.getId()) {
            dagCounter = dagId.getId();
        }
        if (!dagSummaryDataMap.containsKey(dagId)) {
            dagSummaryDataMap.put(dagId, new DAGSummaryData(dagId));
        }
        dagSummaryDataMap.get(dagId).handleSummaryEvent(proto);
        proto.writeDelimitedTo(newSummaryStream);
    }
    summaryStream.close();
    newSummaryStream.hsync();
    newSummaryStream.close();

    // Set counter for next set of DAGs & update dagNames Set in DAGAppMaster
    dagAppMaster.setDAGCounter(dagCounter);
    for (DAGSummaryData dagSummaryData : dagSummaryDataMap.values()) {
        dagAppMaster.dagNames.add(dagSummaryData.dagName);
        dagAppMaster.dagIDs.add(dagSummaryData.dagId.toString());
    }

    DAGSummaryData lastInProgressDAGData = getLastCompletedOrInProgressDAG(dagSummaryDataMap);
    if (lastInProgressDAGData == null) {
        LOG.info("Nothing to recover as no uncompleted/completed DAGs found");
        return null;
    }
    TezDAGID lastInProgressDAG = lastInProgressDAGData.dagId;
    if (lastInProgressDAG == null) {
        LOG.info("Nothing to recover as no uncompleted/completed DAGs found");
        return null;
    }

    LOG.info("Checking if DAG is in recoverable state" + ", dagId=" + lastInProgressDAGData.dagId);

    final RecoveredDAGData recoveredDAGData = new RecoveredDAGData();
    if (lastInProgressDAGData.completed) {
        recoveredDAGData.isCompleted = true;
        recoveredDAGData.dagState = lastInProgressDAGData.dagState;
    }

    String nonRecoverableReason = isDAGRecoverable(lastInProgressDAGData);
    if (nonRecoverableReason != null) {
        LOG.warn("Found last inProgress DAG but not recoverable: " + lastInProgressDAGData);
        recoveredDAGData.nonRecoverable = true;
        recoveredDAGData.reason = nonRecoverableReason;
    }

    LOG.info("Trying to recover dag from recovery file" + ", dagId=" + lastInProgressDAG.toString()
            + ", dataDir=" + previousAttemptRecoveryDataDir + ", intoCurrentDir="
            + currentAttemptRecoveryDataDir);

    FSDataInputStream dagRecoveryStream = getDAGRecoveryStream(previousAttemptRecoveryDataDir,
            lastInProgressDAG);
    if (dagRecoveryStream == null) {
        // Could not find data to recover
        // Error out
        throw new IOException(
                "Could not find recovery data for last in progress DAG" + ", dagId=" + lastInProgressDAG);
    }

    LOG.info("Copying DAG data into Current Attempt directory" + ", filePath="
            + getDAGRecoveryFilePath(currentAttemptRecoveryDataDir, lastInProgressDAG));
    FSDataOutputStream newDAGRecoveryStream = getDAGRecoveryOutputStream(currentAttemptRecoveryDataDir,
            lastInProgressDAG);

    boolean skipAllOtherEvents = false;
    while (true) {
        HistoryEvent event;
        try {
            event = getNextEvent(dagRecoveryStream);
            if (event == null) {
                LOG.info("Reached end of dag recovery stream");
                break;
            }
        } catch (EOFException eof) {
            LOG.info("Reached end of dag recovery stream");
            break;
        } catch (IOException ioe) {
            LOG.warn("Corrupt data found when trying to read next event", ioe);
            break;
        }
        if (skipAllOtherEvents) {
            // hit an error - skip reading other events
            break;
        }
        HistoryEventType eventType = event.getEventType();
        switch (eventType) {
        case DAG_SUBMITTED: {
            DAGSubmittedEvent submittedEvent = (DAGSubmittedEvent) event;
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            recoveredDAGData.recoveredDAG = dagAppMaster.createDAG(submittedEvent.getDAGPlan(),
                    lastInProgressDAG);
            recoveredDAGData.cumulativeAdditionalResources = submittedEvent
                    .getCumulativeAdditionalLocalResources();
            recoveredDAGData.recoveredDagID = recoveredDAGData.recoveredDAG.getID();
            dagAppMaster.setCurrentDAG(recoveredDAGData.recoveredDAG);
            if (recoveredDAGData.nonRecoverable) {
                skipAllOtherEvents = true;
            }
            break;
        }
        case DAG_INITIALIZED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            recoveredDAGData.recoveredDAG.restoreFromEvent(event);
            break;
        }
        case DAG_STARTED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            recoveredDAGData.recoveredDAG.restoreFromEvent(event);
            break;
        }
        case DAG_COMMIT_STARTED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            recoveredDAGData.recoveredDAG.restoreFromEvent(event);
            break;
        }
        case VERTEX_GROUP_COMMIT_STARTED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            recoveredDAGData.recoveredDAG.restoreFromEvent(event);
            break;
        }
        case VERTEX_GROUP_COMMIT_FINISHED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            recoveredDAGData.recoveredDAG.restoreFromEvent(event);
            break;
        }
        case DAG_FINISHED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            // If this is seen, nothing to recover
            assert recoveredDAGData.recoveredDAG != null;
            recoveredDAGData.recoveredDAG.restoreFromEvent(event);
            recoveredDAGData.isCompleted = true;
            recoveredDAGData.dagState = ((DAGFinishedEvent) event).getState();
            skipAllOtherEvents = true;
            break;
        }
        case CONTAINER_LAUNCHED: {
            // Nothing to do for now
            break;
        }
        case VERTEX_INITIALIZED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            VertexInitializedEvent vEvent = (VertexInitializedEvent) event;
            Vertex v = recoveredDAGData.recoveredDAG.getVertex(vEvent.getVertexID());
            v.restoreFromEvent(vEvent);
            break;
        }
        case VERTEX_STARTED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            VertexStartedEvent vEvent = (VertexStartedEvent) event;
            Vertex v = recoveredDAGData.recoveredDAG.getVertex(vEvent.getVertexID());
            v.restoreFromEvent(vEvent);
            break;
        }
        case VERTEX_PARALLELISM_UPDATED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            VertexParallelismUpdatedEvent vEvent = (VertexParallelismUpdatedEvent) event;
            Vertex v = recoveredDAGData.recoveredDAG.getVertex(vEvent.getVertexID());
            v.restoreFromEvent(vEvent);
            break;
        }
        case VERTEX_COMMIT_STARTED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            VertexCommitStartedEvent vEvent = (VertexCommitStartedEvent) event;
            Vertex v = recoveredDAGData.recoveredDAG.getVertex(vEvent.getVertexID());
            v.restoreFromEvent(vEvent);
            break;
        }
        case VERTEX_FINISHED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            VertexFinishedEvent vEvent = (VertexFinishedEvent) event;
            Vertex v = recoveredDAGData.recoveredDAG.getVertex(vEvent.getVertexID());
            v.restoreFromEvent(vEvent);
            break;
        }
        case TASK_STARTED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            TaskStartedEvent tEvent = (TaskStartedEvent) event;
            Task task = recoveredDAGData.recoveredDAG.getVertex(tEvent.getTaskID().getVertexID())
                    .getTask(tEvent.getTaskID());
            task.restoreFromEvent(tEvent);
            break;
        }
        case TASK_FINISHED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            TaskFinishedEvent tEvent = (TaskFinishedEvent) event;
            Task task = recoveredDAGData.recoveredDAG.getVertex(tEvent.getTaskID().getVertexID())
                    .getTask(tEvent.getTaskID());
            task.restoreFromEvent(tEvent);
            break;
        }
        case TASK_ATTEMPT_STARTED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            TaskAttemptStartedEvent tEvent = (TaskAttemptStartedEvent) event;
            Task task = recoveredDAGData.recoveredDAG
                    .getVertex(tEvent.getTaskAttemptID().getTaskID().getVertexID())
                    .getTask(tEvent.getTaskAttemptID().getTaskID());
            task.restoreFromEvent(tEvent);
            break;
        }
        case TASK_ATTEMPT_FINISHED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            TaskAttemptFinishedEvent tEvent = (TaskAttemptFinishedEvent) event;
            Task task = recoveredDAGData.recoveredDAG
                    .getVertex(tEvent.getTaskAttemptID().getTaskID().getVertexID())
                    .getTask(tEvent.getTaskAttemptID().getTaskID());
            task.restoreFromEvent(tEvent);
            break;
        }
        case VERTEX_DATA_MOVEMENT_EVENTS_GENERATED: {
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            assert recoveredDAGData.recoveredDAG != null;
            VertexRecoverableEventsGeneratedEvent vEvent = (VertexRecoverableEventsGeneratedEvent) event;
            Vertex v = recoveredDAGData.recoveredDAG.getVertex(vEvent.getVertexID());
            v.restoreFromEvent(vEvent);
            break;
        }
        default:
            throw new RuntimeException("Invalid data found, unknown event type " + eventType);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("[DAG RECOVERY]" + " dagId=" + lastInProgressDAG + ", eventType=" + eventType + ", event="
                    + event.toString());
        }
        newDAGRecoveryStream.writeInt(eventType.ordinal());
        event.toProtoStream(newDAGRecoveryStream);
    }
    dagRecoveryStream.close();
    newDAGRecoveryStream.hsync();
    newDAGRecoveryStream.close();

    if (!recoveredDAGData.isCompleted && !recoveredDAGData.nonRecoverable) {
        if (lastInProgressDAGData.bufferedSummaryEvents != null
                && !lastInProgressDAGData.bufferedSummaryEvents.isEmpty()) {
            for (HistoryEvent bufferedEvent : lastInProgressDAGData.bufferedSummaryEvents) {
                assert recoveredDAGData.recoveredDAG != null;
                switch (bufferedEvent.getEventType()) {
                case VERTEX_GROUP_COMMIT_STARTED:
                    recoveredDAGData.recoveredDAG.restoreFromEvent(bufferedEvent);
                    break;
                case VERTEX_GROUP_COMMIT_FINISHED:
                    recoveredDAGData.recoveredDAG.restoreFromEvent(bufferedEvent);
                    break;
                case VERTEX_FINISHED:
                    VertexFinishedEvent vertexFinishedEvent = (VertexFinishedEvent) bufferedEvent;
                    Vertex vertex = recoveredDAGData.recoveredDAG.getVertex(vertexFinishedEvent.getVertexID());
                    if (vertex == null) {
                        recoveredDAGData.nonRecoverable = true;
                        recoveredDAGData.reason = "All state could not be recovered"
                                + ", vertex completed but events not flushed" + ", vertexId="
                                + vertexFinishedEvent.getVertexID();
                    } else {
                        vertex.restoreFromEvent(vertexFinishedEvent);
                    }
                    break;
                default:
                    throw new RuntimeException("Invalid data found in buffered summary events"
                            + ", unknown event type " + bufferedEvent.getEventType());
                }
            }
        }
    }

    LOG.info("Finished copying data from previous attempt into current attempt");
    createDataRecoveredFlagFile();

    return recoveredDAGData;
}

From source file:org.apache.tez.dag.app.rm.container.AMContainerHelpers.java

License:Apache License

/**
 * Create a {@link LocalResource} record with all the given parameters.
 *///  ww w . ja  v a2  s  . c o m
public static LocalResource createLocalResource(FileSystem fc, Path file, LocalResourceType type,
        LocalResourceVisibility visibility) throws IOException {
    FileStatus fstat = fc.getFileStatus(file);
    URL resourceURL = ConverterUtils.getYarnUrlFromPath(fc.resolvePath(fstat.getPath()));
    long resourceSize = fstat.getLen();
    long resourceModificationTime = fstat.getModificationTime();

    return LocalResource.newInstance(resourceURL, type, visibility, resourceSize, resourceModificationTime);
}

From source file:org.apache.tez.mapreduce.client.YARNRunner.java

License:Apache License

private LocalResource createApplicationResource(FileContext fs, Path p, LocalResourceType type)
        throws IOException {
    LocalResource rsrc = Records.newRecord(LocalResource.class);
    FileStatus rsrcStat = fs.getFileStatus(p);
    rsrc.setResource(/*from   w  w  w  .j  a va2 s .c  o  m*/
            ConverterUtils.getYarnUrlFromPath(fs.getDefaultFileSystem().resolvePath(rsrcStat.getPath())));
    rsrc.setSize(rsrcStat.getLen());
    rsrc.setTimestamp(rsrcStat.getModificationTime());
    rsrc.setType(type);
    rsrc.setVisibility(LocalResourceVisibility.APPLICATION);
    return rsrc;
}

From source file:org.apache.tez.mapreduce.examples.FilterLinesByWord.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Credentials credentials = new Credentials();

    boolean generateSplitsInClient = false;

    SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
    try {//from   w  ww.j a  va 2s. c om
        generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
        otherArgs = splitCmdLineParser.getRemainingArgs();
    } catch (ParseException e1) {
        System.err.println("Invalid options");
        printUsage();
        return 2;
    }

    if (otherArgs.length != 3) {
        printUsage();
        return 2;
    }

    String inputPath = otherArgs[0];
    String outputPath = otherArgs[1];
    String filterWord = otherArgs[2];

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(outputPath))) {
        System.err.println("Output directory : " + outputPath + " already exists");
        return 2;
    }

    TezConfiguration tezConf = new TezConfiguration(conf);

    fs.getWorkingDirectory();
    Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
    TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);

    String jarPath = ClassUtil.findContainingJar(FilterLinesByWord.class);
    if (jarPath == null) {
        throw new TezUncheckedException(
                "Could not find any jar containing" + FilterLinesByWord.class.getName() + " in the classpath");
    }

    Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
    fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);
    TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, conf);

    Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(),
            remoteJarStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

    TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources,
            credentials);
    tezSession.start(); // Why do I need to start the TezSession.

    Configuration stage1Conf = new JobConf(conf);
    stage1Conf.set(FILTER_PARAM_NAME, filterWord);

    Configuration stage2Conf = new JobConf(conf);
    stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
    stage2Conf.setBoolean("mapred.mapper.new-api", false);

    UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
    // Setup stage1 Vertex
    Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor
            .create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload))
            .addTaskLocalFiles(commonLocalResources);

    DataSourceDescriptor dsd;
    if (generateSplitsInClient) {
        // TODO TEZ-1406. Dont' use MRInputLegacy
        stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
        stage1Conf.setBoolean("mapred.mapper.new-api", false);
        dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
    } else {
        dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false)
                .build();
    }
    stage1Vertex.addDataSource("MRInput", dsd);

    // Setup stage2 Vertex
    Vertex stage2Vertex = Vertex.create("stage2",
            ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName())
                    .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)),
            1);
    stage2Vertex.addTaskLocalFiles(commonLocalResources);

    // Configure the Output for stage2
    OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName())
            .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf));
    OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MROutputCommitter.class.getName());
    stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, ocd, null));

    UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig
            .newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf)
            .build();

    DAG dag = DAG.create("FilterLinesByWord");
    Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultBroadcastEdgeProperty());
    dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);

    LOG.info("Submitting DAG to Tez Session");
    DAGClient dagClient = tezSession.submitDAG(dag);
    LOG.info("Submitted DAG to Tez Session");

    DAGStatus dagStatus = null;
    String[] vNames = { "stage1", "stage2" };
    try {
        while (true) {
            dagStatus = dagClient.getDAGStatus(null);
            if (dagStatus.getState() == DAGStatus.State.RUNNING
                    || dagStatus.getState() == DAGStatus.State.SUCCEEDED
                    || dagStatus.getState() == DAGStatus.State.FAILED
                    || dagStatus.getState() == DAGStatus.State.KILLED
                    || dagStatus.getState() == DAGStatus.State.ERROR) {
                break;
            }
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
                // continue;
            }
        }

        while (dagStatus.getState() == DAGStatus.State.RUNNING) {
            try {
                ExampleDriver.printDAGStatus(dagClient, vNames);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    // continue;
                }
                dagStatus = dagClient.getDAGStatus(null);
            } catch (TezException e) {
                LOG.fatal("Failed to get application progress. Exiting");
                return -1;
            }
        }

        dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));

    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }

    ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
    LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}

From source file:org.apache.tez.mapreduce.examples.FilterLinesByWordOneToOne.java

License:Apache License

@Override
public int run(String[] otherArgs) throws Exception {
    boolean generateSplitsInClient = false;
    SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
    try {//from   w ww.  j  a  v  a 2s  .  co m
        generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
        otherArgs = splitCmdLineParser.getRemainingArgs();
    } catch (ParseException e1) {
        System.err.println("Invalid options");
        printUsage();
        return 2;
    }

    if (otherArgs.length != 3) {
        printUsage();
        return 2;
    }

    String inputPath = otherArgs[0];
    String outputPath = otherArgs[1];
    String filterWord = otherArgs[2];

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(outputPath))) {
        System.err.println("Output directory : " + outputPath + " already exists");
        return 2;
    }

    TezConfiguration tezConf = new TezConfiguration(conf);

    fs.getWorkingDirectory();
    Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
    TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);

    String jarPath = ClassUtil.findContainingJar(FilterLinesByWordOneToOne.class);
    if (jarPath == null) {
        throw new TezUncheckedException("Could not find any jar containing"
                + FilterLinesByWordOneToOne.class.getName() + " in the classpath");
    }

    Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
    fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);

    Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(),
            remoteJarStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

    TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, null);
    tezSession.start(); // Why do I need to start the TezSession.

    Configuration stage1Conf = new JobConf(conf);
    stage1Conf.set(FILTER_PARAM_NAME, filterWord);

    Configuration stage2Conf = new JobConf(conf);

    stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
    stage2Conf.setBoolean("mapred.mapper.new-api", false);

    UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
    // Setup stage1 Vertex
    Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor
            .create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload))
            .addTaskLocalFiles(commonLocalResources);

    DataSourceDescriptor dsd;
    if (generateSplitsInClient) {
        // TODO TEZ-1406. Dont' use MRInputLegacy
        stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
        stage1Conf.setBoolean("mapred.mapper.new-api", false);
        dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
    } else {
        dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false)
                .build();
    }
    stage1Vertex.addDataSource("MRInput", dsd);

    // Setup stage2 Vertex
    Vertex stage2Vertex = Vertex.create("stage2",
            ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName())
                    .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)),
            dsd.getNumberOfShards());
    stage2Vertex.addTaskLocalFiles(commonLocalResources);

    // Configure the Output for stage2
    stage2Vertex.addDataSink("MROutput",
            DataSinkDescriptor.create(
                    OutputDescriptor.create(MROutput.class.getName())
                            .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)),
                    OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null));

    UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig
            .newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf)
            .build();

    DAG dag = DAG.create("FilterLinesByWord");
    Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultOneToOneEdgeProperty());
    dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);

    LOG.info("Submitting DAG to Tez Session");
    DAGClient dagClient = tezSession.submitDAG(dag);
    LOG.info("Submitted DAG to Tez Session");

    DAGStatus dagStatus = null;
    String[] vNames = { "stage1", "stage2" };
    try {
        while (true) {
            dagStatus = dagClient.getDAGStatus(null);
            if (dagStatus.getState() == DAGStatus.State.RUNNING
                    || dagStatus.getState() == DAGStatus.State.SUCCEEDED
                    || dagStatus.getState() == DAGStatus.State.FAILED
                    || dagStatus.getState() == DAGStatus.State.KILLED
                    || dagStatus.getState() == DAGStatus.State.ERROR) {
                break;
            }
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
                // continue;
            }
        }

        while (dagStatus.getState() == DAGStatus.State.RUNNING) {
            try {
                ExampleDriver.printDAGStatus(dagClient, vNames);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    // continue;
                }
                dagStatus = dagClient.getDAGStatus(null);
            } catch (TezException e) {
                LOG.fatal("Failed to get application progress. Exiting");
                return -1;
            }
        }
    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }

    ExampleDriver.printDAGStatus(dagClient, vNames);
    LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}

From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java

License:Apache License

public DAG createDAG(FileSystem remoteFs, Configuration conf, Path remoteStagingDir, int numMapper,
        int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime, int mapSleepCount,
        long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime, int iReduceSleepCount,
        boolean writeSplitsToDFS, boolean generateSplitsInAM) throws IOException, YarnException {

    Configuration mapStageConf = new JobConf(conf);
    mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    mapStageConf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    mapStageConf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    mapStageConf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime);
    mapStageConf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    mapStageConf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    mapStageConf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount);
    mapStageConf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount);
    mapStageConf.setInt(IREDUCE_TASKS_COUNT, numIReducer);
    mapStageConf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName());
    mapStageConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName());
    if (numIReducer == 0 && numReducer == 0) {
        mapStageConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName());
    }/*  ww w.  ja va  2  s  .  c  o m*/

    MRHelpers.translateMRConfToTez(mapStageConf);

    Configuration[] intermediateReduceStageConfs = null;
    if (iReduceStagesCount > 0 && numIReducer > 0) {
        intermediateReduceStageConfs = new JobConf[iReduceStagesCount];
        for (int i = 1; i <= iReduceStagesCount; ++i) {
            JobConf iReduceStageConf = new JobConf(conf);
            iReduceStageConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, iReduceSleepTime);
            iReduceStageConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, iReduceSleepCount);
            iReduceStageConf.setInt(MRJobConfig.NUM_REDUCES, numIReducer);
            iReduceStageConf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName());
            iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
            iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
            iReduceStageConf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());

            MRHelpers.translateMRConfToTez(iReduceStageConf);
            intermediateReduceStageConfs[i - 1] = iReduceStageConf;
        }
    }

    Configuration finalReduceConf = null;
    if (numReducer > 0) {
        finalReduceConf = new JobConf(conf);
        finalReduceConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, reduceSleepTime);
        finalReduceConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, reduceSleepCount);
        finalReduceConf.setInt(MRJobConfig.NUM_REDUCES, numReducer);
        finalReduceConf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName());
        finalReduceConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
        finalReduceConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
        finalReduceConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName());

        MRHelpers.translateMRConfToTez(finalReduceConf);
    }

    MRHelpers.configureMRApiUsage(mapStageConf);
    if (iReduceStagesCount > 0 && numIReducer > 0) {
        for (int i = 0; i < iReduceStagesCount; ++i) {
            MRHelpers.configureMRApiUsage(intermediateReduceStageConfs[i]);
        }
    }
    if (numReducer > 0) {
        MRHelpers.configureMRApiUsage(finalReduceConf);
    }

    DataSourceDescriptor dataSource = null;
    if (!generateSplitsInAM && writeSplitsToDFS) {

        LOG.info("Writing splits to DFS");
        dataSource = MRInputHelpers.configureMRInputWithLegacySplitGeneration(mapStageConf, remoteStagingDir,
                true);
    } else {
        dataSource = MRInputLegacy.createConfigBuilder(mapStageConf, SleepInputFormat.class)
                .generateSplitsInAM(generateSplitsInAM).build();
    }

    DAG dag = DAG.create("MRRSleepJob");
    String jarPath = ClassUtil.findContainingJar(getClass());
    if (jarPath == null) {
        throw new TezUncheckedException(
                "Could not find any jar containing" + " MRRSleepJob.class in the classpath");
    }
    Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar"));
    remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath);

    TokenCache.obtainTokensForNamenodes(this.credentials, new Path[] { remoteJarPath }, mapStageConf);

    Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(),
            jarFileStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

    List<Vertex> vertices = new ArrayList<Vertex>();

    UserPayload mapUserPayload = TezUtils.createUserPayloadFromConf(mapStageConf);
    int numTasks = generateSplitsInAM ? -1 : numMapper;

    Map<String, String> mapEnv = Maps.newHashMap();
    MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, mapEnv, true);
    Map<String, String> reduceEnv = Maps.newHashMap();
    MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, reduceEnv, false);

    Vertex mapVertex = Vertex.create("map",
            ProcessorDescriptor.create(MapProcessor.class.getName()).setUserPayload(mapUserPayload), numTasks,
            MRHelpers.getResourceForMRMapper(mapStageConf));
    mapVertex.addTaskLocalFiles(commonLocalResources).addDataSource("MRInput", dataSource)
            .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRMapper(mapStageConf)).setTaskEnvironment(mapEnv);
    vertices.add(mapVertex);

    if (iReduceStagesCount > 0 && numIReducer > 0) {
        for (int i = 0; i < iReduceStagesCount; ++i) {
            Configuration iconf = intermediateReduceStageConfs[i];
            UserPayload iReduceUserPayload = TezUtils.createUserPayloadFromConf(iconf);
            Vertex ivertex = Vertex.create("ireduce" + (i + 1),
                    ProcessorDescriptor.create(ReduceProcessor.class.getName())
                            .setUserPayload(iReduceUserPayload),
                    numIReducer, MRHelpers.getResourceForMRReducer(intermediateReduceStageConfs[i]));
            ivertex.addTaskLocalFiles(commonLocalResources)
                    .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(intermediateReduceStageConfs[i]))
                    .setTaskEnvironment(reduceEnv);
            vertices.add(ivertex);
        }
    }

    Vertex finalReduceVertex = null;
    if (numReducer > 0) {
        UserPayload reducePayload = TezUtils.createUserPayloadFromConf(finalReduceConf);
        finalReduceVertex = Vertex.create("reduce",
                ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(reducePayload),
                numReducer, MRHelpers.getResourceForMRReducer(finalReduceConf));
        finalReduceVertex.addTaskLocalFiles(commonLocalResources)
                .addDataSink("MROutput",
                        MROutputLegacy.createConfigBuilder(finalReduceConf, NullOutputFormat.class).build())
                .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(finalReduceConf))
                .setTaskEnvironment(reduceEnv);
        vertices.add(finalReduceVertex);
    } else {
        // Map only job
        mapVertex.addDataSink("MROutput",
                MROutputLegacy.createConfigBuilder(mapStageConf, NullOutputFormat.class).build());
    }

    Map<String, String> partitionerConf = Maps.newHashMap();
    partitionerConf.put(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());
    OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
            .newBuilder(IntWritable.class.getName(), IntWritable.class.getName(),
                    HashPartitioner.class.getName(), partitionerConf)
            .configureInput().useLegacyInput().done().build();

    for (int i = 0; i < vertices.size(); ++i) {
        dag.addVertex(vertices.get(i));
        if (i != 0) {
            dag.addEdge(
                    Edge.create(vertices.get(i - 1), vertices.get(i), edgeConf.createDefaultEdgeProperty()));
        }
    }

    return dag;
}

From source file:org.apache.tez.mapreduce.examples.RPCLoadGen.java

License:Apache License

private UserPayload createUserPayload(TezConfiguration conf, int maxSleepTimeMillis, int payloadSize,
        String mode, Map<String, LocalResource> localResources) throws IOException {
    ByteBuffer payload;//from w  w w  .  j  a  v  a 2s .c om
    if (mode.equals(VIA_RPC)) {
        if (payloadSize < 5) {
            payloadSize = 5; // To Configure the processor
        }
        byte[] payloadBytes = new byte[payloadSize];
        random.nextBytes(payloadBytes);
        payload = ByteBuffer.wrap(payloadBytes);
        payload.put(4, VIA_RPC_BYTE); // ViaRPC
    } else {
        // Actual payload
        byte[] payloadBytes = new byte[5];
        payload = ByteBuffer.wrap(payloadBytes);

        // Disk payload
        byte[] diskPayload = new byte[payloadSize];
        random.nextBytes(diskPayload);
        fs = FileSystem.get(conf);
        resourcePath = new Path(Path.SEPARATOR + "tmp", DISK_PAYLOAD_NAME);
        System.err.println("ZZZ: HDFSPath: " + resourcePath);
        resourcePath = fs.makeQualified(resourcePath);
        System.err.println("ZZZ: HDFSPathResolved: " + resourcePath);
        FSDataOutputStream dataOut = fs.create(resourcePath, true);
        dataOut.write(diskPayload);
        dataOut.close();
        fs.setReplication(resourcePath, (short) 10);
        FileStatus fileStatus = fs.getFileStatus(resourcePath);

        if (mode.equals(VIA_HDFS_DIST_CACHE)) {
            LocalResource lr = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(resourcePath),
                    LocalResourceType.ARCHIVE.FILE, LocalResourceVisibility.PRIVATE, fileStatus.getLen(),
                    fileStatus.getModificationTime());
            localResources.put(DISK_PAYLOAD_NAME, lr);
            payload.put(4, VIA_HDFS_DIST_CACHE_BYTE); // ViaRPC
        } else if (mode.equals(VIA_HDFS_DIRECT_READ)) {
            payload.put(4, VIA_HDFS_DIRECT_READ_BYTE); // ViaRPC
        }
    }

    payload.putInt(0, maxSleepTimeMillis);
    return UserPayload.create(payload);
}

From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java

License:Apache License

/**
 * Update provided localResources collection with the required local
 * resources needed by MapReduce tasks with respect to Input splits.
 *
 * @param fs Filesystem instance to access status of splits related files
 * @param inputSplitInfo Information on location of split files
 * @param localResources LocalResources collection to be updated
 * @throws IOException//from   ww w.j a  v a 2 s  .c o m
 */
private static void updateLocalResourcesForInputSplits(FileSystem fs, InputSplitInfo inputSplitInfo,
        Map<String, LocalResource> localResources) throws IOException {
    if (localResources.containsKey(JOB_SPLIT_RESOURCE_NAME)) {
        throw new RuntimeException(
                "LocalResources already contains a" + " resource named " + JOB_SPLIT_RESOURCE_NAME);
    }
    if (localResources.containsKey(JOB_SPLIT_METAINFO_RESOURCE_NAME)) {
        throw new RuntimeException(
                "LocalResources already contains a" + " resource named " + JOB_SPLIT_METAINFO_RESOURCE_NAME);
    }

    FileStatus splitFileStatus = fs.getFileStatus(inputSplitInfo.getSplitsFile());
    FileStatus metaInfoFileStatus = fs.getFileStatus(inputSplitInfo.getSplitsMetaInfoFile());
    localResources.put(JOB_SPLIT_RESOURCE_NAME,
            LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(inputSplitInfo.getSplitsFile()),
                    LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, splitFileStatus.getLen(),
                    splitFileStatus.getModificationTime()));
    localResources.put(JOB_SPLIT_METAINFO_RESOURCE_NAME,
            LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(inputSplitInfo.getSplitsMetaInfoFile()),
                    LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, metaInfoFileStatus.getLen(),
                    metaInfoFileStatus.getModificationTime()));
}

From source file:org.apache.tez.mapreduce.TestMRRJobsDAGApi.java

License:Apache License

private static LocalResource createLocalResource(FileSystem fc, Path file, LocalResourceType type,
        LocalResourceVisibility visibility) throws IOException {
    FileStatus fstat = fc.getFileStatus(file);
    URL resourceURL = ConverterUtils.getYarnUrlFromPath(fc.resolvePath(fstat.getPath()));
    long resourceSize = fstat.getLen();
    long resourceModificationTime = fstat.getModificationTime();

    return LocalResource.newInstance(resourceURL, type, visibility, resourceSize, resourceModificationTime);
}