Example usage for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED

List of usage examples for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED.

Prototype

FinalApplicationStatus FAILED

To view the source code for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED.

Click Source Link

Document

Application which failed.

Usage

From source file:com.zqh.hadoop.moya.core.yarn.ApplicationMaster.java

License:Apache License

private void finish() {
    // Join all launched threads
    // needed for when we time out
    // and we need to release containers
    for (Thread launchThread : launchThreads) {
        try {//from  w  ww  .ja v  a 2 s  .  c o m
            launchThread.join(10000);
        } catch (InterruptedException e) {
            LOG.info("Exception thrown in thread join: " + e.getMessage());
            e.printStackTrace();
        }
    }

    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");

    //TODO Remove MOYA NODE
    try {
        DeleteGroup.main(new String[] { ZKHosts, "moya" });
    } catch (Exception e1) {
        e1.printStackTrace();
    }

    FinalApplicationStatus appStatus;
    String appMessage = null;
    success = true;
    if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed="
                + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed="
                + numFailedContainers.get();
        success = false;
    }
    try {
        resourceManager.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }

    done = true;
    resourceManager.stop();
}

From source file:de.huberlin.wbi.hiway.am.HiWay.java

License:Apache License

private void finish() {
    writeEntryToLog(new JsonReportEntry(getRunId(), null, null, null, null, null, HiwayDBI.KEY_WF_TIME,
            Long.toString(System.currentTimeMillis() - amRMClient.getStartTime())));
    Collection<Data> outputFiles = getOutputFiles();
    if (outputFiles.size() > 0) {
        String outputs = getOutputFiles().toString();
        writeEntryToLog(new JsonReportEntry(getRunId(), null, null, null, null, null, HiwayDBI.KEY_WF_OUTPUT,
                outputs.substring(1, outputs.length() - 1)));
    }/*from  ww  w . j ava  2 s. c  om*/
    // Join all launched threads needed for when we time out and we need to release containers
    for (Thread launchThread : launchThreads) {
        try {
            launchThread.join(10000);
        } catch (InterruptedException e) {
            System.err.println("Exception thrown in thread join: " + e.getMessage());
            e.printStackTrace();
            System.exit(-1);
        }
    }

    // When the application completes, it should stop all running containers
    System.out.println("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a finish application signal to the RM
    System.out.println("Application completed. Signalling finish to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    success = true;

    System.out.println("Failed Containers: " + numFailedContainers.get());
    System.out.println("Completed Containers: " + numCompletedContainers.get());

    int numTotalContainers = scheduler.getNumberOfTotalTasks();

    System.out.println("Total Scheduled Containers: " + numTotalContainers);

    if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed="
                + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed="
                + numFailedContainers.get() + ", killed=" + numKilledContainers.get();
        success = false;
    }

    try {
        statLog.close();
        federatedReport.stageOut();
        if (summaryPath != null) {
            String stdout = hdfsApplicationDirectory + "/AppMaster.stdout";
            String stderr = hdfsApplicationDirectory + "/AppMaster.stderr";
            String statlog = hdfsApplicationDirectory + "/" + appId + ".log";

            try (BufferedWriter writer = new BufferedWriter(new FileWriter(summaryPath.toString()))) {
                Collection<String> output = new ArrayList<>();
                for (Data outputFile : getOutputFiles()) {
                    output.add(outputFile.getHdfsPath().toString());
                }
                JSONObject obj = new JSONObject();
                try {
                    obj.put("output", output);
                    obj.put("stdout", stdout);
                    obj.put("stderr", stderr);
                    obj.put("statlog", statlog);
                } catch (JSONException e) {
                    e.printStackTrace();
                    System.exit(-1);
                }
                writer.write(obj.toString());
            }
            new Data("AppMaster.stdout").stageOut();
            new Data("AppMaster.stderr").stageOut();
            new Data(summaryPath).stageOut();
        }
    } catch (IOException e) {
        System.err.println("Error when attempting to stage out federated output log.");
        e.printStackTrace();
        System.exit(-1);
    }

    try {
        amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException | IOException e) {
        System.err.println("Failed to unregister application");
        e.printStackTrace();
        System.exit(-1);
    }

    amRMClient.stop();
}

From source file:de.huberlin.wbi.hiway.am.WorkflowDriver.java

License:Apache License

protected void finish() {
    /* log */ logger.writeEntryToLog(new JsonReportEntry(getRunId(), null, null, null, null, null,
            HiwayDBI.KEY_WF_TIME, Long.toString(System.currentTimeMillis() - amRMClient.getStartTime())));

    // Join all launched threads needed for when we time out and we need to release containers
    for (Thread launchThread : launchThreads) {
        try {/*  w w  w  . ja v  a  2s .  c om*/
            launchThread.join(10000);
        } catch (InterruptedException e) {
            Logger.writeToStdout("Exception thrown in thread join: " + e.getMessage());
            e.printStackTrace(System.out);
            System.exit(-1);
        }
    }

    // When the application completes, it should stop all running containers
    Logger.writeToStdout("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a finish application signal to the RM
    Logger.writeToStdout("Application completed. Signalling finish to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    success = true;

    WorkflowDriver.Logger.writeToStdout("Failed Containers: " + logger.numFailedContainers.get());
    WorkflowDriver.Logger.writeToStdout("Completed Containers: " + logger.numCompletedContainers.get());

    int numTotalContainers = scheduler.getNumberOfTotalTasks();

    // WorkflowDriver.writeToStdout("Total Scheduled Containers: " + numTotalContainers);

    if (logger.getNumFailedContainers().get() == 0
            && logger.getNumCompletedContainers().get() == numTotalContainers) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed="
                + logger.getNumCompletedContainers().get() + ", allocated="
                + logger.getNumAllocatedContainers().get() + ", failed=" + logger.getNumFailedContainers().get()
                + ", killed=" + logger.getNumKilledContainers().get();
        success = false;
    }

    Collection<String> output = getOutput();
    Collection<Data> outputFiles = getOutputFiles();
    if (outputFiles.size() > 0) {
        String outputs = outputFiles.toString();
        logger.writeEntryToLog(new JsonReportEntry(getRunId(), null, null, null, null, null,
                HiwayDBI.KEY_WF_OUTPUT, outputs.substring(1, outputs.length() - 1)));
    }

    try {
        logger.statLog.close();
        logger.federatedReport.stageOut();
        if (summaryPath != null) {
            String stdout = hdfsApplicationDirectory + "/AppMaster.stdout";
            String stderr = hdfsApplicationDirectory + "/AppMaster.stderr";
            String statlog = hdfsApplicationDirectory + "/" + appId + ".log";

            try (BufferedWriter writer = new BufferedWriter(new FileWriter(summaryPath.toString()))) {
                JSONObject obj = new JSONObject();
                try {
                    obj.put("output", output);
                    obj.put("stdout", stdout);
                    obj.put("stderr", stderr);
                    obj.put("statlog", statlog);
                } catch (JSONException e) {
                    e.printStackTrace(System.out);
                    System.exit(-1);
                }
                writer.write(obj.toString());
            }
            new Data("AppMaster.stdout").stageOut();
            new Data("AppMaster.stderr").stageOut();
            new Data(summaryPath).stageOut();
        }
    } catch (IOException e) {
        Logger.writeToStdout("Error when attempting to stage out federated output log.");
        e.printStackTrace(System.out);
        System.exit(-1);
    }

    try {
        amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException | IOException e) {
        Logger.writeToStdout("Failed to unregister application");
        e.printStackTrace(System.out);
        System.exit(-1);
    }

    amRMClient.stop();

    if (timelineClient != null)
        timelineClient.stop();

}

From source file:edu.cmu.graphchi.toolkits.collaborative_filtering.yarn.ApplicationMaster.java

License:Apache License

private void finish() {
    // Join all launched threads
    // needed for when we time out
    // and we need to release containers
    for (Thread launchThread : launchThreads) {
        try {//  w  w w . j  a v a2  s . c o  m
            launchThread.join(10000);
        } catch (InterruptedException e) {
            LOG.info("Exception thrown in thread join: " + e.getMessage());
            e.printStackTrace();
        }
    }

    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    success = true;
    if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed="
                + numCompletedContainers.get() + ", failed=" + numFailedContainers.get();
        success = false;
    }
    try {
        amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }

    amRMClient.stop();
}

From source file:edu.uci.ics.asterix.aoya.AsterixApplicationMaster.java

License:Apache License

/**
 * Clean up, whether or not we were successful.
 *//*  www. jav a2s . c  o  m*/
private void finish() {
    // Join all launched threads
    // needed for when we time out
    // and we need to release containers
    for (Thread launchThread : launchThreads) {
        try {
            launchThread.join(10000);
        } catch (InterruptedException e) {
            LOG.info("Exception thrown in thread join: " + e.getMessage());
            //from https://stackoverflow.com/questions/4812570/how-to-store-printstacktrace-into-a-string
            StringWriter errors = new StringWriter();
            e.printStackTrace(new PrintWriter(errors));
            LOG.error(errors.toString());
        }
    }

    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    success = true;
    if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed="
                + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed="
                + numFailedContainers.get();
        success = false;
    }
    try {
        resourceManager.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }
    done = true;
    resourceManager.stop();
}

From source file:gobblin.yarn.GobblinYarnAppLauncher.java

License:Apache License

@Subscribe
public void handleApplicationReportArrivalEvent(ApplicationReportArrivalEvent applicationReportArrivalEvent) {
    ApplicationReport applicationReport = applicationReportArrivalEvent.getApplicationReport();

    YarnApplicationState appState = applicationReport.getYarnApplicationState();
    LOGGER.info("Gobblin Yarn application state: " + appState.toString());

    // Reset the count on failures to get the ApplicationReport when there's one success
    this.getApplicationReportFailureCount.set(0);

    if (appState == YarnApplicationState.FINISHED || appState == YarnApplicationState.FAILED
            || appState == YarnApplicationState.KILLED) {

        applicationCompleted = true;/*w  w  w. ja  va  2 s  .c om*/

        LOGGER.info("Gobblin Yarn application finished with final status: "
                + applicationReport.getFinalApplicationStatus().toString());
        if (applicationReport.getFinalApplicationStatus() == FinalApplicationStatus.FAILED) {
            LOGGER.error("Gobblin Yarn application failed for the following reason: "
                    + applicationReport.getDiagnostics());
        }

        try {
            GobblinYarnAppLauncher.this.stop();
        } catch (IOException ioe) {
            LOGGER.error("Failed to close the " + GobblinYarnAppLauncher.class.getSimpleName(), ioe);
        } catch (TimeoutException te) {
            LOGGER.error("Timeout in stopping the service manager", te);
        } finally {
            if (this.emailNotificationOnShutdown) {
                sendEmailOnShutdown(Optional.of(applicationReport));
            }
        }
    }
}

From source file:husky.server.HuskyApplicationMaster.java

License:Apache License

private void run() throws YarnException, IOException, InterruptedException, ExecutionException {
    LOG.info("Run App Master");

    mRMClientListener = new HuskyRMCallbackHandler(this);
    mRMClient = AMRMClientAsync.createAMRMClientAsync(1000, mRMClientListener);
    mRMClient.init(mYarnConf);// ww w .j  a v  a  2 s.  c o m
    mRMClient.start();

    mContainerListener = new HuskyNMCallbackHandler();
    mNMClient = NMClientAsync.createNMClientAsync(mContainerListener);
    mNMClient.init(mYarnConf);
    mNMClient.start();

    // Register with ResourceManager
    LOG.info("registerApplicationMaster started");
    mRMClient.registerApplicationMaster("", 0, "");
    LOG.info("registerApplicationMaster done");

    // Ask RM to start `mNumContainer` containers, each is a worker node
    LOG.info("Ask RM for " + mWorkerInfos.size() + " containers");
    for (Pair<String, Integer> i : mWorkerInfos) {
        mRMClient.addContainerRequest(setupContainerAskForRMSpecific(i.getFirst()));
    }

    FinalApplicationStatus status = mRMClientListener.getFinalNumSuccess() == mWorkerInfos.size()
            ? FinalApplicationStatus.SUCCEEDED
            : FinalApplicationStatus.FAILED;

    mRMClient.unregisterApplicationMaster(status, mRMClientListener.getStatusReport(), null);
}

From source file:io.hops.tensorflow.ApplicationMaster.java

License:Apache License

private boolean finish() {
    // wait for completion. finish if any container fails
    while (!done && !(numCompletedWorkers.get() == numWorkers) && !(numFailedContainers.get() > 0)) {
        if (numAllocatedContainers.get() != numTotalContainers) {
            long timeLeft = appMasterStartTime + allocationTimeout - System.currentTimeMillis();
            LOG.info("Awaits container allocation, timeLeft=" + timeLeft);
            if (timeLeft < 0) {
                LOG.warn("Container allocation timeout. Finish application attempt");
                break;
            }/*from  w w w .  j  av  a  2s.  com*/
        }
        try {
            Thread.sleep(200);
        } catch (InterruptedException ex) {
        }
    }

    if (timelineHandler.isClientNotNull()) {
        timelineHandler.publishApplicationAttemptEvent(YarntfEvent.YARNTF_APP_ATTEMPT_END);
    }

    // Join all launched threads
    // needed for when we time out
    // and we need to release containers
    for (Thread launchThread : launchThreads) {
        try {
            launchThread.join(10000);
        } catch (InterruptedException e) {
            LOG.info("Exception thrown in thread join: " + e.getMessage());
            e.printStackTrace();
        }
    }

    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    nmWrapper.getClient().stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    boolean success = true;
    if (numFailedContainers.get() == 0 && numCompletedWorkers.get() == numWorkers) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed(workers)="
                + numCompletedContainers.get() + "(" + numCompletedWorkers.get() + "), allocated="
                + numAllocatedContainers.get() + ", failed=" + numFailedContainers.get();
        LOG.info(appMessage);
        success = false;
    }
    try {
        rmWrapper.getClient().unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }

    rmWrapper.getClient().stop();

    // Stop Timeline Client
    if (timelineHandler.isClientNotNull()) {
        timelineHandler.stopClient();
    }

    return success;
}

From source file:ml.shifu.guagua.yarn.GuaguaAppMaster.java

License:Apache License

/**
 * Call when the application is done//ww  w  .  java2s  .  co  m
 * 
 * @return if all containers succeed
 */
private boolean finish() {
    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    getNmClientAsync().stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");
    FinalApplicationStatus appStatus;
    String appMessage = null;
    boolean success = true;
    if (getSuccessfulCount().get() == getContainersToLaunch()) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = String.format("Diagnostics total=%s, completed=%s, failed=%s.", getContainersToLaunch(),
                getCompletedCount().get(), getFailedCount().get());
        success = false;
    }
    try {
        getAmRMClient().unregisterApplicationMaster(appStatus, appMessage, this.appMasterTrackingUrl);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }

    getAmRMClient().stop();
    return success;
}

From source file:org.apache.drill.yarn.appMaster.AMYarnFacadeImpl.java

License:Apache License

@Override
public void finish(boolean succeeded, String msg) throws YarnFacadeException {
    // Stop the Node Manager client.

    nodeMgr.stop();//from w w  w  .  jav  a  2s .c  om

    // Deregister the app from YARN.

    String appMsg = "Drill Cluster Shut-Down";
    FinalApplicationStatus status = FinalApplicationStatus.SUCCEEDED;
    if (!succeeded) {
        appMsg = "Drill Cluster Fatal Error - check logs";
        status = FinalApplicationStatus.FAILED;
    }
    if (msg != null) {
        appMsg = msg;
    }
    try {
        resourceMgr.unregisterApplicationMaster(status, appMsg, "");
    } catch (YarnException | IOException e) {
        throw new YarnFacadeException("Deregister AM failed", e);
    }

    // Stop the Resource Manager client

    resourceMgr.stop();
}