Example usage for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED

List of usage examples for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED.

Prototype

FinalApplicationStatus FAILED

To view the source code for org.apache.hadoop.yarn.api.records FinalApplicationStatus FAILED.

Click Source Link

Document

Application which failed.

Usage

From source file:org.apache.flink.yarn.appMaster.ApplicationMaster.java

License:Apache License

private void run() throws Exception {
    heapLimit = Utils.calculateHeapSize(memoryPerTaskManager);

    nmClient = NMClient.createNMClient();
    nmClient.init(conf);//from  w  ww  .  ja v  a 2s. co  m
    nmClient.start();
    nmClient.cleanupRunningContainersOnStop(true);

    // Register with ResourceManager
    String url = "http://" + applicationMasterHost + ":" + jobManagerWebPort;
    LOG.info("Registering ApplicationMaster with tracking url " + url);
    rmClient.registerApplicationMaster(applicationMasterHost, 0, url);

    // Priority for worker containers - priorities are intra-application
    Priority priority = Records.newRecord(Priority.class);
    priority.setPriority(0);

    // Resource requirements for worker containers
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(memoryPerTaskManager);
    capability.setVirtualCores(coresPerTaskManager);

    // Make container requests to ResourceManager
    for (int i = 0; i < taskManagerCount; ++i) {
        ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority);
        LOG.info("Requesting TaskManager container " + i);
        rmClient.addContainerRequest(containerAsk);
    }

    LocalResource flinkJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);

    // register Flink Jar with remote HDFS
    final Path remoteJarPath = new Path(remoteFlinkJarPath);
    Utils.registerLocalResource(fs, remoteJarPath, flinkJar);

    // register conf with local fs.
    Utils.setupLocalResource(conf, fs, appId, new Path("file://" + currDir + "/flink-conf-modified.yaml"),
            flinkConf, new Path(clientHomeDir));
    LOG.info("Prepared local resource for modified yaml: " + flinkConf);

    hasLogback = new File(currDir + "/logback.xml").exists();
    // prepare the files to ship
    LocalResource[] remoteShipRsc = null;
    String[] remoteShipPaths = shipListString.split(",");
    if (!shipListString.isEmpty()) {
        remoteShipRsc = new LocalResource[remoteShipPaths.length];
        { // scope for i
            int i = 0;
            for (String remoteShipPathStr : remoteShipPaths) {
                if (remoteShipPathStr == null || remoteShipPathStr.isEmpty()) {
                    continue;
                }
                remoteShipRsc[i] = Records.newRecord(LocalResource.class);
                Path remoteShipPath = new Path(remoteShipPathStr);
                Utils.registerLocalResource(fs, remoteShipPath, remoteShipRsc[i]);
                i++;
            }
        }
    }
    // copy resources to the TaskManagers.
    taskManagerLocalResources = new HashMap<String, LocalResource>(2);
    taskManagerLocalResources.put("flink.jar", flinkJar);
    taskManagerLocalResources.put("flink-conf.yaml", flinkConf);

    // add ship resources
    if (!shipListString.isEmpty()) {
        Preconditions.checkNotNull(remoteShipRsc);
        for (int i = 0; i < remoteShipPaths.length; i++) {
            taskManagerLocalResources.put(new Path(remoteShipPaths[i]).getName(), remoteShipRsc[i]);
        }
    }
    completedContainers = 0;

    // Obtain allocated containers and launch
    StringBuffer containerDiag = new StringBuffer(); // diagnostics log for the containers.
    allocateOutstandingContainer(containerDiag);
    LOG.info("Allocated all initial containers");

    // Now wait for containers to complete
    while (completedContainers < taskManagerCount) {
        AllocateResponse response = rmClient.allocate(completedContainers / taskManagerCount);
        for (ContainerStatus status : response.getCompletedContainersStatuses()) {
            ++completedContainers;
            LOG.info("Completed container " + status.getContainerId() + ". Total Completed:"
                    + completedContainers);
            LOG.info("Diagnostics " + status.getDiagnostics());
            logDeadContainer(status, containerDiag);
        }
        Thread.sleep(5000);
    }
    if (isClosed) {
        return;
    }
    // Un-register with ResourceManager
    final String diagnosticsMessage = "Application Master shut down after all " + "containers finished\n"
            + containerDiag.toString();
    LOG.info("Diagnostics message: " + diagnosticsMessage);
    rmClient.unregisterApplicationMaster(FinalApplicationStatus.FAILED, diagnosticsMessage, "");
    this.close();
    amRpcServer.stop(); // we need to manually stop the RPC service. Usually, the Client stops the RPC,
    // but at this point, the AM has been shut down (for some reason).
    LOG.info("Application Master shutdown completed.");
}

From source file:org.apache.flink.yarn.appMaster.ApplicationMaster.java

License:Apache License

@Override
public BooleanValue shutdownAM() throws Exception {
    LOG.info("Client requested shutdown of AM");
    FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED;
    String finalMessage = "";
    if (isFailed) {
        finalStatus = FinalApplicationStatus.FAILED;
        finalMessage = "Application Master failed";
        isFailed = false; // allow a proper shutdown
        isFailed.notifyAll();/*from   w  w  w  .  ja va 2 s.  co m*/
    }
    rmClient.unregisterApplicationMaster(finalStatus, finalMessage, "");
    this.close();
    return new BooleanValue(true);
}

From source file:org.apache.flink.yarn.appMaster.ApplicationMaster.java

License:Apache License

public static void main(String[] args) throws Exception {
    // execute Application Master using the client's user
    final String yarnClientUsername = System.getenv(Client.ENV_CLIENT_USERNAME);
    LOG.info("YARN daemon runs as '" + UserGroupInformation.getCurrentUser().getShortUserName() + "' setting"
            + " user to execute Flink ApplicationMaster/JobManager to '" + yarnClientUsername + "'");
    UserGroupInformation ugi = UserGroupInformation.createRemoteUser(yarnClientUsername);
    for (Token<? extends TokenIdentifier> toks : UserGroupInformation.getCurrentUser().getTokens()) {
        ugi.addToken(toks);//from www  .  ja v  a 2s .com
    }
    ugi.doAs(new PrivilegedAction<Object>() {
        @Override
        public Object run() {
            AMRMClient<ContainerRequest> rmClient = null;
            ApplicationMaster am = null;
            try {
                Configuration conf = Utils.initializeYarnConfiguration();
                rmClient = AMRMClient.createAMRMClient();
                rmClient.init(conf);
                rmClient.start();

                // run the actual Application Master
                am = new ApplicationMaster(conf);
                am.generateConfigurationFile();
                am.startJobManager();
                am.setRMClient(rmClient);
                am.run();
            } catch (Throwable e) {
                LOG.error("Error while running the application master", e);
                // the AM is not available. Report error through the unregister function.
                if (rmClient != null && am == null) {
                    try {
                        rmClient.unregisterApplicationMaster(FinalApplicationStatus.FAILED,
                                "Flink YARN Application master" + " stopped unexpectedly with an exception.\n"
                                        + StringUtils.stringifyException(e),
                                "");
                    } catch (Exception e1) {
                        LOG.error("Unable to fail the application master", e1);
                    }
                    LOG.info("AM unregistered from RM");
                    return null;
                }
                if (rmClient == null) {
                    LOG.error("Unable to unregister AM since the RM client is not available");
                }
                if (am != null) {
                    LOG.info("Writing error into internal message system");
                    am.setFailed(true);
                    am.addMessage(new Message("The application master failed with an exception:\n"
                            + StringUtils.stringifyException(e)));
                    am.keepRPCAlive();
                }
            }
            return null;
        }
    });
}

From source file:org.apache.flink.yarn.FlinkYarnCluster.java

License:Apache License

/**
 * Shutdown the YARN cluster./*  w w  w  .ja  v a2  s  .  c o m*/
 * @param failApplication whether we should fail the YARN application (in case of errors in Flink)
 */
@Override
public void shutdown(boolean failApplication) {
    if (!isConnected) {
        throw new IllegalStateException("The cluster has been connected to the ApplicationMaster.");
    }

    if (hasBeenShutDown.getAndSet(true)) {
        return;
    }

    try {
        Runtime.getRuntime().removeShutdownHook(clientShutdownHook);
    } catch (IllegalStateException e) {
        // we are already in the shutdown hook
    }

    if (actorSystem != null) {
        LOG.info("Sending shutdown request to the Application Master");
        if (applicationClient != ActorRef.noSender()) {
            try {
                FinalApplicationStatus finalStatus;
                if (failApplication) {
                    finalStatus = FinalApplicationStatus.FAILED;
                } else {
                    finalStatus = FinalApplicationStatus.SUCCEEDED;
                }
                Future<Object> response = Patterns.ask(applicationClient, new YarnMessages.LocalStopYarnSession(
                        finalStatus, "Flink YARN Client requested shutdown"), new Timeout(akkaDuration));
                Await.ready(response, akkaDuration);
            } catch (Exception e) {
                LOG.warn("Error while stopping YARN Application Client", e);
            }
        }

        actorSystem.shutdown();
        actorSystem.awaitTermination();

        actorSystem = null;
    }

    LOG.info("Deleting files in " + sessionFilesDir);
    try {
        FileSystem shutFS = FileSystem.get(hadoopConfig);
        shutFS.delete(sessionFilesDir, true); // delete conf and jar file.
        shutFS.close();
    } catch (IOException e) {
        LOG.error("Could not delete the Flink jar and configuration files in HDFS..", e);
    }

    try {
        actorRunner.join(1000); // wait for 1 second
    } catch (InterruptedException e) {
        LOG.warn("Shutdown of the actor runner was interrupted", e);
        Thread.currentThread().interrupt();
    }
    try {
        pollingRunner.stopRunner();
        pollingRunner.join(1000);
    } catch (InterruptedException e) {
        LOG.warn("Shutdown of the polling runner was interrupted", e);
        Thread.currentThread().interrupt();
    }

    LOG.info("YARN Client is shutting down");
    yarnClient.stop(); // actorRunner is using the yarnClient.
    yarnClient = null; // set null to clearly see if somebody wants to access it afterwards.
}

From source file:org.apache.flink.yarn.YarnFlinkResourceManager.java

License:Apache License

/**
 * Converts a Flink application status enum to a YARN application status enum.
 * @param status The Flink application status.
 * @return The corresponding YARN application status.
 *//* w  ww.  java2  s .com*/
private FinalApplicationStatus getYarnStatus(ApplicationStatus status) {
    if (status == null) {
        return FinalApplicationStatus.UNDEFINED;
    } else {
        switch (status) {
        case SUCCEEDED:
            return FinalApplicationStatus.SUCCEEDED;
        case FAILED:
            return FinalApplicationStatus.FAILED;
        case CANCELED:
            return FinalApplicationStatus.KILLED;
        default:
            return FinalApplicationStatus.UNDEFINED;
        }
    }
}

From source file:org.apache.giraph.yarn.GiraphApplicationMaster.java

License:Apache License

/**
 * Call when the application is done//from   ww w.j a v a2  s. c o  m
 * @return if all containers succeed
 */
private boolean finish() {
    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");
    FinalApplicationStatus appStatus;
    String appMessage = null;
    boolean success = true;
    if (failedCount.get() == 0 && completedCount.get() == containersToLaunch) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + containersToLaunch + ", completed=" + completedCount.get()
                + ", failed=" + failedCount.get();
        success = false;
    }
    try {
        amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }

    amRMClient.stop();
    return success;
}

From source file:org.apache.hama.bsp.BSPApplicationMaster.java

License:Apache License

private void cleanup() throws YarnException, IOException {
    syncServer.stop();/*from  www  .  ja  v a  2s. co  m*/

    if (threadPool != null && !threadPool.isShutdown()) {
        threadPool.shutdownNow();
    }

    clientServer.stop();
    taskServer.stop();
    FinishApplicationMasterRequest finishReq = Records.newRecord(FinishApplicationMasterRequest.class);
    switch (job.getState()) {
    case SUCCESS:
        finishReq.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED);
        break;
    case KILLED:
        finishReq.setFinalApplicationStatus(FinalApplicationStatus.KILLED);
        break;
    case FAILED:
        finishReq.setFinalApplicationStatus(FinalApplicationStatus.FAILED);
        break;
    default:
        finishReq.setFinalApplicationStatus(FinalApplicationStatus.FAILED);
    }
    this.amrmRPC.finishApplicationMaster(finishReq);
}

From source file:org.apache.hoya.yarn.appmaster.HoyaAppMaster.java

License:Apache License

/**
 * shut down the cluster /*  w w w  .jav a  2s  .  co  m*/
 */
private synchronized void finish() {
    FinalApplicationStatus appStatus;
    log.info("Triggering shutdown of the AM: {}", amCompletionReason);

    String appMessage = amCompletionReason;
    //stop the daemon & grab its exit code
    int exitCode = amExitCode;
    success = exitCode == 0 || exitCode == 3;

    appStatus = success ? FinalApplicationStatus.SUCCEEDED : FinalApplicationStatus.FAILED;
    if (!spawnedProcessExitedBeforeShutdownTriggered) {
        //stopped the forked process but don't worry about its exit code
        exitCode = stopForkedProcess();
        log.debug("Stopped forked process: exit code={}", exitCode);
    }

    //stop any launches in progress
    launchService.stop();

    //now release all containers
    releaseAllContainers();

    // When the application completes, it should send a finish application
    // signal to the RM
    log.info("Application completed. Signalling finish to RM");

    //if there were failed containers and the app isn't already down as failing, it is now
    int failedContainerCount = appState.getFailedCountainerCount();
    if (failedContainerCount != 0 && appStatus == FinalApplicationStatus.SUCCEEDED) {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Completed with exit code =  " + exitCode + " - " + getContainerDiagnosticInfo();
        success = false;
    }
    try {
        log.info("Unregistering AM status={} message={}", appStatus, appMessage);
        asyncRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException e) {
        log.info("Failed to unregister application: " + e, e);
    } catch (IOException e) {
        log.info("Failed to unregister application: " + e, e);
    }
}

From source file:org.apache.ignite.yarn.ApplicationMaster.java

License:Apache License

/**
 * Runs application master.//from   w w w  . j  ava  2  s .  c  om
 *
 * @throws Exception If failed.
 */
public void run() throws Exception {
    // Register with ResourceManager
    rmClient.registerApplicationMaster("", 0, "");

    log.log(Level.INFO, "Application master registered.");

    // Priority for worker containers - priorities are intra-application
    Priority priority = Records.newRecord(Priority.class);
    priority.setPriority(0);

    try {
        // Check ignite cluster.
        while (!nmClient.isInState(Service.STATE.STOPPED)) {
            int runningCnt = containers.size();

            if (runningCnt < props.instances() && checkAvailableResource()) {
                // Resource requirements for worker containers.
                Resource capability = Records.newRecord(Resource.class);

                capability.setMemory((int) props.totalMemoryPerNode());
                capability.setVirtualCores((int) props.cpusPerNode());

                for (int i = 0; i < props.instances() - runningCnt; ++i) {
                    // Make container requests to ResourceManager
                    AMRMClient.ContainerRequest containerAsk = new AMRMClient.ContainerRequest(capability, null,
                            null, priority);

                    rmClient.addContainerRequest(containerAsk);

                    log.log(Level.INFO, "Making request. Memory: {0}, cpu {1}.",
                            new Object[] { props.totalMemoryPerNode(), props.cpusPerNode() });
                }
            }

            TimeUnit.MILLISECONDS.sleep(schedulerTimeout);
        }
    } catch (InterruptedException ignored) {
        // Un-register with ResourceManager
        rmClient.unregisterApplicationMaster(FinalApplicationStatus.KILLED, "", "");

        log.log(Level.WARNING, "Application master killed.");
    } catch (Exception e) {
        // Un-register with ResourceManager
        rmClient.unregisterApplicationMaster(FinalApplicationStatus.FAILED, "", "");

        log.log(Level.SEVERE, "Application master failed.", e);
    }
}

From source file:org.apache.oozie.action.hadoop.TestLauncherAMCallbackNotifier.java

License:Apache License

public void testNotifyBackgroundActionWhenSubmitFailsWithFailed() throws Exception {
    Configuration conf = setupEmbeddedContainer(QueryServlet.class, "/count/*", "/count/?status=$jobStatus",
            null);/*from   w  w w  . ja  va 2  s.c  o m*/

    LauncherAMCallbackNotifier cn = new LauncherAMCallbackNotifier(conf);

    assertNull(QueryServlet.lastQueryString);
    cn.notifyURL(OozieActionResult.FAILED);
    waitForCallbackAndCheckResult(FinalApplicationStatus.FAILED.toString());
}