Example usage for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED

List of usage examples for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED.

Prototype

FinalApplicationStatus SUCCEEDED

To view the source code for org.apache.hadoop.yarn.api.records FinalApplicationStatus SUCCEEDED.

Click Source Link

Document

Application which finished successfully.

Usage

From source file:edu.uci.ics.asterix.aoya.AsterixApplicationMaster.java

License:Apache License

/**
 * Clean up, whether or not we were successful.
 *///  w ww .j  ava 2  s  .c o m
private void finish() {
    // Join all launched threads
    // needed for when we time out
    // and we need to release containers
    for (Thread launchThread : launchThreads) {
        try {
            launchThread.join(10000);
        } catch (InterruptedException e) {
            LOG.info("Exception thrown in thread join: " + e.getMessage());
            //from https://stackoverflow.com/questions/4812570/how-to-store-printstacktrace-into-a-string
            StringWriter errors = new StringWriter();
            e.printStackTrace(new PrintWriter(errors));
            LOG.error(errors.toString());
        }
    }

    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    nmClientAsync.stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    success = true;
    if (numFailedContainers.get() == 0 && numCompletedContainers.get() == numTotalContainers) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed="
                + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed="
                + numFailedContainers.get();
        success = false;
    }
    try {
        resourceManager.unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }
    done = true;
    resourceManager.stop();
}

From source file:eu.stratosphere.yarn.ApplicationMaster.java

License:Apache License

private void run() throws Exception {
    //Utils.logFilesInCurrentDirectory(LOG);
    // Initialize clients to ResourceManager and NodeManagers
    Configuration conf = Utils.initializeYarnConfiguration();
    FileSystem fs = FileSystem.get(conf);
    Map<String, String> envs = System.getenv();
    final String currDir = envs.get(Environment.PWD.key());
    final String logDirs = envs.get(Environment.LOG_DIRS.key());
    final String ownHostname = envs.get(Environment.NM_HOST.key());
    final String appId = envs.get(Client.ENV_APP_ID);
    final String clientHomeDir = envs.get(Client.ENV_CLIENT_HOME_DIR);
    final String applicationMasterHost = envs.get(Environment.NM_HOST.key());
    final String remoteStratosphereJarPath = envs.get(Client.STRATOSPHERE_JAR_PATH);
    final String shipListString = envs.get(Client.ENV_CLIENT_SHIP_FILES);
    final String yarnClientUsername = envs.get(Client.ENV_CLIENT_USERNAME);
    final int taskManagerCount = Integer.valueOf(envs.get(Client.ENV_TM_COUNT));
    final int memoryPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_MEMORY));
    final int coresPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_CORES));

    int heapLimit = Utils.calculateHeapSize(memoryPerTaskManager);

    if (currDir == null) {
        throw new RuntimeException("Current directory unknown");
    }//from w  w  w.j  a v  a  2 s . c om
    if (ownHostname == null) {
        throw new RuntimeException("Own hostname (" + Environment.NM_HOST + ") not set.");
    }
    LOG.info("Working directory " + currDir);

    // load Stratosphere configuration.
    Utils.getStratosphereConfiguration(currDir);

    final String localWebInterfaceDir = currDir + "/resources/"
            + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME;

    // Update yaml conf -> set jobManager address to this machine's address.
    FileInputStream fis = new FileInputStream(currDir + "/stratosphere-conf.yaml");
    BufferedReader br = new BufferedReader(new InputStreamReader(fis));
    Writer output = new BufferedWriter(new FileWriter(currDir + "/stratosphere-conf-modified.yaml"));
    String line;
    while ((line = br.readLine()) != null) {
        if (line.contains(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)) {
            output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n");
        } else if (line.contains(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY)) {
            output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + "\n");
        } else {
            output.append(line + "\n");
        }
    }
    // just to make sure.
    output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n");
    output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + localWebInterfaceDir + "\n");
    output.append(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY + ": " + logDirs + "\n");
    output.close();
    br.close();
    File newConf = new File(currDir + "/stratosphere-conf-modified.yaml");
    if (!newConf.exists()) {
        LOG.warn("modified yaml does not exist!");
    }

    Utils.copyJarContents("resources/" + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME,
            ApplicationMaster.class.getProtectionDomain().getCodeSource().getLocation().getPath());

    JobManager jm;
    {
        String pathToNepheleConfig = currDir + "/stratosphere-conf-modified.yaml";
        String[] args = { "-executionMode", "cluster", "-configDir", pathToNepheleConfig };

        // start the job manager
        jm = JobManager.initialize(args);

        // Start info server for jobmanager
        jm.startInfoServer();
    }

    AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
    rmClient.init(conf);
    rmClient.start();

    NMClient nmClient = NMClient.createNMClient();
    nmClient.init(conf);
    nmClient.start();

    // Register with ResourceManager
    LOG.info("registering ApplicationMaster");
    rmClient.registerApplicationMaster(applicationMasterHost, 0, "http://" + applicationMasterHost + ":"
            + GlobalConfiguration.getString(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, "undefined"));

    // Priority for worker containers - priorities are intra-application
    Priority priority = Records.newRecord(Priority.class);
    priority.setPriority(0);

    // Resource requirements for worker containers
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(memoryPerTaskManager);
    capability.setVirtualCores(coresPerTaskManager);

    // Make container requests to ResourceManager
    for (int i = 0; i < taskManagerCount; ++i) {
        ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority);
        LOG.info("Requesting TaskManager container " + i);
        rmClient.addContainerRequest(containerAsk);
    }

    LocalResource stratosphereJar = Records.newRecord(LocalResource.class);
    LocalResource stratosphereConf = Records.newRecord(LocalResource.class);

    // register Stratosphere Jar with remote HDFS
    final Path remoteJarPath = new Path(remoteStratosphereJarPath);
    Utils.registerLocalResource(fs, remoteJarPath, stratosphereJar);

    // register conf with local fs.
    Path remoteConfPath = Utils.setupLocalResource(conf, fs, appId,
            new Path("file://" + currDir + "/stratosphere-conf-modified.yaml"), stratosphereConf,
            new Path(clientHomeDir));
    LOG.info("Prepared localresource for modified yaml: " + stratosphereConf);

    boolean hasLog4j = new File(currDir + "/log4j.properties").exists();
    // prepare the files to ship
    LocalResource[] remoteShipRsc = null;
    String[] remoteShipPaths = shipListString.split(",");
    if (!shipListString.isEmpty()) {
        remoteShipRsc = new LocalResource[remoteShipPaths.length];
        { // scope for i
            int i = 0;
            for (String remoteShipPathStr : remoteShipPaths) {
                if (remoteShipPathStr == null || remoteShipPathStr.isEmpty()) {
                    continue;
                }
                remoteShipRsc[i] = Records.newRecord(LocalResource.class);
                Path remoteShipPath = new Path(remoteShipPathStr);
                Utils.registerLocalResource(fs, remoteShipPath, remoteShipRsc[i]);
                i++;
            }
        }
    }

    // respect custom JVM options in the YAML file
    final String javaOpts = GlobalConfiguration.getString(ConfigConstants.STRATOSPHERE_JVM_OPTIONS, "");

    // Obtain allocated containers and launch
    int allocatedContainers = 0;
    int completedContainers = 0;
    while (allocatedContainers < taskManagerCount) {
        AllocateResponse response = rmClient.allocate(0);
        for (Container container : response.getAllocatedContainers()) {
            LOG.info("Got new Container for TM " + container.getId() + " on host "
                    + container.getNodeId().getHost());
            ++allocatedContainers;

            // Launch container by create ContainerLaunchContext
            ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class);

            String tmCommand = "$JAVA_HOME/bin/java -Xmx" + heapLimit + "m " + javaOpts;
            if (hasLog4j) {
                tmCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
                        + "/taskmanager-log4j.log\" -Dlog4j.configuration=file:log4j.properties";
            }
            tmCommand += " eu.stratosphere.yarn.YarnTaskManagerRunner -configDir . " + " 1>"
                    + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stdout.log" + " 2>"
                    + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stderr.log";
            ctx.setCommands(Collections.singletonList(tmCommand));

            LOG.info("Starting TM with command=" + tmCommand);

            // copy resources to the TaskManagers.
            Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2);
            localResources.put("stratosphere.jar", stratosphereJar);
            localResources.put("stratosphere-conf.yaml", stratosphereConf);

            // add ship resources
            if (!shipListString.isEmpty()) {
                Preconditions.checkNotNull(remoteShipRsc);
                for (int i = 0; i < remoteShipPaths.length; i++) {
                    localResources.put(new Path(remoteShipPaths[i]).getName(), remoteShipRsc[i]);
                }
            }

            ctx.setLocalResources(localResources);

            // Setup CLASSPATH for Container (=TaskTracker)
            Map<String, String> containerEnv = new HashMap<String, String>();
            Utils.setupEnv(conf, containerEnv); //add stratosphere.jar to class path.
            containerEnv.put(Client.ENV_CLIENT_USERNAME, yarnClientUsername);

            ctx.setEnvironment(containerEnv);

            UserGroupInformation user = UserGroupInformation.getCurrentUser();
            try {
                Credentials credentials = user.getCredentials();
                DataOutputBuffer dob = new DataOutputBuffer();
                credentials.writeTokenStorageToStream(dob);
                ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
                ctx.setTokens(securityTokens);
            } catch (IOException e) {
                LOG.warn("Getting current user info failed when trying to launch the container"
                        + e.getMessage());
            }

            LOG.info("Launching container " + allocatedContainers);
            nmClient.startContainer(container, ctx);
        }
        for (ContainerStatus status : response.getCompletedContainersStatuses()) {
            ++completedContainers;
            LOG.info("Completed container (while allocating) " + status.getContainerId() + ". Total Completed:"
                    + completedContainers);
            LOG.info("Diagnostics " + status.getDiagnostics());
        }
        Thread.sleep(100);
    }

    // Now wait for containers to complete

    while (completedContainers < taskManagerCount) {
        AllocateResponse response = rmClient.allocate(completedContainers / taskManagerCount);
        for (ContainerStatus status : response.getCompletedContainersStatuses()) {
            ++completedContainers;
            LOG.info("Completed container " + status.getContainerId() + ". Total Completed:"
                    + completedContainers);
            LOG.info("Diagnostics " + status.getDiagnostics());
        }
        Thread.sleep(5000);
    }
    LOG.info("Shutting down JobManager");
    jm.shutdown();

    // Un-register with ResourceManager
    rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", "");

}

From source file:gobblin.yarn.YarnService.java

License:Apache License

@Override
protected void shutDown() throws IOException {
    LOGGER.info("Stopping the YarnService");

    this.shutdownInProgress = true;

    try {//from  w w  w . ja v a2s . c o  m
        ExecutorsUtils.shutdownExecutorService(this.containerLaunchExecutor, Optional.of(LOGGER));

        // Stop the running containers
        for (Map.Entry<Container, String> entry : this.containerMap.values()) {
            LOGGER.info(String.format("Stopping container %s running participant %s", entry.getKey().getId(),
                    entry.getValue()));
            this.nmClientAsync.stopContainerAsync(entry.getKey().getId(), entry.getKey().getNodeId());
        }

        if (!this.containerMap.isEmpty()) {
            synchronized (this.allContainersStopped) {
                try {
                    // Wait 5 minutes for the containers to stop
                    this.allContainersStopped.wait(5 * 60 * 1000);
                    LOGGER.info("All of the containers have been stopped");
                } catch (InterruptedException ie) {
                    Thread.currentThread().interrupt();
                }
            }
        }

        this.amrmClientAsync.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
    } catch (IOException | YarnException e) {
        LOGGER.error("Failed to unregister the ApplicationMaster", e);
    } finally {
        try {
            this.closer.close();
        } finally {
            if (this.gobblinMetrics.isPresent()) {
                this.gobblinMetrics.get().stopMetricsReporting();
            }
        }
    }
}

From source file:husky.server.HuskyApplicationMaster.java

License:Apache License

private void run() throws YarnException, IOException, InterruptedException, ExecutionException {
    LOG.info("Run App Master");

    mRMClientListener = new HuskyRMCallbackHandler(this);
    mRMClient = AMRMClientAsync.createAMRMClientAsync(1000, mRMClientListener);
    mRMClient.init(mYarnConf);// ww w. j  av  a 2 s  .  c om
    mRMClient.start();

    mContainerListener = new HuskyNMCallbackHandler();
    mNMClient = NMClientAsync.createNMClientAsync(mContainerListener);
    mNMClient.init(mYarnConf);
    mNMClient.start();

    // Register with ResourceManager
    LOG.info("registerApplicationMaster started");
    mRMClient.registerApplicationMaster("", 0, "");
    LOG.info("registerApplicationMaster done");

    // Ask RM to start `mNumContainer` containers, each is a worker node
    LOG.info("Ask RM for " + mWorkerInfos.size() + " containers");
    for (Pair<String, Integer> i : mWorkerInfos) {
        mRMClient.addContainerRequest(setupContainerAskForRMSpecific(i.getFirst()));
    }

    FinalApplicationStatus status = mRMClientListener.getFinalNumSuccess() == mWorkerInfos.size()
            ? FinalApplicationStatus.SUCCEEDED
            : FinalApplicationStatus.FAILED;

    mRMClient.unregisterApplicationMaster(status, mRMClientListener.getStatusReport(), null);
}

From source file:hws.core.JobMaster.java

License:Apache License

public void runMainLoop() throws Exception {

    AMRMClientAsync<ContainerRequest> rmClient = AMRMClientAsync.createAMRMClientAsync(100, this);
    rmClient.init(getConfiguration());//from w w  w. j a va 2  s  .c om
    rmClient.start();

    // Register with ResourceManager
    Logger.info("[AM] registerApplicationMaster 0");
    rmClient.registerApplicationMaster("", 0, "");
    Logger.info("[AM] registerApplicationMaster 1");

    // Priority for worker containers - priorities are intra-application
    Priority priority = Records.newRecord(Priority.class);
    priority.setPriority(0);

    // Resource requirements for worker containers
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(128);
    capability.setVirtualCores(1);

    final CountDownLatch doneLatch = new CountDownLatch(this.modulePipeline.size());
    // Make container requests to ResourceManager
    for (ModuleInfo moduleInfo : this.modulePipeline) { //create containers for each instance of each module
        zk.createPersistent("/hadoop-watershed/" + this.appIdStr + "/" + moduleInfo.filterInfo().name(), "");
        zk.createPersistent(
                "/hadoop-watershed/" + this.appIdStr + "/" + moduleInfo.filterInfo().name() + "/finish", "");
        zk.createPersistent(
                "/hadoop-watershed/" + this.appIdStr + "/" + moduleInfo.filterInfo().name() + "/halted", "");
        zk.subscribeChildChanges(
                "/hadoop-watershed/" + this.appIdStr + "/" + moduleInfo.filterInfo().name() + "/finish",
                createFinishListener(moduleInfo.filterInfo().name(), moduleInfo.numFilterInstances(),
                        doneLatch));
        for (int i = 0; i < moduleInfo.numFilterInstances(); i++) {
            this.numContainersToWaitFor++;
            ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority);
            Logger.info("[AM] Making res-req for " + moduleInfo.filterInfo().name() + " " + i);
            rmClient.addContainerRequest(containerAsk);
        }
    }
    //TODO: process for starting the whole application
    //create containers
    // -> create instances
    // -> start output channels and filters
    // -> start input channels in reversed topological order (considering that there is no cycle)
    //    * if there is cycle, then inicially start in any order
    //TODO "send" the start signal via ZooKeeper

    Logger.info("[AM] waiting for containers to finish");
    try {
        doneLatch.await(); //await the input threads to finish
    } catch (InterruptedException e) {
        Logger.fatal(e.toString());
        //e.printStackTrace();
    }
    /*while(!doneWithContainers()) {
    Thread.sleep(50);
    }*/

    zk.createPersistent("/hadoop-watershed/" + appIdStr + "/done", "");

    Logger.info("[AM] unregisterApplicationMaster 0");
    // Un-register with ResourceManager
    rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", "");
    Logger.info("[AM] unregisterApplicationMaster 1");
}

From source file:io.amient.yarn1.YarnClient.java

License:Open Source License

/**
 * This method should be called by the implementing application static main
 * method. It does all the work around creating a yarn application and
 * submitting the request to the yarn resource manager. The class given in
 * the appClass argument will be run inside the yarn-allocated master
 * container.//w  w w  .  ja v  a2  s  .c om
 */
public static void submitApplicationMaster(Properties appConfig, Class<? extends YarnMaster> masterClass,
        String[] args, Boolean awaitCompletion) throws Exception {
    log.info("Yarn1 App Configuration:");
    for (Object param : appConfig.keySet()) {
        log.info(param.toString() + " = " + appConfig.get(param).toString());
    }
    String yarnConfigPath = appConfig.getProperty("yarn1.site", "/etc/hadoop");
    String masterClassName = masterClass.getName();
    appConfig.setProperty("yarn1.master.class", masterClassName);
    String applicationName = appConfig.getProperty("yarn1.application.name", masterClassName);
    log.info("--------------------------------------------------------------");

    if (Boolean.valueOf(appConfig.getProperty("yarn1.local.mode", "false"))) {
        YarnMaster.run(appConfig, args);
        return;
    }

    int masterPriority = Integer.valueOf(
            appConfig.getProperty("yarn1.master.priority", String.valueOf(YarnMaster.DEFAULT_MASTER_PRIORITY)));
    int masterMemoryMb = Integer.valueOf(appConfig.getProperty("yarn1.master.memory.mb",
            String.valueOf(YarnMaster.DEFAULT_MASTER_MEMORY_MB)));
    int masterNumCores = Integer.valueOf(
            appConfig.getProperty("yarn1.master.num.cores", String.valueOf(YarnMaster.DEFAULT_MASTER_CORES)));
    String queue = appConfig.getProperty("yarn1.queue");

    Configuration yarnConfig = new YarnConfiguration();
    yarnConfig.addResource(new FileInputStream(yarnConfigPath + "/core-site.xml"));
    yarnConfig.addResource(new FileInputStream(yarnConfigPath + "/hdfs-site.xml"));
    yarnConfig.addResource(new FileInputStream(yarnConfigPath + "/yarn-site.xml"));
    for (Map.Entry<Object, Object> entry : appConfig.entrySet()) {
        yarnConfig.set(entry.getKey().toString(), entry.getValue().toString());
    }

    final org.apache.hadoop.yarn.client.api.YarnClient yarnClient = org.apache.hadoop.yarn.client.api.YarnClient
            .createYarnClient();
    yarnClient.init(yarnConfig);
    yarnClient.start();

    for (NodeReport report : yarnClient.getNodeReports(NodeState.RUNNING)) {
        log.debug("Node report:" + report.getNodeId() + " @ " + report.getHttpAddress() + " | "
                + report.getCapability());
    }

    log.info("Submitting application master class " + masterClassName);

    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    final ApplicationId appId = appResponse.getApplicationId();
    if (appId == null) {
        System.exit(111);
    } else {
        appConfig.setProperty("am.timestamp", String.valueOf(appId.getClusterTimestamp()));
        appConfig.setProperty("am.id", String.valueOf(appId.getId()));
    }

    YarnClient.distributeResources(yarnConfig, appConfig, applicationName);

    String masterJvmArgs = appConfig.getProperty("yarn1.master.jvm.args", "");
    YarnContainerContext masterContainer = new YarnContainerContext(yarnConfig, appConfig, masterJvmArgs,
            masterPriority, masterMemoryMb, masterNumCores, applicationName, YarnMaster.class, args);

    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    appContext.setApplicationName(masterClassName);
    appContext.setResource(masterContainer.capability);
    appContext.setPriority(masterContainer.priority);
    appContext.setQueue(queue);
    appContext.setApplicationType(appConfig.getProperty("yarn1.application.type", "YARN"));
    appContext.setAMContainerSpec(masterContainer.createContainerLaunchContext());

    log.info("Master container spec: " + masterContainer.capability);

    yarnClient.submitApplication(appContext);

    ApplicationReport report = yarnClient.getApplicationReport(appId);
    log.info("Tracking URL: " + report.getTrackingUrl());

    if (awaitCompletion) {
        Runtime.getRuntime().addShutdownHook(new Thread() {
            @Override
            public void run() {
                if (!yarnClient.isInState(Service.STATE.STOPPED)) {
                    log.info("Killing yarn application in shutdown hook");
                    try {
                        yarnClient.killApplication(appId);
                    } catch (Throwable e) {
                        log.error("Failed to kill yarn application - please check YARN Resource Manager", e);
                    }
                }
            }
        });

        float lastProgress = -0.0f;
        while (true) {
            try {
                Thread.sleep(10000);
                report = yarnClient.getApplicationReport(appId);
                if (lastProgress != report.getProgress()) {
                    lastProgress = report.getProgress();
                    log.info(report.getApplicationId() + " " + (report.getProgress() * 100.00) + "% "
                            + (System.currentTimeMillis() - report.getStartTime()) + "(ms) "
                            + report.getDiagnostics());
                }
                if (!report.getFinalApplicationStatus().equals(FinalApplicationStatus.UNDEFINED)) {
                    log.info(report.getApplicationId() + " " + report.getFinalApplicationStatus());
                    log.info("Tracking url: " + report.getTrackingUrl());
                    log.info("Finish time: " + ((System.currentTimeMillis() - report.getStartTime()) / 1000)
                            + "(s)");
                    break;
                }
            } catch (Throwable e) {
                log.error("Master Heart Beat Error - terminating", e);
                yarnClient.killApplication(appId);
                Thread.sleep(2000);
            }
        }
        yarnClient.stop();

        if (!report.getFinalApplicationStatus().equals(FinalApplicationStatus.SUCCEEDED)) {
            System.exit(112);
        }
    }
    yarnClient.stop();
}

From source file:io.amient.yarn1.YarnMaster.java

License:Open Source License

private void conclude() throws IOException, YarnException {
    if (localMode) {
        executor.shutdownNow();/* ww  w  .  ja va  2  s .c  o m*/
    } else {
        rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", "");
    }
}

From source file:io.hops.tensorflow.ApplicationMaster.java

License:Apache License

private boolean finish() {
    // wait for completion. finish if any container fails
    while (!done && !(numCompletedWorkers.get() == numWorkers) && !(numFailedContainers.get() > 0)) {
        if (numAllocatedContainers.get() != numTotalContainers) {
            long timeLeft = appMasterStartTime + allocationTimeout - System.currentTimeMillis();
            LOG.info("Awaits container allocation, timeLeft=" + timeLeft);
            if (timeLeft < 0) {
                LOG.warn("Container allocation timeout. Finish application attempt");
                break;
            }/*from w  ww .  j av  a  2s  .  c  om*/
        }
        try {
            Thread.sleep(200);
        } catch (InterruptedException ex) {
        }
    }

    if (timelineHandler.isClientNotNull()) {
        timelineHandler.publishApplicationAttemptEvent(YarntfEvent.YARNTF_APP_ATTEMPT_END);
    }

    // Join all launched threads
    // needed for when we time out
    // and we need to release containers
    for (Thread launchThread : launchThreads) {
        try {
            launchThread.join(10000);
        } catch (InterruptedException e) {
            LOG.info("Exception thrown in thread join: " + e.getMessage());
            e.printStackTrace();
        }
    }

    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    nmWrapper.getClient().stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");

    FinalApplicationStatus appStatus;
    String appMessage = null;
    boolean success = true;
    if (numFailedContainers.get() == 0 && numCompletedWorkers.get() == numWorkers) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = "Diagnostics." + ", total=" + numTotalContainers + ", completed(workers)="
                + numCompletedContainers.get() + "(" + numCompletedWorkers.get() + "), allocated="
                + numAllocatedContainers.get() + ", failed=" + numFailedContainers.get();
        LOG.info(appMessage);
        success = false;
    }
    try {
        rmWrapper.getClient().unregisterApplicationMaster(appStatus, appMessage, null);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }

    rmWrapper.getClient().stop();

    // Stop Timeline Client
    if (timelineHandler.isClientNotNull()) {
        timelineHandler.stopClient();
    }

    return success;
}

From source file:io.hops.tensorflow.Client.java

License:Apache License

/**
 * Monitor the submitted application for completion.
 * Kill application if time expires./* ww  w  .  j av a  2  s .  c o m*/
 *
 * @param appId
 *     Application Id of application to be monitored
 * @return true if application completed successfully
 * @throws YarnException
 * @throws IOException
 */
public boolean monitorApplication(ApplicationId appId) throws YarnException, IOException {

    YarnApplicationState oldState = null;

    while (true) {

        // Check app status every 1 second.
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            LOG.debug("Thread sleep in monitoring loop interrupted");
        }

        ApplicationReport report = yarnClient.getApplicationReport(appId);
        YarnApplicationState state = report.getYarnApplicationState();
        FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();

        if (oldState != state) {
            LOG.info("Got application report from ASM for" + "\n\t appId=" + appId.getId()
                    + "\n\t clientToAMToken=" + report.getClientToAMToken() + "\n\t appDiagnostics="
                    + report.getDiagnostics() + "\n\t appMasterHost=" + report.getHost() + "\n\t appQueue="
                    + report.getQueue() + "\n\t appMasterRpcPort=" + report.getRpcPort() + "\n\t appStartTime="
                    + report.getStartTime() + "\n\t yarnAppState=" + report.getYarnApplicationState().toString()
                    + "\n\t distributedFinalState=" + report.getFinalApplicationStatus().toString()
                    + "\n\t appTrackingUrl=" + report.getTrackingUrl() + "\n\t appUser=" + report.getUser());
            oldState = state;
        } else {
            LOG.info("Got application report from ASM for " + appId + " (state: " + state + ")");
        }

        if (YarnApplicationState.FINISHED == state) {
            if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
                LOG.info("Application has completed successfully. Breaking monitoring loop");
                return true;
            } else {
                LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString()
                        + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop");
                return false;
            }
        } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) {
            LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", DSFinalStatus="
                    + dsStatus.toString() + ". Breaking monitoring loop");
            return false;
        }

        if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) {
            LOG.info("Reached client specified timeout for application. Killing application");
            forceKillApplication(appId);
            return false;
        }
    }
}

From source file:ml.shifu.guagua.yarn.GuaguaAppMaster.java

License:Apache License

/**
 * Call when the application is done//from   ww  w  .  j  a v a 2  s  . co  m
 * 
 * @return if all containers succeed
 */
private boolean finish() {
    // When the application completes, it should stop all running containers
    LOG.info("Application completed. Stopping running containers");
    getNmClientAsync().stop();

    // When the application completes, it should send a finish application
    // signal to the RM
    LOG.info("Application completed. Signalling finish to RM");
    FinalApplicationStatus appStatus;
    String appMessage = null;
    boolean success = true;
    if (getSuccessfulCount().get() == getContainersToLaunch()) {
        appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
        appStatus = FinalApplicationStatus.FAILED;
        appMessage = String.format("Diagnostics total=%s, completed=%s, failed=%s.", getContainersToLaunch(),
                getCompletedCount().get(), getFailedCount().get());
        success = false;
    }
    try {
        getAmRMClient().unregisterApplicationMaster(appStatus, appMessage, this.appMasterTrackingUrl);
    } catch (YarnException ex) {
        LOG.error("Failed to unregister application", ex);
    } catch (IOException e) {
        LOG.error("Failed to unregister application", e);
    }

    getAmRMClient().stop();
    return success;
}