Example usage for org.apache.hadoop.yarn.api.protocolrecords AllocateResponse getAllocatedContainers

List of usage examples for org.apache.hadoop.yarn.api.protocolrecords AllocateResponse getAllocatedContainers

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.protocolrecords AllocateResponse getAllocatedContainers.

Prototype

@Public
@Stable
public abstract List<Container> getAllocatedContainers();

Source Link

Document

Get the list of newly allocated Container by the ResourceManager.

Usage

From source file:com.continuuity.weave.internal.yarn.Hadoop21YarnAMClient.java

License:Apache License

@Override
public synchronized void allocate(float progress, AllocateHandler handler) throws Exception {
    AllocateResponse allocateResponse = amrmClient.allocate(progress);
    List<ProcessLauncher<YarnContainerInfo>> launchers = Lists
            .newArrayListWithCapacity(allocateResponse.getAllocatedContainers().size());

    for (Container container : allocateResponse.getAllocatedContainers()) {
        launchers.add(new RunnableProcessLauncher(new Hadoop21YarnContainerInfo(container), nmClient));
    }//from  w ww.  jav a 2s  .co m

    if (!launchers.isEmpty()) {
        handler.acquired(launchers);

        // If no process has been launched through the given launcher, return the container.
        for (ProcessLauncher<YarnContainerInfo> l : launchers) {
            // This cast always works.
            RunnableProcessLauncher launcher = (RunnableProcessLauncher) l;
            if (!launcher.isLaunched()) {
                Container container = launcher.getContainerInfo().getContainer();
                LOG.info("Nothing to run in container, releasing it: {}", container);
                amrmClient.releaseAssignedContainer(container.getId());
            }
        }
    }

    List<YarnContainerStatus> completed = ImmutableList
            .copyOf(Iterables.transform(allocateResponse.getCompletedContainersStatuses(), STATUS_TRANSFORM));
    if (!completed.isEmpty()) {
        handler.completed(completed);
    }
}

From source file:com.datatorrent.stram.StramMiniClusterTest.java

License:Apache License

@Ignore
@Test/*w  w  w. j a  v  a2 s  . c  o m*/
public void testUnmanagedAM2() throws Exception {

    new InlineAM(conf) {

        @Override
        @SuppressWarnings("SleepWhileInLoop")
        public void runAM(ApplicationAttemptId attemptId) throws Exception {
            LOG.debug("AM running {}", attemptId);
            AMRMClient<ContainerRequest> amRmClient = AMRMClient.createAMRMClient();
            amRmClient.init(conf);
            amRmClient.start();

            // register with the RM (LAUNCHED -> RUNNING)
            amRmClient.registerApplicationMaster("", 0, null);

            // AM specific logic

            String[] hosts = { "vm1" };
            String[] racks = { "somerack" };

            Resource capability = Records.newRecord(Resource.class);
            capability.setMemory(1000);

            Priority priority = Records.newRecord(Priority.class);
            priority.setPriority(10);
            AMRMClient.ContainerRequest req = new AMRMClient.ContainerRequest(capability, hosts, racks,
                    priority);
            amRmClient.addContainerRequest(req);
            amRmClient.addContainerRequest(req);

            /*
                    capability = Records.newRecord(Resource.class);
                    capability.setMemory(5512);
                    priority = Records.newRecord(Priority.class);
                    priority.setPriority(11);
                    req = new AMRMClient.ContainerRequest(capability, hosts, racks, priority, 3);
                    amRmClient.addContainerRequest(req);
            */
            for (int i = 0; i < 100; i++) {
                AllocateResponse ar = amRmClient.allocate(0);
                sleep(1000);
                LOG.debug("allocateResponse: {}", ar);
                for (Container c : ar.getAllocatedContainers()) {
                    LOG.debug("*** allocated {}", c.getResource());
                    amRmClient.removeContainerRequest(req);
                }
                /*
                GetClusterNodesRequest request =
                    Records.newRecord(GetClusterNodesRequest.class);
                ClientRMService clientRMService = yarnCluster.getResourceManager().getClientRMService();
                GetClusterNodesResponse response = clientRMService.getClusterNodes(request);
                List<NodeReport> nodeReports = response.getNodeReports();
                LOG.info(nodeReports);
                        
                for (NodeReport nr: nodeReports) {
                  LOG.info("Node: " + nr.getNodeId());
                  LOG.info("Total memory: " + nr.getCapability());
                  LOG.info("Used memory: " + nr.getUsed());
                  LOG.info("Number containers: " + nr.getNumContainers());
                }
                */
            }

            // unregister from RM
            amRmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "testUnmanagedAM finished",
                    null);

        }

    }.run();

}

From source file:com.datatorrent.stram.StreamingAppMasterService.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException//from   w  ww.java 2  s . co  m
 */
@SuppressWarnings("SleepWhileInLoop")
private void execute() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");
    final Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    LOG.info("number of tokens: {}", credentials.getAllTokens().size());
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.debug("token: {}", token);
    }
    final Configuration conf = getConfig();
    long tokenLifeTime = (long) (dag.getValue(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR) * Math
            .min(dag.getValue(LogicalPlan.HDFS_TOKEN_LIFE_TIME), dag.getValue(LogicalPlan.RM_TOKEN_LIFE_TIME)));
    long expiryTime = System.currentTimeMillis() + tokenLifeTime;
    LOG.debug(" expiry token time {}", tokenLifeTime);
    String hdfsKeyTabFile = dag.getValue(LogicalPlan.KEY_TAB_FILE);

    // Register self with ResourceManager
    RegisterApplicationMasterResponse response = amRmClient.registerApplicationMaster(appMasterHostname, 0,
            appMasterTrackingUrl);

    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    int maxVcores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max mem {}m and vcores {} capabililty of resources in this cluster ", maxMem, maxVcores);

    // for locality relaxation fall back
    Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources = Maps
            .newHashMap();

    // Setup heartbeat emitter
    // TODO poll RM every now and then with an empty request to let RM know that we are alive
    // The heartbeat interval after which an AM is timed out by the RM is defined by a config setting:
    // RM_AM_EXPIRY_INTERVAL_MS with default defined by DEFAULT_RM_AM_EXPIRY_INTERVAL_MS
    // The allocate calls to the RM count as heartbeat so, for now, this additional heartbeat emitter
    // is not required.

    int loopCounter = -1;
    List<ContainerId> releasedContainers = new ArrayList<ContainerId>();
    int numTotalContainers = 0;
    // keep track of already requested containers to not request them again while waiting for allocation
    int numRequestedContainers = 0;
    int numReleasedContainers = 0;
    int nextRequestPriority = 0;
    ResourceRequestHandler resourceRequestor = new ResourceRequestHandler();

    YarnClient clientRMService = YarnClient.createYarnClient();

    try {
        // YARN-435
        // we need getClusterNodes to populate the initial node list,
        // subsequent updates come through the heartbeat response
        clientRMService.init(conf);
        clientRMService.start();

        ApplicationReport ar = StramClientUtils.getStartedAppInstanceByName(clientRMService,
                dag.getAttributes().get(DAG.APPLICATION_NAME),
                UserGroupInformation.getLoginUser().getUserName(), dag.getAttributes().get(DAG.APPLICATION_ID));
        if (ar != null) {
            appDone = true;
            dnmgr.shutdownDiagnosticsMessage = String.format(
                    "Application master failed due to application %s with duplicate application name \"%s\" by the same user \"%s\" is already started.",
                    ar.getApplicationId().toString(), ar.getName(), ar.getUser());
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finishApplication(FinalApplicationStatus.FAILED, numTotalContainers);
            return;
        }
        resourceRequestor.updateNodeReports(clientRMService.getNodeReports());
    } catch (Exception e) {
        throw new RuntimeException("Failed to retrieve cluster nodes report.", e);
    } finally {
        clientRMService.stop();
    }

    // check for previously allocated containers
    // as of 2.2, containers won't survive AM restart, but this will change in the future - YARN-1490
    checkContainerStatus();
    FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED;
    final InetSocketAddress rmAddress = conf.getSocketAddr(YarnConfiguration.RM_ADDRESS,
            YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT);

    while (!appDone) {
        loopCounter++;

        if (UserGroupInformation.isSecurityEnabled() && System.currentTimeMillis() >= expiryTime
                && hdfsKeyTabFile != null) {
            String applicationId = appAttemptID.getApplicationId().toString();
            expiryTime = StramUserLogin.refreshTokens(tokenLifeTime, "." + File.separator + "tmp",
                    applicationId, conf, hdfsKeyTabFile, credentials, rmAddress, true);
        }

        Runnable r;
        while ((r = this.pendingTasks.poll()) != null) {
            r.run();
        }

        // log current state
        /*
         * LOG.info("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" +
         * numTotalContainers + ", requested=" + numRequestedContainers + ", completed=" + numCompletedContainers +
         * ", failed=" + numFailedContainers + ", currentAllocated=" + this.allAllocatedContainers.size());
         */
        // Sleep before each loop when asking RM for containers
        // to avoid flooding RM with spurious requests when it
        // need not have any available containers
        try {
            sleep(1000);
        } catch (InterruptedException e) {
            LOG.info("Sleep interrupted " + e.getMessage());
        }

        // Setup request to be sent to RM to allocate containers
        List<ContainerRequest> containerRequests = new ArrayList<ContainerRequest>();
        List<ContainerRequest> removedContainerRequests = new ArrayList<ContainerRequest>();

        // request containers for pending deploy requests
        if (!dnmgr.containerStartRequests.isEmpty()) {
            StreamingContainerAgent.ContainerStartRequest csr;
            while ((csr = dnmgr.containerStartRequests.poll()) != null) {
                if (csr.container.getRequiredMemoryMB() > maxMem) {
                    LOG.warn("Container memory {}m above max threshold of cluster. Using max value {}m.",
                            csr.container.getRequiredMemoryMB(), maxMem);
                    csr.container.setRequiredMemoryMB(maxMem);
                }
                if (csr.container.getRequiredVCores() > maxVcores) {
                    LOG.warn("Container vcores {} above max threshold of cluster. Using max value {}.",
                            csr.container.getRequiredVCores(), maxVcores);
                    csr.container.setRequiredVCores(maxVcores);
                }
                csr.container.setResourceRequestPriority(nextRequestPriority++);
                ContainerRequest cr = resourceRequestor.createContainerRequest(csr, true);
                MutablePair<Integer, ContainerRequest> pair = new MutablePair<Integer, ContainerRequest>(
                        loopCounter, cr);
                requestedResources.put(csr, pair);
                containerRequests.add(cr);
            }
        }

        if (!requestedResources.isEmpty()) {
            //resourceRequestor.clearNodeMapping();
            for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources
                    .entrySet()) {
                if ((loopCounter - entry.getValue().getKey()) > NUMBER_MISSED_HEARTBEATS) {
                    StreamingContainerAgent.ContainerStartRequest csr = entry.getKey();
                    removedContainerRequests.add(entry.getValue().getRight());
                    ContainerRequest cr = resourceRequestor.createContainerRequest(csr, false);
                    entry.getValue().setLeft(loopCounter);
                    entry.getValue().setRight(cr);
                    containerRequests.add(cr);
                }
            }
        }

        numTotalContainers += containerRequests.size();
        numRequestedContainers += containerRequests.size();
        AllocateResponse amResp = sendContainerAskToRM(containerRequests, removedContainerRequests,
                releasedContainers);
        if (amResp.getAMCommand() != null) {
            LOG.info(" statement executed:{}", amResp.getAMCommand());
            switch (amResp.getAMCommand()) {
            case AM_RESYNC:
            case AM_SHUTDOWN:
                throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");
            default:
                throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");

            }
        }
        releasedContainers.clear();

        // Retrieve list of allocated containers from the response
        List<Container> newAllocatedContainers = amResp.getAllocatedContainers();
        // LOG.info("Got response from RM for container ask, allocatedCnt=" + newAllocatedContainers.size());
        numRequestedContainers -= newAllocatedContainers.size();
        long timestamp = System.currentTimeMillis();
        for (Container allocatedContainer : newAllocatedContainers) {

            LOG.info("Got new container." + ", containerId=" + allocatedContainer.getId() + ", containerNode="
                    + allocatedContainer.getNodeId() + ", containerNodeURI="
                    + allocatedContainer.getNodeHttpAddress() + ", containerResourceMemory"
                    + allocatedContainer.getResource().getMemory() + ", priority"
                    + allocatedContainer.getPriority());
            // + ", containerToken" + allocatedContainer.getContainerToken().getIdentifier().toString());

            boolean alreadyAllocated = true;
            StreamingContainerAgent.ContainerStartRequest csr = null;
            for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources
                    .entrySet()) {
                if (entry.getKey().container.getResourceRequestPriority() == allocatedContainer.getPriority()
                        .getPriority()) {
                    alreadyAllocated = false;
                    csr = entry.getKey();
                    break;
                }
            }

            if (alreadyAllocated) {
                LOG.info("Releasing {} as resource with priority {} was already assigned",
                        allocatedContainer.getId(), allocatedContainer.getPriority());
                releasedContainers.add(allocatedContainer.getId());
                numReleasedContainers++;
                numRequestedContainers++;
                continue;
            }
            if (csr != null) {
                requestedResources.remove(csr);
            }

            // allocate resource to container
            ContainerResource resource = new ContainerResource(allocatedContainer.getPriority().getPriority(),
                    allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString(),
                    allocatedContainer.getResource().getMemory(),
                    allocatedContainer.getResource().getVirtualCores(),
                    allocatedContainer.getNodeHttpAddress());
            StreamingContainerAgent sca = dnmgr.assignContainer(resource, null);

            if (sca == null) {
                // allocated container no longer needed, add release request
                LOG.warn("Container {} allocated but nothing to deploy, going to release this container.",
                        allocatedContainer.getId());
                releasedContainers.add(allocatedContainer.getId());
            } else {
                AllocatedContainer allocatedContainerHolder = new AllocatedContainer(allocatedContainer);
                this.allocatedContainers.put(allocatedContainer.getId().toString(), allocatedContainerHolder);
                ByteBuffer tokens = null;
                if (UserGroupInformation.isSecurityEnabled()) {
                    UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                    Token<StramDelegationTokenIdentifier> delegationToken = allocateDelegationToken(
                            ugi.getUserName(), heartbeatListener.getAddress());
                    allocatedContainerHolder.delegationToken = delegationToken;
                    //ByteBuffer tokens = LaunchContainerRunnable.getTokens(delegationTokenManager, heartbeatListener.getAddress());
                    tokens = LaunchContainerRunnable.getTokens(ugi, delegationToken);
                }
                LaunchContainerRunnable launchContainer = new LaunchContainerRunnable(allocatedContainer,
                        nmClient, sca, tokens);
                // Thread launchThread = new Thread(runnableLaunchContainer);
                // launchThreads.add(launchThread);
                // launchThread.start();
                launchContainer.run(); // communication with NMs is now async

                // record container start event
                StramEvent ev = new StramEvent.StartContainerEvent(allocatedContainer.getId().toString(),
                        allocatedContainer.getNodeId().toString());
                ev.setTimestamp(timestamp);
                dnmgr.recordEventAsync(ev);
            }
        }

        // track node updates for future locality constraint allocations
        // TODO: it seems 2.0.4-alpha doesn't give us any updates
        resourceRequestor.updateNodeReports(amResp.getUpdatedNodes());

        // Check the completed containers
        List<ContainerStatus> completedContainers = amResp.getCompletedContainersStatuses();
        // LOG.debug("Got response from RM for container ask, completedCnt=" + completedContainers.size());
        for (ContainerStatus containerStatus : completedContainers) {
            LOG.info("Completed containerId=" + containerStatus.getContainerId() + ", state="
                    + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus()
                    + ", diagnostics=" + containerStatus.getDiagnostics());

            // non complete containers should not be here
            assert (containerStatus.getState() == ContainerState.COMPLETE);

            AllocatedContainer allocatedContainer = allocatedContainers
                    .remove(containerStatus.getContainerId().toString());
            if (allocatedContainer != null && allocatedContainer.delegationToken != null) {
                UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                delegationTokenManager.cancelToken(allocatedContainer.delegationToken, ugi.getUserName());
            }
            int exitStatus = containerStatus.getExitStatus();
            if (0 != exitStatus) {
                if (allocatedContainer != null) {
                    numFailedContainers.incrementAndGet();
                }
                //          if (exitStatus == 1) {
                //            // non-recoverable StreamingContainer failure
                //            appDone = true;
                //            finalStatus = FinalApplicationStatus.FAILED;
                //            dnmgr.shutdownDiagnosticsMessage = "Unrecoverable failure " + containerStatus.getContainerId();
                //            LOG.info("Exiting due to: {}", dnmgr.shutdownDiagnosticsMessage);
                //          }
                //          else {
                // Recoverable failure or process killed (externally or via stop request by AM)
                // also occurs when a container was released by the application but never assigned/launched
                LOG.debug("Container {} failed or killed.", containerStatus.getContainerId());
                dnmgr.scheduleContainerRestart(containerStatus.getContainerId().toString());
                //          }
            } else {
                // container completed successfully
                numCompletedContainers.incrementAndGet();
                LOG.info("Container completed successfully." + ", containerId="
                        + containerStatus.getContainerId());
            }

            String containerIdStr = containerStatus.getContainerId().toString();
            dnmgr.removeContainerAgent(containerIdStr);

            // record container stop event
            StramEvent ev = new StramEvent.StopContainerEvent(containerIdStr, containerStatus.getExitStatus());
            ev.setReason(containerStatus.getDiagnostics());
            dnmgr.recordEventAsync(ev);
        }

        if (dnmgr.forcedShutdown) {
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finalStatus = FinalApplicationStatus.FAILED;
            appDone = true;
        } else if (allocatedContainers.isEmpty() && numRequestedContainers == 0
                && dnmgr.containerStartRequests.isEmpty()) {
            LOG.debug("Exiting as no more containers are allocated or requested");
            finalStatus = FinalApplicationStatus.SUCCEEDED;
            appDone = true;
        }

        LOG.debug("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total="
                + numTotalContainers + ", requested=" + numRequestedContainers + ", released="
                + numReleasedContainers + ", completed=" + numCompletedContainers + ", failed="
                + numFailedContainers + ", currentAllocated=" + allocatedContainers.size());

        // monitor child containers
        dnmgr.monitorHeartbeat();
    }

    finishApplication(finalStatus, numTotalContainers);
}

From source file:com.yahoo.storm.yarn.MasterServer.java

License:Open Source License

private Thread initAndStartHeartbeat(final StormAMRMClient client, final BlockingQueue<Container> launcherQueue,
        final int heartBeatIntervalMs) {
    Thread thread = new Thread() {
        @Override/*from  ww w . j a  v a  2s.c  om*/
        public void run() {
            try {
                while (client.getServiceState() == Service.STATE.STARTED
                        && !Thread.currentThread().isInterrupted()) {

                    Thread.sleep(heartBeatIntervalMs);

                    // We always send 50% progress.
                    AllocateResponse allocResponse = client.allocate(0.5f);

                    AMCommand am_command = allocResponse.getAMCommand();
                    if (am_command != null
                            && (am_command == AMCommand.AM_SHUTDOWN || am_command == AMCommand.AM_RESYNC)) {
                        LOG.info("Got AM_SHUTDOWN or AM_RESYNC from the RM");
                        _handler.stop();
                        System.exit(0);
                    }

                    List<Container> allocatedContainers = allocResponse.getAllocatedContainers();
                    if (allocatedContainers.size() > 0) {
                        // Add newly allocated containers to the client.
                        LOG.info("HB: Received allocated containers (" + allocatedContainers.size() + ")");
                        client.addAllocatedContainers(allocatedContainers);
                        if (client.supervisorsAreToRun()) {
                            LOG.info("HB: Supervisors are to run, so queueing (" + allocatedContainers.size()
                                    + ") containers...");
                            launcherQueue.addAll(allocatedContainers);
                        } else {
                            LOG.info("HB: Supervisors are to stop, so releasing all containers...");
                            client.stopAllSupervisors();
                        }
                    }

                    List<ContainerStatus> completedContainers = allocResponse.getCompletedContainersStatuses();

                    if (completedContainers.size() > 0 && client.supervisorsAreToRun()) {
                        LOG.debug("HB: Containers completed (" + completedContainers.size()
                                + "), so releasing them.");
                        client.startAllSupervisors();
                    }

                }
            } catch (Throwable t) {
                // Something happened we could not handle.  Make sure the AM goes
                // down so that we are not surprised later on that our heart
                // stopped..
                LOG.error("Unhandled error in AM: ", t);
                _handler.stop();
                System.exit(1);
            }
        }
    };
    thread.start();
    return thread;
}

From source file:edu.cmu.graphchi.toolkits.collaborative_filtering.yarn.ApplicationMaster.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException//from w  w  w  .  ja  va2  s  .  c  o  m
 * @throws IOException
 */
@SuppressWarnings({ "unchecked" })
public boolean run() throws YarnException, IOException {
    yarnClient.start();
    LOG.info("Starting ApplicationMaster");

    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    amRMClient = AMRMClient.createAMRMClient();
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);

    //TODO: Figure out how to do this.
    List<NodeReport> reports = this.yarnClient.getNodeReports();
    LOG.info("Cluster Status");
    List<Resource> availableResources = new ArrayList<Resource>();
    for (NodeReport nr : reports) {
        LOG.info("    NodeId: " + nr.getNodeId() + " Capabilities " + nr.getCapability() + " Used Resources "
                + nr.getUsed());
        int availableMem = nr.getCapability().getMemory() - nr.getUsed().getMemory();
        int availableVCores = nr.getCapability().getVirtualCores() - nr.getUsed().getVirtualCores();
        Resource resource = Resource.newInstance(availableMem, availableVCores);

        availableResources.add(resource);
    }

    /*Based on available resources scheduler should decide the best allocation of recommenders to resources
    and return a list of resources that should be requested from the ResourceManager*/
    DataSetDescription datasetDesc = new DataSetDescription(this.setup.dataMetadataFile);
    List<RecommenderPool> recommenderPools = RecommenderScheduler.splitRecommenderPool(availableResources,
            recommenders, datasetDesc, this.setup.nShards);

    for (RecommenderPool res : recommenderPools) {
        ContainerRequest containerAsk = setupContainerAskForRM(res.getTotalMemory(), requestPriority);
        LOG.info("CONTAINER ASK: " + containerAsk);
        amRMClient.addContainerRequest(containerAsk);
    }

    float progress = 0;

    List<RecommenderPool> pendingPools = new ArrayList<RecommenderPool>();
    for (RecommenderPool p : recommenderPools)
        pendingPools.add(p);

    this.numTotalContainers = recommenderPools.size();
    this.numCompletedContainers.set(0);
    this.numAllocatedContainers.set(0);

    while (numCompletedContainers.get() != numTotalContainers) {
        try {
            Thread.sleep(200);

            AllocateResponse allocResp = amRMClient.allocate(progress);
            List<Container> newContainers = allocResp.getAllocatedContainers();
            List<ContainerStatus> completedContainers = allocResp.getCompletedContainersStatuses();

            if (this.numAllocatedContainers.get() >= this.numTotalContainers && pendingPools.size() != 0) {
                //Ask for new containers for pending pools
                LOG.warn("The number of allocated containers has exceeded number of total containers, but "
                        + "the pending pool size is still not 0. Asking for new containers from RM");
                for (RecommenderPool res : pendingPools) {
                    ContainerRequest containerAsk = setupContainerAskForRM(res.getTotalMemory(),
                            requestPriority);
                    LOG.info("NEW CONTAINER ASK: " + containerAsk);
                    amRMClient.addContainerRequest(containerAsk);
                }

            }

            if (newContainers.size() > 0) {
                LOG.info("Allocated " + newContainers.size() + " new containers");
                numAllocatedContainers.addAndGet(newContainers.size());
                for (Container container : newContainers) {
                    //Find matching recommender pool from pendingRecommender pools.
                    RecommenderPool pool = null;
                    for (RecommenderPool p : pendingPools) {
                        if (p.getTotalMemory() == container.getResource().getMemory()) {
                            pool = p;
                            break;
                        }
                    }
                    if (pool == null) {
                        LOG.warn("No Takers for Container " + container + " Releasing container");
                        amRMClient.releaseAssignedContainer(container.getId());
                    } else {
                        startContainer(container, pool);
                        //This pool has now got a container. Remove it from pending pools
                        pendingPools.remove(pool);
                    }
                }

            }

            onContainersCompleted(completedContainers);

        } catch (InterruptedException ex) {
        }
    }
    finish();

    return success;
}

From source file:eu.stratosphere.yarn.ApplicationMaster.java

License:Apache License

private void run() throws Exception {
    //Utils.logFilesInCurrentDirectory(LOG);
    // Initialize clients to ResourceManager and NodeManagers
    Configuration conf = Utils.initializeYarnConfiguration();
    FileSystem fs = FileSystem.get(conf);
    Map<String, String> envs = System.getenv();
    final String currDir = envs.get(Environment.PWD.key());
    final String logDirs = envs.get(Environment.LOG_DIRS.key());
    final String ownHostname = envs.get(Environment.NM_HOST.key());
    final String appId = envs.get(Client.ENV_APP_ID);
    final String clientHomeDir = envs.get(Client.ENV_CLIENT_HOME_DIR);
    final String applicationMasterHost = envs.get(Environment.NM_HOST.key());
    final String remoteStratosphereJarPath = envs.get(Client.STRATOSPHERE_JAR_PATH);
    final String shipListString = envs.get(Client.ENV_CLIENT_SHIP_FILES);
    final String yarnClientUsername = envs.get(Client.ENV_CLIENT_USERNAME);
    final int taskManagerCount = Integer.valueOf(envs.get(Client.ENV_TM_COUNT));
    final int memoryPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_MEMORY));
    final int coresPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_CORES));

    int heapLimit = Utils.calculateHeapSize(memoryPerTaskManager);

    if (currDir == null) {
        throw new RuntimeException("Current directory unknown");
    }//w w w.  ja v a  2s .  c o m
    if (ownHostname == null) {
        throw new RuntimeException("Own hostname (" + Environment.NM_HOST + ") not set.");
    }
    LOG.info("Working directory " + currDir);

    // load Stratosphere configuration.
    Utils.getStratosphereConfiguration(currDir);

    final String localWebInterfaceDir = currDir + "/resources/"
            + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME;

    // Update yaml conf -> set jobManager address to this machine's address.
    FileInputStream fis = new FileInputStream(currDir + "/stratosphere-conf.yaml");
    BufferedReader br = new BufferedReader(new InputStreamReader(fis));
    Writer output = new BufferedWriter(new FileWriter(currDir + "/stratosphere-conf-modified.yaml"));
    String line;
    while ((line = br.readLine()) != null) {
        if (line.contains(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)) {
            output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n");
        } else if (line.contains(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY)) {
            output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + "\n");
        } else {
            output.append(line + "\n");
        }
    }
    // just to make sure.
    output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n");
    output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + localWebInterfaceDir + "\n");
    output.append(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY + ": " + logDirs + "\n");
    output.close();
    br.close();
    File newConf = new File(currDir + "/stratosphere-conf-modified.yaml");
    if (!newConf.exists()) {
        LOG.warn("modified yaml does not exist!");
    }

    Utils.copyJarContents("resources/" + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME,
            ApplicationMaster.class.getProtectionDomain().getCodeSource().getLocation().getPath());

    JobManager jm;
    {
        String pathToNepheleConfig = currDir + "/stratosphere-conf-modified.yaml";
        String[] args = { "-executionMode", "cluster", "-configDir", pathToNepheleConfig };

        // start the job manager
        jm = JobManager.initialize(args);

        // Start info server for jobmanager
        jm.startInfoServer();
    }

    AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
    rmClient.init(conf);
    rmClient.start();

    NMClient nmClient = NMClient.createNMClient();
    nmClient.init(conf);
    nmClient.start();

    // Register with ResourceManager
    LOG.info("registering ApplicationMaster");
    rmClient.registerApplicationMaster(applicationMasterHost, 0, "http://" + applicationMasterHost + ":"
            + GlobalConfiguration.getString(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, "undefined"));

    // Priority for worker containers - priorities are intra-application
    Priority priority = Records.newRecord(Priority.class);
    priority.setPriority(0);

    // Resource requirements for worker containers
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(memoryPerTaskManager);
    capability.setVirtualCores(coresPerTaskManager);

    // Make container requests to ResourceManager
    for (int i = 0; i < taskManagerCount; ++i) {
        ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority);
        LOG.info("Requesting TaskManager container " + i);
        rmClient.addContainerRequest(containerAsk);
    }

    LocalResource stratosphereJar = Records.newRecord(LocalResource.class);
    LocalResource stratosphereConf = Records.newRecord(LocalResource.class);

    // register Stratosphere Jar with remote HDFS
    final Path remoteJarPath = new Path(remoteStratosphereJarPath);
    Utils.registerLocalResource(fs, remoteJarPath, stratosphereJar);

    // register conf with local fs.
    Path remoteConfPath = Utils.setupLocalResource(conf, fs, appId,
            new Path("file://" + currDir + "/stratosphere-conf-modified.yaml"), stratosphereConf,
            new Path(clientHomeDir));
    LOG.info("Prepared localresource for modified yaml: " + stratosphereConf);

    boolean hasLog4j = new File(currDir + "/log4j.properties").exists();
    // prepare the files to ship
    LocalResource[] remoteShipRsc = null;
    String[] remoteShipPaths = shipListString.split(",");
    if (!shipListString.isEmpty()) {
        remoteShipRsc = new LocalResource[remoteShipPaths.length];
        { // scope for i
            int i = 0;
            for (String remoteShipPathStr : remoteShipPaths) {
                if (remoteShipPathStr == null || remoteShipPathStr.isEmpty()) {
                    continue;
                }
                remoteShipRsc[i] = Records.newRecord(LocalResource.class);
                Path remoteShipPath = new Path(remoteShipPathStr);
                Utils.registerLocalResource(fs, remoteShipPath, remoteShipRsc[i]);
                i++;
            }
        }
    }

    // respect custom JVM options in the YAML file
    final String javaOpts = GlobalConfiguration.getString(ConfigConstants.STRATOSPHERE_JVM_OPTIONS, "");

    // Obtain allocated containers and launch
    int allocatedContainers = 0;
    int completedContainers = 0;
    while (allocatedContainers < taskManagerCount) {
        AllocateResponse response = rmClient.allocate(0);
        for (Container container : response.getAllocatedContainers()) {
            LOG.info("Got new Container for TM " + container.getId() + " on host "
                    + container.getNodeId().getHost());
            ++allocatedContainers;

            // Launch container by create ContainerLaunchContext
            ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class);

            String tmCommand = "$JAVA_HOME/bin/java -Xmx" + heapLimit + "m " + javaOpts;
            if (hasLog4j) {
                tmCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
                        + "/taskmanager-log4j.log\" -Dlog4j.configuration=file:log4j.properties";
            }
            tmCommand += " eu.stratosphere.yarn.YarnTaskManagerRunner -configDir . " + " 1>"
                    + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stdout.log" + " 2>"
                    + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stderr.log";
            ctx.setCommands(Collections.singletonList(tmCommand));

            LOG.info("Starting TM with command=" + tmCommand);

            // copy resources to the TaskManagers.
            Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2);
            localResources.put("stratosphere.jar", stratosphereJar);
            localResources.put("stratosphere-conf.yaml", stratosphereConf);

            // add ship resources
            if (!shipListString.isEmpty()) {
                Preconditions.checkNotNull(remoteShipRsc);
                for (int i = 0; i < remoteShipPaths.length; i++) {
                    localResources.put(new Path(remoteShipPaths[i]).getName(), remoteShipRsc[i]);
                }
            }

            ctx.setLocalResources(localResources);

            // Setup CLASSPATH for Container (=TaskTracker)
            Map<String, String> containerEnv = new HashMap<String, String>();
            Utils.setupEnv(conf, containerEnv); //add stratosphere.jar to class path.
            containerEnv.put(Client.ENV_CLIENT_USERNAME, yarnClientUsername);

            ctx.setEnvironment(containerEnv);

            UserGroupInformation user = UserGroupInformation.getCurrentUser();
            try {
                Credentials credentials = user.getCredentials();
                DataOutputBuffer dob = new DataOutputBuffer();
                credentials.writeTokenStorageToStream(dob);
                ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
                ctx.setTokens(securityTokens);
            } catch (IOException e) {
                LOG.warn("Getting current user info failed when trying to launch the container"
                        + e.getMessage());
            }

            LOG.info("Launching container " + allocatedContainers);
            nmClient.startContainer(container, ctx);
        }
        for (ContainerStatus status : response.getCompletedContainersStatuses()) {
            ++completedContainers;
            LOG.info("Completed container (while allocating) " + status.getContainerId() + ". Total Completed:"
                    + completedContainers);
            LOG.info("Diagnostics " + status.getDiagnostics());
        }
        Thread.sleep(100);
    }

    // Now wait for containers to complete

    while (completedContainers < taskManagerCount) {
        AllocateResponse response = rmClient.allocate(completedContainers / taskManagerCount);
        for (ContainerStatus status : response.getCompletedContainersStatuses()) {
            ++completedContainers;
            LOG.info("Completed container " + status.getContainerId() + ". Total Completed:"
                    + completedContainers);
            LOG.info("Diagnostics " + status.getDiagnostics());
        }
        Thread.sleep(5000);
    }
    LOG.info("Shutting down JobManager");
    jm.shutdown();

    // Un-register with ResourceManager
    rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", "");

}

From source file:io.hops.metadata.util.TestHopYarnAPIUtilities.java

License:Apache License

@Test(timeout = 60000)
public void test() throws Exception {
    MockRM rm = new MockRM(conf);
    rm.start();/*from  ww  w.  j a va 2  s.  co  m*/
    //register two NodeManagers
    MockNM nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB);
    MockNM nm2 = rm.registerNode("127.0.0.2:5678", 4 * GB);
    Thread.sleep(1000);
    //verify that they are persisted in the db
    List<FiCaSchedulerNode> dbNodes = RMUtilities.getAllFiCaSchedulerNodes();
    assertEquals(2, dbNodes.size());

    //submit an application of 2GB memory
    RMApp app1 = rm.submitApp(2048);
    Thread.sleep(2000);
    //at this point tables :
    //ha_fifoscheduler_apps,
    //ha_schedulerapplication
    //should container one row for the specific app. You can verify that looking at the db.

    //also the tables :
    //ha_appschedulinginfo,
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();

    //get the scheduler
    FifoScheduler fifoScheduler = (FifoScheduler) rm.getResourceScheduler();

    //get applications map
    Map<ApplicationId, org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication> apps = fifoScheduler
            .getSchedulerApplications();
    //get nodes map
    Map<NodeId, org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode> nodes = fifoScheduler
            .getNodes();

    //get current application
    org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication sA = apps
            .get(app1.getApplicationId());
    //get currentApplicationAttemptId
    SchedulerApplicationAttempt saAttempt = sA.getCurrentAppAttempt();

    List<org.apache.hadoop.yarn.api.records.ResourceRequest> reqs = saAttempt.getAppSchedulingInfo()
            .getAllResourceRequests();
    //retrieve requests from the database
    Map<String, List<ResourceRequest>> requests = RMUtilities.getAllResourceRequests();
    List<ResourceRequest> dbReqs = requests.get(attempt1.getAppAttemptId().toString());

    //compare
    assertEquals(reqs.size(), dbReqs.size());

    //its time to kick the scheduling, 2GB given to AM1, remaining 4GB on nm1
    nm1.nodeHeartbeat(true);
    //Thread.sleep(1000);

    MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
    am1.registerAppAttempt();

    SchedulerNodeReport report_nm1 = fifoScheduler.getNodeReport(nm1.getNodeId());
    Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemory());

    //retrieve used Resource from database
    Resource resource = RMUtilities.getResource(nm1.getNodeId().toString(), Resource.USED,
            Resource.FICASCHEDULERNODE);
    assertEquals(2 * GB, resource.getMemory());

    //get newlyAllocatedContainers
    List<RMContainer> newlyAllocatedContainers = saAttempt.getNewlyAllocatedContainers();
    //get launchedContainers
    org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode ficaNode = nodes
            .get(nm1.getNodeId());
    List<RMContainer> launchedContainers = ficaNode.getRunningContainers();

    //get liveContainers
    Map<ContainerId, RMContainer> liveContainers = saAttempt.getLiveContainersMap();

    //retrieve newlyAllocatedContainers from the database
    List<FiCaSchedulerAppNewlyAllocatedContainers> dbNewlyAlCont = RMUtilities
            .getNewlyAllocatedContainers(attempt1.getAppAttemptId().toString());
    //retrieve launchedContainers from the database
    Map<String, List<LaunchedContainers>> map = RMUtilities.getAllLaunchedContainers();
    List<LaunchedContainers> dbLaunchCont = map.get(nm1.getNodeId().toString());
    //retrieve liveContainers from the database
    Map<String, List<FiCaSchedulerAppLiveContainers>> mapLiveCont = RMUtilities.getAllLiveContainers();
    List<FiCaSchedulerAppLiveContainers> dbLiveCont = mapLiveCont.get(attempt1.getAppAttemptId().toString());

    assertEquals(newlyAllocatedContainers.size(), dbNewlyAlCont.size());
    assertEquals(launchedContainers.size(), dbLaunchCont.size());
    assertEquals(liveContainers.size(), dbLiveCont.size());

    //submit a second application of 2GB memory
    RMApp app2 = rm.submitApp(2048);
    // kick the scheduling, 2GB given to AM, remaining 2 GB on nm2
    nm2.nodeHeartbeat(true);
    RMAppAttempt attempt2 = app2.getCurrentAppAttempt();
    MockAM am2 = rm.sendAMLaunched(attempt2.getAppAttemptId());
    am2.registerAppAttempt();
    SchedulerNodeReport report_nm2 = fifoScheduler.getNodeReport(nm2.getNodeId());
    Assert.assertEquals(2 * GB, report_nm2.getUsedResource().getMemory());

    //retrieve used Resource from database
    Resource hopResource2 = RMUtilities.getResource(nm2.getNodeId().toString(), Resource.USED,
            Resource.FICASCHEDULERNODE);
    assertEquals(2 * GB, hopResource2.getMemory());

    // add request for containers
    am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, GB, 1, 1);
    AllocateResponse alloc1Response = am1.schedule(); // send the request

    // add request for containers
    am2.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 3 * GB, 0, 1);
    AllocateResponse alloc2Response = am2.schedule(); // send the request

    // kick the scheduler, 1 GB and 3 GB given to AM1 and AM2, remaining 0 GB to nm1
    nm1.nodeHeartbeat(true);
    //Thread.sleep(1000);
    while (alloc1Response.getAllocatedContainers().size() < 1) {
        LOG.info("Waiting for containers to be created for app 1...");
        Thread.sleep(1000);
        alloc1Response = am1.schedule();
    }
    while (alloc2Response.getAllocatedContainers().size() < 1) {
        LOG.info("Waiting for containers to be created for app 2...");
        Thread.sleep(1000);
        alloc2Response = am2.schedule();
    }
    // kick the scheduler, nothing given remaining 2 GB to nm2
    nm2.nodeHeartbeat(true);
    Thread.sleep(1000);

    List<Container> allocated1 = alloc1Response.getAllocatedContainers();
    Assert.assertEquals(1, allocated1.size());
    Assert.assertEquals(1 * GB, allocated1.get(0).getResource().getMemory());
    Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId());

    List<Container> allocated2 = alloc2Response.getAllocatedContainers();
    Assert.assertEquals(1, allocated2.size());
    Assert.assertEquals(3 * GB, allocated2.get(0).getResource().getMemory());
    Assert.assertEquals(nm1.getNodeId(), allocated2.get(0).getNodeId());

    report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    report_nm2 = rm.getResourceScheduler().getNodeReport(nm2.getNodeId());
    Assert.assertEquals(0, report_nm1.getAvailableResource().getMemory());
    Assert.assertEquals(2 * GB, report_nm2.getAvailableResource().getMemory());

    Resource nm1AvailableResource = RMUtilities.getResource(nm1.getNodeId().toString(), Resource.AVAILABLE,
            Resource.FICASCHEDULERNODE);
    Resource nm2AvailableResource = RMUtilities.getResource(nm2.getNodeId().toString(), Resource.AVAILABLE,
            Resource.FICASCHEDULERNODE);

    assertEquals(0, nm1AvailableResource.getMemory());
    assertEquals(2 * GB, nm2AvailableResource.getMemory());

    Assert.assertEquals(6 * GB, report_nm1.getUsedResource().getMemory());
    Assert.assertEquals(2 * GB, report_nm2.getUsedResource().getMemory());

    Resource nm1UsedResource = RMUtilities.getResource(nm1.getNodeId().toString(), Resource.USED,
            Resource.FICASCHEDULERNODE);
    Resource nm2UsedResource = RMUtilities.getResource(nm2.getNodeId().toString(), Resource.USED,
            Resource.FICASCHEDULERNODE);

    assertEquals(6 * GB, nm1UsedResource.getMemory());
    assertEquals(2 * GB, nm2UsedResource.getMemory());

    Thread.sleep(2000);
    rm.stop();
    Thread.sleep(2000);
}

From source file:org.apache.flink.yarn.ApplicationMaster.java

License:Apache License

private void run() throws Exception {
    //Utils.logFilesInCurrentDirectory(LOG);
    // Initialize clients to ResourceManager and NodeManagers
    Configuration conf = Utils.initializeYarnConfiguration();
    FileSystem fs = FileSystem.get(conf);
    Map<String, String> envs = System.getenv();
    final String currDir = envs.get(Environment.PWD.key());
    final String logDirs = envs.get(Environment.LOG_DIRS.key());
    final String ownHostname = envs.get(Environment.NM_HOST.key());
    final String appId = envs.get(Client.ENV_APP_ID);
    final String clientHomeDir = envs.get(Client.ENV_CLIENT_HOME_DIR);
    final String applicationMasterHost = envs.get(Environment.NM_HOST.key());
    final String remoteFlinkJarPath = envs.get(Client.FLINK_JAR_PATH);
    final String shipListString = envs.get(Client.ENV_CLIENT_SHIP_FILES);
    final String yarnClientUsername = envs.get(Client.ENV_CLIENT_USERNAME);
    final int taskManagerCount = Integer.valueOf(envs.get(Client.ENV_TM_COUNT));
    final int memoryPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_MEMORY));
    final int coresPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_CORES));

    int heapLimit = Utils.calculateHeapSize(memoryPerTaskManager);

    if (currDir == null) {
        throw new RuntimeException("Current directory unknown");
    }/*from   w w w .  ja  v a2 s .c  o  m*/
    if (ownHostname == null) {
        throw new RuntimeException("Own hostname (" + Environment.NM_HOST + ") not set.");
    }
    LOG.info("Working directory " + currDir);

    // load Flink configuration.
    Utils.getFlinkConfiguration(currDir);

    final String localWebInterfaceDir = currDir + "/resources/"
            + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME;

    // Update yaml conf -> set jobManager address to this machine's address.
    FileInputStream fis = new FileInputStream(currDir + "/flink-conf.yaml");
    BufferedReader br = new BufferedReader(new InputStreamReader(fis));
    Writer output = new BufferedWriter(new FileWriter(currDir + "/flink-conf-modified.yaml"));
    String line;
    while ((line = br.readLine()) != null) {
        if (line.contains(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)) {
            output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n");
        } else if (line.contains(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY)) {
            output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + "\n");
        } else {
            output.append(line + "\n");
        }
    }
    // just to make sure.
    output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY + ": " + ownHostname + "\n");
    output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY + ": " + localWebInterfaceDir + "\n");
    output.append(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY + ": " + logDirs + "\n");
    output.close();
    br.close();
    File newConf = new File(currDir + "/flink-conf-modified.yaml");
    if (!newConf.exists()) {
        LOG.warn("modified yaml does not exist!");
    }

    Utils.copyJarContents("resources/" + ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME,
            ApplicationMaster.class.getProtectionDomain().getCodeSource().getLocation().getPath());

    JobManager jm;
    {
        String pathToNepheleConfig = currDir + "/flink-conf-modified.yaml";
        String[] args = { "-executionMode", "cluster", "-configDir", pathToNepheleConfig };

        // start the job manager
        jm = JobManager.initialize(args);

        // Start info server for jobmanager
        jm.startInfoServer();
    }

    AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
    rmClient.init(conf);
    rmClient.start();

    NMClient nmClient = NMClient.createNMClient();
    nmClient.init(conf);
    nmClient.start();

    // Register with ResourceManager
    LOG.info("registering ApplicationMaster");
    rmClient.registerApplicationMaster(applicationMasterHost, 0, "http://" + applicationMasterHost + ":"
            + GlobalConfiguration.getString(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, "undefined"));

    // Priority for worker containers - priorities are intra-application
    Priority priority = Records.newRecord(Priority.class);
    priority.setPriority(0);

    // Resource requirements for worker containers
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(memoryPerTaskManager);
    capability.setVirtualCores(coresPerTaskManager);

    // Make container requests to ResourceManager
    for (int i = 0; i < taskManagerCount; ++i) {
        ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority);
        LOG.info("Requesting TaskManager container " + i);
        rmClient.addContainerRequest(containerAsk);
    }

    LocalResource flinkJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);

    // register Flink Jar with remote HDFS
    final Path remoteJarPath = new Path(remoteFlinkJarPath);
    Utils.registerLocalResource(fs, remoteJarPath, flinkJar);

    // register conf with local fs.
    Path remoteConfPath = Utils.setupLocalResource(conf, fs, appId,
            new Path("file://" + currDir + "/flink-conf-modified.yaml"), flinkConf, new Path(clientHomeDir));
    LOG.info("Prepared localresource for modified yaml: " + flinkConf);

    boolean hasLog4j = new File(currDir + "/log4j.properties").exists();
    // prepare the files to ship
    LocalResource[] remoteShipRsc = null;
    String[] remoteShipPaths = shipListString.split(",");
    if (!shipListString.isEmpty()) {
        remoteShipRsc = new LocalResource[remoteShipPaths.length];
        { // scope for i
            int i = 0;
            for (String remoteShipPathStr : remoteShipPaths) {
                if (remoteShipPathStr == null || remoteShipPathStr.isEmpty()) {
                    continue;
                }
                remoteShipRsc[i] = Records.newRecord(LocalResource.class);
                Path remoteShipPath = new Path(remoteShipPathStr);
                Utils.registerLocalResource(fs, remoteShipPath, remoteShipRsc[i]);
                i++;
            }
        }
    }

    // respect custom JVM options in the YAML file
    final String javaOpts = GlobalConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, "");

    // Obtain allocated containers and launch
    int allocatedContainers = 0;
    int completedContainers = 0;
    while (allocatedContainers < taskManagerCount) {
        AllocateResponse response = rmClient.allocate(0);
        for (Container container : response.getAllocatedContainers()) {
            LOG.info("Got new Container for TM " + container.getId() + " on host "
                    + container.getNodeId().getHost());
            ++allocatedContainers;

            // Launch container by create ContainerLaunchContext
            ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class);

            String tmCommand = "$JAVA_HOME/bin/java -Xmx" + heapLimit + "m " + javaOpts;
            if (hasLog4j) {
                tmCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
                        + "/taskmanager-log4j.log\" -Dlog4j.configuration=file:log4j.properties";
            }
            tmCommand += " org.apache.flink.yarn.YarnTaskManagerRunner -configDir . " + " 1>"
                    + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stdout.log" + " 2>"
                    + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stderr.log";
            ctx.setCommands(Collections.singletonList(tmCommand));

            LOG.info("Starting TM with command=" + tmCommand);

            // copy resources to the TaskManagers.
            Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2);
            localResources.put("flink.jar", flinkJar);
            localResources.put("flink-conf.yaml", flinkConf);

            // add ship resources
            if (!shipListString.isEmpty()) {
                Preconditions.checkNotNull(remoteShipRsc);
                for (int i = 0; i < remoteShipPaths.length; i++) {
                    localResources.put(new Path(remoteShipPaths[i]).getName(), remoteShipRsc[i]);
                }
            }

            ctx.setLocalResources(localResources);

            // Setup CLASSPATH for Container (=TaskTracker)
            Map<String, String> containerEnv = new HashMap<String, String>();
            Utils.setupEnv(conf, containerEnv); //add flink.jar to class path.
            containerEnv.put(Client.ENV_CLIENT_USERNAME, yarnClientUsername);

            ctx.setEnvironment(containerEnv);

            UserGroupInformation user = UserGroupInformation.getCurrentUser();
            try {
                Credentials credentials = user.getCredentials();
                DataOutputBuffer dob = new DataOutputBuffer();
                credentials.writeTokenStorageToStream(dob);
                ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
                ctx.setTokens(securityTokens);
            } catch (IOException e) {
                LOG.warn("Getting current user info failed when trying to launch the container"
                        + e.getMessage());
            }

            LOG.info("Launching container " + allocatedContainers);
            nmClient.startContainer(container, ctx);
        }
        for (ContainerStatus status : response.getCompletedContainersStatuses()) {
            ++completedContainers;
            LOG.info("Completed container (while allocating) " + status.getContainerId() + ". Total Completed:"
                    + completedContainers);
            LOG.info("Diagnostics " + status.getDiagnostics());
        }
        Thread.sleep(100);
    }

    // Now wait for containers to complete

    while (completedContainers < taskManagerCount) {
        AllocateResponse response = rmClient.allocate(completedContainers / taskManagerCount);
        for (ContainerStatus status : response.getCompletedContainersStatuses()) {
            ++completedContainers;
            LOG.info("Completed container " + status.getContainerId() + ". Total Completed:"
                    + completedContainers);
            LOG.info("Diagnostics " + status.getDiagnostics());
        }
        Thread.sleep(5000);
    }
    LOG.info("Shutting down JobManager");
    jm.shutdown();

    // Un-register with ResourceManager
    rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", "");

}

From source file:org.apache.flink.yarn.appMaster.ApplicationMaster.java

License:Apache License

/**
 * Run a Thread to allocate new containers until taskManagerCount
 * is correct again./*from   w ww  .  j a  v a  2s . c  om*/
 */
private void allocateOutstandingContainer(StringBuffer containerDiag) throws Exception {

    // respect custom JVM options in the YAML file
    final String javaOpts = GlobalConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, "");

    int allocatedContainers = 0;
    while (allocatedContainers < taskManagerCount) {
        AllocateResponse response = rmClient.allocate(0);
        for (Container container : response.getAllocatedContainers()) {
            LOG.info("Got new Container for TM " + container.getId() + " on host "
                    + container.getNodeId().getHost());
            ++allocatedContainers;

            // Launch container by create ContainerLaunchContext
            ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class);

            String tmCommand = "$JAVA_HOME/bin/java -Xmx" + heapLimit + "m " + javaOpts;
            if (hasLogback) {
                tmCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
                        + "/taskmanager-logback" + ".log\" -Dlogback.configurationFile=file:logback.xml";
            }
            tmCommand += " " + YarnTaskManagerRunner.class.getName() + " -configDir . " + " 1>"
                    + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stdout.log" + " 2>"
                    + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/taskmanager-stderr.log";
            ctx.setCommands(Collections.singletonList(tmCommand));

            LOG.info("Starting TM with command=" + tmCommand);

            ctx.setLocalResources(taskManagerLocalResources);

            // Setup CLASSPATH for Container (=TaskTracker)
            Map<String, String> containerEnv = new HashMap<String, String>();
            Utils.setupEnv(conf, containerEnv); //add flink.jar to class path.
            containerEnv.put(Client.ENV_CLIENT_USERNAME, yarnClientUsername);

            ctx.setEnvironment(containerEnv);

            UserGroupInformation user = UserGroupInformation.getCurrentUser();
            try {
                Credentials credentials = user.getCredentials();
                DataOutputBuffer dob = new DataOutputBuffer();
                credentials.writeTokenStorageToStream(dob);
                ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
                ctx.setTokens(securityTokens);
            } catch (IOException e) {
                LOG.warn("Getting current user info failed when trying to launch the container", e);
            }

            LOG.info("Launching container " + allocatedContainers);
            nmClient.startContainer(container, ctx);
        }
        for (ContainerStatus status : response.getCompletedContainersStatuses()) {
            ++completedContainers;
            LOG.info("Completed container (while allocating) " + status.getContainerId() + ". Total Completed:"
                    + completedContainers);
            LOG.info("Diagnostics " + status.getDiagnostics());
            // status.
            logDeadContainer(status, containerDiag);
        }
        Thread.sleep(100);
    }
}

From source file:org.apache.hama.bsp.JobImpl.java

License:Apache License

@Override
public JobState startJob() throws Exception {

    this.allocatedContainers = new ArrayList<Container>(numBSPTasks);
    NMTokenCache nmTokenCache = new NMTokenCache();
    while (allocatedContainers.size() < numBSPTasks) {
        AllocateRequest req = AllocateRequest.newInstance(lastResponseID, 0.0f,
                createBSPTaskRequest(numBSPTasks - allocatedContainers.size(), taskMemoryInMb, priority),
                releasedContainers, null);

        AllocateResponse allocateResponse = resourceManager.allocate(req);
        for (NMToken token : allocateResponse.getNMTokens()) {
            nmTokenCache.setToken(token.getNodeId().toString(), token.getToken());
        }/*  w  w w  .j a  v a2s  .com*/

        LOG.info("Got response ID: " + allocateResponse.getResponseId() + " with num of containers: "
                + allocateResponse.getAllocatedContainers().size() + " and following resources: "
                + allocateResponse.getAvailableResources().getMemory() + "mb");
        this.lastResponseID = allocateResponse.getResponseId();

        this.allocatedContainers.addAll(allocateResponse.getAllocatedContainers());

        LOG.info("Waiting to allocate " + (numBSPTasks - allocatedContainers.size()) + " more containers...");

        Thread.sleep(1000l);
    }

    LOG.info("Got " + allocatedContainers.size() + " containers!");

    int id = 0;
    for (Container allocatedContainer : allocatedContainers) {
        LOG.info("Launching task on a new container." + ", containerId=" + allocatedContainer.getId()
                + ", containerNode=" + allocatedContainer.getNodeId().getHost() + ":"
                + allocatedContainer.getNodeId().getPort() + ", containerNodeURI="
                + allocatedContainer.getNodeHttpAddress() + ", containerResourceMemory"
                + allocatedContainer.getResource().getMemory());

        // Connect to ContainerManager on the allocated container
        String user = conf.get("bsp.user.name");
        if (user == null) {
            user = System.getenv(ApplicationConstants.Environment.USER.name());
        }

        ContainerManagementProtocol cm = null;
        try {
            cm = getContainerManagementProtocolProxy(yarnRPC,
                    nmTokenCache.getToken(allocatedContainer.getNodeId().toString()),
                    allocatedContainer.getNodeId(), user);
        } catch (Exception e) {
            LOG.error("Failed to create ContainerManager...");
            if (cm != null)
                yarnRPC.stopProxy(cm, conf);
            e.printStackTrace();
        }

        BSPTaskLauncher runnableLaunchContainer = new BSPTaskLauncher(id, allocatedContainer, cm, conf, jobFile,
                jobId);

        launchers.put(id, runnableLaunchContainer);
        runnableLaunchContainer.start();
        completionQueue.add(runnableLaunchContainer);
        id++;
    }

    LOG.info("Waiting for tasks to finish...");
    state = JobState.RUNNING;
    int completed = 0;

    List<Integer> cleanupTasks = new ArrayList<Integer>();
    while (completed != numBSPTasks) {
        for (BSPTaskLauncher task : completionQueue) {
            BSPTaskStatus returnedTask = task.poll();
            // if our task returned with a finished state
            if (returnedTask != null) {
                if (returnedTask.getExitStatus() != 0) {
                    LOG.error("Task with id \"" + returnedTask.getId() + "\" failed!");
                    cleanupTask(returnedTask.getId());
                    state = JobState.FAILED;
                    return state;
                } else {
                    LOG.info("Task \"" + returnedTask.getId() + "\" sucessfully finished!");
                    completed++;
                    LOG.info("Waiting for " + (numBSPTasks - completed) + " tasks to finish!");
                }
                cleanupTasks.add(returnedTask.getId());
            }
        }
        Thread.sleep(1000L);
    }

    for (Integer stopId : cleanupTasks) {
        cleanupTask(stopId);
    }

    state = JobState.SUCCESS;
    return state;
}