Example usage for org.apache.hadoop.yarn.exceptions YarnRuntimeException YarnRuntimeException

List of usage examples for org.apache.hadoop.yarn.exceptions YarnRuntimeException YarnRuntimeException

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.exceptions YarnRuntimeException YarnRuntimeException.

Prototype

public YarnRuntimeException(String message) 

Source Link

Usage

From source file:com.datatorrent.stram.StreamingAppMasterService.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException/*www . j  a v a2 s  .c  o  m*/
 */
@SuppressWarnings("SleepWhileInLoop")
private void execute() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");
    final Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    LOG.info("number of tokens: {}", credentials.getAllTokens().size());
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.debug("token: {}", token);
    }
    final Configuration conf = getConfig();
    long tokenLifeTime = (long) (dag.getValue(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR) * Math
            .min(dag.getValue(LogicalPlan.HDFS_TOKEN_LIFE_TIME), dag.getValue(LogicalPlan.RM_TOKEN_LIFE_TIME)));
    long expiryTime = System.currentTimeMillis() + tokenLifeTime;
    LOG.debug(" expiry token time {}", tokenLifeTime);
    String hdfsKeyTabFile = dag.getValue(LogicalPlan.KEY_TAB_FILE);

    // Register self with ResourceManager
    RegisterApplicationMasterResponse response = amRmClient.registerApplicationMaster(appMasterHostname, 0,
            appMasterTrackingUrl);

    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    int maxVcores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max mem {}m and vcores {} capabililty of resources in this cluster ", maxMem, maxVcores);

    // for locality relaxation fall back
    Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources = Maps
            .newHashMap();

    // Setup heartbeat emitter
    // TODO poll RM every now and then with an empty request to let RM know that we are alive
    // The heartbeat interval after which an AM is timed out by the RM is defined by a config setting:
    // RM_AM_EXPIRY_INTERVAL_MS with default defined by DEFAULT_RM_AM_EXPIRY_INTERVAL_MS
    // The allocate calls to the RM count as heartbeat so, for now, this additional heartbeat emitter
    // is not required.

    int loopCounter = -1;
    List<ContainerId> releasedContainers = new ArrayList<ContainerId>();
    int numTotalContainers = 0;
    // keep track of already requested containers to not request them again while waiting for allocation
    int numRequestedContainers = 0;
    int numReleasedContainers = 0;
    int nextRequestPriority = 0;
    ResourceRequestHandler resourceRequestor = new ResourceRequestHandler();

    YarnClient clientRMService = YarnClient.createYarnClient();

    try {
        // YARN-435
        // we need getClusterNodes to populate the initial node list,
        // subsequent updates come through the heartbeat response
        clientRMService.init(conf);
        clientRMService.start();

        ApplicationReport ar = StramClientUtils.getStartedAppInstanceByName(clientRMService,
                dag.getAttributes().get(DAG.APPLICATION_NAME),
                UserGroupInformation.getLoginUser().getUserName(), dag.getAttributes().get(DAG.APPLICATION_ID));
        if (ar != null) {
            appDone = true;
            dnmgr.shutdownDiagnosticsMessage = String.format(
                    "Application master failed due to application %s with duplicate application name \"%s\" by the same user \"%s\" is already started.",
                    ar.getApplicationId().toString(), ar.getName(), ar.getUser());
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finishApplication(FinalApplicationStatus.FAILED, numTotalContainers);
            return;
        }
        resourceRequestor.updateNodeReports(clientRMService.getNodeReports());
    } catch (Exception e) {
        throw new RuntimeException("Failed to retrieve cluster nodes report.", e);
    } finally {
        clientRMService.stop();
    }

    // check for previously allocated containers
    // as of 2.2, containers won't survive AM restart, but this will change in the future - YARN-1490
    checkContainerStatus();
    FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED;
    final InetSocketAddress rmAddress = conf.getSocketAddr(YarnConfiguration.RM_ADDRESS,
            YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT);

    while (!appDone) {
        loopCounter++;

        if (UserGroupInformation.isSecurityEnabled() && System.currentTimeMillis() >= expiryTime
                && hdfsKeyTabFile != null) {
            String applicationId = appAttemptID.getApplicationId().toString();
            expiryTime = StramUserLogin.refreshTokens(tokenLifeTime, "." + File.separator + "tmp",
                    applicationId, conf, hdfsKeyTabFile, credentials, rmAddress, true);
        }

        Runnable r;
        while ((r = this.pendingTasks.poll()) != null) {
            r.run();
        }

        // log current state
        /*
         * LOG.info("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total=" +
         * numTotalContainers + ", requested=" + numRequestedContainers + ", completed=" + numCompletedContainers +
         * ", failed=" + numFailedContainers + ", currentAllocated=" + this.allAllocatedContainers.size());
         */
        // Sleep before each loop when asking RM for containers
        // to avoid flooding RM with spurious requests when it
        // need not have any available containers
        try {
            sleep(1000);
        } catch (InterruptedException e) {
            LOG.info("Sleep interrupted " + e.getMessage());
        }

        // Setup request to be sent to RM to allocate containers
        List<ContainerRequest> containerRequests = new ArrayList<ContainerRequest>();
        List<ContainerRequest> removedContainerRequests = new ArrayList<ContainerRequest>();

        // request containers for pending deploy requests
        if (!dnmgr.containerStartRequests.isEmpty()) {
            StreamingContainerAgent.ContainerStartRequest csr;
            while ((csr = dnmgr.containerStartRequests.poll()) != null) {
                if (csr.container.getRequiredMemoryMB() > maxMem) {
                    LOG.warn("Container memory {}m above max threshold of cluster. Using max value {}m.",
                            csr.container.getRequiredMemoryMB(), maxMem);
                    csr.container.setRequiredMemoryMB(maxMem);
                }
                if (csr.container.getRequiredVCores() > maxVcores) {
                    LOG.warn("Container vcores {} above max threshold of cluster. Using max value {}.",
                            csr.container.getRequiredVCores(), maxVcores);
                    csr.container.setRequiredVCores(maxVcores);
                }
                csr.container.setResourceRequestPriority(nextRequestPriority++);
                ContainerRequest cr = resourceRequestor.createContainerRequest(csr, true);
                MutablePair<Integer, ContainerRequest> pair = new MutablePair<Integer, ContainerRequest>(
                        loopCounter, cr);
                requestedResources.put(csr, pair);
                containerRequests.add(cr);
            }
        }

        if (!requestedResources.isEmpty()) {
            //resourceRequestor.clearNodeMapping();
            for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources
                    .entrySet()) {
                if ((loopCounter - entry.getValue().getKey()) > NUMBER_MISSED_HEARTBEATS) {
                    StreamingContainerAgent.ContainerStartRequest csr = entry.getKey();
                    removedContainerRequests.add(entry.getValue().getRight());
                    ContainerRequest cr = resourceRequestor.createContainerRequest(csr, false);
                    entry.getValue().setLeft(loopCounter);
                    entry.getValue().setRight(cr);
                    containerRequests.add(cr);
                }
            }
        }

        numTotalContainers += containerRequests.size();
        numRequestedContainers += containerRequests.size();
        AllocateResponse amResp = sendContainerAskToRM(containerRequests, removedContainerRequests,
                releasedContainers);
        if (amResp.getAMCommand() != null) {
            LOG.info(" statement executed:{}", amResp.getAMCommand());
            switch (amResp.getAMCommand()) {
            case AM_RESYNC:
            case AM_SHUTDOWN:
                throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");
            default:
                throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");

            }
        }
        releasedContainers.clear();

        // Retrieve list of allocated containers from the response
        List<Container> newAllocatedContainers = amResp.getAllocatedContainers();
        // LOG.info("Got response from RM for container ask, allocatedCnt=" + newAllocatedContainers.size());
        numRequestedContainers -= newAllocatedContainers.size();
        long timestamp = System.currentTimeMillis();
        for (Container allocatedContainer : newAllocatedContainers) {

            LOG.info("Got new container." + ", containerId=" + allocatedContainer.getId() + ", containerNode="
                    + allocatedContainer.getNodeId() + ", containerNodeURI="
                    + allocatedContainer.getNodeHttpAddress() + ", containerResourceMemory"
                    + allocatedContainer.getResource().getMemory() + ", priority"
                    + allocatedContainer.getPriority());
            // + ", containerToken" + allocatedContainer.getContainerToken().getIdentifier().toString());

            boolean alreadyAllocated = true;
            StreamingContainerAgent.ContainerStartRequest csr = null;
            for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources
                    .entrySet()) {
                if (entry.getKey().container.getResourceRequestPriority() == allocatedContainer.getPriority()
                        .getPriority()) {
                    alreadyAllocated = false;
                    csr = entry.getKey();
                    break;
                }
            }

            if (alreadyAllocated) {
                LOG.info("Releasing {} as resource with priority {} was already assigned",
                        allocatedContainer.getId(), allocatedContainer.getPriority());
                releasedContainers.add(allocatedContainer.getId());
                numReleasedContainers++;
                numRequestedContainers++;
                continue;
            }
            if (csr != null) {
                requestedResources.remove(csr);
            }

            // allocate resource to container
            ContainerResource resource = new ContainerResource(allocatedContainer.getPriority().getPriority(),
                    allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString(),
                    allocatedContainer.getResource().getMemory(),
                    allocatedContainer.getResource().getVirtualCores(),
                    allocatedContainer.getNodeHttpAddress());
            StreamingContainerAgent sca = dnmgr.assignContainer(resource, null);

            if (sca == null) {
                // allocated container no longer needed, add release request
                LOG.warn("Container {} allocated but nothing to deploy, going to release this container.",
                        allocatedContainer.getId());
                releasedContainers.add(allocatedContainer.getId());
            } else {
                AllocatedContainer allocatedContainerHolder = new AllocatedContainer(allocatedContainer);
                this.allocatedContainers.put(allocatedContainer.getId().toString(), allocatedContainerHolder);
                ByteBuffer tokens = null;
                if (UserGroupInformation.isSecurityEnabled()) {
                    UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                    Token<StramDelegationTokenIdentifier> delegationToken = allocateDelegationToken(
                            ugi.getUserName(), heartbeatListener.getAddress());
                    allocatedContainerHolder.delegationToken = delegationToken;
                    //ByteBuffer tokens = LaunchContainerRunnable.getTokens(delegationTokenManager, heartbeatListener.getAddress());
                    tokens = LaunchContainerRunnable.getTokens(ugi, delegationToken);
                }
                LaunchContainerRunnable launchContainer = new LaunchContainerRunnable(allocatedContainer,
                        nmClient, sca, tokens);
                // Thread launchThread = new Thread(runnableLaunchContainer);
                // launchThreads.add(launchThread);
                // launchThread.start();
                launchContainer.run(); // communication with NMs is now async

                // record container start event
                StramEvent ev = new StramEvent.StartContainerEvent(allocatedContainer.getId().toString(),
                        allocatedContainer.getNodeId().toString());
                ev.setTimestamp(timestamp);
                dnmgr.recordEventAsync(ev);
            }
        }

        // track node updates for future locality constraint allocations
        // TODO: it seems 2.0.4-alpha doesn't give us any updates
        resourceRequestor.updateNodeReports(amResp.getUpdatedNodes());

        // Check the completed containers
        List<ContainerStatus> completedContainers = amResp.getCompletedContainersStatuses();
        // LOG.debug("Got response from RM for container ask, completedCnt=" + completedContainers.size());
        for (ContainerStatus containerStatus : completedContainers) {
            LOG.info("Completed containerId=" + containerStatus.getContainerId() + ", state="
                    + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus()
                    + ", diagnostics=" + containerStatus.getDiagnostics());

            // non complete containers should not be here
            assert (containerStatus.getState() == ContainerState.COMPLETE);

            AllocatedContainer allocatedContainer = allocatedContainers
                    .remove(containerStatus.getContainerId().toString());
            if (allocatedContainer != null && allocatedContainer.delegationToken != null) {
                UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                delegationTokenManager.cancelToken(allocatedContainer.delegationToken, ugi.getUserName());
            }
            int exitStatus = containerStatus.getExitStatus();
            if (0 != exitStatus) {
                if (allocatedContainer != null) {
                    numFailedContainers.incrementAndGet();
                }
                //          if (exitStatus == 1) {
                //            // non-recoverable StreamingContainer failure
                //            appDone = true;
                //            finalStatus = FinalApplicationStatus.FAILED;
                //            dnmgr.shutdownDiagnosticsMessage = "Unrecoverable failure " + containerStatus.getContainerId();
                //            LOG.info("Exiting due to: {}", dnmgr.shutdownDiagnosticsMessage);
                //          }
                //          else {
                // Recoverable failure or process killed (externally or via stop request by AM)
                // also occurs when a container was released by the application but never assigned/launched
                LOG.debug("Container {} failed or killed.", containerStatus.getContainerId());
                dnmgr.scheduleContainerRestart(containerStatus.getContainerId().toString());
                //          }
            } else {
                // container completed successfully
                numCompletedContainers.incrementAndGet();
                LOG.info("Container completed successfully." + ", containerId="
                        + containerStatus.getContainerId());
            }

            String containerIdStr = containerStatus.getContainerId().toString();
            dnmgr.removeContainerAgent(containerIdStr);

            // record container stop event
            StramEvent ev = new StramEvent.StopContainerEvent(containerIdStr, containerStatus.getExitStatus());
            ev.setReason(containerStatus.getDiagnostics());
            dnmgr.recordEventAsync(ev);
        }

        if (dnmgr.forcedShutdown) {
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finalStatus = FinalApplicationStatus.FAILED;
            appDone = true;
        } else if (allocatedContainers.isEmpty() && numRequestedContainers == 0
                && dnmgr.containerStartRequests.isEmpty()) {
            LOG.debug("Exiting as no more containers are allocated or requested");
            finalStatus = FinalApplicationStatus.SUCCEEDED;
            appDone = true;
        }

        LOG.debug("Current application state: loop=" + loopCounter + ", appDone=" + appDone + ", total="
                + numTotalContainers + ", requested=" + numRequestedContainers + ", released="
                + numReleasedContainers + ", completed=" + numCompletedContainers + ", failed="
                + numFailedContainers + ", currentAllocated=" + allocatedContainers.size());

        // monitor child containers
        dnmgr.monitorHeartbeat();
    }

    finishApplication(finalStatus, numTotalContainers);
}

From source file:com.datatorrent.stram.StreamingContainerParent.java

License:Apache License

protected void startRpcServer() {
    Configuration conf = getConfig();
    LOG.info("Config: " + conf);
    LOG.info("Listener thread count " + listenerThreadCount);
    try {/*w  ww.  j av a2 s.  c om*/
        server = new RPC.Builder(conf).setProtocol(StreamingContainerUmbilicalProtocol.class).setInstance(this)
                .setBindAddress("0.0.0.0").setPort(0).setNumHandlers(listenerThreadCount)
                .setSecretManager(tokenSecretManager).setVerbose(false).build();

        // Enable service authorization?
        if (conf.getBoolean(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, false)) {
            //refreshServiceAcls(conf, new MRAMPolicyProvider());
            server.refreshServiceAcl(conf, new PolicyProvider() {

                @Override
                public Service[] getServices() {
                    return (new Service[] { new Service(StreamingContainerUmbilicalProtocol.class.getName(),
                            StreamingContainerUmbilicalProtocol.class) });
                }

            });
        }

        server.start();
        this.address = NetUtils.getConnectAddress(server);
        LOG.info("Container callback server listening at " + this.address);
    } catch (IOException e) {
        throw new YarnRuntimeException(e);
    }
}

From source file:disAMS.AMRMClient.Impl.AMRMClientImpl.java

License:Apache License

@Override
protected void serviceStart() throws Exception {
    final YarnConfiguration conf = new YarnConfiguration(getConfig());
    try {/*from  ww w.  j  a v  a 2 s .c o  m*/
        rmClient = ClientRMProxy.createRMProxy(conf, ApplicationMasterProtocol.class);
    } catch (IOException e) {
        throw new YarnRuntimeException(e);
    }
    super.serviceStart();
}

From source file:org.apache.hoya.yarn.appmaster.state.AppState.java

License:Apache License

@Override
public RoleStatus lookupRoleStatus(String name) throws YarnRuntimeException {
    ProviderRole providerRole = roles.get(name);
    if (providerRole == null) {
        throw new YarnRuntimeException("Unknown role " + name);
    }/*www  . j  a v a 2s.  c  om*/
    return lookupRoleStatus(providerRole.id);
}

From source file:org.apache.slider.server.appmaster.state.AppState.java

License:Apache License

public RoleStatus lookupRoleStatus(String name) throws YarnRuntimeException {
    ProviderRole providerRole = roles.get(name);
    if (providerRole == null) {
        throw new YarnRuntimeException("Unknown role " + name);
    }//from  w ww  . j ava 2s . com
    return lookupRoleStatus(providerRole.id);
}

From source file:org.apache.slider.server.appmaster.state.AppState.java

License:Apache License

/**
* container start event handler -throwing an exception on problems
* @param containerId container that is to be started
* @return the role instance//from www.jav a2  s.c o  m
* @throws RuntimeException on problems
*/
@VisibleForTesting
public RoleInstance innerOnNodeManagerContainerStarted(ContainerId containerId) {
    incStartedCountainerCount();
    RoleInstance instance = getOwnedContainer(containerId);
    if (instance == null) {
        //serious problem
        throw new YarnRuntimeException("Container not in active containers start " + containerId);
    }
    if (instance.role == null) {
        throw new YarnRuntimeException("Component instance has no instance name " + instance);
    }
    instance.startTime = now();
    RoleInstance starting = getStartingNodes().remove(containerId);
    if (null == starting) {
        throw new YarnRuntimeException("Container " + containerId + "%s is already started");
    }
    instance.state = ClusterDescription.STATE_LIVE;
    RoleStatus roleStatus = lookupRoleStatus(instance.roleId);
    roleStatus.incStarted();
    Container container = instance.container;
    addLaunchedContainer(container, instance);
    return instance;
}

From source file:org.apache.tajo.master.container.TajoRecordFactoryProvider.java

License:Apache License

private static Object getFactoryClassInstance(String factoryClassName) {
    try {/*  www  .  j  a v  a  2  s .  co m*/
        Class<?> clazz = Class.forName(factoryClassName);
        Method method = clazz.getMethod("get", null);
        method.setAccessible(true);
        return method.invoke(null, null);
    } catch (ClassNotFoundException e) {
        throw new YarnRuntimeException(e);
    } catch (NoSuchMethodException e) {
        throw new YarnRuntimeException(e);
    } catch (InvocationTargetException e) {
        throw new YarnRuntimeException(e);
    } catch (IllegalAccessException e) {
        throw new YarnRuntimeException(e);
    }
}

From source file:org.apache.tajo.master.rm.YarnTajoResourceManager.java

License:Apache License

@Override
public void init(Configuration conf) {
    this.conf = conf;
    connectYarnClient();/*from   ww  w .  j ava2s.  com*/

    final YarnConfiguration yarnConf = new YarnConfiguration(conf);
    final YarnRPC rpc = YarnRPC.create(conf);
    final InetSocketAddress rmAddress = conf.getSocketAddr(YarnConfiguration.RM_SCHEDULER_ADDRESS,
            YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS, YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT);

    UserGroupInformation currentUser;
    try {
        currentUser = UserGroupInformation.getCurrentUser();
    } catch (IOException e) {
        throw new YarnRuntimeException(e);
    }

    rmClient = currentUser.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {
        @Override
        public ApplicationMasterProtocol run() {
            return (ApplicationMasterProtocol) rpc.getProxy(ApplicationMasterProtocol.class, rmAddress,
                    yarnConf);
        }
    });
}

From source file:org.apache.tez.dag.app.rm.UtilizationTable.java

License:Apache License

/**
 * A function that creates the protocol proxy to communicate with Unicorn
 *
 * @return The created protocol proxy//from www. j av  a  2  s  . co  m
 */
protected UtilizationProtocol createUtilizationProtocolProxy(Configuration conf) throws YarnRuntimeException {
    try {
        return ClientRMProxy.createRMProxy(conf, UtilizationProtocol.class);
    } catch (IOException e) {
        throw new YarnRuntimeException(e);
    }
}