Example usage for org.apache.hadoop.yarn.api.records ContainerExitStatus DISKS_FAILED

List of usage examples for org.apache.hadoop.yarn.api.records ContainerExitStatus DISKS_FAILED

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records ContainerExitStatus DISKS_FAILED.

Prototype

int DISKS_FAILED

To view the source code for org.apache.hadoop.yarn.api.records ContainerExitStatus DISKS_FAILED.

Click Source Link

Document

When threshold number of the nodemanager-local-directories or threshold number of the nodemanager-log-directories become bad.

Usage

From source file:org.apache.tez.dag.app.rm.TestTaskSchedulerManager.java

License:Apache License

@Test(timeout = 5000)
public void testContainerDiskFailed() throws IOException {
    Configuration conf = new Configuration(false);
    schedulerHandler.init(conf);/*from   w  ww .  j a v  a  2s .  c  o m*/
    schedulerHandler.start();

    String diagnostics = "NM disk failed.";
    TaskAttemptImpl mockTask = mock(TaskAttemptImpl.class);
    ContainerStatus mockStatus = mock(ContainerStatus.class);
    ContainerId mockCId = mock(ContainerId.class);
    AMContainer mockAMContainer = mock(AMContainer.class);
    when(mockAMContainerMap.get(mockCId)).thenReturn(mockAMContainer);
    when(mockAMContainer.getContainerId()).thenReturn(mockCId);
    when(mockStatus.getContainerId()).thenReturn(mockCId);
    when(mockStatus.getDiagnostics()).thenReturn(diagnostics);
    when(mockStatus.getExitStatus()).thenReturn(ContainerExitStatus.DISKS_FAILED);
    schedulerHandler.containerCompleted(0, mockTask, mockStatus);
    assertEquals(1, mockEventHandler.events.size());
    Event event = mockEventHandler.events.get(0);
    assertEquals(AMContainerEventType.C_COMPLETED, event.getType());
    AMContainerEventCompleted completedEvent = (AMContainerEventCompleted) event;
    assertEquals(mockCId, completedEvent.getContainerId());
    assertEquals("Container disk failed. NM disk failed.", completedEvent.getDiagnostics());
    Assert.assertFalse(completedEvent.isPreempted());
    assertTrue(completedEvent.isDiskFailed());
    assertEquals(TaskAttemptTerminationCause.NODE_DISK_ERROR, completedEvent.getTerminationCause());

    schedulerHandler.stop();
    schedulerHandler.close();
}

From source file:org.elasticsearch.hadoop.yarn.am.EsCluster.java

License:Apache License

public void start() {
    running = true;/*from  ww w .  j  ava  2s.  c  o  m*/
    nmRpc.start();

    UserGroupInformation.setConfiguration(cfg);

    log.info(String.format("Allocating Elasticsearch cluster with %d nodes", appConfig.containersToAllocate()));

    // register requests
    Resource capability = YarnCompat.resource(cfg, appConfig.containerMem(), appConfig.containerVCores());
    Priority prio = Priority.newInstance(appConfig.amPriority());

    for (int i = 0; i < appConfig.containersToAllocate(); i++) {
        // TODO: Add allocation (host/rack rules) - and disable location constraints
        ContainerRequest req = new ContainerRequest(capability, null, null, prio);
        amRpc.addContainerRequest(req);
    }

    // update status every 5 sec
    final long heartBeatRate = TimeUnit.SECONDS.toMillis(5);

    // start the allocation loop
    // when a new container is allocated, launch it right away

    int responseId = 0;

    try {
        do {
            AllocateResponse alloc = amRpc.allocate(responseId++);
            List<Container> currentlyAllocated = alloc.getAllocatedContainers();
            for (Container container : currentlyAllocated) {
                launchContainer(container);
                allocatedContainers.add(container.getId());
            }

            if (currentlyAllocated.size() > 0) {
                int needed = appConfig.containersToAllocate() - allocatedContainers.size();
                if (needed > 0) {
                    log.info(String.format("%s containers allocated, %s remaining", allocatedContainers.size(),
                            needed));
                } else {
                    log.info(String.format("Fully allocated %s containers", allocatedContainers.size()));
                }
            }

            List<ContainerStatus> completed = alloc.getCompletedContainersStatuses();
            for (ContainerStatus status : completed) {
                if (!completedContainers.contains(status.getContainerId())) {
                    ContainerId containerId = status.getContainerId();
                    completedContainers.add(containerId);

                    boolean containerSuccesful = false;

                    switch (status.getExitStatus()) {
                    case ContainerExitStatus.SUCCESS:
                        log.info(String.format("Container %s finished succesfully...", containerId));
                        containerSuccesful = true;
                        break;
                    case ContainerExitStatus.ABORTED:
                        log.warn(String.format("Container %s aborted...", containerId));
                        break;
                    case ContainerExitStatus.DISKS_FAILED:
                        log.warn(String.format("Container %s ran out of disk...", containerId));
                        break;
                    case ContainerExitStatus.PREEMPTED:
                        log.warn(String.format("Container %s preempted...", containerId));
                        break;
                    default:
                        log.warn(String.format("Container %s exited with an invalid/unknown exit code...",
                                containerId));
                    }

                    if (!containerSuccesful) {
                        log.warn("Cluster has not completed succesfully...");
                        clusterHasFailed = true;
                        running = false;
                    }
                }
            }

            if (completedContainers.size() == appConfig.containersToAllocate()) {
                running = false;
            }

            if (running) {
                try {
                    Thread.sleep(heartBeatRate);
                } catch (Exception ex) {
                    throw new EsYarnNmException("Cluster interrupted");
                }
            }
        } while (running);
    } finally {
        log.info("Cluster has completed running...");
        try {
            Thread.sleep(TimeUnit.SECONDS.toMillis(15));
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        close();
    }
}

From source file:yarnkit.utils.YarnUtils.java

License:Apache License

public static String getContainerExitStatusMessage(@Nonnull ContainerStatus status) {
    String containerId = status.getContainerId().toString();

    final String msg;
    final int exitStatus = status.getExitStatus();
    switch (exitStatus) {
    case ContainerExitStatus.SUCCESS: {
        msg = String.format("Container %s finished succesfully", containerId);
        break;//w  ww.j  a  va  2s.  co m
    }
    case ContainerExitStatus.ABORTED: {
        msg = String.format("Container %s aborted", containerId);
        break;
    }
    case ContainerExitStatus.DISKS_FAILED: {
        msg = String.format("Container %s ran out of disk", containerId);
        break;
    }
    case ContainerExitStatus.PREEMPTED: {
        msg = String.format("Container %s preempted", containerId);
        break;
    }
    case ContainerExitStatus.INVALID:
    default: {
        msg = String.format("Container %s exited with an invalid/unknown exit code: %d", containerId,
                exitStatus);
        break;
    }
    }

    String diagnostics = status.getDiagnostics();
    if (exitStatus == ContainerExitStatus.SUCCESS || Strings.isNullOrEmpty(diagnostics)) {
        return msg;
    } else {
        return msg + "\nDiagnostics: " + diagnostics;
    }
}