org.apache.tez.dag.history.logging.ats.ATSHistoryLoggingService.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tez.dag.history.logging.ats.ATSHistoryLoggingService.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tez.dag.history.logging.ats;

import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse;
import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse.TimelinePutError;
import org.apache.hadoop.yarn.client.api.TimelineClient;
import org.apache.tez.common.ReflectionUtils;
import org.apache.tez.common.security.HistoryACLPolicyManager;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.history.DAGHistoryEvent;
import org.apache.tez.dag.history.HistoryEventType;
import org.apache.tez.dag.history.events.DAGSubmittedEvent;
import org.apache.tez.dag.history.logging.HistoryLoggingService;
import org.apache.tez.dag.records.TezDAGID;

import com.google.common.annotations.VisibleForTesting;

public class ATSHistoryLoggingService extends HistoryLoggingService {

    private static final Log LOG = LogFactory.getLog(ATSHistoryLoggingService.class);

    private LinkedBlockingQueue<DAGHistoryEvent> eventQueue = new LinkedBlockingQueue<DAGHistoryEvent>();

    private Thread eventHandlingThread;
    private AtomicBoolean stopped = new AtomicBoolean(false);
    private int eventCounter = 0;
    private int eventsProcessed = 0;
    private final Object lock = new Object();

    @VisibleForTesting
    TimelineClient timelineClient;

    private HashSet<TezDAGID> skippedDAGs = new HashSet<TezDAGID>();
    private Map<TezDAGID, String> dagDomainIdMap = new HashMap<TezDAGID, String>();
    private long maxTimeToWaitOnShutdown;
    private boolean waitForeverOnShutdown = false;

    private int maxEventsPerBatch;
    private long maxPollingTimeMillis;

    private String sessionDomainId;
    private static final String atsHistoryACLManagerClassName = "org.apache.tez.dag.history.ats.acls.ATSHistoryACLPolicyManager";
    private HistoryACLPolicyManager historyACLPolicyManager;

    public ATSHistoryLoggingService() {
        super(ATSHistoryLoggingService.class.getName());
    }

    @Override
    public void serviceInit(Configuration conf) throws Exception {
        LOG.info("Initializing ATSService");
        timelineClient = TimelineClient.createTimelineClient();
        timelineClient.init(conf);
        maxTimeToWaitOnShutdown = conf.getLong(TezConfiguration.YARN_ATS_EVENT_FLUSH_TIMEOUT_MILLIS,
                TezConfiguration.YARN_ATS_EVENT_FLUSH_TIMEOUT_MILLIS_DEFAULT);
        maxEventsPerBatch = conf.getInt(TezConfiguration.YARN_ATS_MAX_EVENTS_PER_BATCH,
                TezConfiguration.YARN_ATS_MAX_EVENTS_PER_BATCH_DEFAULT);
        maxPollingTimeMillis = conf.getInt(TezConfiguration.YARN_ATS_MAX_POLLING_TIME_PER_EVENT,
                TezConfiguration.YARN_ATS_MAX_POLLING_TIME_PER_EVENT_DEFAULT);
        if (maxTimeToWaitOnShutdown < 0) {
            waitForeverOnShutdown = true;
        }
        sessionDomainId = conf.get(TezConfiguration.YARN_ATS_ACL_SESSION_DOMAIN_ID);

        LOG.info("Using " + atsHistoryACLManagerClassName + " to manage Timeline ACLs");
        try {
            historyACLPolicyManager = ReflectionUtils.createClazzInstance(atsHistoryACLManagerClassName);
            historyACLPolicyManager.setConf(conf);
        } catch (TezUncheckedException e) {
            LOG.warn("Could not instantiate object for " + atsHistoryACLManagerClassName
                    + ". ACLs cannot be enforced correctly for history data in Timeline", e);
            if (!conf.getBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS,
                    TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS_DEFAULT)) {
                throw e;
            }
            historyACLPolicyManager = null;
        }

    }

    @Override
    public void serviceStart() {
        LOG.info("Starting ATSService");
        timelineClient.start();

        eventHandlingThread = new Thread(new Runnable() {
            @Override
            public void run() {
                List<DAGHistoryEvent> events = new LinkedList<DAGHistoryEvent>();
                boolean interrupted = false;
                while (!stopped.get() && !Thread.currentThread().isInterrupted() && !interrupted) {

                    // Log the size of the event-queue every so often.
                    if (eventCounter != 0 && eventCounter % 1000 == 0) {
                        if (eventsProcessed != 0 && !events.isEmpty()) {
                            LOG.info("Event queue stats" + ", eventsProcessedSinceLastUpdate=" + eventsProcessed
                                    + ", eventQueueSize=" + eventQueue.size());
                        }
                        eventCounter = 0;
                        eventsProcessed = 0;
                    } else {
                        ++eventCounter;
                    }

                    synchronized (lock) {
                        try {
                            getEventBatch(events);
                        } catch (InterruptedException e) {
                            // Finish processing events and then return
                            interrupted = true;
                        }

                        if (events.isEmpty()) {
                            continue;
                        }

                        eventsProcessed += events.size();
                        try {
                            handleEvents(events);
                        } catch (Exception e) {
                            LOG.warn("Error handling events", e);
                        }
                    }
                }
            }
        }, "HistoryEventHandlingThread");
        eventHandlingThread.start();
    }

    @Override
    public void serviceStop() {
        LOG.info("Stopping ATSService" + ", eventQueueBacklog=" + eventQueue.size());
        stopped.set(true);
        if (eventHandlingThread != null) {
            eventHandlingThread.interrupt();
        }
        synchronized (lock) {
            if (!eventQueue.isEmpty()) {
                LOG.warn("ATSService being stopped" + ", eventQueueBacklog=" + eventQueue.size()
                        + ", maxTimeLeftToFlush=" + maxTimeToWaitOnShutdown + ", waitForever="
                        + waitForeverOnShutdown);
                long startTime = appContext.getClock().getTime();
                long endTime = startTime + maxTimeToWaitOnShutdown;
                List<DAGHistoryEvent> events = new LinkedList<DAGHistoryEvent>();
                while (waitForeverOnShutdown || (endTime >= appContext.getClock().getTime())) {
                    try {
                        getEventBatch(events);
                    } catch (InterruptedException e) {
                        LOG.info("ATSService interrupted while shutting down. Exiting." + " EventQueueBacklog="
                                + eventQueue.size());
                    }
                    if (events.isEmpty()) {
                        LOG.info("Event queue empty, stopping ATS Service");
                        break;
                    }
                    try {
                        handleEvents(events);
                    } catch (Exception e) {
                        LOG.warn("Error handling event", e);
                        break;
                    }
                }
            }
        }
        if (!eventQueue.isEmpty()) {
            LOG.warn("Did not finish flushing eventQueue before stopping ATSService" + ", eventQueueBacklog="
                    + eventQueue.size());
        }
        timelineClient.stop();
    }

    private void getEventBatch(List<DAGHistoryEvent> events) throws InterruptedException {
        events.clear();
        int counter = 0;
        while (counter < maxEventsPerBatch) {
            DAGHistoryEvent event = eventQueue.poll(maxPollingTimeMillis, TimeUnit.MILLISECONDS);
            if (event == null) {
                break;
            }
            if (!isValidEvent(event)) {
                continue;
            }
            ++counter;
            events.add(event);
            if (event.getHistoryEvent().getEventType().equals(HistoryEventType.DAG_SUBMITTED)) {
                // Special case this as it might be a large payload
                break;
            }
        }
    }

    public void handle(DAGHistoryEvent event) {
        eventQueue.add(event);
    }

    private boolean isValidEvent(DAGHistoryEvent event) {
        HistoryEventType eventType = event.getHistoryEvent().getEventType();
        TezDAGID dagId = event.getDagID();

        if (eventType.equals(HistoryEventType.DAG_SUBMITTED)) {
            DAGSubmittedEvent dagSubmittedEvent = (DAGSubmittedEvent) event.getHistoryEvent();
            String dagName = dagSubmittedEvent.getDAGName();
            if (dagName != null && dagName.startsWith(TezConstants.TEZ_PREWARM_DAG_NAME_PREFIX)) {
                // Skip recording pre-warm DAG events
                skippedDAGs.add(dagId);
                return false;
            }
            if (historyACLPolicyManager != null) {
                String dagDomainId = dagSubmittedEvent.getConf().get(TezConfiguration.YARN_ATS_ACL_DAG_DOMAIN_ID);
                if (dagDomainId != null) {
                    dagDomainIdMap.put(dagId, dagDomainId);
                }
            }
        }
        if (eventType.equals(HistoryEventType.DAG_FINISHED)) {
            // Remove from set to keep size small
            // No more events should be seen after this point.
            if (skippedDAGs.remove(dagId)) {
                return false;
            }
        }

        if (dagId != null && skippedDAGs.contains(dagId)) {
            // Skip pre-warm DAGs
            return false;
        }

        return true;
    }

    private void handleEvents(List<DAGHistoryEvent> events) {
        TimelineEntity[] entities = new TimelineEntity[events.size()];
        for (int i = 0; i < events.size(); ++i) {
            DAGHistoryEvent event = events.get(i);
            String domainId = sessionDomainId;
            TezDAGID dagId = event.getDagID();

            if (historyACLPolicyManager != null && dagId != null) {
                if (dagDomainIdMap.containsKey(dagId)) {
                    domainId = dagDomainIdMap.get(dagId);
                }
            }

            entities[i] = HistoryEventTimelineConversion.convertToTimelineEntity(event.getHistoryEvent());
            if (historyACLPolicyManager != null) {
                if (domainId != null && !domainId.isEmpty()) {
                    historyACLPolicyManager.updateTimelineEntityDomain(entities[i], domainId);
                }
            }
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("Sending event batch to Timeline, batchSize=" + events.size());
        }
        try {
            TimelinePutResponse response = timelineClient.putEntities(entities);
            if (response != null && !response.getErrors().isEmpty()) {
                int count = response.getErrors().size();
                for (int i = 0; i < count; ++i) {
                    TimelinePutError err = response.getErrors().get(i);
                    if (err.getErrorCode() != 0) {
                        LOG.warn("Could not post history event to ATS" + ", atsPutError=" + err.getErrorCode()
                                + ", entityId=" + entities[i].getEntityId() + ", eventType="
                                + events.get(i).getHistoryEvent().getEventType());
                    }
                }
            }
            // Do nothing additional, ATS client library should handle throttling
            // or auto-disable as needed
        } catch (Exception e) {
            LOG.warn("Could not handle history events", e);
        }
    }

}