com.thinkbiganalytics.nifi.provenance.ProvenanceEventCollector.java Source code

Java tutorial

Introduction

Here is the source code for com.thinkbiganalytics.nifi.provenance.ProvenanceEventCollector.java

Source

package com.thinkbiganalytics.nifi.provenance;

/*-
 * #%L
 * thinkbig-nifi-provenance-repo
 * %%
 * Copyright (C) 2017 ThinkBig Analytics
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import com.google.common.collect.Lists;
import com.thinkbiganalytics.nifi.provenance.cache.FeedFlowFileCacheUtil;
import com.thinkbiganalytics.nifi.provenance.jms.ProvenanceEventActiveMqWriter;
import com.thinkbiganalytics.nifi.provenance.model.BatchFeedProcessorEvents;
import com.thinkbiganalytics.nifi.provenance.model.FeedFlowFile;
import com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO;
import com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTOHolder;
import com.thinkbiganalytics.nifi.provenance.model.util.ProvenanceEventUtil;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;

import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;

/**
 * Process a Kylo managed ProvenanceEvent. If indicated as a Stream ({@link ProvenanceEventRecordDTO#isStream()}) the system will just generate Statistics {@link
 * com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedFeedProcessorStatistics} grouping the events by Feed and Processor Id Otherwise if not indicated a stream, it will be processed as a
 * Batch job and send the full Event to JMS
 */
public class ProvenanceEventCollector {

    private static final Logger log = LoggerFactory.getLogger(ProvenanceEventCollector.class);
    @Autowired
    ProvenanceFeedLookup provenanceFeedLookup;
    @Autowired
    ProvenanceStatsCalculator statsCalculator;
    @Autowired
    FeedFlowFileCacheUtil cacheUtil;
    /**
     * The Map of Objects that will be grouped and sent over to Kylo as Batch Jobs/Steps for Operations Manager
     */
    Map<String, BatchFeedProcessorEvents> groupedBatchEventsByFeed = new ConcurrentHashMap<>();
    @Autowired
    private ProvenanceEventActiveMqWriter provenanceEventActiveMqWriter;
    /**
     * Safeguard against the system sending too many batch feed events through to Kylo
     * This is the  max events per second allowed for a feed/processor combo
     * if a given batch exceeds this threshold the remaining jobs will be suppressed
     * All jobs will calculate statistics about the feeds
     */
    private Integer maxBatchFeedJobEventsPerSecond = 10;
    /**
     * Size of the group of events that will be batched and sent to Kylo
     */
    private Integer jmsEventGroupSize = 50;

    @Autowired
    public ProvenanceEventCollector(
            @Qualifier("provenanceEventActiveMqWriter") ProvenanceEventActiveMqWriter provenanceEventActiveMqWriter) {
        super();

        this.provenanceEventActiveMqWriter = provenanceEventActiveMqWriter;
    }

    /**
     * The key to use to batch up the events by Feed and Processor.
     *
     * @param event the event to process
     * @return the key based upon the feed name and the component id
     */
    private String mapKey(ProvenanceEventRecordDTO event) {
        return event.getFeedName() + ":" + event.getComponentId();
    }

    /**
     * determine if the event has Feed
     *
     * @param event the event to check
     * @return true if feed name is set, false if not
     */
    private boolean hasFeedName(ProvenanceEventRecordDTO event) {
        return StringUtils.isNotBlank(event.getFeedName());
    }

    /**
     * Process the event, adding it to the running {@link com.thinkbiganalytics.nifi.provenance.model.FeedFlowFile} , calculating statistics on the event, and if a Batch feed, grouped by Feed and
     * Processor, process the entire event for processing.
     *
     * @param event the event to process
     */
    public void process(ProvenanceEventRecordDTO event) {
        try {
            if (event != null) {
                try {

                    cacheUtil.cacheAndBuildFlowFileGraph(event);
                    //if the Flow gets an "Empty Queue" message it means a user emptied the queue that was stuck in a connection.
                    // this means the flow cannot complete and will be treated as a failed flow and failed job
                    if (ProvenanceEventUtil.isFlowFileQueueEmptied(event)) {
                        // a Drop event component id will be the connection, not the processor id. we will set the name of the component
                        event.setComponentName("FlowFile Queue emptied");
                        event.setIsFailure(true);
                        event.setHasFailedEvents(true);
                        FeedFlowFile feedFlowFile = event.getFeedFlowFile();
                        if (feedFlowFile != null) {
                            feedFlowFile.checkAndMarkComplete(event);
                        }
                        event.getFeedFlowFile().incrementFailedEvents();
                    }
                    //only process if we can get the feed name, otherwise its no use
                    if (hasFeedName(event)) {
                        //send the event off for stats processing
                        statsCalculator.calculateStats(event);

                        //batch up the data to send to kylo if this feed is marked as a batch or if the parent flow file is marked as a batch
                        if (!event.isStream()) {
                            batchEvent(event);
                        }

                    } else {
                        log.error("Provenance: Cant find Feed for {} ", event);
                    }

                } catch (FeedFlowFileNotFoundException e) {
                    log.debug("Unable to find Root flowfile.", event, event.getFlowFileUuid());
                }
            }
        } catch (Exception e) {
            log.error("ERROR PROCESSING EVENT! {}.  ERROR: {} ", event, e.getMessage(), e);
        }
    }

    /**
     * Group the Event by Feed and then by Processor
     *
     * @param event the event to process
     * @return true if added, false if suppressed
     */
    private boolean batchEvent(ProvenanceEventRecordDTO event) {
        if (event != null) {
            return groupedBatchEventsByFeed
                    .computeIfAbsent(mapKey(event),
                            mapKey -> new BatchFeedProcessorEvents(event.getFeedName(), event.getComponentId(),
                                    getMaxBatchFeedJobEventsPerSecond()))
                    .setMaxEventsPerSecond(getMaxBatchFeedJobEventsPerSecond()).add(event);
        }
        return false;
    }

    /**
     * Send both the Statistics {@link com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedFeedProcessorStatisticsHolder} and the Batched Provenance Events {@link
     * ProvenanceEventRecordDTOHolder } to JMS for Kylo Operations Manager to process
     */
    public void sendToJms() {
        //update the collection time
        List<ProvenanceEventRecordDTO> eventsSentToJms = groupedBatchEventsByFeed.values().stream()
                .flatMap(feedProcessorEventAggregate -> feedProcessorEventAggregate
                        .collectEventsToBeSentToJmsQueue().stream())
                .collect(Collectors.toList());
        sendBatchFeedEvents(eventsSentToJms);
        statsCalculator.sendStats();
    }

    /**
     * Send the Batched events over to JMS
     *
     * @param elements The events to send to JMS
     */
    private void sendBatchFeedEvents(List<ProvenanceEventRecordDTO> elements) {
        if (elements != null && !elements.isEmpty()) {
            Lists.partition(elements, getJmsEventGroupSize()).forEach(eventsSubList -> {
                ProvenanceEventRecordDTOHolder eventRecordDTOHolder = new ProvenanceEventRecordDTOHolder();
                eventRecordDTOHolder.setEvents(Lists.newArrayList(eventsSubList));
                provenanceEventActiveMqWriter.writeBatchEvents(eventRecordDTOHolder);
            });
        }
    }

    /**
     * The Max number of events allowed per feed per second. This is passed in from the {@link com.thinkbiganalytics.nifi.provenance.reporting.KyloProvenanceEventReportingTask} configuration and used
     * to safeguard against processing too many Jobs/Event records as Batch Jobs.
     *
     * @return the maximum number of jobs/sec allowed for this feed to be considered a batch job
     */
    public Integer getMaxBatchFeedJobEventsPerSecond() {
        return maxBatchFeedJobEventsPerSecond;
    }

    public void setMaxBatchFeedJobEventsPerSecond(Integer maxBatchFeedJobEventsPerSecond) {
        this.maxBatchFeedJobEventsPerSecond = maxBatchFeedJobEventsPerSecond;
    }

    /**
     * Returns the sub group size of events to group together before sending to JMS.
     */
    public Integer getJmsEventGroupSize() {
        return jmsEventGroupSize == null ? 50 : jmsEventGroupSize;
    }

    public void setJmsEventGroupSize(Integer jmsEventGroupSize) {
        this.jmsEventGroupSize = jmsEventGroupSize;
    }

}