org.apache.streams.sysomos.provider.SysomosProvider.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.streams.sysomos.provider.SysomosProvider.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.streams.sysomos.provider;

import org.apache.streams.config.ComponentConfigurator;
import org.apache.streams.config.StreamsConfiguration;
import org.apache.streams.config.StreamsConfigurator;
import org.apache.streams.core.StreamsDatum;
import org.apache.streams.core.StreamsProvider;
import org.apache.streams.core.StreamsResultSet;
import org.apache.streams.data.util.RFC3339Utils;
import org.apache.streams.jackson.StreamsJacksonMapper;
import org.apache.streams.sysomos.SysomosConfiguration;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.util.concurrent.Uninterruptibles;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigParseOptions;
import org.apache.commons.lang.NotImplementedException;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.math.BigInteger;
import java.util.HashSet;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

/**
 * Streams Provider for the Sysomos Heartbeat API
 * <p/>
 * Configuration:
 * The provider takes either a Map[String,Object] containing the mode (backfill and terminate OR continuous) and a
 * Map[String,String] of heartbeat IDs to document target ids or a string of the format
 * ${heartbeatId}:${documentId},...,${heartbeatId}:${documentId}
 * This configuration will configure the provider to backfill to the specified document and either terminate or not
 * depending on the mode flag.  Continuous mode is assumed, and is the ony mode supported by the String configuration.
 */
public class SysomosProvider implements StreamsProvider {

    public static final String STREAMS_ID = "SysomosProvider";
    public static final String ENDING_TIME_KEY = "addedBefore";
    public static final String STARTING_TIME_KEY = "addedAfter";
    public static final String MODE_KEY = "mode";
    public static final String STARTING_DOCS_KEY = "startingDocs";
    public static final int LATENCY = 10000; //Default minLatency for querying the Sysomos API in milliseconds
    public static final long PROVIDER_BATCH_SIZE = 10000L; //Default maximum size of the queue
    public static final long API_BATCH_SIZE = 1000L; //Default maximum size of an API request
    private static final Logger LOGGER = LoggerFactory.getLogger(SysomosProvider.class);
    private final ReadWriteLock lock = new ReentrantReadWriteLock();
    private final Set<String> completedHeartbeats = new HashSet<>();
    private final long maxQueued;
    private final long minLatency;
    private final long scheduledLatency;
    private final long maxApiBatch;
    protected volatile Queue<StreamsDatum> providerQueue;
    private SysomosClient client;
    private SysomosConfiguration config;
    private ScheduledExecutorService stream;
    private Map<String, String> documentIds;
    private Map<String, String> addedBefore;
    private Map<String, String> addedAfter;
    private Mode mode = Mode.CONTINUOUS;
    private boolean started = false;
    private AtomicInteger count;

    /**
     * SysomosProvider constructor.
     * @param sysomosConfiguration SysomosConfiguration
     */
    public SysomosProvider(SysomosConfiguration sysomosConfiguration) {
        this.config = sysomosConfiguration;
        this.client = new SysomosClient(sysomosConfiguration.getApiKey());
        this.maxQueued = sysomosConfiguration.getMaxBatchSize() == null ? PROVIDER_BATCH_SIZE
                : sysomosConfiguration.getMaxBatchSize();
        this.minLatency = sysomosConfiguration.getMinDelayMs() == null ? LATENCY
                : sysomosConfiguration.getMinDelayMs();
        this.scheduledLatency = sysomosConfiguration.getScheduledDelayMs() == null ? (LATENCY * 15)
                : sysomosConfiguration.getScheduledDelayMs();
        this.maxApiBatch = sysomosConfiguration.getMinDelayMs() == null ? API_BATCH_SIZE
                : sysomosConfiguration.getApiBatchSize();
        this.count = new AtomicInteger();
    }

    /**
     * To use from command line:
     * <p/>
     * Supply configuration similar to src/test/resources/rss.conf
     * <p/>
     * Launch using:
     * <p/>
     * mvn exec:java -Dexec.mainClass=org.apache.streams.rss.provider.RssStreamProvider -Dexec.args="rss.conf articles.json"
     * @param args args
     * @throws Exception Exception
     */
    public static void main(String[] args) throws Exception {

        Preconditions.checkArgument(args.length >= 2);

        String configfile = args[0];
        String outfile = args[1];

        Config reference = ConfigFactory.load();
        File file = new File(configfile);
        assert (file.exists());
        Config testResourceConfig = ConfigFactory.parseFileAnySyntax(file,
                ConfigParseOptions.defaults().setAllowMissing(false));

        Config typesafe = testResourceConfig.withFallback(reference).resolve();

        StreamsConfiguration streamsConfiguration = StreamsConfigurator.detectConfiguration(typesafe);
        SysomosConfiguration config = new ComponentConfigurator<>(SysomosConfiguration.class)
                .detectConfiguration(typesafe, "rss");
        SysomosProvider provider = new SysomosProvider(config);

        ObjectMapper mapper = StreamsJacksonMapper.getInstance();

        PrintStream outStream = new PrintStream(new BufferedOutputStream(new FileOutputStream(outfile)));
        provider.prepare(config);
        provider.startStream();
        do {
            Uninterruptibles.sleepUninterruptibly(streamsConfiguration.getBatchFrequencyMs(),
                    TimeUnit.MILLISECONDS);
            for (StreamsDatum datum : provider.readCurrent()) {
                String json;
                try {
                    json = mapper.writeValueAsString(datum.getDocument());
                    outStream.println(json);
                } catch (JsonProcessingException ex) {
                    System.err.println(ex.getMessage());
                }
            }
        } while (provider.isRunning());
        provider.cleanUp();
        outStream.flush();
    }

    public SysomosConfiguration getConfig() {
        return config;
    }

    public void setConfig(SysomosConfiguration config) {
        this.config = config;
    }

    public Mode getMode() {
        return mode;
    }

    public long getMinLatency() {
        return minLatency;
    }

    public long getMaxApiBatch() {
        return maxApiBatch;
    }

    public SysomosClient getClient() {
        return client;
    }

    @Override
    public String getId() {
        return STREAMS_ID;
    }

    @Override
    public void startStream() {
        LOGGER.trace("Starting Producer");
        if (!started) {
            LOGGER.trace("Producer not started.  Initializing");
            stream = Executors.newScheduledThreadPool(getConfig().getHeartbeatIds().size() + 1);
            for (String heartbeatId : getConfig().getHeartbeatIds()) {
                Runnable task = createStream(heartbeatId);
                stream.scheduleWithFixedDelay(task, 0, this.scheduledLatency, TimeUnit.MILLISECONDS);
                LOGGER.info("Started producer task for heartbeat {}", heartbeatId);
            }
            started = true;
        }
    }

    @Override
    public StreamsResultSet readCurrent() {
        StreamsResultSet current;
        try {
            lock.writeLock().lock();
            LOGGER.debug("Creating new result set for {} items", providerQueue.size());
            count.addAndGet(providerQueue.size());
            current = new StreamsResultSet(providerQueue);
            providerQueue = constructQueue();
        } finally {
            lock.writeLock().unlock();
        }

        return current;
    }

    @Override
    public StreamsResultSet readNew(BigInteger bigInteger) {
        throw new NotImplementedException("readNew not currently implemented");
    }

    @Override
    public StreamsResultSet readRange(DateTime dateTime, DateTime dateTime2) {
        throw new NotImplementedException("readRange not currently implemented");
    }

    //If the provider queue still has data, we are still running.  If not, we are running if we have not been signaled
    //by all completed heartbeats so long as the thread pool is alive
    @Override
    public boolean isRunning() {
        return providerQueue.size() > 0 || (completedHeartbeats.size() < this.getConfig().getHeartbeatIds().size()
                && !(stream.isTerminated() || stream.isShutdown()));
    }

    @Override
    public void prepare(Object configurationObject) {
        this.providerQueue = constructQueue();
        if (configurationObject instanceof Map) {
            extractConfigFromMap((Map) configurationObject);
        } else if (configurationObject instanceof String) {
            documentIds = Splitter.on(";").trimResults().withKeyValueSeparator("=")
                    .split((String) configurationObject);
        }
    }

    @Override
    public void cleanUp() {
        stream.shutdown(); // Disable new tasks from being submitted
        try {
            // Wait a while for existing tasks to terminate
            if (!stream.awaitTermination(60, TimeUnit.SECONDS)) {
                stream.shutdownNow(); // Cancel currently executing tasks
                // Wait a while for tasks to respond to being cancelled
                if (!stream.awaitTermination(60, TimeUnit.SECONDS)) {
                    LOGGER.error("Stream did not terminate");
                }
            }
        } catch (InterruptedException ie) {
            // (Re-)Cancel if current thread also interrupted
            stream.shutdownNow();
            // Preserve interrupt status
            Thread.currentThread().interrupt();
        }
    }

    /**
     * signalComplete.
     * @param heartbeatId heartbeatId
     */
    public void signalComplete(String heartbeatId) {
        try {
            this.lock.writeLock().lock();
            this.completedHeartbeats.add(heartbeatId);
            if (!this.isRunning()) {
                this.cleanUp();
            }
        } finally {
            this.lock.writeLock().unlock();
        }

    }

    protected void enqueueItem(StreamsDatum datum) {
        boolean success;
        do {
            try {
                pauseForSpace(); //Dont lock before this pause. We don't want to block the readCurrent method
                lock.readLock().lock();
                success = providerQueue.offer(datum);
                Thread.yield();
            } finally {
                lock.readLock().unlock();
            }
        } while (!success);
    }

    protected SysomosHeartbeatStream createStream(String heartbeatId) {
        String afterTime = addedAfter != null && addedAfter.containsKey(heartbeatId) ? addedAfter.get(heartbeatId)
                : null;
        String beforeTime = addedBefore != null && addedBefore.containsKey(heartbeatId)
                ? addedBefore.get(heartbeatId)
                : null;

        if (documentIds != null && documentIds.containsKey(heartbeatId)) {
            return new SysomosHeartbeatStream(this, heartbeatId, documentIds.get(heartbeatId));
        }
        if (afterTime != null) {
            if (beforeTime != null) {
                return new SysomosHeartbeatStream(this, heartbeatId, RFC3339Utils.parseToUTC(beforeTime),
                        RFC3339Utils.parseToUTC(afterTime));
            } else {
                return new SysomosHeartbeatStream(this, heartbeatId, null, RFC3339Utils.parseToUTC(afterTime));
            }
        }
        return new SysomosHeartbeatStream(this, heartbeatId);
    }

    /**
     * Wait for the queue size to be below threshold before allowing execution to continue on this thread.
     */
    protected void pauseForSpace() {
        while (this.providerQueue.size() >= maxQueued) {
            LOGGER.trace("Sleeping the current thread due to a full queue");
            try {
                Thread.sleep(100);
                LOGGER.trace("Resuming thread after wait period");
            } catch (InterruptedException ex) {
                LOGGER.warn("Thread was interrupted", ex);
            }
        }
    }

    @SuppressWarnings("unchecked")
    protected void extractConfigFromMap(Map configMap) {
        if (configMap.containsKey(MODE_KEY)) {
            Object configMode = configMap.get(MODE_KEY);
            if (!(configMode instanceof Mode)) {
                throw new IllegalStateException(
                        "Invalid configuration.  Mode must be an instance of the Mode enum but was " + configMode);
            }
            this.mode = (Mode) configMode;
        }
        if (configMap.containsKey(STARTING_DOCS_KEY)) {
            Object configIds = configMap.get(STARTING_DOCS_KEY);
            if (!(configIds instanceof Map)) {
                throw new IllegalStateException(
                        "Invalid configuration.  StartingDocs must be an instance of Map<String,String> but was "
                                + configIds);
            }
            this.documentIds = (Map) configIds;
        }
        if (configMap.containsKey(STARTING_TIME_KEY)) {
            Object configIds = configMap.get(STARTING_TIME_KEY);
            if (!(configIds instanceof Map)) {
                throw new IllegalStateException(
                        "Invalid configuration.  Added after key must be an instance of Map<String,String> but was "
                                + configIds);
            }
            this.addedAfter = (Map) configIds;
        }
        if (configMap.containsKey(ENDING_TIME_KEY)) {
            Object configIds = configMap.get(ENDING_TIME_KEY);
            if (!(configIds instanceof Map)) {
                throw new IllegalStateException(
                        "Invalid configuration.  Added before key must be an instance of Map<String,String> but was "
                                + configIds);
            }
            this.addedBefore = (Map) configIds;
        }
    }

    private Queue<StreamsDatum> constructQueue() {
        return new ConcurrentLinkedQueue<>();
    }

    public int getCount() {
        return this.count.get();
    }

    public enum Mode {
        CONTINUOUS, BACKFILL_AND_TERMINATE
    }
}