org.eclipse.ditto.services.utils.persistence.mongo.streaming.MongoReadJournal.java Source code

Introduction

Here is the source code for org.eclipse.ditto.services.utils.persistence.mongo.streaming.MongoReadJournal.java
Source

/*
 * Copyright (c) 2017 Contributors to the Eclipse Foundation
 *
 * See the NOTICE file(s) distributed with this work for additional
 * information regarding copyright ownership.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License 2.0 which is available at
 * http://www.eclipse.org/legal/epl-2.0
 *
 * SPDX-License-Identifier: EPL-2.0
 */
package org.eclipse.ditto.services.utils.persistence.mongo.streaming;

import java.time.Duration;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.annotation.Nullable;

import org.bson.Document;
import org.bson.conversions.Bson;
import org.bson.types.ObjectId;
import org.eclipse.ditto.services.utils.config.DefaultScopedConfig;
import org.eclipse.ditto.services.utils.persistence.mongo.DittoMongoClient;
import org.eclipse.ditto.services.utils.persistence.mongo.MongoClientWrapper;
import org.eclipse.ditto.services.utils.persistence.mongo.config.DefaultMongoDbConfig;
import org.eclipse.ditto.services.utils.persistence.mongo.config.MongoDbConfig;
import org.eclipse.ditto.utils.jsr305.annotations.AllValuesAreNonnullByDefault;
import org.reactivestreams.Publisher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.mongodb.QueryOperators;
import com.mongodb.client.model.Aggregates;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.Sorts;
import com.mongodb.reactivestreams.client.ListCollectionsPublisher;
import com.mongodb.reactivestreams.client.MongoCollection;
import com.mongodb.reactivestreams.client.MongoDatabase;
import com.typesafe.config.Config;

import akka.NotUsed;
import akka.actor.ActorSystem;
import akka.contrib.persistence.mongodb.JournallingFieldNames$;
import akka.contrib.persistence.mongodb.SnapshottingFieldNames$;
import akka.japi.Pair;
import akka.stream.ActorMaterializer;
import akka.stream.Attributes;
import akka.stream.javadsl.RestartSource;
import akka.stream.javadsl.Sink;
import akka.stream.javadsl.Source;

/**
 * Reads the event journal of com.github.scullxbones.akka-persistence-mongo plugin.
 * In the Akka system configuration,
 * <ul>
 * <li>
 * {@code akka.persistence.journal.auto-start-journals} must contain exactly 1 configuration key {@code
 * <JOURNAL_KEY>},
 * </li>
 * <li>
 * {@code <JOURNAL_KEY>.overrides.journal-collection} must be defined and equal to the name of the event journal
 * collection.
 * </li>
 * </ul>
 */
@AllValuesAreNonnullByDefault
public class MongoReadJournal {
    // not a final class to test with Mockito

    // pattern that matches nothing
    private static final Pattern MATCH_NOTHING = Pattern.compile(".\\A");

    // group name of collection name suffix
    private static final String SUFFIX = "suffix";

    private static final String AKKA_PERSISTENCE_JOURNAL_AUTO_START = "akka.persistence.journal.auto-start-journals";
    private static final String AKKA_PERSISTENCE_SNAPS_AUTO_START = "akka.persistence.snapshot-store.auto-start-snapshot-stores";

    private static final String JOURNAL_COLLECTION_NAME_KEY = "overrides.journal-collection";
    private static final String SNAPS_COLLECTION_NAME_KEY = "overrides.snaps-collection";

    private static final String ID = JournallingFieldNames$.MODULE$.ID();
    private static final String PROCESSOR_ID = JournallingFieldNames$.MODULE$.PROCESSOR_ID();
    private static final String TO = JournallingFieldNames$.MODULE$.TO();
    private static final String SN = SnapshottingFieldNames$.MODULE$.SEQUENCE_NUMBER();
    private static final String GTE = QueryOperators.GTE;
    private static final String LT = QueryOperators.LT;

    private static final Integer PROJECT_INCLUDE = 1;
    private static final Integer SORT_DESCENDING = -1;

    private static final Document JOURNAL_PROJECT_DOCUMENT = toDocument(
            new Object[][] { { PROCESSOR_ID, PROJECT_INCLUDE }, { TO, PROJECT_INCLUDE } });
    private static final Document SNAPS_PROJECT_DOCUMENT = toDocument(
            new Object[][] { { PROCESSOR_ID, PROJECT_INCLUDE }, { SN, PROJECT_INCLUDE } });

    private static final Document ID_DESC = toDocument(new Object[][] { { ID, SORT_DESCENDING } });

    private static final String COLLECTION_NAME_FIELD = "name";
    private static final Duration MAX_BACK_OFF_DURATION = Duration.ofSeconds(128L);

    private final Pattern journalCollectionPrefix;
    private final Pattern snapsCollectionPrefix;
    private final DittoMongoClient mongoClient;
    private final Logger log;

    private MongoReadJournal(final Pattern journalCollectionPrefix, final Pattern snapsCollectionPrefix,
            final DittoMongoClient mongoClient) {
        this.journalCollectionPrefix = journalCollectionPrefix;
        this.snapsCollectionPrefix = snapsCollectionPrefix;
        this.mongoClient = mongoClient;
        log = LoggerFactory.getLogger(MongoReadJournal.class);
    }

    /**
     * Create a read journal for an actor system with a persistence plugin having a unique auto-start journal.
     *
     * @param system the actor system.
     * @return the read journal.
     */
    public static MongoReadJournal newInstance(final ActorSystem system) {
        final Config config = system.settings().config();
        final MongoDbConfig mongoDbConfig = DefaultMongoDbConfig.of(DefaultScopedConfig.dittoScoped(config));
        return newInstance(config, MongoClientWrapper.newInstance(mongoDbConfig));
    }

    /**
     * Creates a new {@code MongoReadJournal}.
     *
     * @param config The Akka system configuration.
     * @param mongoClient The Mongo client wrapper.
     * @return A {@code MongoReadJournal} object.
     */
    public static MongoReadJournal newInstance(final Config config, final DittoMongoClient mongoClient) {
        final String autoStartJournalKey = extractAutoStartConfigKey(config, AKKA_PERSISTENCE_JOURNAL_AUTO_START);
        final String autoStartSnapsKey = extractAutoStartConfigKey(config, AKKA_PERSISTENCE_SNAPS_AUTO_START);
        final Pattern journalCollectionPrefix = getOverrideCollectionNamePattern(
                config.getConfig(autoStartJournalKey), JOURNAL_COLLECTION_NAME_KEY);
        final Pattern snapsCollectionPrefix = getOverrideCollectionNamePattern(config.getConfig(autoStartSnapsKey),
                SNAPS_COLLECTION_NAME_KEY);
        return new MongoReadJournal(journalCollectionPrefix, snapsCollectionPrefix, mongoClient);
    }

    /**
     * Retrieve sequence numbers for persistence IDs modified within the time interval as a source of {@code
     * PidWithSeqNr}. A persistence ID may appear multiple times with various sequence numbers.
     *
     * @param start start of the time window.
     * @param end end of the time window.
     * @return source of persistence IDs and sequence numbers written within the given time window.
     */
    public Source<PidWithSeqNr, NotUsed> getPidWithSeqNrsByInterval(final Instant start, final Instant end) {
        final MongoDatabase db = mongoClient.getDefaultDatabase();
        final Document idFilter = createIdFilter(start, end);

        log.debug("Looking for journal collection with pattern <{}>.", journalCollectionPrefix);

        return listJournalsAndSnapshotStores()
                .flatMapConcat(journalAndSnaps -> listPidWithSeqNr(journalAndSnaps, db, idFilter));
    }

    /**
     * Retrieve all unique PIDs in journals. Does its best not to create long-living cursors on the database by reading
     * {@code batchSize} events per query.
     *
     * @param batchSize how many events to read in one query.
     * @param maxIdleTime how long the stream is allowed to idle without sending any element. Bounds the number of
     * retries with exponential back-off.
     * @param mat the actor materializer to run the query streams.
     * @return Source of all persistence IDs such that each element contains the persistence IDs in {@code batchSize}
     * events that do not occur in prior buckets.
     */
    public Source<String, NotUsed> getJournalPids(final int batchSize, final Duration maxIdleTime,
            final ActorMaterializer mat) {

        final int maxRestarts = computeMaxRestarts(maxIdleTime);
        return listJournals().withAttributes(Attributes.inputBuffer(1, 1)).flatMapConcat(
                journal -> listPidsInJournal(journal, "", batchSize, mat, MAX_BACK_OFF_DURATION, maxRestarts))
                .mapConcat(pids -> pids);
    }

    /**
     * Retrieve all unique PIDs in journals above a lower bound. Does not limit database access in any way.
     *
     * @param lowerBoundPid the lower-bound PID.
     * @param batchSize how many events to read in 1 query.
     * @param maxIdleTime max idle time of the stream.
     * @param mat the materializer.
     * @return all unique PIDs in journals above a lower bound.
     */
    public Source<String, NotUsed> getJournalPidsAbove(final String lowerBoundPid, final int batchSize,
            final Duration maxIdleTime, final ActorMaterializer mat) {

        return listJournals().withAttributes(Attributes.inputBuffer(1, 1))
                .flatMapConcat(
                        journal -> listPidsInJournal(journal, lowerBoundPid, batchSize, mat, Duration.ZERO, 0))
                .mapConcat(pids -> pids);
    }

    private Source<List<String>, NotUsed> listPidsInJournal(final MongoCollection<Document> journal,
            final String lowerBound, final int batchSize, final ActorMaterializer mat, final Duration maxBackOff,
            final int maxRestarts) {

        return Source.unfoldAsync("", start -> {
            final String actualStart = lowerBound.compareTo(start) >= 0 ? lowerBound : start;
            return listJournalPidsAbove(journal, actualStart, batchSize, maxBackOff, maxRestarts)
                    .runWith(Sink.seq(), mat).thenApply(list -> {
                        if (list.isEmpty()) {
                            return Optional.empty();
                        } else {
                            return Optional.of(Pair.create(list.get(list.size() - 1), list));
                        }
                    });
        }).withAttributes(Attributes.inputBuffer(1, 1));
    }

    private Source<String, NotUsed> listJournalPidsAbove(final MongoCollection<Document> journal,
            final String start, final int batchSize, final Duration maxBackOff, final int maxRestarts) {

        final List<Bson> pipeline = new ArrayList<>(5);
        // optional match stage
        if (!start.isEmpty()) {
            pipeline.add(Aggregates.match(Filters.gt(PROCESSOR_ID, start)));
        }

        // sort stage
        pipeline.add(Aggregates.sort(Sorts.ascending(PROCESSOR_ID)));

        // limit stage. It should come before group stage or MongoDB would scan the entire journal collection.
        pipeline.add(Aggregates.limit(batchSize));

        // group stage
        pipeline.add(Aggregates.group("$" + PROCESSOR_ID));

        // sort stage 2 -- order after group stage is not defined
        pipeline.add(Aggregates.sort(Sorts.ascending(ID)));

        final Duration minBackOff = Duration.ofSeconds(1L);
        final double randomFactor = 0.1;

        return RestartSource.onFailuresWithBackoff(minBackOff, maxBackOff, randomFactor, maxRestarts,
                () -> Source.fromPublisher(journal.aggregate(pipeline)).map(document -> document.getString(ID)));
    }

    private int computeMaxRestarts(final Duration maxDuration) {
        if (MAX_BACK_OFF_DURATION.minus(maxDuration).isNegative()) {
            // maxBackOff < maxDuration: backOff at least 7 times (1+2+4+8+16+32+64=127s)
            return Math.max(7, 6 + (int) (maxDuration.toMillis() / MAX_BACK_OFF_DURATION.toMillis()));
        } else {
            // maxBackOff >= maxDuration: maxRestarts = log2 of maxDuration in seconds
            final int log2MaxDuration = 63 - Long.numberOfLeadingZeros(maxDuration.getSeconds());
            return Math.max(0, log2MaxDuration);
        }
    }

    private Source<PidWithSeqNr, NotUsed> listPidWithSeqNr(final JournalAndSnaps journalAndSnaps,
            final MongoDatabase database, final Document idFilter) {
        final Source<PidWithSeqNr, NotUsed> journalPids;
        final Source<PidWithSeqNr, NotUsed> snapsPids;

        if (journalAndSnaps.journal == null) {
            journalPids = Source.empty();
        } else {
            journalPids = find(database, journalAndSnaps.journal, idFilter, JOURNAL_PROJECT_DOCUMENT)
                    .map(doc -> new PidWithSeqNr(doc.getString(PROCESSOR_ID), doc.getLong(TO)));
        }

        if (journalAndSnaps.snaps == null) {
            snapsPids = Source.empty();
        } else {
            snapsPids = find(database, journalAndSnaps.snaps, idFilter, SNAPS_PROJECT_DOCUMENT)
                    .map(doc -> new PidWithSeqNr(doc.getString(PROCESSOR_ID), doc.getLong(SN)));
        }

        return journalPids.concat(snapsPids);
    }

    private Source<Document, NotUsed> find(final MongoDatabase db, final String collection, final Document filter,
            final Document project) {

        return Source.fromPublisher(db.getCollection(collection).find(filter).projection(project).sort(ID_DESC));
    }

    private Source<JournalAndSnaps, NotUsed> listJournalsAndSnapshotStores() {
        final MongoDatabase database = mongoClient.getDefaultDatabase();
        return resolveCollectionNames(journalCollectionPrefix, snapsCollectionPrefix, database, log)
                .map(this::toJournalAndSnaps);
    }

    private Source<MongoCollection<Document>, NotUsed> listJournals() {
        final MongoDatabase database = mongoClient.getDefaultDatabase();
        return resolveCollectionNames(journalCollectionPrefix, MATCH_NOTHING, database, log)
                .map(database::getCollection);
    }

    private JournalAndSnaps toJournalAndSnaps(final String collectionName) {
        final Matcher matcher1 = journalCollectionPrefix.matcher(collectionName);
        if (matcher1.matches()) {
            return new JournalAndSnaps(matcher1.group(SUFFIX), collectionName, null);
        } else {
            final Matcher matcher2 = snapsCollectionPrefix.matcher(collectionName);
            if (matcher2.matches()) {
                return new JournalAndSnaps(matcher2.group(SUFFIX), null, collectionName);
            } else {
                throw new IllegalArgumentException(
                        String.format("Collection is neither journal nor snapshot-store: <%s>", collectionName));
            }
        }
    }

    private Document createIdFilter(final Instant start, final Instant end) {
        final ObjectId startObjectId = instantToObjectIdBoundary(start);
        final ObjectId endObjectId = instantToObjectIdBoundary(end.plus(1L, ChronoUnit.SECONDS));
        log.debug("Limiting query to ObjectIds $gte {} and $lt {}", startObjectId, endObjectId);
        return toDocument(new Object[][] {
                { ID, toDocument(new Object[][] { { GTE, startObjectId }, { LT, endObjectId } }) } });
    }

    /* Create a ObjectID boundary from a timestamp to be used for comparison in MongoDB queries. */
    private static ObjectId instantToObjectIdBoundary(final Instant instant) {
        // MongoDBObject IDs only contain dates with precision of seconds, thus adjust the range of the query
        // appropriately to make sure a client does not miss data when providing Instants with higher precision.
        //
        // Do not use
        //
        //   new ObjectId(Date.from(startTruncatedToSecs))
        //
        // to compute object ID boundaries. The 1-argument constructor above appends incidental non-zero bits after
        // the timestamp and may filter out events persisted after 'instant' if they happen to have
        // a lower machine ID, process ID or counter value. (A MongoDB ObjectID is a byte array with fields for
        // timestamp, machine ID, process ID and counter such that timestamp occupies the most significant bits.)
        return new ObjectId(Date.from(instant.truncatedTo(ChronoUnit.SECONDS)), 0, (short) 0, 0);
    }

    private static Document toDocument(final Object[][] keyValuePairs) {
        final Map<String, Object> map = new HashMap<>(keyValuePairs.length);
        for (final Object[] keyValuePair : keyValuePairs) {
            map.put(keyValuePair[0].toString(), keyValuePair[1]);
        }
        return new Document(map);
    }

    /**
     * Extract the auto-start journal/snaps config from the configuration of the actor system.
     * <p>
     * It assumes that in the Akka system configuration,
     * {@code akka.persistence.journal.auto-start-journals} or
     * {@code akka.persistence.snapshot-store.auto-start-snapshot-stores}
     * contains exactly 1 configuration key, which points to the configuration of the auto-start journal/snapshot-store.
     *
     * @param config the system configuration.
     * @param key either {@code akka.persistence.journal.auto-start-journals} or
     * {@code akka.persistence.snapshot-store.auto-start-snapshot-stores}.
     */
    private static String extractAutoStartConfigKey(final Config config, final String key) {
        final List<String> autoStartJournals = config.getStringList(key);
        if (autoStartJournals.size() != 1) {
            final String message = String.format("Expect %s to be a singleton list, but it is List(%s)",
                    AKKA_PERSISTENCE_JOURNAL_AUTO_START, String.join(", ", autoStartJournals));
            throw new IllegalArgumentException(message);
        } else {
            return autoStartJournals.get(0);
        }
    }

    /**
     * Resolve event journal collection prefix (e.g. "things_journal") from the auto-start journal configuration.
     * <p>
     * It assumes that in the auto-start journal configuration,
     * {@code overrides.journal-collection} is defined and equal to the name of the event journal
     * collection.
     *
     * @param journalOrSnapsConfig The journal or snapshot-store configuration.
     * @param key Config key of the collection name.
     * @return The name of the event journal collection.
     * @throws IllegalArgumentException if {@code akka.persistence.journal.auto-start-journal} is not a singleton list.
     * @throws com.typesafe.config.ConfigException.Missing if a relevant config value is missing.
     * @throws com.typesafe.config.ConfigException.WrongType if a relevant config value has not the expected type.
     */
    private static Pattern getOverrideCollectionNamePattern(final Config journalOrSnapsConfig, final String key) {
        final String collectionPrefix = journalOrSnapsConfig.getString(key);
        return Pattern.compile("^" + collectionPrefix + String.format("(?<%s>.*)", SUFFIX));
    }

    /**
     * Resolves all journal and snapshot-store collection names matching the passed prefixes.
     *
     * @param journalCollectionPrefix the prefix of the journal collections to resolve.
     * @param snapsCollectionPrefix the prefix of the journal collections to resolve.
     * @param database the MongoDB database to use for resolving collection names.
     * @return a source of resolved journal collection names which matched the prefix.
     */
    private static Source<String, NotUsed> resolveCollectionNames(final Pattern journalCollectionPrefix,
            final Pattern snapsCollectionPrefix, final MongoDatabase database, final Logger log) {

        // starts with "journalCollectionPrefix":
        final ListCollectionsPublisher<Document> documentListCollectionsPublisher = database.listCollections();
        final Bson filter = Filters.or(Filters.regex(COLLECTION_NAME_FIELD, journalCollectionPrefix),
                Filters.regex(COLLECTION_NAME_FIELD, snapsCollectionPrefix));
        final Publisher<Document> publisher = documentListCollectionsPublisher.filter(filter);
        return Source.fromPublisher(publisher).map(document -> document.getString(COLLECTION_NAME_FIELD))
                // Double check in case the Mongo API persistence layer in use does not support listCollections with filtering
                .filter(collectionName -> journalCollectionPrefix.matcher(collectionName).matches()
                        || snapsCollectionPrefix.matcher(collectionName).matches())
                .map(collectionName -> {
                    log.debug("Collection <{}> with patterns <{}> or <{}> found.", collectionName,
                            journalCollectionPrefix, snapsCollectionPrefix);
                    return collectionName;
                })
                // Each "get current PIDs" query collects all collection names in memory in order to list them in
                // a fixed order.
                .<SortedSet<String>>fold(new TreeSet<>(), (collectionNames, collectionName) -> {
                    collectionNames.add(collectionName);
                    return collectionNames;
                }).mapConcat(collectionNames -> collectionNames);
    }

    private static final class JournalAndSnaps {

        @Nullable
        private final String suffix;

        @Nullable
        private final String journal;

        @Nullable
        private final String snaps;

        private JournalAndSnaps() {
            this.suffix = null;
            journal = null;
            snaps = null;
        }

        private JournalAndSnaps(@Nullable final String suffix, @Nullable final String journal,
                @Nullable final String snaps) {
            this.suffix = suffix;
            this.journal = journal;
            this.snaps = snaps;
        }

        @Override
        public String toString() {
            return "JournalAndSnapshot[journal=" + journal + ",snaps=" + snaps + "]";
        }

        @Nullable
        private String getSuffix() {
            return suffix;
        }

        private static JournalAndSnaps merge(final JournalAndSnaps js1, final JournalAndSnaps js2) {
            final String suffix = js1.suffix != null ? js1.suffix : js2.suffix;
            final String journal = js1.journal != null ? js1.journal : js2.journal;
            final String snaps = js1.snaps != null ? js1.snaps : js2.snaps;
            return new JournalAndSnaps(suffix, journal, snaps);
        }
    }

}