com.cloudant.sync.replication.PushStrategy.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudant.sync.replication.PushStrategy.java

Source

/**
 * Copyright (c) 2013 Cloudant, Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the
 * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */

package com.cloudant.sync.replication;

import com.cloudant.http.HttpConnectionRequestInterceptor;
import com.cloudant.http.HttpConnectionResponseInterceptor;
import com.cloudant.mazha.CouchClient;
import com.cloudant.mazha.json.JSONHelper;
import com.cloudant.sync.datastore.Attachment;
import com.cloudant.sync.datastore.AttachmentException;
import com.cloudant.sync.datastore.Changes;
import com.cloudant.sync.datastore.Datastore;
import com.cloudant.sync.datastore.DatastoreException;
import com.cloudant.sync.datastore.DatastoreImpl;
import com.cloudant.sync.datastore.DocumentRevision;
import com.cloudant.sync.datastore.DocumentRevisionTree;
import com.cloudant.sync.datastore.MultipartAttachmentWriter;
import com.cloudant.sync.datastore.RevisionHistoryHelper;
import com.cloudant.sync.event.EventBus;
import com.cloudant.sync.util.JSONUtils;
import com.cloudant.sync.util.Misc;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;

import org.apache.commons.codec.binary.Hex;

import java.io.ByteArrayInputStream;
import java.net.URI;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * @api_private
 */
class PushStrategy implements ReplicationStrategy {

    // internal state which gets reset each time run() is called
    private static class State {
        // Flag to stop the replication thread.
        // Volatile as might be set from another thread.
        private volatile boolean cancel = false;

        /**
         * Flag is set when the replication process is complete. The thread
         * may live on because the listener's callback is executed on the thread.
         */
        private volatile boolean replicationTerminated = false;

        private int documentCounter = 0;

        private int batchCounter = 0;
    }

    private State state;

    private static final String LOG_TAG = "PushStrategy";

    private static final Logger logger = Logger.getLogger(PushStrategy.class.getCanonicalName());

    CouchDB targetDb;

    DatastoreWrapper sourceDb;

    private final String name;

    public final EventBus eventBus = new EventBus();

    private static JSONHelper sJsonHelper = new JSONHelper();

    public int changeLimitPerBatch = 500;

    public int batchLimitPerRun = 100;

    public int bulkInsertSize = 10;

    public PushFilter filter = null;

    public PushAttachmentsInline pushAttachmentsInline = PushAttachmentsInline.Small;

    public PushStrategy(Datastore source, URI target, List<HttpConnectionRequestInterceptor> requestInterceptors,
            List<HttpConnectionResponseInterceptor> responseInterceptors) {
        this.sourceDb = new DatastoreWrapper((DatastoreImpl) source);
        this.targetDb = new CouchClientWrapper(new CouchClient(target, requestInterceptors, responseInterceptors));
        String replicatorName = String.format("%s <-- %s ", target, source.getDatastoreName());
        this.name = String.format("%s [%s]", LOG_TAG, replicatorName);
    }

    @Override
    public boolean isReplicationTerminated() {
        if (state != null) {
            return state.replicationTerminated;
        } else {
            return false;
        }
    }

    @Override
    public void setCancel() {
        // if we have been cancelled before run(), we have to create the internal state
        if (this.state == null) {
            this.state = new State();
        }
        this.state.cancel = true;
    }

    @Override
    public int getDocumentCounter() {
        if (state != null) {
            return this.state.documentCounter;
        } else {
            return 0;
        }
    }

    @Override
    public int getBatchCounter() {
        if (state != null) {
            return this.state.batchCounter;
        } else {
            return 0;
        }
    }

    /**
     * Handle exceptions in separate run() method to allow replicate() to
     * just return when cancel is set to true rather than having to keep
     * track of whether we terminated via an error in replicate().
     */
    @Override
    public void run() {

        if (this.state != null && this.state.cancel) {
            // we were already cancelled, don't run, but still post completion
            this.state.documentCounter = 0;
            this.state.batchCounter = 0;
            runComplete(null);
            return;
        }
        // reset internal state
        this.state = new State();

        ErrorInfo errorInfo = null;

        try {

            replicate();

        } catch (Throwable e) {
            logger.log(Level.SEVERE, String.format("Batch %s ended with error:", this.state.batchCounter), e);
            errorInfo = new ErrorInfo(e);
        }

        runComplete(errorInfo);
    }

    private void runComplete(ErrorInfo errorInfo) {
        state.replicationTerminated = true;

        String msg = "Push replication terminated via ";
        msg += this.state.cancel ? "cancel." : "completion.";

        // notify complete/errored on eventbus
        logger.info(msg + " Posting on EventBus.");
        if (errorInfo == null) { // successful replication
            eventBus.post(new ReplicationStrategyCompleted(this));
        } else {
            eventBus.post(new ReplicationStrategyErrored(this, errorInfo));
        }
    }

    private void replicate() throws DatabaseNotFoundException, InterruptedException, ExecutionException,
            AttachmentException, DatastoreException {
        logger.info("Push replication started");
        long startTime = System.currentTimeMillis();

        // We were cancelled before we started
        if (this.state.cancel) {
            return;
        }

        if (!this.targetDb.exists()) {
            throw new DatabaseNotFoundException("Database not found: " + this.targetDb.getIdentifier());
        }

        this.state.documentCounter = 0;
        for (this.state.batchCounter = 1; this.state.batchCounter < this.batchLimitPerRun; this.state.batchCounter++) {

            if (this.state.cancel) {
                return;
            }

            String msg = String.format("Batch %s started (completed %s changes so far)", this.state.batchCounter,
                    this.state.documentCounter);
            logger.info(msg);
            long batchStartTime = System.currentTimeMillis();

            // Get the next batch of changes and record the size and last sequence
            Changes changes = getNextBatch();
            final int unfilteredChangesSize = changes.size();
            final long lastSeq = changes.getLastSequence();

            // Count the number of changes processed
            int changesProcessed = 0;

            // If there is a filter replace the changes with the filtered list of changes
            if (this.filter != null) {
                List<DocumentRevision> allowedChanges = new ArrayList<DocumentRevision>(
                        changes.getResults().size());

                for (DocumentRevision revision : changes.getResults()) {
                    if (this.filter.shouldReplicateDocument(revision)) {
                        allowedChanges.add(revision);
                    }
                }

                changes = new FilteredChanges(changes.getLastSequence(), allowedChanges);
            }
            final int filteredChangesSize = changes.size();

            // So we can check whether all changes were processed during
            // a log analysis.
            msg = String.format("Batch %s contains %s changes", this.state.batchCounter, filteredChangesSize);
            logger.info(msg);

            if (filteredChangesSize > 0) {
                changesProcessed = processOneChangesBatch(changes);
                this.state.documentCounter += changesProcessed;
            }

            // If not cancelled and there were any changes set a checkpoint
            if (!this.state.cancel && unfilteredChangesSize > 0) {
                try {
                    this.putCheckpoint(String.valueOf(lastSeq));
                } catch (DatastoreException e) {
                    logger.log(Level.WARNING, "Failed to put checkpoint doc, next replication " + "will "
                            + "start from previous checkpoint", e);
                }
            }

            long batchEndTime = System.currentTimeMillis();
            msg = String.format("Batch %s completed in %sms (processed %s changes)", this.state.batchCounter,
                    batchEndTime - batchStartTime, changesProcessed);
            logger.info(msg);

            // This logic depends on the changes in the feed rather than the
            // changes we actually processed.
            if (unfilteredChangesSize == 0) {
                break;
            }
        }

        long endTime = System.currentTimeMillis();
        long deltaTime = endTime - startTime;
        String msg = String.format("Push completed in %sms (%s total changes processed)", deltaTime,
                this.state.documentCounter);
        logger.info(msg);
    }

    private Changes getNextBatch() throws ExecutionException, InterruptedException, DatastoreException {
        long lastPushSequence = getLastCheckpointSequence();
        logger.fine("Last push sequence from remote database: " + lastPushSequence);
        return this.sourceDb.getDbCore().changes(lastPushSequence, this.changeLimitPerBatch);
    }

    private static class FilteredChanges extends Changes {
        public FilteredChanges(long lastSequence, List<DocumentRevision> results) {
            super(lastSequence, results);
        }
    }

    /**
     * A small value class containing a set of documents to push, some
     * via multipart and some via _bulk_docs
     */
    private static class ItemsToPush {
        public ItemsToPush() {
            serializedDocs = new ArrayList<String>();
            multiparts = new ArrayList<MultipartAttachmentWriter>();
        }

        List<String> serializedDocs;
        List<MultipartAttachmentWriter> multiparts;
    }

    private int processOneChangesBatch(Changes changes) throws AttachmentException, DatastoreException {

        int changesProcessed = 0;

        // Process the changes themselves in batches, where we post a batch
        // at a time to the remote database's _bulk_docs endpoint.
        List<? extends List<DocumentRevision>> batches = Lists.partition(changes.getResults(), this.bulkInsertSize);
        for (List<DocumentRevision> batch : batches) {

            if (this.state.cancel) {
                break;
            }

            Map<String, DocumentRevisionTree> allTrees = this.sourceDb.getDocumentTrees(batch);
            Map<String, Set<String>> docOpenRevs = this.openRevisions(allTrees);
            Map<String, CouchClient.MissingRevisions> docMissingRevs = this.targetDb.revsDiff(docOpenRevs);

            ItemsToPush itemsToPush = missingRevisionsToJsonDocs(allTrees, docMissingRevs);
            List<String> serialisedMissingRevs = itemsToPush.serializedDocs;
            List<MultipartAttachmentWriter> multiparts = itemsToPush.multiparts;

            if (!this.state.cancel) {
                this.targetDb.putMultiparts(multiparts);
                this.targetDb.bulkCreateSerializedDocs(serialisedMissingRevs);
                changesProcessed += docMissingRevs.size();
            }
        }

        return changesProcessed;
    }

    /**
     * Generate serialised JSON strings and/or MIME multipart/related writer objects for revisions
     * which are missing on the server
     *
     * @param allTrees batch of document trees, keyed by document id, in local database
     * @param revisions {@code MissingRevisions} objects, keyed by document id, as returned from
     *                  remote database by querying revs_diff endpoint.
     *
     * @return {@code ItemsToPush} object representing serialised JSON strings and/or MIME
     *         multipart/related writer
     *
     * @throws AttachmentException
     *
     * @see com.cloudant.mazha.CouchClient.MissingRevisions
     * @see PushStrategy.ItemsToPush
     */
    private ItemsToPush missingRevisionsToJsonDocs(Map<String, DocumentRevisionTree> allTrees,
            Map<String, CouchClient.MissingRevisions> revisions) throws AttachmentException {

        ItemsToPush itemsToPush = new ItemsToPush();

        for (Map.Entry<String, CouchClient.MissingRevisions> e : revisions.entrySet()) {
            String docId = e.getKey();
            Set<String> missingRevisions = e.getValue().missing;
            DocumentRevisionTree tree = allTrees.get(docId);
            for (String rev : missingRevisions) {
                long sequence = tree.lookup(docId, rev).getSequence();
                List<DocumentRevision> path = tree.getPathForNode(sequence);

                // get the attachments for the leaf of this path
                DocumentRevision dr = path.get(0);
                List<? extends Attachment> atts = this.sourceDb.getDbCore().attachmentsForRevision(dr);

                // get common ancestor generation - needed to correctly stub out attachments
                // closest back (first) instance of one of the possible ancestors rev id in the history tree
                int minRevPos = 0;
                for (DocumentRevision ancestor : path) {
                    if (e.getValue().possible_ancestors != null
                            && e.getValue().possible_ancestors.contains(ancestor.getRevision())) {
                        minRevPos = ancestor.getGeneration();
                    }
                }

                // get the json, and inline any small attachments
                Map<String, Object> json = RevisionHistoryHelper.revisionHistoryToJson(path, atts,
                        this.pushAttachmentsInline, minRevPos);
                // if there are any large atts we will get a multipart writer, otherwise null
                MultipartAttachmentWriter mpw = RevisionHistoryHelper.createMultipartWriter(json, atts,
                        this.pushAttachmentsInline, minRevPos);

                // now we will have either a multipart or a plain doc
                if (mpw == null) {
                    itemsToPush.serializedDocs.add(sJsonHelper.toJson(json));
                } else {
                    itemsToPush.multiparts.add(mpw);
                }
            }
        }

        return itemsToPush;
    }

    Map<String, Set<String>> openRevisions(Map<String, DocumentRevisionTree> trees) {
        Map<String, Set<String>> allOpenRevisions = new HashMap<String, Set<String>>();
        for (Map.Entry<String, DocumentRevisionTree> e : trees.entrySet()) {
            allOpenRevisions.put(e.getKey(), e.getValue().leafRevisionIds());
        }
        return allOpenRevisions;
    }

    public String getReplicationId() throws DatastoreException {
        HashMap<String, String> dict = new HashMap<String, String>();
        dict.put("source", this.sourceDb.getIdentifier());
        dict.put("target", this.targetDb.getIdentifier());
        // get raw SHA-1 of dictionary
        byte[] sha1Bytes = Misc.getSha1(new ByteArrayInputStream(JSONUtils.serializeAsBytes(dict)));
        // return SHA-1 as a hex string
        byte[] sha1Hex = new Hex().encode(sha1Bytes);
        return new String(sha1Hex, Charset.forName("UTF-8"));
    }

    private long getLastCheckpointSequence() throws DatastoreException {
        String lastSequence = targetDb.getCheckpoint(this.getReplicationId());
        // As we are pretty sure the checkpoint is a number
        return Strings.isNullOrEmpty(lastSequence) ? 0 : Long.parseLong(lastSequence);
    }

    private void putCheckpoint(String checkpoint) throws DatastoreException {
        targetDb.putCheckpoint(this.getReplicationId(), checkpoint);
    }

    @Override
    public EventBus getEventBus() {
        return eventBus;
    }

}