com.amazonaws.services.kinesis.stormspout.state.zookeeper.ZookeeperShardState.java Source code

Java tutorial

Introduction

Here is the source code for com.amazonaws.services.kinesis.stormspout.state.zookeeper.ZookeeperShardState.java

Source

/*
 * Copyright 2013-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Amazon Software License (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 * http://aws.amazon.com/asl/
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

package com.amazonaws.services.kinesis.stormspout.state.zookeeper;

import java.io.IOException;
import java.util.Random;
import java.util.concurrent.Callable;

import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.amazonaws.services.kinesis.stormspout.KinesisSpoutConfig;
import com.amazonaws.services.kinesis.stormspout.exceptions.KinesisSpoutException;
import com.amazonaws.services.kinesis.stormspout.state.zookeeper.NodeFunction.Mod;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.netflix.curator.RetryLoop;
import com.netflix.curator.framework.CuratorFramework;
import com.netflix.curator.framework.CuratorFrameworkFactory;
import com.netflix.curator.retry.ExponentialBackoffRetry;

/**
 * Handles communication with Zookeeper and methods specific to the spout for saving/restoring
 * state.
 */
class ZookeeperShardState {
    private static final Logger LOG = LoggerFactory.getLogger(ZookeeperShardState.class);
    private static final int BASE_SLEEP_TIME_MS = 200;
    private static final int BASE_OPTIMISTIC_RETRY_TIME_MS = 100;
    private static final int MAX_NUM_RETRIES = 5;
    private static final String SHARD_LIST_SUFFIX = "shardList";
    private static final String STATE_SUFFIX = "state";

    private final KinesisSpoutConfig config;
    private final Random rand;
    private final CuratorFramework zk;

    /**
     * Create and configure the ZK sync object with the KinesisSpoutConfig.
     * @param config  the configuration for the spout.
     */
    ZookeeperShardState(final KinesisSpoutConfig config) {
        this.config = config;
        this.rand = new Random();

        try {
            zk = CuratorFrameworkFactory.newClient(config.getZookeeperConnectionString(),
                    new ExponentialBackoffRetry(BASE_SLEEP_TIME_MS, MAX_NUM_RETRIES));
        } catch (IOException e) {
            LOG.error("Could not connect to ZooKeeper", e);
            throw new KinesisSpoutException(e);
        }
        zk.start();
    }

    /**
     * Initialize the shardList in ZK. This is called by every spout task on activate(), and ensures
     * that the shardList is up to date and correct.
     *
     * @param shards  list of shards (output of DescribeStream).
     * @throws Exception
     */
    void initialize(final ImmutableList<String> shards) throws Exception {
        NodeFunction verifyOrCreateShardList = new NodeFunction() {
            @Override
            public byte[] initialize() {
                LOG.info(this + " First initialization of shardList: " + shards);
                ShardListV0 shardList = new ShardListV0(shards);
                ObjectMapper objectMapper = new ObjectMapper();
                byte[] data;
                try {
                    data = objectMapper.writeValueAsBytes(shardList);
                } catch (JsonProcessingException e) {
                    throw new KinesisSpoutException("Unable to serialize shardList " + shardList, e);
                }
                return data;
            }

            @Override
            public Mod<byte[]> apply(byte[] x) {
                // At this point, we don't support resharding. We assume the shard list is valid if one exists.
                LOG.info("ShardList already initialized in Zookeeper. Assuming it is valid.");
                return Mod.noModification();
            }
        };

        atomicUpdate(SHARD_LIST_SUFFIX, verifyOrCreateShardList);
    }

    /**
     * Delete the shard list in ZK. This is called by every spout task on deactivate(), so that
     * when the task is reactivated, the latest shard list is retrieved.
     *
     * @throws Exception
     */
    void clearShardList() throws Exception {
        delete(SHARD_LIST_SUFFIX);
    }

    /**
     * Commit the checkpoint sequence number for a shard to Zookeeper.
     *
     * @param  shardId  shard to commit to.
     * @param  seqNum  sequence number to commit.
     * @throws Exception
     */
    void commitSeqNum(final String shardId, final String seqNum) throws Exception {
        ObjectMapper objectMapper = new ObjectMapper();
        byte[] data = objectMapper.writeValueAsBytes(new ShardStateV0(seqNum));
        NodeFunction commit = NodeFunction.constant(data);
        atomicUpdate(shardId + "/" + STATE_SUFFIX, commit);
    }

    /**
     * Get the last committed sequence number for the shard from Zookeeper.
     *
     * @param  shardId  shard to read from.
     * @return a sequence number if the state exists, empty string otherwise.
     * @throws Exception
     */
    String getLastCommittedSeqNum(final String shardId) throws Exception {
        try {
            byte[] data = get(shardId + "/" + STATE_SUFFIX);
            ObjectMapper objectMapper = new ObjectMapper();
            return objectMapper.readValue(data, ShardStateV0.class).getCheckpoint();
        } catch (KeeperException.NoNodeException e) {
            LOG.info("No shard state for " + shardId);
            return "";
        }
    }

    /**
     * Get the shardList from ZK. The spout should do assignments based on this list, not
     * the one returned by DescribeStream. This will ensure consistency across all spout tasks.
     *
     * @return the list of shards in the stream.
     * @throws Exception
     */
    ImmutableList<String> getShardList() throws Exception {
        byte[] data = get(SHARD_LIST_SUFFIX);
        ObjectMapper objectMapper = new ObjectMapper();
        return ImmutableList.copyOf(objectMapper.readValue(data, ShardListV0.class).getShardList());
    }

    /**
     * Set a watcher for the shardList.
     * 
     * @param callback Zookeeper watcher to be set on the shard list.
     * @throws Exception
     */
    void watchShardList(Watcher callback) throws Exception {
        watch(SHARD_LIST_SUFFIX, callback);
    }

    /**
     * @param path path to check.
     * @return true if path is the path to the shard list.
     */
    boolean isShardList(final String path) {
        return buildZookeeperPath(SHARD_LIST_SUFFIX).equals(path);
    }

    /**
     * Closes the connection to ZK.
     * 
     * @throws InterruptedException
     */
    void close() throws InterruptedException {
        zk.close();
    }

    /**
     * Optimistic concurrency scheme for tryAtomicUpdate. Try to update, and keep trying
     * until successful.
     * 
     * @param pathSuffix suffix to use to build path in ZooKeeper.
     * @param f function used to initialize the node, or transform the data already there.
     * @throws Exception
     */
    private void atomicUpdate(final String pathSuffix, final NodeFunction f) throws Exception {
        boolean done = false;

        do {
            done = RetryLoop.callWithRetry(zk.getZookeeperClient(), new Callable<Boolean>() {
                @Override
                public Boolean call() throws Exception {
                    return tryAtomicUpdate(pathSuffix, f);
                }
            });
            Thread.sleep(BASE_OPTIMISTIC_RETRY_TIME_MS + rand.nextInt(BASE_OPTIMISTIC_RETRY_TIME_MS));
        } while (!done);
    }

    private byte[] get(final String pathSuffix) throws Exception {
        return RetryLoop.callWithRetry(zk.getZookeeperClient(), new Callable<byte[]>() {
            @Override
            public byte[] call() throws Exception {
                return zk.getData().forPath(buildZookeeperPath(pathSuffix));
            }
        });
    }

    private void delete(final String pathSuffix) throws Exception {
        RetryLoop.callWithRetry(zk.getZookeeperClient(), new Callable<Void>() {
            @Override
            public Void call() throws Exception {
                try {
                    zk.delete().forPath(buildZookeeperPath(pathSuffix));
                    return null;
                } catch (KeeperException.NoNodeException e) {
                    // likely deleted by another task
                    return null;
                }
            }
        });
    }

    private void watch(final String pathSuffix, final Watcher callback) throws Exception {
        RetryLoop.callWithRetry(zk.getZookeeperClient(), new Callable<Void>() {
            @Override
            public Void call() throws Exception {
                zk.checkExists().usingWatcher(callback).forPath(buildZookeeperPath(pathSuffix));
                return null;
            }
        });
    }

    /**
     * Try to atomically update a node in ZooKeeper, creating it if it doesn't exist. This is
     * meant to be used within an optimistic concurrency model.
     * 
     * @param pathSuffix suffix to use to build path in ZooKeeper.
     * @param f function used to initialize the node, or transform the data already there.
     * @return true if node was created/updated, false if a concurrent modification occurred
     *         and succeeded while trying to update/create the node.
     * @throws Exception
     */
    private boolean tryAtomicUpdate(final String pathSuffix, final NodeFunction f) throws Exception {
        final String path = buildZookeeperPath(pathSuffix);
        final Stat stat = zk.checkExists().forPath(path);

        if (stat == null) {
            try {
                zk.create().creatingParentsIfNeeded().withMode(CreateMode.PERSISTENT).forPath(path, f.initialize());
            } catch (KeeperException.NodeExistsException e) {
                LOG.debug("Concurrent creation of " + path + ", retrying", e);
                return false;
            }
        } else {
            Mod<byte[]> newVal = f.apply(zk.getData().forPath(path));

            if (newVal.hasModification()) {
                try {
                    zk.setData().withVersion(stat.getVersion()).forPath(path, newVal.get());
                } catch (KeeperException.BadVersionException e) {
                    LOG.debug("Concurrent update to " + path + ", retrying.", e);
                    return false;
                }
            }
        }

        return true;
    }

    private String buildZookeeperPath(final String suffix) {
        return "/" + config.getZookeeperPrefix() + "/" + config.getTopologyName() + "/" + config.getStreamName()
                + "/" + suffix;
    }
}