com.facebook.zookeeper.RecoveringZooKeeper.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.zookeeper.RecoveringZooKeeper.java

Source

/*
 * Copyright (C) 2012 Facebook, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.zookeeper;

import org.apache.log4j.Logger;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Stat;
import org.apache.zookeeper.ZooKeeper.States;

import java.util.List;
import java.util.concurrent.TimeUnit;

public class RecoveringZooKeeper implements ZooKeeperIface {
    private static final Logger LOG = Logger.getLogger(RecoveringZooKeeper.class);

    private final ZooKeeperIface zk;
    private final RetryCounterFactory retryCounterFactory;

    public RecoveringZooKeeper(ZooKeeperIface zk, int maxRetries, int retryIntervalMillis) {
        this.zk = zk;
        this.retryCounterFactory = new RetryCounterFactory(maxRetries, retryIntervalMillis);
    }

    @Override
    public long getSessionId() {
        return zk.getSessionId();
    }

    @Override
    public void close() throws InterruptedException {
        zk.close();
    }

    @Override
    public String create(String path, byte[] data, List<ACL> acl, CreateMode createMode)
            throws KeeperException, InterruptedException {
        switch (createMode) {
        case EPHEMERAL:
        case PERSISTENT:
            return createNonSequential(path, data, acl, createMode);

        case EPHEMERAL_SEQUENTIAL:
            // NOTE: this does not reliably support creating multiple ephemeral
            // sequential nodes with the same prefix under the same path
            return createEphemeralSequential(path, data, acl, createMode);

        case PERSISTENT_SEQUENTIAL:
            // No recovery for persistent sequential b/c no way to verify
            // insertion after disconnect w/o application help
            return zk.create(path, data, acl, createMode);

        default:
            throw new IllegalArgumentException("Unrecognized CreateMode: " + createMode);
        }
    }

    @Override
    public void delete(String path, int version) throws InterruptedException, KeeperException {
        RetryCounter retryCounter = retryCounterFactory.create();
        while (true) {
            try {
                zk.delete(path, version);
                return;
            } catch (KeeperException e) {
                switch (e.code()) {
                case NONODE:
                    return; // Delete was successful

                case CONNECTIONLOSS:
                case OPERATIONTIMEOUT:
                    LOG.warn("Possibly transient ZooKeeper exception: " + e);
                    if (!retryCounter.shouldRetry()) {
                        LOG.error("ZooKeeper delete failed after " + retryCounter.getMaxRetries() + " retries");
                        throw e;
                    }
                    break;

                default:
                    throw e;
                }
            }
            LOG.info("Retrying ZooKeeper after sleeping...");
            retryCounter.sleepUntilNextRetry();
            retryCounter.useRetry();
        }
    }

    @Override
    public Stat exists(String path, Watcher watcher) throws KeeperException, InterruptedException {
        RetryCounter retryCounter = retryCounterFactory.create();
        while (true) {
            try {
                return zk.exists(path, watcher);
            } catch (KeeperException e) {
                switch (e.code()) {
                case CONNECTIONLOSS:
                case OPERATIONTIMEOUT:
                    LOG.warn("Possibly transient ZooKeeper exception: " + e);
                    if (!retryCounter.shouldRetry()) {
                        LOG.error("ZooKeeper exists failed after " + retryCounter.getMaxRetries() + " retries");
                        throw e;
                    }
                    break;

                default:
                    throw e;
                }
            }
            LOG.info("Retrying ZooKeeper after sleeping...");
            retryCounter.sleepUntilNextRetry();
            retryCounter.useRetry();
        }
    }

    @Override
    public Stat exists(String path, boolean watch) throws KeeperException, InterruptedException {
        RetryCounter retryCounter = retryCounterFactory.create();
        while (true) {
            try {
                return zk.exists(path, watch);
            } catch (KeeperException e) {
                switch (e.code()) {
                case CONNECTIONLOSS:
                case OPERATIONTIMEOUT:
                    LOG.warn("Possibly transient ZooKeeper exception: " + e);
                    if (!retryCounter.shouldRetry()) {
                        LOG.error("ZooKeeper exists failed after " + retryCounter.getMaxRetries() + " retries");
                        throw e;
                    }
                    break;

                default:
                    throw e;
                }
            }
            LOG.info("Retrying ZooKeeper after sleeping...");
            retryCounter.sleepUntilNextRetry();
            retryCounter.useRetry();
        }
    }

    @Override
    public byte[] getData(String path, Watcher watcher, Stat stat) throws KeeperException, InterruptedException {
        RetryCounter retryCounter = retryCounterFactory.create();
        while (true) {
            try {
                return zk.getData(path, watcher, stat);
            } catch (KeeperException e) {
                switch (e.code()) {
                case CONNECTIONLOSS:
                case OPERATIONTIMEOUT:
                    LOG.warn("Possibly transient ZooKeeper exception: " + e);
                    if (!retryCounter.shouldRetry()) {
                        LOG.error("ZooKeeper getData failed after " + retryCounter.getMaxRetries() + " retries");
                        throw e;
                    }
                    break;

                default:
                    throw e;
                }
            }
            LOG.info("Retrying ZooKeeper after sleeping...");
            retryCounter.sleepUntilNextRetry();
            retryCounter.useRetry();
        }
    }

    @Override
    public byte[] getData(String path, boolean watch, Stat stat) throws KeeperException, InterruptedException {
        RetryCounter retryCounter = retryCounterFactory.create();
        while (true) {
            try {
                return zk.getData(path, watch, stat);
            } catch (KeeperException e) {
                switch (e.code()) {
                case CONNECTIONLOSS:
                case OPERATIONTIMEOUT:
                    LOG.warn("Possibly transient ZooKeeper exception: " + e);
                    if (!retryCounter.shouldRetry()) {
                        LOG.error("ZooKeeper getData failed after " + retryCounter.getMaxRetries() + " retries");
                        throw e;
                    }
                    break;

                default:
                    throw e;
                }
            }
            LOG.info("Retrying ZooKeeper after sleeping...");
            retryCounter.sleepUntilNextRetry();
            retryCounter.useRetry();
        }
    }

    @Override
    public Stat setData(String path, byte[] data, int version) throws KeeperException, InterruptedException {
        RetryCounter retryCounter = retryCounterFactory.create();
        while (true) {
            try {
                return zk.setData(path, data, version);
            } catch (KeeperException e) {
                switch (e.code()) {
                case CONNECTIONLOSS:
                case OPERATIONTIMEOUT:
                    LOG.warn("Possibly transient ZooKeeper exception: " + e);
                    if (!retryCounter.shouldRetry()) {
                        LOG.error("ZooKeeper setData failed after " + retryCounter.getMaxRetries() + " retries");
                        throw e;
                    }
                    break;

                default:
                    throw e;
                }
            }
            LOG.info("Retrying ZooKeeper after sleeping...");
            retryCounter.sleepUntilNextRetry();
            retryCounter.useRetry();
        }
    }

    @Override
    public List<String> getChildren(String path, Watcher watcher) throws KeeperException, InterruptedException {
        RetryCounter retryCounter = retryCounterFactory.create();
        while (true) {
            try {
                return zk.getChildren(path, watcher);
            } catch (KeeperException e) {
                switch (e.code()) {
                case CONNECTIONLOSS:
                case OPERATIONTIMEOUT:
                    LOG.warn("Possibly transient ZooKeeper exception: " + e);
                    if (!retryCounter.shouldRetry()) {
                        LOG.error(
                                "ZooKeeper getChildren failed after " + retryCounter.getMaxRetries() + " retries");
                        throw e;
                    }
                    break;

                default:
                    throw e;
                }
            }
            LOG.info("Retrying ZooKeeper after sleeping...");
            retryCounter.sleepUntilNextRetry();
            retryCounter.useRetry();
        }
    }

    @Override
    public List<String> getChildren(String path, boolean watch) throws KeeperException, InterruptedException {
        RetryCounter retryCounter = retryCounterFactory.create();
        while (true) {
            try {
                return zk.getChildren(path, watch);
            } catch (KeeperException e) {
                switch (e.code()) {
                case CONNECTIONLOSS:
                case OPERATIONTIMEOUT:
                    LOG.warn("Possibly transient ZooKeeper exception: " + e);
                    if (!retryCounter.shouldRetry()) {
                        LOG.error(
                                "ZooKeeper getChildren failed after " + retryCounter.getMaxRetries() + " retries");
                        throw e;
                    }
                    break;

                default:
                    throw e;
                }
            }
            LOG.info("Retrying ZooKeeper after sleeping...");
            retryCounter.sleepUntilNextRetry();
            retryCounter.useRetry();
        }
    }

    @Override
    public States getState() {
        return zk.getState();
    }

    // ------------------------- Internal Helpers ------------------------ //

    private String createNonSequential(String path, byte[] data, List<ACL> acl, CreateMode createMode)
            throws KeeperException, InterruptedException {
        RetryCounter retryCounter = retryCounterFactory.create();
        while (true) {
            try {
                return zk.create(path, data, acl, createMode);
            } catch (KeeperException e) {
                switch (e.code()) {
                case NODEEXISTS:
                    // Non-sequential node was successfully created
                    return path;

                case CONNECTIONLOSS:
                case OPERATIONTIMEOUT:
                    LOG.warn("Possibly transient ZooKeeper exception: " + e);
                    if (!retryCounter.shouldRetry()) {
                        LOG.error("ZooKeeper create failed after " + retryCounter.getMaxRetries() + " retries");
                        throw e;
                    }
                    break;

                default:
                    throw e;
                }
            }
            LOG.info("Retrying ZooKeeper after sleeping...");
            retryCounter.sleepUntilNextRetry();
            retryCounter.useRetry();
        }
    }

    private String createEphemeralSequential(String path, byte[] data, List<ACL> acl, CreateMode createMode)
            throws KeeperException, InterruptedException {
        RetryCounter retryCounter = retryCounterFactory.create();
        boolean first = true;
        while (true) {
            try {
                if (!first) {
                    // Check if we succeeded on a previous attempt
                    String myNode = findMyEphemeralSequentialNode(path);
                    if (myNode != null) {
                        return myNode;
                    }
                }
                first = false;
                return zk.create(path, data, acl, createMode);
            } catch (KeeperException e) {
                switch (e.code()) {
                case CONNECTIONLOSS:
                case OPERATIONTIMEOUT:
                    LOG.warn("Possibly transient ZooKeeper exception: " + e);
                    if (!retryCounter.shouldRetry()) {
                        LOG.error("ZooKeeper create failed after " + retryCounter.getMaxRetries() + " retries");
                        throw e;
                    }
                    break;

                default:
                    throw e;
                }
            }
            LOG.info("Retrying ZooKeeper after sleeping...");
            retryCounter.sleepUntilNextRetry();
            retryCounter.useRetry();
        }
    }

    private String findMyEphemeralSequentialNode(String path) throws KeeperException, InterruptedException {
        int lastSlashIdx = path.lastIndexOf('/');
        assert (lastSlashIdx != -1);
        String parent = path.substring(0, lastSlashIdx);
        String nodePrefix = path.substring(lastSlashIdx + 1);

        List<String> nodes = zk.getChildren(parent, false);
        List<String> matching = ZkUtil.filterByPrefix(nodes, nodePrefix);
        for (String node : matching) {
            String nodePath = parent + "/" + node;
            Stat stat = zk.exists(nodePath, false);
            if (stat != null && stat.getEphemeralOwner() == zk.getSessionId()) {
                return nodePath;
            }
        }
        return null;
    }

    private static class RetryCounterFactory {
        private final int maxRetries;
        private final int retryIntervalMillis;

        private RetryCounterFactory(int maxRetries, int retryIntervalMillis) {
            this.maxRetries = maxRetries;
            this.retryIntervalMillis = retryIntervalMillis;
        }

        public RetryCounter create() {
            return new RetryCounter(maxRetries, retryIntervalMillis, TimeUnit.MILLISECONDS);
        }
    }

    private static class RetryCounter {
        private final int maxRetries;
        private int retriesRemaining;
        private final int retryIntervalMillis;
        private final TimeUnit timeUnit;

        private RetryCounter(int maxRetries, int retryIntervalMillis, TimeUnit timeUnit) {
            this.maxRetries = maxRetries;
            this.retriesRemaining = maxRetries;
            this.retryIntervalMillis = retryIntervalMillis;
            this.timeUnit = timeUnit;
        }

        public int getMaxRetries() {
            return maxRetries;
        }

        public void sleepUntilNextRetry() throws InterruptedException {
            timeUnit.sleep(retryIntervalMillis);
        }

        public boolean shouldRetry() {
            return retriesRemaining > 0;
        }

        public void useRetry() {
            retriesRemaining--;
        }

    }

}