org.apache.hadoop.hive.llap.registry.impl.SlotZnode.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.llap.registry.impl.SlotZnode.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.hadoop.hive.llap.registry.impl;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.api.BackgroundCallback;
import org.apache.curator.framework.api.CuratorEvent;
import org.apache.curator.framework.state.ConnectionState;
import org.apache.curator.framework.state.ConnectionStateListener;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.Watcher.Event.EventType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.TreeSet;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;

/**
 * We would have used the curator ephemeral node with some extra logic but it doesn't handle
 * the EXISTS condition, which is crucial here; so we c/p parts of Curator and add our logic.
 */
public class SlotZnode implements Closeable {
    static final Charset CHARSET = StandardCharsets.UTF_8;
    private final static Logger LOG = LoggerFactory.getLogger(SlotZnode.class);

    private final AtomicReference<CountDownLatch> initialCreateLatch = new AtomicReference<CountDownLatch>(
            new CountDownLatch(1));
    private final AtomicReference<String> nodePath = new AtomicReference<String>(null);
    private final Random rdm = new Random();
    private final CuratorFramework client;
    private final String basePath, prefix, workerPrefix, dataStr;
    private final byte[] data;
    private int currentSlot;
    private final AtomicReference<State> state = new AtomicReference<State>(State.LATENT);
    private int fallbackCount = 0; // Test-only counter.
    private final BackgroundCallback backgroundCallback = new BackgroundCallback() {
        @Override
        public void processResult(CuratorFramework client, CuratorEvent event) throws Exception {
            processCreateResult(client, event);
        }
    };
    private final Watcher watcher = new Watcher() {
        @Override
        public void process(WatchedEvent event) {
            processWatchedEvent(event);
        }
    };
    private final BackgroundCallback checkExistsCallback = new BackgroundCallback() {
        @Override
        public void processResult(CuratorFramework client, CuratorEvent event) throws Exception {
            processWatchResult(event);
        }
    };
    private final ConnectionStateListener connectionStateListener = new ConnectionStateListener() {
        @Override
        public void stateChanged(CuratorFramework client, ConnectionState newState) {
            processConnectionState(newState);
        }
    };

    private enum State {
        LATENT, INITIAL_SELECTION, AFTER_SELECTION, CLOSED
    }

    public SlotZnode(CuratorFramework client, String basePath, String prefix, String workerPrefix, String data) {
        this.client = Preconditions.checkNotNull(client, "client cannot be null");
        this.basePath = Preconditions.checkNotNull(basePath, "basePath cannot be null");
        this.prefix = Preconditions.checkNotNull(prefix, "prefix cannot be null");
        this.workerPrefix = workerPrefix;
        Preconditions.checkNotNull(data, "data cannot be null");
        this.dataStr = data;
        this.data = data.getBytes(CHARSET);
    }

    @VisibleForTesting
    public int getFallbackCount() {
        return fallbackCount;
    }

    private void chooseSlotToTake() throws Exception {
        int slotToTake = -1;
        while (true) {
            List<String> allChildNodes;
            try {
                allChildNodes = client.getChildren().forPath(basePath);
            } catch (Exception e) {
                LOG.error("Cannot list nodes to get slots; failing", e);
                throw e;
            }
            TreeSet<Integer> slots = new TreeSet<>();
            int approxWorkerCount = 0;
            for (String child : allChildNodes) {
                if (!child.startsWith(prefix)) {
                    if (child.startsWith(workerPrefix)) {
                        ++approxWorkerCount;
                    }
                } else {
                    slots.add(Integer.parseInt(child.substring(prefix.length())));
                }
            }
            Iterator<Integer> slotIter = slots.iterator();
            slotToTake = 0;
            while (slotIter.hasNext()) {
                int nextTaken = slotIter.next();
                if (slotToTake < nextTaken)
                    break;
                slotToTake = nextTaken + 1;
            }
            // There may be a race for this slot so re-query after a delay with some probability.
            if (slotToTake != currentSlot || !shouldFallBackOnCollision(approxWorkerCount))
                break;
            ++fallbackCount;
            Thread.sleep(rdm.nextInt(200)); // arbitrary
        }

        currentSlot = slotToTake;
        LOG.info("Will attempt to take slot " + currentSlot);
    }

    private boolean shouldFallBackOnCollision(int approxWorkerCount) {
        // Ideally, we'd want 1 worker to try for every slot; e.g. if there are 4 workers we want 3
        // to re-read, i.e. probability of falling back = 0.75, or 1/4 < random([0,1)). However, we
        // make it slightly more probable (2.0x) to avoid too much re-reading. This is hand-wavy.
        if (approxWorkerCount == 0)
            return false;
        return (2.0f / approxWorkerCount) <= rdm.nextDouble();
    }

    private String getSlotPath(int slot) {
        return String.format("%s/%s%010d", basePath, prefix, slot);
    }

    public boolean start(long timeout, TimeUnit unit) throws Exception {
        Preconditions.checkState(state.compareAndSet(State.LATENT, State.INITIAL_SELECTION), "Already started");
        CountDownLatch localLatch = initialCreateLatch.get();
        client.getConnectionStateListenable().addListener(connectionStateListener);
        chooseSlotToTake();
        startCreateCurrentNode();
        return localLatch.await(timeout, unit);
    }

    @Override
    public void close() throws IOException {
        State currentState = state.getAndSet(State.CLOSED);
        if (currentState == State.CLOSED || currentState == State.LATENT)
            return;
        client.getConnectionStateListenable().removeListener(connectionStateListener);
        String localNodePath = nodePath.getAndSet(null);
        if (localNodePath == null)
            return;
        try {
            client.delete().guaranteed().forPath(localNodePath);
        } catch (KeeperException.NoNodeException ignore) {
        } catch (Exception e) {
            LOG.error("Deleting node: " + localNodePath, e);
            throw new IOException(e);
        }
    }

    public int getCurrentSlot() {
        assert isActive();
        return currentSlot;
    }

    private void startCreateCurrentNode() {
        if (!isActive())
            return;
        String createPath = null;
        try {
            createPath = getSlotPath(currentSlot);
            LOG.info("Attempting to create " + createPath);
            client.create().withMode(CreateMode.EPHEMERAL).inBackground(backgroundCallback).forPath(createPath,
                    data);
        } catch (Exception e) {
            LOG.error("Creating node. Path: " + createPath, e);
            throw new RuntimeException(e);
        }
    }

    private void watchNode() throws Exception {
        if (!isActive())
            return;
        String localNodePath = nodePath.get();
        if (localNodePath == null)
            return;
        try {
            client.checkExists().usingWatcher(watcher).inBackground(checkExistsCallback).forPath(localNodePath);
        } catch (Exception e) {
            LOG.error("Watching node: " + localNodePath, e);
            throw e;
        }
    }

    private boolean isActive() {
        State localState = state.get();
        return (localState != State.LATENT && localState != State.CLOSED);
    }

    private void processWatchResult(CuratorEvent event) throws Exception {
        if (event.getResultCode() != KeeperException.Code.NONODE.intValue())
            return;
        LOG.info("Trying to reacquire because of the NONODE event");
        startCreateCurrentNode();
    }

    private void processConnectionState(ConnectionState newState) {
        if (newState != ConnectionState.RECONNECTED)
            return;
        LOG.info("Trying to reacquire because of the RECONNECTED event");
        startCreateCurrentNode();
    }

    private void processWatchedEvent(WatchedEvent event) {
        if (event.getType() != EventType.NodeDeleted)
            return;
        String localPath = nodePath.get();
        if (localPath == null)
            return;
        if (!localPath.equals(event.getPath())) {
            LOG.info("Ignoring the NodeDeleted event for " + event.getPath());
            return;
        }
        LOG.info("Trying to reacquire because of the NodeDeleted event");
        startCreateCurrentNode();
    }

    private void processCreateResult(CuratorFramework client, CuratorEvent event) throws Exception {
        boolean doesExist = event.getResultCode() == KeeperException.Code.NODEEXISTS.intValue();
        if (!doesExist && event.getResultCode() != KeeperException.Code.OK.intValue()) {
            if (LOG.isInfoEnabled()) {
                LOG.info("Trying to reacquire due to create error: " + event);
            }
            startCreateCurrentNode(); // TODO: a pattern from Curator. Better error handling?
            return;
        }
        State localState = state.get();
        switch (localState) {
        case CLOSED:
        case LATENT:
            return;
        case INITIAL_SELECTION:
            if (doesExist) {
                LOG.info("Slot " + currentSlot + " was occupied");
                chooseSlotToTake(); // Try another slot.
                startCreateCurrentNode();
            } else {
                handleCreatedNode(event.getName());
            }
            break;
        case AFTER_SELECTION:
            if (doesExist) {
                processExistsFromCreate(client, event.getPath());
            } else {
                handleCreatedNode(event.getName());
            }
            break;
        default:
            throw new AssertionError("Unknown state " + localState);
        }
    }

    private void processExistsFromCreate(CuratorFramework client, String path) throws Exception {
        byte[] actual;
        try {
            actual = client.getData().forPath(path);
        } catch (Exception ex) {
            LOG.error("Error getting data for the node; will retry creating", ex);
            startCreateCurrentNode();
            return;
        }
        if (Arrays.equals(actual, data)) {
            handleCreatedNode(path);
        } else {
            if (LOG.isInfoEnabled()) {
                LOG.info("Data at {} is from a different node: {} (we are {})", path, new String(actual, CHARSET),
                        dataStr);
            }
            nodePath.getAndSet(null);
            chooseSlotToTake(); // Try another slot.
            startCreateCurrentNode();
        }
    }

    private void handleCreatedNode(String path) throws Exception {
        while (true) {
            State localState = state.get();
            if (localState == State.CLOSED || localState == State.LATENT)
                return;
            if (state.compareAndSet(localState, State.AFTER_SELECTION))
                break;
        }
        nodePath.set(path);
        watchNode();
        CountDownLatch localLatch = initialCreateLatch.getAndSet(null);
        if (localLatch != null) {
            localLatch.countDown();
        }
        LOG.info("Acquired the slot znode {}{}", path,
                localLatch != null ? "; this is the initial assignment" : "");
    }

    @VisibleForTesting
    public String getActualPath() {
        return nodePath.get();
    }
}