co.cask.hydrator.plugin.realtime.Kafka08SimpleApiConsumer.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.hydrator.plugin.realtime.Kafka08SimpleApiConsumer.java

Source

/*
 * Copyright  2015 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.hydrator.plugin.realtime;

import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.format.StructuredRecord;
import co.cask.cdap.etl.api.Emitter;
import co.cask.cdap.etl.api.realtime.RealtimeContext;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.RemovalListener;
import com.google.common.cache.RemovalNotification;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterators;
import com.google.common.util.concurrent.Futures;
import kafka.api.FetchRequest;
import kafka.api.FetchRequestBuilder;
import kafka.api.PartitionOffsetRequestInfo;
import kafka.cluster.Broker;
import kafka.common.ErrorMapping;
import kafka.common.TopicAndPartition;
import kafka.javaapi.FetchResponse;
import kafka.javaapi.OffsetRequest;
import kafka.javaapi.OffsetResponse;
import kafka.javaapi.PartitionMetadata;
import kafka.javaapi.TopicMetadata;
import kafka.javaapi.TopicMetadataRequest;
import kafka.javaapi.TopicMetadataResponse;
import kafka.javaapi.consumer.SimpleConsumer;
import kafka.message.Message;
import kafka.message.MessageAndOffset;
import org.apache.twill.internal.kafka.client.ZKBrokerService;
import org.apache.twill.kafka.client.BrokerInfo;
import org.apache.twill.kafka.client.BrokerService;
import org.apache.twill.kafka.client.TopicPartition;
import org.apache.twill.zookeeper.RetryStrategies;
import org.apache.twill.zookeeper.ZKClientService;
import org.apache.twill.zookeeper.ZKClientServices;
import org.apache.twill.zookeeper.ZKClients;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import javax.annotation.Nullable;

/**
 * The {@link KafkaSimpleApiConsumer} compatible mode for {@code Kafka} 0.8.x releases.
 *
 * <p>
 * The class expects these runtime arguments:
 * </p>
 * <ul>
 * <li>kafka.zookeeper: the location of Kafka Zookeeper</li>
 * <li>kafka.brokers: comma-separated list of Kafka brokers</li>
 * <li>kafka.partitions: the number of partitions of the topic</li>
 * <li>kafka.topic: the Kafka topic to retrieve messages for</li>
 * <li>kafka.initial.offset: the initial offset for the partition</li>
 * </ul>
 */
public class Kafka08SimpleApiConsumer extends KafkaSimpleApiConsumer<String, ByteBuffer, Long> {
    private static final Logger LOG = LoggerFactory.getLogger(Kafka08SimpleApiConsumer.class);

    private ZKClientService zkClient;
    private BrokerService brokerService;
    private Cache<TopicPartition, SimpleConsumer> kafkaConsumers;

    public Kafka08SimpleApiConsumer(KafkaSource kafkaSource) {
        super(kafkaSource);
    }

    @Override
    protected void configureKafka(KafkaConfigurer configurer) {
        KafkaSource.KafkaPluginConfig pluginConfig = kafkaSource.getConfig();
        Preconditions.checkNotNull(pluginConfig, "Could not have Kafka source plugin config to be null.");

        String zk = pluginConfig.getZkConnect();
        String brokers = pluginConfig.getKafkaBrokers();
        Preconditions.checkState(zk != null || brokers != null);

        if (zk != null) {
            configurer.setZooKeeper(zk);
        } else {
            configurer.setBrokers(brokers);
        }
        setupTopicPartitions(configurer, pluginConfig);
    }

    private void setupTopicPartitions(KafkaConsumerConfigurer configurer,
            KafkaSource.KafkaPluginConfig pluginConfig) {
        int partitions = pluginConfig.getPartitions();
        int instanceId = getContext().getInstanceId();
        int instances = getContext().getInstanceCount();
        String kafkaTopic = pluginConfig.getTopic();
        for (int i = 0; i < partitions; i++) {
            if ((i % instances) == instanceId) {
                configurer.addTopicPartition(kafkaTopic, i);
            }
        }
    }

    @Override
    protected Iterator<KafkaMessage<Long>> readMessages(KafkaConsumerInfo<Long> consumerInfo) throws Exception {
        final TopicPartition topicPartition = consumerInfo.getTopicPartition();
        String topic = topicPartition.getTopic();
        int partition = topicPartition.getPartition();

        // Fetch message from Kafka.
        final SimpleConsumer consumer = getConsumer(consumerInfo);
        if (consumer == null) {
            return Iterators.emptyIterator();
        }

        long readOffset = consumerInfo.getReadOffset();
        if (readOffset < 0) {
            readOffset = getReadOffset(consumer, topic, partition, readOffset);
            consumerInfo.setReadOffset(readOffset);
        }

        FetchRequest fetchRequest = new FetchRequestBuilder().clientId(consumer.clientId())
                .addFetch(topic, partition, readOffset, consumerInfo.getFetchSize()).build();
        FetchResponse response = consumer.fetch(fetchRequest);

        // Fetch failed
        if (response.hasError()) {
            handleFetchError(consumerInfo, consumer, readOffset, response.errorCode(topic, partition));
            return Iterators.emptyIterator();
        }

        // Returns an Iterator of message
        final long fetchReadOffset = readOffset;
        final Iterator<MessageAndOffset> messages = response.messageSet(topic, partition).iterator();
        return new AbstractIterator<KafkaMessage<Long>>() {
            @Override
            protected KafkaMessage<Long> computeNext() {
                while (messages.hasNext()) {
                    MessageAndOffset messageAndOffset = messages.next();
                    if (messageAndOffset.offset() < fetchReadOffset) {
                        // Older message read (which is valid in Kafka), skip it.
                        continue;
                    }

                    // Lets get the Kafka message based on last offset
                    Message message = messageAndOffset.message();
                    return new KafkaMessage<>(topicPartition, messageAndOffset.nextOffset(), message.key(),
                            message.payload());
                }
                return endOfData();
            }
        };
    }

    @Override
    public void initialize(RealtimeContext context) throws Exception {
        super.initialize(context);

        // Setting up ZK for 08 version of Apache Kafka
        String kafkaZKConnect = getKafkaConfig().getZookeeper();
        if (kafkaZKConnect != null) {
            zkClient = ZKClientServices.delegate(ZKClients
                    .reWatchOnExpire(ZKClients.retryOnFailure(ZKClientService.Builder.of(kafkaZKConnect).build(),
                            RetryStrategies.fixDelay(2, TimeUnit.SECONDS))));
            brokerService = new ZKBrokerService(zkClient);

            try {
                Futures.get(zkClient.start(), 3, TimeUnit.SECONDS, TimeoutException.class);
                Futures.get(brokerService.start(), 3, TimeUnit.SECONDS, TimeoutException.class);
            } catch (TimeoutException e) {
                Futures.get(brokerService.stop(), 3, TimeUnit.SECONDS, TimeoutException.class);
                Futures.get(zkClient.stop(), 3, TimeUnit.SECONDS, TimeoutException.class);
                throw new IllegalArgumentException(
                        String.format(
                                "Timeout while trying to start ZookeeperClient/Broker Service. "
                                        + "Check if the zookeeper connection string %s is correct.",
                                kafkaZKConnect),
                        e);
            }
        }

        kafkaConsumers = CacheBuilder.newBuilder().concurrencyLevel(1).expireAfterAccess(60, TimeUnit.SECONDS)
                .removalListener(consumerCacheRemovalListener()).build();
    }

    /**
     * Creates a {@link RemovalListener} to close {@link SimpleConsumer} when it is evicted from the consumer cache.
     */
    private RemovalListener<TopicPartition, SimpleConsumer> consumerCacheRemovalListener() {
        return new RemovalListener<TopicPartition, SimpleConsumer>() {
            @Override
            public void onRemoval(RemovalNotification<TopicPartition, SimpleConsumer> notification) {
                SimpleConsumer consumer = notification.getValue();
                if (consumer == null) {
                    return;
                }
                try {
                    consumer.close();
                } catch (Throwable t) {
                    LOG.error("Exception when closing Kafka consumer.", t);
                }
            }
        };
    }

    @Override
    public void destroy() {
        super.destroy();
        if (kafkaConsumers != null) {
            kafkaConsumers.invalidateAll();
            kafkaConsumers.cleanUp();
        }

        if (brokerService != null) {
            stopService(brokerService);
        }
        if (zkClient != null) {
            stopService(zkClient);
        }
    }

    @Override
    protected void processMessage(String key, ByteBuffer payload, Emitter<StructuredRecord> emitter) {
        StructuredRecord structuredRecord = byteBufferToStructuredRecord(key, payload);
        emitter.emit(structuredRecord);
    }

    @Override
    protected void processMessage(ByteBuffer payload, Emitter<StructuredRecord> emitter) {
        StructuredRecord structuredRecord = byteBufferToStructuredRecord(null, payload);
        emitter.emit(structuredRecord);
    }

    @Override
    protected Long getBeginOffset(TopicPartition topicPartition) {
        Map<String, byte[]> offsetStore = getOffsetStore();
        if (offsetStore == null) {
            return getDefaultOffset(topicPartition);
        }

        // The value is simply a 8-bytes long representing the offset
        byte[] value = offsetStore.get(getStoreKey(topicPartition));
        if (value == null || value.length != Bytes.SIZEOF_LONG) {
            return getDefaultOffset(topicPartition);
        }
        return Bytes.toLong(value);
    }

    @Override
    protected void saveReadOffsets(Map<TopicPartition, Long> offsets) {
        Map<String, byte[]> offsetStore = getOffsetStore();
        if (offsetStore == null) {
            return;
        }

        for (Map.Entry<TopicPartition, Long> entry : offsets.entrySet()) {
            offsetStore.put(getStoreKey(entry.getKey()), Bytes.toBytes(entry.getValue()));
        }
    }

    @Override
    protected long getDefaultOffset(TopicPartition topicPartition) {
        Long argValue = kafkaSource.getConfig().getDefaultOffset();
        if (argValue != null) {
            return argValue.longValue();
        } else {
            return kafka.api.OffsetRequest.EarliestTime();
        }
    }

    private long getReadOffset(SimpleConsumer consumer, String topic, int partition, long time) {
        OffsetRequest offsetRequest = new OffsetRequest(
                ImmutableMap.of(new TopicAndPartition(topic, partition), new PartitionOffsetRequestInfo(time, 1)),
                kafka.api.OffsetRequest.CurrentVersion(), consumer.clientId());

        OffsetResponse response = consumer.getOffsetsBefore(offsetRequest);
        if (response.hasError()) {
            LOG.warn("Failed to fetch offset from broker {}:{} for topic-partition {}-{} with error code {}",
                    consumer.host(), consumer.port(), topic, partition, response.errorCode(topic, partition));
            return 0L;
        }
        return response.offsets(topic, partition)[0];
    }

    /**
     * Returns a {@link SimpleConsumer} for the given consumer info or {@code null} if no leader broker is currently
     * available.
     */
    @Nullable
    private SimpleConsumer getConsumer(KafkaConsumerInfo<Long> consumerInfo) throws Exception {
        TopicPartition topicPartition = consumerInfo.getTopicPartition();
        SimpleConsumer consumer = kafkaConsumers.getIfPresent(topicPartition);
        if (consumer != null) {
            return consumer;
        }

        InetSocketAddress leader = getLeader(topicPartition.getTopic(), topicPartition.getPartition());
        if (leader == null) {
            return null;
        }
        String consumerName = String.format("%s-%d-kafka-consumer", getName(), getContext().getInstanceId());
        consumer = new SimpleConsumer(leader.getHostName(), leader.getPort(), SO_TIMEOUT,
                consumerInfo.getFetchSize(), consumerName);
        kafkaConsumers.put(topicPartition, consumer);

        return consumer;
    }

    /**
     * Handles fetch failure.
     *
     * @param consumerInfo information on what and how to consume
     * @param consumer consumer to talk to Kafka
     * @param readOffset the beginning read offset
     * @param errorCode error code for the fetch.
     */
    private void handleFetchError(KafkaConsumerInfo<Long> consumerInfo, SimpleConsumer consumer, long readOffset,
            short errorCode) {
        TopicPartition topicPartition = consumerInfo.getTopicPartition();
        String topic = topicPartition.getTopic();
        int partition = topicPartition.getPartition();

        LOG.warn("Failed to fetch from broker {}:{} for topic-partition {}-{} with error code {}", consumer.host(),
                consumer.port(), topic, partition, errorCode);
        if (errorCode == ErrorMapping.OffsetOutOfRangeCode()) {
            // Get the earliest offset
            long earliest = getReadOffset(consumer, topic, partition, kafka.api.OffsetRequest.EarliestTime());
            // If the current read offset is smaller than the earliest one, use the earliest offset in next fetch
            if (readOffset < earliest) {
                consumerInfo.setReadOffset(earliest);
            } else {
                // Otherwise the read offset must be larger than the latest (otherwise it won't have the out of range error)
                consumerInfo.setReadOffset(
                        getReadOffset(consumer, topic, partition, kafka.api.OffsetRequest.LatestTime()));
            }
        } else {
            // For other type of error, invalid it from cache so that a new one will be created in next iteration
            kafkaConsumers.invalidate(topicPartition);
        }
    }

    /**
     * Gets the address of the leader broker for the given topic and partition
     *
     * @return the address for the leader broker or {@code null} if no leader is currently available.
     */
    @Nullable
    private InetSocketAddress getLeader(String topic, int partition) throws Exception {
        // If BrokerService is available, it will use information from ZooKeeper
        if (brokerService != null) {
            BrokerInfo brokerInfo = brokerService.getLeader(topic, partition);
            return (brokerInfo == null) ? null : new InetSocketAddress(brokerInfo.getHost(), brokerInfo.getPort());
        }

        // Otherwise use broker list to discover leader
        return findLeader(getKafkaConfig().getBrokers(), topic, partition);
    }

    /**
     * Finds the leader broker address for the given topic partition.
     *
     * @return the address for the leader broker or {@code null} if no leader is currently available.
     */
    @Nullable
    private InetSocketAddress findLeader(String brokers, String topic, int partition) throws Exception {
        // Splits the broker list of format "host:port,host:port" into a map<host, port>
        Map<String, String> brokerMap = Splitter.on(',').withKeyValueSeparator(":").split(brokers);

        // Go through the broker list and try to find a leader for the given topic partition.
        for (Map.Entry<String, String> broker : brokerMap.entrySet()) {
            try {
                SimpleConsumer consumer = new SimpleConsumer(broker.getKey(), Integer.parseInt(broker.getValue()),
                        SO_TIMEOUT, KafkaConsumerConfigurer.DEFAULT_FETCH_SIZE, "leaderLookup");
                try {
                    TopicMetadataRequest request = new TopicMetadataRequest(ImmutableList.of(topic));
                    TopicMetadataResponse response = consumer.send(request);
                    for (TopicMetadata topicData : response.topicsMetadata()) {
                        for (PartitionMetadata partitionData : topicData.partitionsMetadata()) {
                            if (partitionData.partitionId() == partition) {
                                Broker leader = partitionData.leader();
                                return new InetSocketAddress(leader.host(), leader.port());
                            }
                        }
                    }

                } finally {
                    consumer.close();
                }
            } catch (Exception e) {
                throw new Exception(String.format(
                        "Failed to communicate with broker %s:%s for leader lookup for topic-partition %s-%s",
                        broker.getKey(), broker.getValue(), topic, partition), e);
            }
        }
        return null;
    }
}