io.druid.firehose.kafka.KafkaEightSimpleConsumerFirehoseFactory.java Source code

Java tutorial

Introduction

Here is the source code for io.druid.firehose.kafka.KafkaEightSimpleConsumerFirehoseFactory.java

Source

/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.firehose.kafka;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.google.common.io.Closeables;
import com.metamx.common.logger.Logger;
import com.metamx.emitter.EmittingLogger;
import io.druid.data.input.ByteBufferInputRowParser;
import io.druid.data.input.Committer;
import io.druid.data.input.FirehoseFactoryV2;
import io.druid.data.input.FirehoseV2;
import io.druid.data.input.InputRow;
import io.druid.firehose.kafka.KafkaSimpleConsumer.BytesMessageWithOffset;

import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;

public class KafkaEightSimpleConsumerFirehoseFactory implements FirehoseFactoryV2<ByteBufferInputRowParser> {
    private static final EmittingLogger log = new EmittingLogger(KafkaEightSimpleConsumerFirehoseFactory.class);

    @JsonProperty
    private final List<String> brokerList;

    @JsonProperty
    private final List<Integer> partitionIdList;

    @JsonProperty
    private final String clientId;

    @JsonProperty
    private final String feed;

    @JsonProperty
    private final int queueBufferLength;

    @JsonProperty
    private final boolean earliest;

    private final List<PartitionConsumerWorker> consumerWorkers = new CopyOnWriteArrayList<>();
    private static final int DEFAULT_QUEUE_BUFFER_LENGTH = 20000;
    private static final int CONSUMER_FETCH_TIMEOUT = 10000;

    @JsonCreator
    public KafkaEightSimpleConsumerFirehoseFactory(@JsonProperty("brokerList") List<String> brokerList,
            @JsonProperty("partitionIdList") List<Integer> partitionIdList,
            @JsonProperty("clientId") String clientId, @JsonProperty("feed") String feed,
            @JsonProperty("queueBufferLength") Integer queueBufferLength,
            @JsonProperty("resetOffsetToEarliest") Boolean resetOffsetToEarliest) {
        this.brokerList = brokerList;
        Preconditions.checkArgument(brokerList != null && brokerList.size() > 0, "brokerList is null/empty");

        this.partitionIdList = partitionIdList;
        Preconditions.checkArgument(partitionIdList != null && partitionIdList.size() > 0,
                "partitionIdList is null/empty");

        this.clientId = clientId;
        Preconditions.checkArgument(clientId != null && !clientId.isEmpty(), "clientId is null/empty");

        this.feed = feed;
        Preconditions.checkArgument(feed != null && !feed.isEmpty(), "feed is null/empty");

        this.queueBufferLength = queueBufferLength == null ? DEFAULT_QUEUE_BUFFER_LENGTH : queueBufferLength;
        Preconditions.checkArgument(queueBufferLength > 0, "queueBufferLength must be positive number");
        log.info("queueBufferLength loaded as[%s]", this.queueBufferLength);

        this.earliest = resetOffsetToEarliest == null ? true : resetOffsetToEarliest.booleanValue();
        log.info("if old offsets are not known, data from partition will be read from [%s] available offset.",
                this.earliest ? "earliest" : "latest");
    }

    private Map<Integer, Long> loadOffsetFromPreviousMetaData(Object lastCommit) {
        Map<Integer, Long> offsetMap = Maps.newHashMap();
        if (lastCommit == null) {
            return offsetMap;
        }
        if (lastCommit instanceof Map) {
            Map<Object, Object> lastCommitMap = (Map) lastCommit;
            for (Map.Entry<Object, Object> entry : lastCommitMap.entrySet()) {
                try {
                    int partitionId = Integer.parseInt(entry.getKey().toString());
                    long offset = Long.parseLong(entry.getValue().toString());
                    log.debug("Recover last commit information partitionId [%s], offset [%s]", partitionId, offset);
                    offsetMap.put(partitionId, offset);
                } catch (NumberFormatException e) {
                    log.error(e, "Fail to load offset from previous meta data [%s]", entry);
                }
            }
            log.info("Loaded offset map[%s]", offsetMap);
        } else {
            log.makeAlert("Unable to cast lastCommit to Map for feed [%s]", feed);
        }
        return offsetMap;
    }

    @Override
    public FirehoseV2 connect(final ByteBufferInputRowParser firehoseParser, Object lastCommit) throws IOException {
        final Map<Integer, Long> lastOffsets = loadOffsetFromPreviousMetaData(lastCommit);

        for (Integer partition : partitionIdList) {
            final KafkaSimpleConsumer kafkaSimpleConsumer = new KafkaSimpleConsumer(feed, partition, clientId,
                    brokerList, earliest);
            Long startOffset = lastOffsets.get(partition);
            PartitionConsumerWorker worker = new PartitionConsumerWorker(feed, kafkaSimpleConsumer, partition,
                    startOffset == null ? 0 : startOffset);
            consumerWorkers.add(worker);
        }

        final LinkedBlockingQueue<BytesMessageWithOffset> messageQueue = new LinkedBlockingQueue<BytesMessageWithOffset>(
                queueBufferLength);
        log.info("Kicking off all consumers");
        for (PartitionConsumerWorker worker : consumerWorkers) {
            worker.go(messageQueue);
        }
        log.info("All consumer started");

        return new FirehoseV2() {
            private Map<Integer, Long> lastOffsetPartitions;
            private volatile boolean stopped;
            private volatile BytesMessageWithOffset msg = null;
            private volatile InputRow row = null;

            {
                lastOffsetPartitions = Maps.newHashMap();
                lastOffsetPartitions.putAll(lastOffsets);
            }

            @Override
            public void start() throws Exception {
                nextMessage();
            }

            @Override
            public boolean advance() {
                if (stopped) {
                    return false;
                }

                nextMessage();
                return true;
            }

            private void nextMessage() {
                try {
                    row = null;
                    while (row == null) {
                        if (msg != null) {
                            lastOffsetPartitions.put(msg.getPartition(), msg.offset());
                        }

                        msg = messageQueue.take();

                        final byte[] message = msg.message();
                        row = message == null ? null : firehoseParser.parse(ByteBuffer.wrap(message));
                    }
                } catch (InterruptedException e) {
                    //Let the caller decide whether to stop or continue when thread is interrupted.
                    log.warn(e, "Thread Interrupted while taking from queue, propagating the interrupt");
                    Thread.currentThread().interrupt();
                }
            }

            @Override
            public InputRow currRow() {
                if (stopped) {
                    return null;
                }
                return row;
            }

            @Override
            public Committer makeCommitter() {
                final Map<Integer, Long> offsets = Maps.newHashMap(lastOffsetPartitions);

                return new Committer() {
                    @Override
                    public Object getMetadata() {
                        return offsets;
                    }

                    @Override
                    public void run() {

                    }
                };
            }

            @Override
            public void close() throws IOException {
                log.info("Stopping kafka 0.8 simple firehose");
                stopped = true;
                for (PartitionConsumerWorker t : consumerWorkers) {
                    Closeables.close(t, true);
                }
            }
        };
    }

    private static class PartitionConsumerWorker implements Closeable {
        private final String topic;
        private final KafkaSimpleConsumer consumer;
        private final int partitionId;
        private final long startOffset;

        private final AtomicBoolean stopped = new AtomicBoolean(false);
        private volatile Thread thread = null;

        PartitionConsumerWorker(String topic, KafkaSimpleConsumer consumer, int partitionId, long startOffset) {
            this.topic = topic;
            this.consumer = consumer;
            this.partitionId = partitionId;
            this.startOffset = startOffset;
        }

        public void go(final LinkedBlockingQueue<BytesMessageWithOffset> messageQueue) {
            thread = new Thread() {
                @Override
                public void run() {
                    long offset = startOffset;
                    log.info("Start running parition[%s], offset[%s]", partitionId, offset);
                    try {
                        while (!stopped.get()) {
                            try {
                                Iterable<BytesMessageWithOffset> msgs = consumer.fetch(offset,
                                        CONSUMER_FETCH_TIMEOUT);
                                int count = 0;
                                for (BytesMessageWithOffset msgWithOffset : msgs) {
                                    offset = msgWithOffset.offset();
                                    messageQueue.put(msgWithOffset);
                                    count++;
                                }
                                log.debug("fetch [%s] msgs for partition [%s] in one time ", count, partitionId);
                            } catch (InterruptedException e) {
                                log.info("Interrupted when fetching data, shutting down.");
                                return;
                            } catch (Exception e) {
                                log.error(e, "Exception happened in fetching data, but will continue consuming");
                            }
                        }
                    } finally {
                        consumer.stop();
                    }
                }
            };
            thread.setDaemon(true);
            thread.setName(String.format("kafka-%s-%s", topic, partitionId));
            thread.start();
        }

        @Override
        public synchronized void close() throws IOException {
            if (stopped.compareAndSet(false, true)) {
                thread.interrupt();
                thread = null;
            }
        }
    }
}