org.apache.storm.kafka.spout.internal.CommonKafkaSpoutConfig.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.storm.kafka.spout.internal.CommonKafkaSpoutConfig.java

Source

/*
 * Copyright 2018 The Apache Software Foundation.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.storm.kafka.spout.internal;

import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.lang.builder.ToStringBuilder;
import org.apache.commons.lang.builder.ToStringStyle;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.storm.kafka.spout.DefaultRecordTranslator;
import org.apache.storm.kafka.spout.FirstPollOffsetStrategy;
import org.apache.storm.kafka.spout.Func;
import org.apache.storm.kafka.spout.RecordTranslator;
import org.apache.storm.kafka.spout.SimpleRecordTranslator;
import org.apache.storm.kafka.spout.subscription.ManualPartitioner;
import org.apache.storm.kafka.spout.subscription.NamedTopicFilter;
import org.apache.storm.kafka.spout.subscription.PatternTopicFilter;
import org.apache.storm.kafka.spout.subscription.RoundRobinManualPartitioner;
import org.apache.storm.kafka.spout.subscription.TopicFilter;
import org.apache.storm.tuple.Fields;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class CommonKafkaSpoutConfig<K, V> implements Serializable {
    public static final long DEFAULT_POLL_TIMEOUT_MS = 200;
    public static final long DEFAULT_PARTITION_REFRESH_PERIOD_MS = 2_000;
    // Earliest start
    public static final long DEFAULT_START_TS = 0L;
    public static final FirstPollOffsetStrategy DEFAULT_FIRST_POLL_OFFSET_STRATEGY = FirstPollOffsetStrategy.UNCOMMITTED_EARLIEST;

    public static final Logger LOG = LoggerFactory.getLogger(CommonKafkaSpoutConfig.class);

    // Kafka consumer configuration
    private final Map<String, Object> kafkaProps;
    private final TopicFilter topicFilter;
    private final ManualPartitioner topicPartitioner;
    private final long pollTimeoutMs;

    // Kafka spout configuration
    private final RecordTranslator<K, V> translator;
    private final FirstPollOffsetStrategy firstPollOffsetStrategy;
    private final long partitionRefreshPeriodMs;
    private final long startTimeStamp;

    /**
     * Creates a new CommonKafkaSpoutConfig using a Builder.
     *
     * @param builder The Builder to construct the CommonKafkaSpoutConfig from
     */
    public CommonKafkaSpoutConfig(Builder<K, V, ?> builder) {
        this.kafkaProps = builder.kafkaProps;
        this.topicFilter = builder.topicFilter;
        this.topicPartitioner = builder.topicPartitioner;
        this.translator = builder.translator;
        this.firstPollOffsetStrategy = builder.firstPollOffsetStrategy;
        this.pollTimeoutMs = builder.pollTimeoutMs;
        this.partitionRefreshPeriodMs = builder.partitionRefreshPeriodMs;
        this.startTimeStamp = builder.startTimeStamp;
    }

    public abstract static class Builder<K, V, T extends Builder<K, V, T>> {

        private final Map<String, Object> kafkaProps;
        private final TopicFilter topicFilter;
        private final ManualPartitioner topicPartitioner;
        private RecordTranslator<K, V> translator;
        private long pollTimeoutMs = DEFAULT_POLL_TIMEOUT_MS;
        private FirstPollOffsetStrategy firstPollOffsetStrategy = DEFAULT_FIRST_POLL_OFFSET_STRATEGY;
        private long partitionRefreshPeriodMs = DEFAULT_PARTITION_REFRESH_PERIOD_MS;
        private long startTimeStamp = DEFAULT_START_TS;

        public Builder(String bootstrapServers, String... topics) {
            this(bootstrapServers, new NamedTopicFilter(topics), new RoundRobinManualPartitioner());
        }

        public Builder(String bootstrapServers, Set<String> topics) {
            this(bootstrapServers, new NamedTopicFilter(topics), new RoundRobinManualPartitioner());
        }

        public Builder(String bootstrapServers, Pattern topics) {
            this(bootstrapServers, new PatternTopicFilter(topics), new RoundRobinManualPartitioner());
        }

        /**
         * Create a KafkaSpoutConfig builder with default property values and no key/value deserializers.
         *
         * @param bootstrapServers The bootstrap servers the consumer will use
         * @param topicFilter The topic filter defining which topics and partitions the spout will read
         * @param topicPartitioner The topic partitioner defining which topics and partitions are assinged to each spout task
         */
        public Builder(String bootstrapServers, TopicFilter topicFilter, ManualPartitioner topicPartitioner) {
            kafkaProps = new HashMap<>();
            if (bootstrapServers == null || bootstrapServers.isEmpty()) {
                throw new IllegalArgumentException("bootstrap servers cannot be null");
            }
            kafkaProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
            this.topicFilter = topicFilter;
            this.topicPartitioner = topicPartitioner;
            this.translator = new DefaultRecordTranslator<>();
        }

        /**
         * Set a {@link KafkaConsumer} property. 
         */
        public T setProp(String key, Object value) {
            kafkaProps.put(key, value);
            return (T) this;
        }

        /**
         * Set multiple {@link KafkaConsumer} properties. 
         */
        public T setProp(Map<String, Object> props) {
            kafkaProps.putAll(props);
            return (T) this;
        }

        /**
         * Set multiple {@link KafkaConsumer} properties. 
         */
        public T setProp(Properties props) {
            props.forEach((key, value) -> {
                if (key instanceof String) {
                    kafkaProps.put((String) key, value);
                } else {
                    throw new IllegalArgumentException("Kafka Consumer property keys must be Strings");
                }
            });
            return (T) this;
        }

        //Spout Settings
        /**
         * Specifies the time, in milliseconds, spent waiting in poll if data is not available. Default is 200ms.
         *
         * @param pollTimeoutMs time in ms
         */
        public T setPollTimeoutMs(long pollTimeoutMs) {
            this.pollTimeoutMs = pollTimeoutMs;
            return (T) this;
        }

        /**
         * Sets the offset used by the Kafka spout in the first poll to Kafka broker upon process start. Please refer to to the
         * documentation in {@link FirstPollOffsetStrategy}
         *
         * @param firstPollOffsetStrategy Offset used by Kafka spout first poll
         */
        public T setFirstPollOffsetStrategy(FirstPollOffsetStrategy firstPollOffsetStrategy) {
            this.firstPollOffsetStrategy = firstPollOffsetStrategy;
            return (T) this;
        }

        public T setRecordTranslator(RecordTranslator<K, V> translator) {
            this.translator = translator;
            return (T) this;
        }

        /**
         * Configure a translator with tuples to be emitted on the default stream.
         *
         * @param func extracts and turns a Kafka ConsumerRecord into a list of objects to be emitted
         * @param fields the names of the fields extracted
         * @return this to be able to chain configuration
         */
        public T setRecordTranslator(Func<ConsumerRecord<K, V>, List<Object>> func, Fields fields) {
            return setRecordTranslator(new SimpleRecordTranslator<>(func, fields));
        }

        /**
         * Configure a translator with tuples to be emitted to a given stream.
         *
         * @param func extracts and turns a Kafka ConsumerRecord into a list of objects to be emitted
         * @param fields the names of the fields extracted
         * @param stream the stream to emit the tuples on
         * @return this to be able to chain configuration
         */
        public T setRecordTranslator(Func<ConsumerRecord<K, V>, List<Object>> func, Fields fields, String stream) {
            return setRecordTranslator(new SimpleRecordTranslator<>(func, fields, stream));
        }

        /**
         * Sets partition refresh period in milliseconds. This is how often Kafka will be polled to check for new topics and/or new
         * partitions.
         *
         * @param partitionRefreshPeriodMs time in milliseconds
         * @return the builder (this)
         */
        public T setPartitionRefreshPeriodMs(long partitionRefreshPeriodMs) {
            this.partitionRefreshPeriodMs = partitionRefreshPeriodMs;
            return (T) this;
        }

        /**
         * Specifies the startTimeStamp if the first poll strategy is TIMESTAMP or UNCOMMITTED_TIMESTAMP.
         * @param startTimeStamp time in ms
         */
        public T setStartTimeStamp(long startTimeStamp) {
            this.startTimeStamp = startTimeStamp;
            return (T) this;
        }

        protected Map<String, Object> getKafkaProps() {
            return kafkaProps;
        }

        public abstract CommonKafkaSpoutConfig<K, V> build();
    }

    /**
     * Gets the properties that will be passed to the KafkaConsumer.
     *
     * @return The Kafka properties map
     */
    public Map<String, Object> getKafkaProps() {
        return kafkaProps;
    }

    public TopicFilter getTopicFilter() {
        return topicFilter;
    }

    public ManualPartitioner getTopicPartitioner() {
        return topicPartitioner;
    }

    public RecordTranslator<K, V> getTranslator() {
        return translator;
    }

    public FirstPollOffsetStrategy getFirstPollOffsetStrategy() {
        return firstPollOffsetStrategy;
    }

    public long getPollTimeoutMs() {
        return pollTimeoutMs;
    }

    public long getPartitionRefreshPeriodMs() {
        return partitionRefreshPeriodMs;
    }

    public long getStartTimeStamp() {
        return startTimeStamp;
    }

    @Override
    public String toString() {
        return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).append("kafkaProps", kafkaProps)
                .append("partitionRefreshPeriodMs", partitionRefreshPeriodMs).append("pollTimeoutMs", pollTimeoutMs)
                .append("topicFilter", topicFilter).append("topicPartitioner", topicPartitioner)
                .append("translator", translator).append("startTimeStamp", startTimeStamp).toString();
    }
}