com.linkedin.pinot.core.realtime.impl.kafka.KafkaHighLevelStreamProviderConfig.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.core.realtime.impl.kafka.KafkaHighLevelStreamProviderConfig.java

Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.core.realtime.impl.kafka;

import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import org.joda.time.Duration;
import org.joda.time.Period;
import org.joda.time.format.PeriodFormatter;
import org.joda.time.format.PeriodFormatterBuilder;
import com.linkedin.pinot.common.config.AbstractTableConfig;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.metadata.instance.InstanceZKMetadata;
import com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata;
import com.linkedin.pinot.common.utils.CommonConstants.Helix;
import com.linkedin.pinot.core.realtime.StreamProviderConfig;
import static com.linkedin.pinot.common.utils.EqualityUtils.hashCodeOf;
import static com.linkedin.pinot.common.utils.EqualityUtils.isEqual;
import static com.linkedin.pinot.common.utils.EqualityUtils.isNullOrNotSameClass;
import static com.linkedin.pinot.common.utils.EqualityUtils.isSameReference;
import kafka.consumer.ConsumerConfig;

public class KafkaHighLevelStreamProviderConfig implements StreamProviderConfig {
    private static final Map<String, String> defaultProps;

    private static final int DEFAULT_MAX_REALTIME_ROWS_COUNT = 5000000;
    private final static long ONE_MINUTE_IN_MILLSEC = 1000 * 60;
    public static final long ONE_HOUR = ONE_MINUTE_IN_MILLSEC * 60;

    private final static PeriodFormatter PERIOD_FORMATTER;

    static {
        defaultProps = new HashMap<String, String>();
        /*defaultProps.put("zookeeper.connect", zookeeper);
        defaultProps.put("group.id", groupId);*/
        defaultProps.put("zookeeper.session.timeout.ms", "30000");
        defaultProps.put("zookeeper.connection.timeout.ms", "10000");
        defaultProps.put("zookeeper.sync.time.ms", "2000");

        // Rebalance retries will take up to 1 mins to fail.
        defaultProps.put("rebalance.max.retries", "30");
        defaultProps.put("rebalance.backoff.ms", "2000");

        defaultProps.put("auto.commit.enable", "false");
        defaultProps.put(Helix.DataSource.Realtime.Kafka.AUTO_OFFSET_RESET, "largest");

        // A formatter for time specification that allows time to be specified in days, hours and minutes
        // e.g. 1d2h3m, or 6h5m or simply 5h
        PERIOD_FORMATTER = new PeriodFormatterBuilder().appendDays().appendSuffix("d").appendHours()
                .appendSuffix("h").appendMinutes().appendSuffix("m").toFormatter();
    }

    public static int getDefaultMaxRealtimeRowsCount() {
        return DEFAULT_MAX_REALTIME_ROWS_COUNT;
    }

    private String kafkaTopicName;
    private String zkString;
    private String groupId;
    private KafkaMessageDecoder decoder;
    private String decodeKlass;
    private Schema indexingSchema;
    private Map<String, String> decoderProps;
    private Map<String, String> kafkaConsumerProps;
    private long segmentTimeInMillis = ONE_HOUR;
    private int realtimeRecordsThreshold = DEFAULT_MAX_REALTIME_ROWS_COUNT;

    public KafkaHighLevelStreamProviderConfig() {

    }

    /*
     * kafka.hlc.zk.connect.string : comma separated list of hosts
     * kafka.hlc.broker.port : broker port
     * kafka.hlc.group.id : group id
     * kafka.decoder.class.name : the absolute path of the decoder class name
     * kafka.decoder.props1 : every property that is prefixed with kafka.decoder.
     * */

    @Override
    public void init(Map<String, String> properties, Schema schema) {
        decoderProps = new HashMap<String, String>();
        kafkaConsumerProps = new HashMap<>();

        this.indexingSchema = schema;
        if (properties.containsKey(Helix.DataSource.Realtime.Kafka.HighLevelConsumer.GROUP_ID)) {
            this.groupId = properties.get(Helix.DataSource.Realtime.Kafka.HighLevelConsumer.GROUP_ID);
        }

        if (properties.containsKey(Helix.DataSource.Realtime.Kafka.HighLevelConsumer.ZK_CONNECTION_STRING)) {
            this.zkString = properties.get(Helix.DataSource.Realtime.Kafka.HighLevelConsumer.ZK_CONNECTION_STRING);
        }

        if (properties.containsKey(Helix.DataSource.Realtime.Kafka.TOPIC_NAME)) {
            this.kafkaTopicName = properties.get(Helix.DataSource.Realtime.Kafka.TOPIC_NAME);
        }

        if (properties.containsKey(Helix.DataSource.Realtime.Kafka.DECODER_CLASS)) {
            this.decodeKlass = properties.get(Helix.DataSource.Realtime.Kafka.DECODER_CLASS);
        }

        if (groupId == null || zkString == null || kafkaTopicName == null || this.decodeKlass == null) {
            throw new RuntimeException("Cannot initialize KafkaHighLevelStreamProviderConfig as: " + "groupId = "
                    + groupId + ", zkString = " + zkString + ", kafkaTopicName = " + kafkaTopicName
                    + ", decodeKlass = " + decodeKlass);
        }

        if (properties.containsKey(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_SIZE)) {
            realtimeRecordsThreshold = Integer
                    .parseInt(properties.get(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_SIZE));
        }

        if (properties.containsKey(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_TIME)) {
            segmentTimeInMillis = Long
                    .parseLong(properties.get(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_TIME));
        }

        for (String key : properties.keySet()) {
            if (key.startsWith(Helix.DataSource.Realtime.Kafka.DECODER_PROPS_PREFIX)) {
                decoderProps.put(Helix.DataSource.Realtime.Kafka.getDecoderPropertyKey(key), properties.get(key));
            }

            if (key.startsWith(Helix.DataSource.Realtime.Kafka.KAFKA_CONSUMER_PROPS_PREFIX)) {
                kafkaConsumerProps.put(Helix.DataSource.Realtime.Kafka.getConsumerPropertyKey(key),
                        properties.get(key));
            }
        }
    }

    @Override
    public Schema getSchema() {
        return indexingSchema;
    }

    public String getTopicName() {
        return this.kafkaTopicName;
    }

    public Map<String, Integer> getTopicMap(int numThreads) {
        Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
        topicCountMap.put(kafkaTopicName, numThreads);
        return topicCountMap;
    }

    public ConsumerConfig getKafkaConsumerConfig() {
        Properties props = new Properties();
        for (String key : defaultProps.keySet()) {
            props.put(key, defaultProps.get(key));
        }

        for (String key : kafkaConsumerProps.keySet()) {
            props.put(key, kafkaConsumerProps.get(key));
        }

        props.put("group.id", groupId);
        props.put("zookeeper.connect", zkString);
        return new ConsumerConfig(props);
    }

    public KafkaMessageDecoder getDecoder() throws Exception {
        KafkaMessageDecoder ret = (KafkaMessageDecoder) Class.forName(decodeKlass).newInstance();
        ret.init(decoderProps, indexingSchema, kafkaTopicName);
        return ret;
    }

    @Override
    public String getStreamProviderClass() {
        return null;
    }

    @Override
    public void init(AbstractTableConfig tableConfig, InstanceZKMetadata instanceMetadata, Schema schema) {
        this.indexingSchema = schema;
        if (instanceMetadata != null) {
            // For LL segments, instanceZkMetadata will be null
            this.groupId = instanceMetadata.getGroupId(tableConfig.getTableName());
        }
        KafkaStreamMetadata kafkaMetadata = new KafkaStreamMetadata(
                tableConfig.getIndexingConfig().getStreamConfigs());
        this.kafkaTopicName = kafkaMetadata.getKafkaTopicName();
        this.decodeKlass = kafkaMetadata.getDecoderClass();
        this.decoderProps = kafkaMetadata.getDecoderProperties();
        this.kafkaConsumerProps = kafkaMetadata.getKafkaConsumerProperties();
        this.zkString = kafkaMetadata.getZkBrokerUrl();
        if (tableConfig.getIndexingConfig().getStreamConfigs()
                .containsKey(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_SIZE)) {
            realtimeRecordsThreshold = Integer.parseInt(tableConfig.getIndexingConfig().getStreamConfigs()
                    .get(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_SIZE));
        }

        if (tableConfig.getIndexingConfig().getStreamConfigs()
                .containsKey(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_TIME)) {
            segmentTimeInMillis = convertToMs(tableConfig.getIndexingConfig().getStreamConfigs()
                    .get(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_TIME));
        }
    }

    @Override
    public String getStreamName() {
        return getTopicName();
    }

    @Override
    public int getSizeThresholdToFlushSegment() {
        return realtimeRecordsThreshold;
    }

    @Override
    public long getTimeThresholdToFlushSegment() {
        return segmentTimeInMillis;
    }

    String getGroupId() {
        return groupId;
    }

    String getZkString() {
        return zkString;
    }

    protected long convertToMs(String timeStr) {
        long ms = -1;
        try {
            ms = Long.valueOf(timeStr);
        } catch (NumberFormatException e1) {
            try {
                Period p = PERIOD_FORMATTER.parsePeriod(timeStr);
                Duration d = p.toStandardDuration();
                ms = d.getStandardSeconds() * 1000L;
            } catch (Exception e2) {
                throw new RuntimeException("Invalid time spec '" + timeStr + "' (Valid examples: '3h', '4h30m')",
                        e2);
            }
        }
        return ms;
    }

    @Override
    public boolean equals(Object o) {
        if (isSameReference(this, o)) {
            return true;
        }

        if (isNullOrNotSameClass(this, o)) {
            return false;
        }

        KafkaHighLevelStreamProviderConfig that = (KafkaHighLevelStreamProviderConfig) o;

        return isEqual(segmentTimeInMillis, that.segmentTimeInMillis)
                && isEqual(realtimeRecordsThreshold, that.realtimeRecordsThreshold)
                && isEqual(kafkaTopicName, that.kafkaTopicName) && isEqual(zkString, that.zkString)
                && isEqual(groupId, that.groupId) && isEqual(decoder, that.decoder)
                && isEqual(decodeKlass, that.decodeKlass) && isEqual(indexingSchema, that.indexingSchema)
                && isEqual(decoderProps, that.decoderProps) && isEqual(kafkaConsumerProps, that.kafkaConsumerProps);
    }

    @Override
    public int hashCode() {
        int result = hashCodeOf(kafkaTopicName);
        result = hashCodeOf(result, zkString);
        result = hashCodeOf(result, groupId);
        result = hashCodeOf(result, decoder);
        result = hashCodeOf(result, decodeKlass);
        result = hashCodeOf(result, indexingSchema);
        result = hashCodeOf(result, decoderProps);
        result = hashCodeOf(result, kafkaConsumerProps);
        result = hashCodeOf(result, segmentTimeInMillis);
        result = hashCodeOf(result, realtimeRecordsThreshold);
        return result;
    }
}