com.linkedin.pinot.server.integration.realtime.RealtimeTableDataManagerTest.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.server.integration.realtime.RealtimeTableDataManagerTest.java

Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.server.integration.realtime;

import com.linkedin.pinot.common.metrics.ServerMetrics;
import com.yammer.metrics.core.MetricsRegistry;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.TimerTask;
import java.util.concurrent.TimeUnit;

import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.helix.ZNRecord;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.annotations.BeforeClass;

import com.linkedin.pinot.common.config.AbstractTableConfig;
import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.common.data.FieldSpec.FieldType;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.metadata.instance.InstanceZKMetadata;
import com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.common.utils.CommonConstants.Segment.Realtime.Status;
import com.linkedin.pinot.common.utils.CommonConstants.Segment.SegmentType;
import com.linkedin.pinot.core.common.Block;
import com.linkedin.pinot.core.common.BlockMetadata;
import com.linkedin.pinot.core.common.BlockMultiValIterator;
import com.linkedin.pinot.core.common.BlockSingleValIterator;
import com.linkedin.pinot.core.common.BlockValSet;
import com.linkedin.pinot.core.common.Constants;
import com.linkedin.pinot.core.data.manager.config.TableDataManagerConfig;
import com.linkedin.pinot.core.data.manager.realtime.HLRealtimeSegmentDataManager;
import com.linkedin.pinot.core.data.manager.realtime.TimerService;
import com.linkedin.pinot.core.realtime.RealtimeFileBasedReaderTest;
import com.linkedin.pinot.core.realtime.RealtimeSegment;
import com.linkedin.pinot.core.realtime.impl.datasource.RealtimeColumnDataSource;
import com.linkedin.pinot.segments.v1.creator.SegmentTestUtils;

public class RealtimeTableDataManagerTest {

    private static final Logger LOGGER = LoggerFactory.getLogger(RealtimeTableDataManagerTest.class);

    private static AbstractTableConfig tableConfig;

    private static InstanceZKMetadata instanceZKMetadata;
    private static RealtimeSegmentZKMetadata realtimeSegmentZKMetadata;
    private static TableDataManagerConfig tableDataManagerConfig;
    private static final String AVRO_DATA = "data/test_data-mv.avro";
    private static String filePath;
    private static Map<String, FieldType> fieldTypeMap;

    private static final String TABLE_DATA_MANAGER_NUM_QUERY_EXECUTOR_THREADS = "numQueryExecutorThreads";
    private static final String TABLE_DATA_MANAGER_TYPE = "dataManagerType";
    private static final String READ_MODE = "readMode";
    private static final String TABLE_DATA_MANAGER_DATA_DIRECTORY = "directory";
    private static final String TABLE_DATA_MANAGER_NAME = "name";

    private static final long SEGMENT_CONSUMING_TIME = 1000 * 60 * 3;

    private static volatile boolean keepOnRunning = true;

    @BeforeClass
    public static void setup() throws Exception {
        instanceZKMetadata = getInstanceZKMetadata();
        realtimeSegmentZKMetadata = getRealtimeSegmentZKMetadata();
        tableDataManagerConfig = getTableDataManagerConfig();

        JSONObject request = new JSONObject();
        request.put("tableName", "mirror");
        request.put("tableType", "REALTIME");

        JSONObject indexing = new JSONObject();
        indexing.put("loadMode", "HEAP");

        JSONObject stream = new JSONObject();
        stream.put("streamType", "kafka");
        stream.put("stream.kafka.consumer.type", "highLevel");
        stream.put("stream.kafka.topic.name", "MirrorDecoratedProfileViewEvent");
        stream.put("stream.kafka.decoder.class.name",
                "com.linkedin.pinot.core.realtime.impl.kafka.KafkaAvroMessageDecoder");
        stream.put("stream.kafka.hlc.zk.connect.string",
                "zk-eat1-kafka.corp.linkedin.com:12913/kafka-aggregate-tracking");
        stream.put("stream.kafka.decoder.prop.schema.registry.rest.url",
                "http://eat1-ei2-schema-vip-z.stg.linkedin.com:10252/schemaRegistry/schemas");
        indexing.put("streamConfigs", stream);

        request.put("tableIndexConfig", indexing);
        request.put("segmentsConfig", new JSONObject());
        request.put("tenants", new JSONObject());
        request.put("metadata", new JSONObject());
        tableConfig = AbstractTableConfig.init(request.toString());
    }

    private static TableDataManagerConfig getTableDataManagerConfig() throws ConfigurationException {
        String tableName = "testTable_R";
        Configuration defaultConfig = new PropertiesConfiguration();
        defaultConfig.addProperty(TABLE_DATA_MANAGER_NAME, tableName);
        String dataDir = "/tmp/" + tableName;
        defaultConfig.addProperty(TABLE_DATA_MANAGER_DATA_DIRECTORY, dataDir);
        defaultConfig.addProperty(READ_MODE, ReadMode.heap.toString());
        defaultConfig.addProperty(TABLE_DATA_MANAGER_NUM_QUERY_EXECUTOR_THREADS, 20);
        TableDataManagerConfig tableDataManagerConfig = new TableDataManagerConfig(defaultConfig);

        defaultConfig.addProperty(TABLE_DATA_MANAGER_TYPE, "realtime");

        return tableDataManagerConfig;
    }

    public void testSetup() throws Exception {
        final HLRealtimeSegmentDataManager manager = new HLRealtimeSegmentDataManager(realtimeSegmentZKMetadata,
                tableConfig, instanceZKMetadata, null, tableDataManagerConfig.getDataDir(),
                ReadMode.valueOf(tableDataManagerConfig.getReadMode()), getTestSchema(),
                new ServerMetrics(new MetricsRegistry()));

        final long start = System.currentTimeMillis();
        TimerService.timer.scheduleAtFixedRate(new TimerTask() {

            @Override
            public void run() {
                if (System.currentTimeMillis() - start >= (SEGMENT_CONSUMING_TIME)) {
                    keepOnRunning = false;
                }
            }
        }, 1000, 1000 * 60 * 1);

        TimerService.timer.scheduleAtFixedRate(new TimerTask() {

            @Override
            public void run() {
                long start = System.currentTimeMillis();
                long sum = 0;
                try {
                    RealtimeSegment segment = (RealtimeSegment) manager.getSegment();
                    RealtimeColumnDataSource mDs = (RealtimeColumnDataSource) segment.getDataSource("count");
                    BlockValSet valSet = mDs.nextBlock().getBlockValueSet();
                    BlockSingleValIterator valIt = (BlockSingleValIterator) valSet.iterator();
                    int val = valIt.nextIntVal();
                    while (val != Constants.EOF) {
                        val = valIt.nextIntVal();
                        sum += val;
                    }
                } catch (Exception e) {
                    LOGGER.info("count column exception");
                    e.printStackTrace();
                }

                long stop = System.currentTimeMillis();
                LOGGER.info("time to scan metric col count : " + (stop - start) + " sum : " + sum);
            }
        }, 20000, 1000 * 5);

        TimerService.timer.scheduleAtFixedRate(new TimerTask() {

            @Override
            public void run() {
                long start = System.currentTimeMillis();
                long sum = 0;
                try {
                    RealtimeSegment segment = (RealtimeSegment) manager.getSegment();
                    RealtimeColumnDataSource mDs = (RealtimeColumnDataSource) segment.getDataSource("viewerId");
                    BlockValSet valSet = mDs.nextBlock().getBlockValueSet();
                    BlockSingleValIterator valIt = (BlockSingleValIterator) valSet.iterator();
                    int val = valIt.nextIntVal();
                    while (val != Constants.EOF) {
                        val = valIt.nextIntVal();
                        sum += val;
                    }
                } catch (Exception e) {
                    LOGGER.info("viewerId column exception");
                    e.printStackTrace();
                }

                long stop = System.currentTimeMillis();
                LOGGER.info("time to scan SV dimension col viewerId : " + (stop - start) + " sum : " + sum);
            }
        }, 20000, 1000 * 5);

        TimerService.timer.scheduleAtFixedRate(new TimerTask() {

            @Override
            public void run() {
                long start = System.currentTimeMillis();
                long sum = 0;
                try {
                    RealtimeSegment segment = (RealtimeSegment) manager.getSegment();
                    RealtimeColumnDataSource mDs = (RealtimeColumnDataSource) segment
                            .getDataSource("daysSinceEpoch");
                    BlockValSet valSet = mDs.nextBlock().getBlockValueSet();
                    BlockSingleValIterator valIt = (BlockSingleValIterator) valSet.iterator();
                    int val = valIt.nextIntVal();
                    while (val != Constants.EOF) {
                        val = valIt.nextIntVal();
                        sum += val;
                    }
                } catch (Exception e) {
                    LOGGER.info("daysSinceEpoch column exception");
                    e.printStackTrace();
                }
                long stop = System.currentTimeMillis();
                LOGGER.info("time to scan SV time col daysSinceEpoch : " + (stop - start) + " sum : " + sum);
            }
        }, 20000, 1000 * 5);

        TimerService.timer.scheduleAtFixedRate(new TimerTask() {

            @Override
            public void run() {
                long start = System.currentTimeMillis();
                long sum = 0;
                float sumOfLengths = 0F;
                float counter = 0F;
                try {
                    RealtimeSegment segment = (RealtimeSegment) manager.getSegment();
                    RealtimeColumnDataSource mDs = (RealtimeColumnDataSource) segment
                            .getDataSource("viewerCompanies");
                    Block b = mDs.nextBlock();
                    BlockValSet valSet = b.getBlockValueSet();
                    BlockMultiValIterator valIt = (BlockMultiValIterator) valSet.iterator();
                    BlockMetadata m = b.getMetadata();
                    int maxVams = m.getMaxNumberOfMultiValues();
                    while (valIt.hasNext()) {
                        int[] vals = new int[maxVams];
                        int len = valIt.nextIntVal(vals);
                        for (int i = 0; i < len; i++) {
                            sum += vals[i];
                        }
                        sumOfLengths += len;
                        counter++;
                    }
                } catch (Exception e) {
                    LOGGER.info("daysSinceEpoch column exception");
                    e.printStackTrace();
                }
                long stop = System.currentTimeMillis();
                LOGGER.info("time to scan MV col viewerCompanies : " + (stop - start) + " sum : " + sum
                        + " average len : " + (sumOfLengths / counter));
            }
        }, 20000, 1000 * 5);

        while (keepOnRunning) {
            // Wait for keepOnRunning to be set to false
        }
    }

    private static InstanceZKMetadata getInstanceZKMetadata() {
        ZNRecord record = new ZNRecord("Server_lva1-app0120.corp.linkedin.com_8001");
        Map<String, String> groupIdMap = new HashMap<String, String>();
        Map<String, String> partitionMap = new HashMap<String, String>();

        groupIdMap.put("mirror", "groupId_testTable_" + String.valueOf(System.currentTimeMillis()));
        partitionMap.put("testTable_R", "0");
        record.setMapField("KAFKA_HLC_GROUP_MAP", groupIdMap);
        record.setMapField("KAFKA_HLC_PARTITION_MAP", partitionMap);
        return new InstanceZKMetadata(record);
    }

    private static RealtimeSegmentZKMetadata getRealtimeSegmentZKMetadata() {
        RealtimeSegmentZKMetadata realtimeSegmentMetadata = new RealtimeSegmentZKMetadata();
        realtimeSegmentMetadata.setSegmentName("testTable_R_1000_groupId0_part0");
        realtimeSegmentMetadata.setTableName("testTable");
        realtimeSegmentMetadata.setSegmentType(SegmentType.REALTIME);
        realtimeSegmentMetadata.setIndexVersion("v1");
        realtimeSegmentMetadata.setStartTime(1000);
        realtimeSegmentMetadata.setEndTime(-1);
        realtimeSegmentMetadata.setTimeUnit(TimeUnit.HOURS);
        realtimeSegmentMetadata.setStatus(Status.IN_PROGRESS);
        realtimeSegmentMetadata.setTotalRawDocs(-1);
        realtimeSegmentMetadata.setCrc(-1);
        realtimeSegmentMetadata.setCreationTime(1000);
        return realtimeSegmentMetadata;
    }

    private static Schema getTestSchema() throws FileNotFoundException, IOException {
        filePath = RealtimeFileBasedReaderTest.class.getClassLoader().getResource(AVRO_DATA).getFile();
        fieldTypeMap = new HashMap<String, FieldSpec.FieldType>();
        fieldTypeMap.put("viewerId", FieldType.DIMENSION);
        fieldTypeMap.put("vieweeId", FieldType.DIMENSION);
        fieldTypeMap.put("viewerPrivacySetting", FieldType.DIMENSION);
        fieldTypeMap.put("vieweePrivacySetting", FieldType.DIMENSION);
        fieldTypeMap.put("viewerObfuscationType", FieldType.DIMENSION);
        fieldTypeMap.put("viewerCompanies", FieldType.DIMENSION);
        fieldTypeMap.put("viewerOccupations", FieldType.DIMENSION);
        fieldTypeMap.put("viewerRegionCode", FieldType.DIMENSION);
        fieldTypeMap.put("viewerIndustry", FieldType.DIMENSION);
        fieldTypeMap.put("viewerSchool", FieldType.DIMENSION);
        fieldTypeMap.put("weeksSinceEpochSunday", FieldType.DIMENSION);
        fieldTypeMap.put("daysSinceEpoch", FieldType.DIMENSION);
        fieldTypeMap.put("minutesSinceEpoch", FieldType.TIME);
        fieldTypeMap.put("count", FieldType.METRIC);
        return SegmentTestUtils.extractSchemaFromAvro(new File(filePath), fieldTypeMap, TimeUnit.MINUTES);
    }
}