com.linkedin.pinot.core.startree.StarTreeIndexTestSegmentHelper.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.core.startree.StarTreeIndexTestSegmentHelper.java

Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.core.startree;

import com.linkedin.pinot.common.data.*;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.core.data.GenericRow;
import com.linkedin.pinot.core.data.readers.FileFormat;
import com.linkedin.pinot.core.data.readers.RecordReader;
import com.linkedin.pinot.core.indexsegment.IndexSegment;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
import com.linkedin.pinot.core.segment.index.loader.Loaders;
import com.linkedin.pinot.core.startree.hll.HllConfig;
import org.apache.commons.lang.mutable.MutableLong;
import org.apache.commons.math.util.MathUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.util.*;
import java.util.concurrent.TimeUnit;

public class StarTreeIndexTestSegmentHelper {

    private static final Logger LOGGER = LoggerFactory.getLogger(StarTreeIndexTestSegmentHelper.class);

    private static final String TIME_COLUMN_NAME = "daysSinceEpoch";
    private static final int NUM_DIMENSIONS = 4;
    private static final int NUM_METRICS = 2;
    private static final int METRIC_MAX_VALUE = 10000;

    private static final long RANDOM_SEED = System.nanoTime();

    /**
     * Helper method to build the segment.
     *
     * @param segmentDirName
     * @param segmentName
     * @throws Exception
     */
    public static Schema buildSegment(String segmentDirName, String segmentName, boolean enableOffHeapFormat)
            throws Exception {
        return buildSegment(segmentDirName, segmentName, null, enableOffHeapFormat);
    }

    public static Schema buildSegmentWithHll(String segmentDirName, String segmentName, HllConfig hllConfig)
            throws Exception {
        return buildSegment(segmentDirName, segmentName, hllConfig, false);
    }

    private static Schema buildSegment(String segmentDirName, String segmentName, HllConfig hllConfig,
            boolean enableOffHeapFormat) throws Exception {
        final int rows = (int) MathUtils.factorial(NUM_DIMENSIONS) * 100;
        Schema schema = new Schema();

        for (int i = 0; i < NUM_DIMENSIONS; i++) {
            String dimName = "d" + (i + 1);
            DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, FieldSpec.DataType.STRING,
                    true);
            schema.addField(dimName, dimensionFieldSpec);
        }

        schema.setTimeFieldSpec(new TimeFieldSpec(TIME_COLUMN_NAME, FieldSpec.DataType.INT, TimeUnit.DAYS));
        for (int i = 0; i < NUM_METRICS; i++) {
            String metricName = "m" + (i + 1);
            MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, FieldSpec.DataType.INT);
            schema.addField(metricName, metricFieldSpec);
        }

        SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
        config.setEnableStarTreeIndex(true);
        config.setOutDir(segmentDirName);
        config.setFormat(FileFormat.AVRO);
        config.setSegmentName(segmentName);
        config.setHllConfig(hllConfig);
        config.setStarTreeIndexSpec(buildStarTreeIndexSpec(enableOffHeapFormat));

        Random random = new Random(RANDOM_SEED);
        final List<GenericRow> data = new ArrayList<>();
        for (int row = 0; row < rows; row++) {
            HashMap<String, Object> map = new HashMap<>();
            // Dim columns.
            for (int i = 0; i < NUM_DIMENSIONS / 2; i++) {
                String dimName = schema.getDimensionFieldSpecs().get(i).getName();
                map.put(dimName, dimName + "-v" + row % (NUM_DIMENSIONS - i));
            }
            // Random values make cardinality of d3, d4 column values larger to better test hll
            for (int i = NUM_DIMENSIONS / 2; i < NUM_DIMENSIONS; i++) {
                String dimName = schema.getDimensionFieldSpecs().get(i).getName();
                map.put(dimName, dimName + "-v" + random.nextInt(i * 100));
            }

            // Metric columns.
            for (int i = 0; i < NUM_METRICS; i++) {
                String metName = schema.getMetricFieldSpecs().get(i).getName();
                map.put(metName, random.nextInt(METRIC_MAX_VALUE));
            }

            // Time column.
            map.put(TIME_COLUMN_NAME, row % 7);

            GenericRow genericRow = new GenericRow();
            genericRow.init(map);
            data.add(genericRow);
        }

        SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
        RecordReader reader = createReader(schema, data);
        driver.init(config, reader);
        driver.build();

        LOGGER.info("Built segment {} at {}", segmentName, segmentDirName);
        return schema;
    }

    /**
     * Builds a star tree index spec for the test.
     * - Use MaxLeafRecords as 1 to stress test.
     * @return
     * @param enableOffHeapFormat
     */
    private static StarTreeIndexSpec buildStarTreeIndexSpec(boolean enableOffHeapFormat) {
        StarTreeIndexSpec spec = new StarTreeIndexSpec();
        spec.setMaxLeafRecords(1);
        spec.setEnableOffHeapFormat(enableOffHeapFormat);
        return spec;
    }

    private static RecordReader createReader(final Schema schema, final List<GenericRow> data) {
        return new RecordReader() {

            int counter = 0;

            @Override
            public void rewind() throws Exception {
                counter = 0;
            }

            @Override
            public GenericRow next() {
                return data.get(counter++);
            }

            @Override
            public void init() throws Exception {

            }

            @Override
            public boolean hasNext() {
                return counter < data.size();
            }

            @Override
            public Schema getSchema() {
                return schema;
            }

            @Override
            public Map<String, MutableLong> getNullCountMap() {
                return null;
            }

            @Override
            public void close() throws Exception {

            }
        };
    }

    /**
     * Helper method to load the segment.
     *
     * @param segmentDirName
     * @param segmentName
     * @throws Exception
     */
    public static IndexSegment loadSegment(String segmentDirName, String segmentName) throws Exception {
        LOGGER.info("Loading segment {}", segmentName);
        return Loaders.IndexSegment.load(new File(segmentDirName, segmentName), ReadMode.heap);
    }
}