org.carbondata.scan.executor.impl.AbstractQueryExecutor.java Source code

Introduction

Here is the source code for org.carbondata.scan.executor.impl.AbstractQueryExecutor.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.carbondata.scan.executor.impl;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.carbondata.common.logging.LogService;
import org.carbondata.common.logging.LogServiceFactory;
import org.carbondata.common.logging.impl.StandardLogService;
import org.carbondata.core.carbon.datastore.BlockIndexStore;
import org.carbondata.core.carbon.datastore.IndexKey;
import org.carbondata.core.carbon.datastore.block.AbstractIndex;
import org.carbondata.core.carbon.datastore.block.SegmentProperties;
import org.carbondata.core.carbon.datastore.exception.IndexBuilderException;
import org.carbondata.core.carbon.metadata.datatype.DataType;
import org.carbondata.core.carbon.metadata.encoder.Encoding;
import org.carbondata.core.carbon.metadata.schema.table.column.CarbonDimension;
import org.carbondata.core.carbon.metadata.schema.table.column.CarbonMeasure;
import org.carbondata.core.constants.CarbonCommonConstants;
import org.carbondata.core.datastorage.store.impl.FileFactory;
import org.carbondata.core.keygenerator.KeyGenException;
import org.carbondata.core.keygenerator.KeyGenerator;
import org.carbondata.core.util.CarbonUtil;
import org.carbondata.scan.executor.QueryExecutor;
import org.carbondata.scan.executor.exception.QueryExecutionException;
import org.carbondata.scan.executor.infos.AggregatorInfo;
import org.carbondata.scan.executor.infos.BlockExecutionInfo;
import org.carbondata.scan.executor.infos.KeyStructureInfo;
import org.carbondata.scan.executor.infos.SortInfo;
import org.carbondata.scan.executor.util.QueryUtil;
import org.carbondata.scan.executor.util.RestructureUtil;
import org.carbondata.scan.filter.FilterUtil;
import org.carbondata.scan.model.QueryDimension;
import org.carbondata.scan.model.QueryMeasure;
import org.carbondata.scan.model.QueryModel;

import org.apache.commons.lang3.ArrayUtils;

/**
 * This class provides a skeletal implementation of the {@link QueryExecutor}
 * interface to minimize the effort required to implement this interface. This
 * will be used to prepare all the properties required for query execution
 */
public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E> {

    private static final LogService LOGGER = LogServiceFactory.getLogService(AbstractQueryExecutor.class.getName());
    /**
     * holder for query properties which will be used to execute the query
     */
    protected QueryExecutorProperties queryProperties;

    public AbstractQueryExecutor() {
        queryProperties = new QueryExecutorProperties();
    }

    /**
     * Below method will be used to fill the executor properties based on query
     * model it will parse the query model and get the detail and fill it in
     * query properties
     *
     * @param queryModel
     */
    protected void initQuery(QueryModel queryModel) throws QueryExecutionException {
        StandardLogService.setThreadName(
                StandardLogService.getPartitionID(
                        queryModel.getAbsoluteTableIdentifier().getCarbonTableIdentifier().getTableName()),
                queryModel.getQueryId());
        LOGGER.info("Query will be executed on table: "
                + queryModel.getAbsoluteTableIdentifier().getCarbonTableIdentifier().getTableName());

        QueryUtil.resolveQueryModel(queryModel);

        // get the table blocks
        try {
            queryProperties.dataBlocks = BlockIndexStore.getInstance()
                    .loadAndGetBlocks(queryModel.getTableBlockInfos(), queryModel.getAbsoluteTableIdentifier());
        } catch (IndexBuilderException e) {
            throw new QueryExecutionException(e);
        }
        //
        // // updating the restructuring infos for the query
        queryProperties.keyStructureInfo = getKeyStructureInfo(queryModel, queryProperties.dataBlocks
                .get(queryProperties.dataBlocks.size() - 1).getSegmentProperties().getDimensionKeyGenerator());

        // calculating the total number of aggeragted columns
        int aggTypeCount = queryModel.getQueryMeasures().size();

        int currentIndex = 0;
        String[] aggTypes = new String[aggTypeCount];
        DataType[] dataTypes = new DataType[aggTypeCount];

        for (QueryMeasure carbonMeasure : queryModel.getQueryMeasures()) {
            // adding the data type and aggregation type of all the measure this
            // can be used
            // to select the aggregator
            aggTypes[currentIndex] = carbonMeasure.getAggregateFunction();
            dataTypes[currentIndex] = carbonMeasure.getMeasure().getDataType();
            currentIndex++;
        }
        queryProperties.measureDataTypes = dataTypes;
        // as aggregation will be executed in following order
        // 1.aggregate dimension expression
        // 2. expression
        // 3. query measure
        // so calculating the index of the expression start index
        // and measure column start index
        queryProperties.aggExpressionStartIndex = queryModel.getQueryMeasures().size();
        queryProperties.measureStartIndex = aggTypes.length - queryModel.getQueryMeasures().size();

        // dictionary column unique column id to dictionary mapping
        // which will be used to get column actual data
        queryProperties.columnToDictionayMapping = QueryUtil.getDimensionDictionaryDetail(
                queryModel.getQueryDimension(), queryModel.getAbsoluteTableIdentifier());
        queryModel.setColumnToDictionaryMapping(queryProperties.columnToDictionayMapping);
        // setting the sort dimension index. as it will be updated while getting the sort info
        // so currently setting it to default 0 means sort is not present in any dimension
        queryProperties.sortDimIndexes = new byte[queryModel.getQueryDimension().size()];
    }

    /**
     * Below method will be used to get the key structure info for the uqery
     *
     * @param queryModel   query model
     * @param keyGenerator
     * @return key structure info
     */
    private KeyStructureInfo getKeyStructureInfo(QueryModel queryModel, KeyGenerator keyGenerator) {
        // getting the masked byte range for dictionary column
        int[] maskByteRanges = QueryUtil.getMaskedByteRange(queryModel.getQueryDimension(), keyGenerator);

        // getting the masked bytes for query dimension dictionary column
        int[] maskedBytes = QueryUtil.getMaskedByte(keyGenerator.getKeySizeInBytes(), maskByteRanges);

        // max key for the dictionary dimension present in the query
        byte[] maxKey = null;
        try {
            // getting the max key which will be used to masked and get the
            // masked key
            maxKey = QueryUtil.getMaxKeyBasedOnDimensions(queryModel.getQueryDimension(), keyGenerator);
        } catch (KeyGenException e) {
            LOGGER.error(e, "problem while getting the max key");
        }

        KeyStructureInfo restructureInfos = new KeyStructureInfo();
        restructureInfos.setKeyGenerator(keyGenerator);
        restructureInfos.setMaskByteRanges(maskByteRanges);
        restructureInfos.setMaskedBytes(maskedBytes);
        restructureInfos.setMaxKey(maxKey);
        return restructureInfos;
    }

    protected List<BlockExecutionInfo> getBlockExecutionInfos(QueryModel queryModel)
            throws QueryExecutionException {
        initQuery(queryModel);
        List<BlockExecutionInfo> blockExecutionInfoList = new ArrayList<BlockExecutionInfo>();
        // fill all the block execution infos for all the blocks selected in
        // query
        // and query will be executed based on that infos
        for (int i = 0; i < queryProperties.dataBlocks.size(); i++) {
            blockExecutionInfoList
                    .add(getBlockExecutionInfoForBlock(queryModel, queryProperties.dataBlocks.get(i)));
        }
        return blockExecutionInfoList;
    }

    /**
     * Below method will be used to get the block execution info which is
     * required to execute any block  based on query model
     *
     * @param queryModel query model from user query
     * @param blockIndex block index
     * @return block execution info
     * @throws QueryExecutionException any failure during block info creation
     */
    protected BlockExecutionInfo getBlockExecutionInfoForBlock(QueryModel queryModel, AbstractIndex blockIndex)
            throws QueryExecutionException {
        BlockExecutionInfo blockExecutionInfo = new BlockExecutionInfo();
        SegmentProperties segmentProperties = blockIndex.getSegmentProperties();
        List<CarbonDimension> tableBlockDimensions = segmentProperties.getDimensions();
        KeyGenerator blockKeyGenerator = segmentProperties.getDimensionKeyGenerator();

        // below is to get only those dimension in query which is present in the
        // table block
        List<QueryDimension> updatedQueryDimension = RestructureUtil
                .getUpdatedQueryDimension(queryModel.getQueryDimension(), tableBlockDimensions);
        // TODO add complex dimension children
        int[] maskByteRangesForBlock = QueryUtil.getMaskedByteRange(updatedQueryDimension, blockKeyGenerator);
        int[] maksedByte = QueryUtil.getMaskedByte(blockKeyGenerator.getKeySizeInBytes(), maskByteRangesForBlock);
        blockExecutionInfo.setDimensionsExistInQuery(updatedQueryDimension.size() > 0);
        blockExecutionInfo.setDataBlock(blockIndex);
        blockExecutionInfo.setBlockKeyGenerator(blockKeyGenerator);
        // adding aggregation info for query
        blockExecutionInfo.setAggregatorInfo(getAggregatorInfoForBlock(queryModel, blockIndex));

        // setting the limit
        blockExecutionInfo.setLimit(queryModel.getLimit());
        // setting whether detail query or not
        blockExecutionInfo.setDetailQuery(queryModel.isDetailQuery());
        // setting whether raw record query or not
        blockExecutionInfo.setRawRecordDetailQuery(queryModel.isForcedDetailRawQuery());
        // setting the masked byte of the block which will be
        // used to update the unpack the older block keys
        blockExecutionInfo.setMaskedByteForBlock(maksedByte);
        // total number dimension
        blockExecutionInfo
                .setTotalNumberDimensionBlock(segmentProperties.getDimensionOrdinalToBlockMapping().size());
        blockExecutionInfo
                .setTotalNumberOfMeasureBlock(segmentProperties.getMeasuresOrdinalToBlockMapping().size());
        // to check whether older block key update is required or not
        blockExecutionInfo.setFixedKeyUpdateRequired(
                blockKeyGenerator.equals(queryProperties.keyStructureInfo.getKeyGenerator()));
        IndexKey startIndexKey = null;
        IndexKey endIndexKey = null;
        if (null != queryModel.getFilterExpressionResolverTree()) {
            // loading the filter executer tree for filter evaluation
            blockExecutionInfo.setFilterExecuterTree(FilterUtil
                    .getFilterExecuterTree(queryModel.getFilterExpressionResolverTree(), segmentProperties));
            List<IndexKey> listOfStartEndKeys = new ArrayList<IndexKey>(2);
            FilterUtil.traverseResolverTreeAndGetStartAndEndKey(segmentProperties,
                    queryModel.getAbsoluteTableIdentifier(), queryModel.getFilterExpressionResolverTree(),
                    listOfStartEndKeys);
            startIndexKey = listOfStartEndKeys.get(0);
            endIndexKey = listOfStartEndKeys.get(1);
        } else {
            try {
                startIndexKey = FilterUtil.prepareDefaultStartIndexKey(segmentProperties);
                endIndexKey = FilterUtil.prepareDefaultEndIndexKey(segmentProperties);
            } catch (KeyGenException e) {
                throw new QueryExecutionException(e);
            }
        }
        blockExecutionInfo
                .setFileType(FileFactory.getFileType(queryModel.getAbsoluteTableIdentifier().getStorePath()));
        //setting the start index key of the block node
        blockExecutionInfo.setStartKey(startIndexKey);
        //setting the end index key of the block node
        blockExecutionInfo.setEndKey(endIndexKey);
        // expression dimensions
        List<CarbonDimension> expressionDimensions = new ArrayList<CarbonDimension>(
                CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
        // expression measure
        List<CarbonMeasure> expressionMeasures = new ArrayList<CarbonMeasure>(
                CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
        // setting all the dimension chunk indexes to be read from file
        blockExecutionInfo
                .setAllSelectedDimensionBlocksIndexes(QueryUtil.getDimensionsBlockIndexes(updatedQueryDimension,
                        segmentProperties.getDimensionOrdinalToBlockMapping(), expressionDimensions));
        // setting all the measure chunk indexes to be read from file
        blockExecutionInfo
                .setAllSelectedMeasureBlocksIndexes(QueryUtil.getMeasureBlockIndexes(queryModel.getQueryMeasures(),
                        expressionMeasures, segmentProperties.getMeasuresOrdinalToBlockMapping()));
        // setting the key structure info which will be required
        // to update the older block key with new key generator
        blockExecutionInfo.setKeyStructureInfo(queryProperties.keyStructureInfo);
        // setting the size of fixed key column (dictionary column)
        blockExecutionInfo.setFixedLengthKeySize(getKeySize(updatedQueryDimension, segmentProperties));
        Set<Integer> dictionaryColumnBlockIndex = new HashSet<Integer>();
        List<Integer> noDictionaryColumnBlockIndex = new ArrayList<Integer>();
        // get the block index to be read from file for query dimension
        // for both dictionary columns and no dictionary columns
        QueryUtil.fillQueryDimensionsBlockIndexes(updatedQueryDimension,
                segmentProperties.getDimensionOrdinalToBlockMapping(), dictionaryColumnBlockIndex,
                noDictionaryColumnBlockIndex);
        int[] queryDictionaruColumnBlockIndexes = ArrayUtils
                .toPrimitive(dictionaryColumnBlockIndex.toArray(new Integer[dictionaryColumnBlockIndex.size()]));
        // need to sort the dictionary column as for all dimension
        // column key will be filled based on key order
        Arrays.sort(queryDictionaruColumnBlockIndexes);
        blockExecutionInfo.setDictionaryColumnBlockIndex(queryDictionaruColumnBlockIndexes);
        // setting the no dictionary column block indexes
        blockExecutionInfo.setNoDictionaryBlockIndexes(ArrayUtils.toPrimitive(
                noDictionaryColumnBlockIndex.toArray(new Integer[noDictionaryColumnBlockIndex.size()])));
        // setting column id to dictionary mapping
        blockExecutionInfo.setColumnIdToDcitionaryMapping(queryProperties.columnToDictionayMapping);
        // setting each column value size
        blockExecutionInfo.setEachColumnValueSize(segmentProperties.getEachDimColumnValueSize());
        try {
            // to set column group and its key structure info which will be used
            // to
            // for getting the column group column data in case of final row
            // and in case of dimension aggregation
            blockExecutionInfo.setColumnGroupToKeyStructureInfo(
                    QueryUtil.getColumnGroupKeyStructureInfo(updatedQueryDimension, segmentProperties));
        } catch (KeyGenException e) {
            throw new QueryExecutionException(e);
        }
        return blockExecutionInfo;
    }

    /**
     * This method will be used to get fixed key length size this will be used
     * to create a row from column chunk
     *
     * @param queryDimension    query dimension
     * @param blockMetadataInfo block metadata info
     * @return key size
     */
    private int getKeySize(List<QueryDimension> queryDimension, SegmentProperties blockMetadataInfo) {
        List<Integer> fixedLengthDimensionOrdinal = new ArrayList<Integer>(
                CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
        int counter = 0;
        while (counter < queryDimension.size()) {
            if (queryDimension.get(counter).getDimension().numberOfChild() > 0) {
                counter += queryDimension.get(counter).getDimension().numberOfChild();
                continue;
            } else if (!CarbonUtil.hasEncoding(queryDimension.get(counter).getDimension().getEncoder(),
                    Encoding.DICTIONARY)) {
                counter++;
            } else {
                fixedLengthDimensionOrdinal.add(queryDimension.get(counter).getDimension().getKeyOrdinal());
                counter++;
            }
        }
        int[] dictioanryColumnOrdinal = ArrayUtils
                .toPrimitive(fixedLengthDimensionOrdinal.toArray(new Integer[fixedLengthDimensionOrdinal.size()]));
        if (dictioanryColumnOrdinal.length > 0) {
            return blockMetadataInfo.getFixedLengthKeySplitter().getKeySizeByBlock(dictioanryColumnOrdinal);
        }
        return 0;
    }

    /**
     * Below method will be used to get the sort information which will be
     * required during sorting the data on dimension column
     *
     * @param queryModel query model
     * @return Sort infos
     * @throws QueryExecutionException if problem while
     */
    protected SortInfo getSortInfos(QueryModel queryModel) throws QueryExecutionException {

        // get the masked by range for order by dimension
        int[][] maskedByteRangeForSorting = QueryUtil.getMaskedByteRangeForSorting(queryModel.getSortDimension(),
                queryProperties.keyStructureInfo.getKeyGenerator(),
                queryProperties.keyStructureInfo.getMaskByteRanges());
        // get masked key for sorting
        byte[][] maksedKeyForSorting = QueryUtil.getMaksedKeyForSorting(queryModel.getSortDimension(),
                queryProperties.keyStructureInfo.getKeyGenerator(), maskedByteRangeForSorting,
                queryProperties.keyStructureInfo.getMaskByteRanges());
        // fill sort dimension indexes
        queryProperties.sortDimIndexes = QueryUtil.getSortDimensionIndexes(queryModel.getSortDimension(),
                queryModel.getQueryDimension());
        SortInfo sortInfos = new SortInfo();
        sortInfos.setDimensionMaskKeyForSorting(maksedKeyForSorting);
        sortInfos.setDimensionSortOrder(queryModel.getSortOrder());
        sortInfos.setMaskedByteRangeForSorting(maskedByteRangeForSorting);
        sortInfos.setSortDimensionIndex(queryProperties.sortDimIndexes);
        sortInfos.setSortDimension(queryModel.getSortDimension());
        return sortInfos;
    }

    /**
     * Below method will be used to get the aggrgator info for the query
     *
     * @param queryModel query model
     * @param tableBlock table block
     * @return aggregator info
     */
    private AggregatorInfo getAggregatorInfoForBlock(QueryModel queryModel, AbstractIndex tableBlock) {
        // getting the aggregate infos which will be used during aggregation
        AggregatorInfo aggregatorInfos = RestructureUtil.getAggregatorInfos(queryModel.getQueryMeasures(),
                tableBlock.getSegmentProperties().getMeasures());
        // setting the index of expression in measure aggregators
        aggregatorInfos.setExpressionAggregatorStartIndex(queryProperties.aggExpressionStartIndex);
        // setting the index of measure columns in measure aggregators
        aggregatorInfos.setMeasureAggregatorStartIndex(queryProperties.measureStartIndex);
        // setting the measure aggregator for all aggregation function selected
        // in query
        aggregatorInfos.setMeasureDataTypes(queryProperties.measureDataTypes);
        return aggregatorInfos;
    }

}