Source code

Java tutorial


Here is the source code for


 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.
package org.apache.carbondata.core.indexstore.blockletindex;

import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Comparator;
import java.util.List;

import org.apache.carbondata.common.logging.LogService;
import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.IndexKey;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.block.TableBlockInfo;
import org.apache.carbondata.core.indexstore.AbstractMemoryDMStore;
import org.apache.carbondata.core.indexstore.BlockMetaInfo;
import org.apache.carbondata.core.indexstore.Blocklet;
import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
import org.apache.carbondata.core.indexstore.ExtendedBlocklet;
import org.apache.carbondata.core.indexstore.PartitionSpec;
import org.apache.carbondata.core.indexstore.SafeMemoryDMStore;
import org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore;
import org.apache.carbondata.core.indexstore.row.DataMapRow;
import org.apache.carbondata.core.indexstore.row.DataMapRowImpl;
import org.apache.carbondata.core.indexstore.schema.CarbonRowSchema;
import org.apache.carbondata.core.memory.MemoryException;
import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex;
import org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.profiler.ExplainCollector;
import org.apache.carbondata.core.scan.filter.FilterExpressionProcessor;
import org.apache.carbondata.core.scan.filter.FilterUtil;
import org.apache.carbondata.core.scan.filter.executer.FilterExecuter;
import org.apache.carbondata.core.scan.filter.executer.ImplicitColumnFilterExecutor;
import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
import org.apache.carbondata.core.util.ByteUtil;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.DataFileFooterConverter;
import org.apache.carbondata.core.util.DataTypeUtil;
import org.apache.carbondata.core.util.path.CarbonTablePath;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.Path;
import org.xerial.snappy.Snappy;

 * Datamap implementation for blocklet.
public class BlockletDataMap extends CoarseGrainDataMap implements Serializable {

    private static final LogService LOGGER = LogServiceFactory.getLogService(BlockletDataMap.class.getName());

    private static final long serialVersionUID = -2170289352240810993L;

    private static int KEY_INDEX = 0;

    private static int MIN_VALUES_INDEX = 1;

    private static int MAX_VALUES_INDEX = 2;

    private static int ROW_COUNT_INDEX = 3;

    private static int FILE_PATH_INDEX = 4;

    private static int PAGE_COUNT_INDEX = 5;

    private static int VERSION_INDEX = 6;

    private static int SCHEMA_UPADATED_TIME_INDEX = 7;

    private static int BLOCK_INFO_INDEX = 8;

    private static int BLOCK_FOOTER_OFFSET = 9;

    private static int LOCATIONS = 10;

    private static int BLOCKLET_ID_INDEX = 11;

    private static int BLOCK_LENGTH = 12;

    private static int TASK_MIN_VALUES_INDEX = 0;

    private static int TASK_MAX_VALUES_INDEX = 1;

    private static int SCHEMA = 2;

    private static int INDEX_PATH = 3;

    private static int INDEX_FILE_NAME = 4;

    private static int SEGMENTID = 5;

    private AbstractMemoryDMStore memoryDMStore;

    private AbstractMemoryDMStore summaryDMStore;

    // As it is a heavy object it is not recommended to serialize this object
    private transient SegmentProperties segmentProperties;

    private int[] columnCardinality;

    private long blockletSchemaTime;

    public void init(DataMapModel dataMapModel) throws IOException, MemoryException {
        long startTime = System.currentTimeMillis();
        assert (dataMapModel instanceof BlockletDataMapModel);
        BlockletDataMapModel blockletDataMapInfo = (BlockletDataMapModel) dataMapModel;
        DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
        List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(blockletDataMapInfo.getFilePath(),
        Path path = new Path(blockletDataMapInfo.getFilePath());
        byte[] filePath = path.getParent().toString().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
        byte[] fileName = path.getName().toString().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
        byte[] segmentId = blockletDataMapInfo.getSegmentId().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
        DataMapRowImpl summaryRow = null;
        byte[] schemaBinary = null;
        // below 2 variables will be used for fetching the relative blocklet id. Relative blocklet ID
        // is id assigned to a blocklet within a part file
        String tempFilePath = null;
        int relativeBlockletId = 0;
        for (DataFileFooter fileFooter : indexInfo) {
            if (segmentProperties == null) {
                List<ColumnSchema> columnInTable = fileFooter.getColumnInTable();
                schemaBinary = convertSchemaToBinary(columnInTable);
                blockletSchemaTime = fileFooter.getSchemaUpdatedTimeStamp();
                columnCardinality = fileFooter.getSegmentInfo().getColumnCardinality();
                segmentProperties = new SegmentProperties(columnInTable, columnCardinality);
                createSchema(segmentProperties, ((BlockletDataMapModel) dataMapModel).isAddToUnsafe());
                createSummarySchema(segmentProperties, schemaBinary, filePath, fileName, segmentId,
                        ((BlockletDataMapModel) dataMapModel).isAddToUnsafe());
            TableBlockInfo blockInfo = fileFooter.getBlockInfo().getTableBlockInfo();
            BlockMetaInfo blockMetaInfo = blockletDataMapInfo.getBlockMetaInfoMap().get(blockInfo.getFilePath());
            // Here it loads info about all blocklets of index
            // Only add if the file exists physically. There are scenarios which index file exists inside
            // merge index but related carbondata files are deleted. In that case we first check whether
            // the file exists physically or not
            if (blockMetaInfo != null) {
                if (fileFooter.getBlockletList() == null) {
                    // This is old store scenario, here blocklet information is not available in index file so
                    // load only block info
                    summaryRow = loadToUnsafeBlock(fileFooter, segmentProperties, blockInfo.getFilePath(),
                            summaryRow, blockMetaInfo);
                } else {
                    // blocklet ID will start from 0 again only when part file path is changed
                    if (null == tempFilePath || !tempFilePath.equals(blockInfo.getFilePath())) {
                        tempFilePath = blockInfo.getFilePath();
                        relativeBlockletId = 0;
                    summaryRow = loadToUnsafe(fileFooter, segmentProperties, blockInfo.getFilePath(), summaryRow,
                            blockMetaInfo, relativeBlockletId);
                    // this is done because relative blocklet id need to be incremented based on the
                    // total number of blocklets
                    relativeBlockletId += fileFooter.getBlockletList().size();
        if (memoryDMStore != null) {
        if (null != summaryDMStore) {
            addTaskSummaryRowToUnsafeMemoryStore(summaryRow, schemaBinary, filePath, fileName, segmentId);
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Time taken to load blocklet datamap from file : " + dataMapModel.getFilePath() + " is "
                    + (System.currentTimeMillis() - startTime));

    private DataMapRowImpl loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentProperties,
            String filePath, DataMapRowImpl summaryRow, BlockMetaInfo blockMetaInfo, int relativeBlockletId) {
        int[] minMaxLen = segmentProperties.getColumnsValueSize();
        List<BlockletInfo> blockletList = fileFooter.getBlockletList();
        CarbonRowSchema[] schema = memoryDMStore.getSchema();
        // Add one row to maintain task level min max for segment pruning
        if (!blockletList.isEmpty() && summaryRow == null) {
            summaryRow = new DataMapRowImpl(summaryDMStore.getSchema());
        for (int index = 0; index < blockletList.size(); index++) {
            DataMapRow row = new DataMapRowImpl(schema);
            int ordinal = 0;
            int taskMinMaxOrdinal = 0;
            BlockletInfo blockletInfo = blockletList.get(index);

            // add start key as index key
            row.setByteArray(blockletInfo.getBlockletIndex().getBtreeIndex().getStartKey(), ordinal++);

            BlockletMinMaxIndex minMaxIndex = blockletInfo.getBlockletIndex().getMinMaxIndex();
            byte[][] minValues = updateMinValues(minMaxIndex.getMinValues(), minMaxLen);
            row.setRow(addMinMax(minMaxLen, schema[ordinal], minValues), ordinal);
            // compute and set task level min values
            addTaskMinMaxValues(summaryRow, minMaxLen, summaryDMStore.getSchema()[taskMinMaxOrdinal], minValues,
                    TASK_MIN_VALUES_INDEX, true);
            byte[][] maxValues = updateMaxValues(minMaxIndex.getMaxValues(), minMaxLen);
            row.setRow(addMinMax(minMaxLen, schema[ordinal], maxValues), ordinal);
            // compute and set task level max values
            addTaskMinMaxValues(summaryRow, minMaxLen, summaryDMStore.getSchema()[taskMinMaxOrdinal], maxValues,
                    TASK_MAX_VALUES_INDEX, false);

            row.setInt(blockletInfo.getNumberOfRows(), ordinal++);

            // add file path
            byte[] filePathBytes = filePath.getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
            row.setByteArray(filePathBytes, ordinal++);

            // add pages
            row.setShort((short) blockletInfo.getNumberOfPages(), ordinal++);

            // add version number
            row.setShort(fileFooter.getVersionId().number(), ordinal++);

            // add schema updated time
            row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);

            // add blocklet info
            byte[] serializedData;
            try {
                ByteArrayOutputStream stream = new ByteArrayOutputStream();
                DataOutput dataOutput = new DataOutputStream(stream);
                serializedData = stream.toByteArray();
                row.setByteArray(serializedData, ordinal++);
                // Add block footer offset, it is used if we need to read footer of block
                row.setLong(fileFooter.getBlockInfo().getTableBlockInfo().getBlockOffset(), ordinal++);
                setLocations(blockMetaInfo.getLocationInfo(), row, ordinal);
                // for relative blockelt id i.e blocklet id that belongs to a particular part file
                row.setShort((short) relativeBlockletId++, ordinal++);
                // Store block size
                row.setLong(blockMetaInfo.getSize(), ordinal);
            } catch (Exception e) {
                throw new RuntimeException(e);

        return summaryRow;

    private void setLocations(String[] locations, DataMapRow row, int ordinal) throws UnsupportedEncodingException {
        // Add location info
        String locationStr = StringUtils.join(locations, ',');
        row.setByteArray(locationStr.getBytes(CarbonCommonConstants.DEFAULT_CHARSET), ordinal);

     * Load information for the block.It is the case can happen only for old stores
     * where blocklet information is not available in index file. So load only block information
     * and read blocklet information in executor.
    private DataMapRowImpl loadToUnsafeBlock(DataFileFooter fileFooter, SegmentProperties segmentProperties,
            String filePath, DataMapRowImpl summaryRow, BlockMetaInfo blockMetaInfo) {
        int[] minMaxLen = segmentProperties.getColumnsValueSize();
        BlockletIndex blockletIndex = fileFooter.getBlockletIndex();
        CarbonRowSchema[] schema = memoryDMStore.getSchema();
        // Add one row to maintain task level min max for segment pruning
        if (summaryRow == null) {
            summaryRow = new DataMapRowImpl(summaryDMStore.getSchema());
        DataMapRow row = new DataMapRowImpl(schema);
        int ordinal = 0;
        int taskMinMaxOrdinal = 0;
        // add start key as index key
        row.setByteArray(blockletIndex.getBtreeIndex().getStartKey(), ordinal++);

        BlockletMinMaxIndex minMaxIndex = blockletIndex.getMinMaxIndex();
        byte[][] minValues = updateMinValues(minMaxIndex.getMinValues(), minMaxLen);
        byte[][] maxValues = updateMaxValues(minMaxIndex.getMaxValues(), minMaxLen);
        // update min max values in case of old store
        byte[][] updatedMinValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, true);
        byte[][] updatedMaxValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, false);
        row.setRow(addMinMax(minMaxLen, schema[ordinal], updatedMinValues), ordinal);
        // compute and set task level min values
        addTaskMinMaxValues(summaryRow, minMaxLen, summaryDMStore.getSchema()[taskMinMaxOrdinal], updatedMinValues,
                TASK_MIN_VALUES_INDEX, true);
        row.setRow(addMinMax(minMaxLen, schema[ordinal], updatedMaxValues), ordinal);
        // compute and set task level max values
        addTaskMinMaxValues(summaryRow, minMaxLen, summaryDMStore.getSchema()[taskMinMaxOrdinal], updatedMaxValues,
                TASK_MAX_VALUES_INDEX, false);

        row.setInt((int) fileFooter.getNumberOfRows(), ordinal++);

        // add file path
        byte[] filePathBytes = filePath.getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
        row.setByteArray(filePathBytes, ordinal++);

        // add pages
        row.setShort((short) 0, ordinal++);

        // add version number
        row.setShort(fileFooter.getVersionId().number(), ordinal++);

        // add schema updated time
        row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);

        // add blocklet info
        row.setByteArray(new byte[0], ordinal++);

        row.setLong(fileFooter.getBlockInfo().getTableBlockInfo().getBlockOffset(), ordinal++);
        try {
            setLocations(blockMetaInfo.getLocationInfo(), row, ordinal);
            // for relative blocklet id. Value is -1 because in case of old store blocklet info will
            // not be present in the index file and in that case we will not knwo the total number of
            // blocklets
            row.setShort((short) -1, ordinal++);

            // store block size
            row.setLong(blockMetaInfo.getSize(), ordinal);
        } catch (Exception e) {
            throw new RuntimeException(e);

        return summaryRow;

    private void addTaskSummaryRowToUnsafeMemoryStore(DataMapRow summaryRow, byte[] schemaBinary, byte[] filePath,
            byte[] fileName, byte[] segmentId) {
        // write the task summary info to unsafe memory store
        if (null != summaryRow) {
            // Add column schema , it is useful to generate segment properties in executor.
            // So we no need to read footer again there.
            if (schemaBinary != null) {
                summaryRow.setByteArray(schemaBinary, SCHEMA);
            summaryRow.setByteArray(filePath, INDEX_PATH);
            summaryRow.setByteArray(fileName, INDEX_FILE_NAME);
            summaryRow.setByteArray(segmentId, SEGMENTID);
            try {
            } catch (Exception e) {
                throw new RuntimeException(e);

     * Fill the measures min values with minimum , this is needed for backward version compatability
     * as older versions don't store min values for measures
    private byte[][] updateMinValues(byte[][] minValues, int[] minMaxLen) {
        byte[][] updatedValues = minValues;
        if (minValues.length < minMaxLen.length) {
            updatedValues = new byte[minMaxLen.length][];
            System.arraycopy(minValues, 0, updatedValues, 0, minValues.length);
            List<CarbonMeasure> measures = segmentProperties.getMeasures();
            ByteBuffer buffer = ByteBuffer.allocate(8);
            for (int i = 0; i < measures.size(); i++) {
                DataType dataType = measures.get(i).getDataType();
                if (dataType == DataTypes.BYTE) {
                    updatedValues[minValues.length + i] = buffer.array().clone();
                } else if (dataType == DataTypes.SHORT) {
                    updatedValues[minValues.length + i] = buffer.array().clone();
                } else if (dataType == DataTypes.INT) {
                    updatedValues[minValues.length + i] = buffer.array().clone();
                } else if (dataType == DataTypes.LONG) {
                    updatedValues[minValues.length + i] = buffer.array().clone();
                } else if (DataTypes.isDecimal(dataType)) {
                    updatedValues[minValues.length + i] = DataTypeUtil
                } else {
                    updatedValues[minValues.length + i] = buffer.array().clone();
        return updatedValues;

     * Fill the measures max values with maximum , this is needed for backward version compatability
     * as older versions don't store max values for measures
    private byte[][] updateMaxValues(byte[][] maxValues, int[] minMaxLen) {
        byte[][] updatedValues = maxValues;
        if (maxValues.length < minMaxLen.length) {
            updatedValues = new byte[minMaxLen.length][];
            System.arraycopy(maxValues, 0, updatedValues, 0, maxValues.length);
            List<CarbonMeasure> measures = segmentProperties.getMeasures();
            ByteBuffer buffer = ByteBuffer.allocate(8);
            for (int i = 0; i < measures.size(); i++) {
                DataType dataType = measures.get(i).getDataType();
                if (dataType == DataTypes.BYTE) {
                    updatedValues[maxValues.length + i] = buffer.array().clone();
                } else if (dataType == DataTypes.SHORT) {
                    updatedValues[maxValues.length + i] = buffer.array().clone();
                } else if (dataType == DataTypes.INT) {
                    updatedValues[maxValues.length + i] = buffer.array().clone();
                } else if (dataType == DataTypes.LONG) {
                    updatedValues[maxValues.length + i] = buffer.array().clone();
                } else if (DataTypes.isDecimal(dataType)) {
                    updatedValues[maxValues.length + i] = DataTypeUtil
                } else {
                    updatedValues[maxValues.length + i] = buffer.array().clone();
        return updatedValues;

    private DataMapRow addMinMax(int[] minMaxLen, CarbonRowSchema carbonRowSchema, byte[][] minValues) {
        CarbonRowSchema[] minSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
        DataMapRow minRow = new DataMapRowImpl(minSchemas);
        int minOrdinal = 0;
        // min value adding
        for (int i = 0; i < minMaxLen.length; i++) {
            minRow.setByteArray(minValues[i], minOrdinal++);
        return minRow;

     * This method will compute min/max values at task level
     * @param taskMinMaxRow
     * @param minMaxLen
     * @param carbonRowSchema
     * @param minMaxValue
     * @param ordinal
     * @param isMinValueComparison
    private void addTaskMinMaxValues(DataMapRow taskMinMaxRow, int[] minMaxLen, CarbonRowSchema carbonRowSchema,
            byte[][] minMaxValue, int ordinal, boolean isMinValueComparison) {
        DataMapRow row = taskMinMaxRow.getRow(ordinal);
        byte[][] updatedMinMaxValues = minMaxValue;
        if (null == row) {
            CarbonRowSchema[] minSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema)
            row = new DataMapRowImpl(minSchemas);
        } else {
            byte[][] existingMinMaxValues = getMinMaxValue(taskMinMaxRow, ordinal);
            // Compare and update min max values
            for (int i = 0; i < minMaxLen.length; i++) {
                int compare = ByteUtil.UnsafeComparer.INSTANCE.compareTo(existingMinMaxValues[i], minMaxValue[i]);
                if (isMinValueComparison) {
                    if (compare < 0) {
                        updatedMinMaxValues[i] = existingMinMaxValues[i];
                } else if (compare > 0) {
                    updatedMinMaxValues[i] = existingMinMaxValues[i];
        int minMaxOrdinal = 0;
        // min/max value adding
        for (int i = 0; i < minMaxLen.length; i++) {
            row.setByteArray(updatedMinMaxValues[i], minMaxOrdinal++);
        taskMinMaxRow.setRow(row, ordinal);

    private void createSchema(SegmentProperties segmentProperties, boolean addToUnsafe) throws MemoryException {
        List<CarbonRowSchema> indexSchemas = new ArrayList<>();

        // Index key
        indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
        getMinMaxSchema(segmentProperties, indexSchemas);

        // for number of rows.
        indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.INT));

        // for table block path
        indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));

        // for number of pages.
        indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.SHORT));

        // for version number.
        indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.SHORT));

        // for schema updated time.
        indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));

        //for blocklet info
        indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));

        // for block footer offset.
        indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));

        // for locations
        indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));

        // for relative blocklet id i.e. blocklet id that belongs to a particular part file.
        indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.SHORT));

        // for storing block length.
        indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));

        CarbonRowSchema[] schema = indexSchemas.toArray(new CarbonRowSchema[indexSchemas.size()]);
        memoryDMStore = getMemoryDMStore(schema, addToUnsafe);

     * Creates the schema to store summary information or the information which can be stored only
     * once per datamap. It stores datamap level max/min of each column and partition information of
     * datamap
     * @param segmentProperties
     * @throws MemoryException
    private void createSummarySchema(SegmentProperties segmentProperties, byte[] schemaBinary, byte[] filePath,
            byte[] fileName, byte[] segmentId, boolean addToUnsafe) throws MemoryException {
        List<CarbonRowSchema> taskMinMaxSchemas = new ArrayList<>();
        getMinMaxSchema(segmentProperties, taskMinMaxSchemas);
        // for storing column schema
        taskMinMaxSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, schemaBinary.length));
        // for storing file path
        taskMinMaxSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, filePath.length));
        // for storing file name
        taskMinMaxSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, fileName.length));
        // for storing segmentid
        taskMinMaxSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, segmentId.length));
        CarbonRowSchema[] schema = taskMinMaxSchemas.toArray(new CarbonRowSchema[taskMinMaxSchemas.size()]);
        summaryDMStore = getMemoryDMStore(schema, addToUnsafe);

    private void getMinMaxSchema(SegmentProperties segmentProperties, List<CarbonRowSchema> minMaxSchemas) {
        // Index key
        int[] minMaxLen = segmentProperties.getColumnsValueSize();
        // do it 2 times, one for min and one for max.
        for (int k = 0; k < 2; k++) {
            CarbonRowSchema[] mapSchemas = new CarbonRowSchema[minMaxLen.length];
            for (int i = 0; i < minMaxLen.length; i++) {
                if (minMaxLen[i] <= 0) {
                    boolean isVarchar = false;
                    if (i < segmentProperties.getDimensions().size()
                            && segmentProperties.getDimensions().get(i).getDataType() == DataTypes.VARCHAR) {
                        isVarchar = true;
                    mapSchemas[i] = new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY, isVarchar);
                } else {
                    mapSchemas[i] = new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, minMaxLen[i]);
            CarbonRowSchema mapSchema = new CarbonRowSchema.StructCarbonRowSchema(
                    DataTypes.createDefaultStructType(), mapSchemas);

    public boolean isScanRequired(FilterResolverIntf filterExp) {
        FilterExecuter filterExecuter = FilterUtil.getFilterExecuterTree(filterExp, segmentProperties, null);
        for (int i = 0; i < summaryDMStore.getRowCount(); i++) {
            DataMapRow unsafeRow = summaryDMStore.getDataMapRow(i);
            boolean isScanRequired = FilterExpressionProcessor.isScanRequired(filterExecuter,
                    getMinMaxValue(unsafeRow, TASK_MAX_VALUES_INDEX),
                    getMinMaxValue(unsafeRow, TASK_MIN_VALUES_INDEX));
            if (isScanRequired) {
                return true;
        return false;

    private List<Blocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties) {
        if (memoryDMStore.getRowCount() == 0) {
            return new ArrayList<>();
        List<Blocklet> blocklets = new ArrayList<>();
        int numBlocklets = 0;
        if (filterExp == null) {
            numBlocklets = memoryDMStore.getRowCount();
            for (int i = 0; i < numBlocklets; i++) {
                DataMapRow safeRow = memoryDMStore.getDataMapRow(i).convertToSafeRow();
                blocklets.add(createBlocklet(safeRow, safeRow.getShort(BLOCKLET_ID_INDEX)));
        } else {
            // Remove B-tree jump logic as start and end key prepared is not
            // correct for old store scenarios
            int startIndex = 0;
            numBlocklets = memoryDMStore.getRowCount();
            FilterExecuter filterExecuter = FilterUtil.getFilterExecuterTree(filterExp, segmentProperties, null);
            while (startIndex < numBlocklets) {
                DataMapRow safeRow = memoryDMStore.getDataMapRow(startIndex).convertToSafeRow();
                int blockletId = safeRow.getShort(BLOCKLET_ID_INDEX);
                String filePath = new String(safeRow.getByteArray(FILE_PATH_INDEX),
                boolean isValid = addBlockBasedOnMinMaxValue(filterExecuter,
                        getMinMaxValue(safeRow, MAX_VALUES_INDEX), getMinMaxValue(safeRow, MIN_VALUES_INDEX),
                        filePath, blockletId);
                if (isValid) {
                    blocklets.add(createBlocklet(safeRow, blockletId));
        return blocklets;

    public List<Blocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties,
            List<PartitionSpec> partitions) {
        if (memoryDMStore.getRowCount() == 0) {
            return new ArrayList<>();
        // if it has partitioned datamap but there is no partitioned information stored, it means
        // partitions are dropped so return empty list.
        if (partitions != null) {
            // First get the partitions which are stored inside datamap.
            String[] fileDetails = getFileDetails();
            // Check the exact match of partition information inside the stored partitions.
            boolean found = false;
            Path folderPath = new Path(fileDetails[0]);
            for (PartitionSpec spec : partitions) {
                if (folderPath.equals(spec.getLocation()) && isCorrectUUID(fileDetails, spec)) {
                    found = true;
            if (!found) {
                return new ArrayList<>();
        // Prune with filters if the partitions are existed in this datamap
        // changed segmentProperties to this.segmentProperties to make sure the pruning with its own
        // segmentProperties.
        // Its a temporary fix. The Interface DataMap.prune(FilterResolverIntf filterExp,
        // SegmentProperties segmentProperties, List<PartitionSpec> partitions) should be corrected
        return prune(filterExp, this.segmentProperties);

    public void finish() {


    private boolean isCorrectUUID(String[] fileDetails, PartitionSpec spec) {
        boolean needToScan = false;
        if (spec.getUuid() != null) {
            String[] split = spec.getUuid().split("_");
            if (split[0].equals(fileDetails[2])
                    && CarbonTablePath.DataFileUtil.getTimeStampFromFileName(fileDetails[1]).equals(split[1])) {
                needToScan = true;
        } else {
            needToScan = true;
        return needToScan;

     * select the blocks based on column min and max value
     * @param filterExecuter
     * @param maxValue
     * @param minValue
     * @param filePath
     * @param blockletId
     * @return
    private boolean addBlockBasedOnMinMaxValue(FilterExecuter filterExecuter, byte[][] maxValue, byte[][] minValue,
            String filePath, int blockletId) {
        BitSet bitSet = null;
        if (filterExecuter instanceof ImplicitColumnFilterExecutor) {
            String uniqueBlockPath = filePath.substring(filePath.lastIndexOf("/Part") + 1);
            // this case will come in case of old store where index file does not contain the
            // blocklet information
            if (blockletId != -1) {
                uniqueBlockPath = uniqueBlockPath + CarbonCommonConstants.FILE_SEPARATOR + blockletId;
            bitSet = ((ImplicitColumnFilterExecutor) filterExecuter)
                    .isFilterValuesPresentInBlockOrBlocklet(maxValue, minValue, uniqueBlockPath);
        } else {
            bitSet = filterExecuter.isScanRequired(maxValue, minValue);
        if (!bitSet.isEmpty()) {
            return true;
        } else {
            return false;

    public ExtendedBlocklet getDetailedBlocklet(String blockletId) {
        int index = Integer.parseInt(blockletId);
        DataMapRow safeRow = memoryDMStore.getDataMapRow(index).convertToSafeRow();
        return createBlocklet(safeRow, safeRow.getShort(BLOCKLET_ID_INDEX));

     * Get the index file name of the blocklet data map
     * @return
    public String getIndexFileName() {
        DataMapRow unsafeRow = summaryDMStore.getDataMapRow(0);
        try {
            return new String(unsafeRow.getByteArray(INDEX_FILE_NAME), CarbonCommonConstants.DEFAULT_CHARSET);
        } catch (UnsupportedEncodingException e) {
            // should never happen!
            throw new IllegalArgumentException("UTF8 encoding is not supported", e);

    private byte[][] getMinMaxValue(DataMapRow row, int index) {
        DataMapRow minMaxRow = row.getRow(index);
        byte[][] minMax = new byte[minMaxRow.getColumnCount()][];
        for (int i = 0; i < minMax.length; i++) {
            minMax[i] = minMaxRow.getByteArray(i);
        return minMax;

    private ExtendedBlocklet createBlocklet(DataMapRow row, int blockletId) {
        ExtendedBlocklet blocklet = new ExtendedBlocklet(
                new String(row.getByteArray(FILE_PATH_INDEX), CarbonCommonConstants.DEFAULT_CHARSET_CLASS),
                blockletId + "");
        BlockletDetailInfo detailInfo = new BlockletDetailInfo();
        detailInfo.setBlockletId((short) blockletId);
        try {
                    new String(row.getByteArray(LOCATIONS), CarbonCommonConstants.DEFAULT_CHARSET).split(","));
        } catch (IOException e) {
            throw new RuntimeException(e);
        return blocklet;

    private String[] getFileDetails() {
        try {
            String[] fileDetails = new String[3];
            DataMapRow unsafeRow = summaryDMStore.getDataMapRow(0);
            fileDetails[0] = new String(unsafeRow.getByteArray(INDEX_PATH), CarbonCommonConstants.DEFAULT_CHARSET);
            fileDetails[1] = new String(unsafeRow.getByteArray(INDEX_FILE_NAME),
            fileDetails[2] = new String(unsafeRow.getByteArray(SEGMENTID), CarbonCommonConstants.DEFAULT_CHARSET);
            return fileDetails;
        } catch (Exception e) {
            throw new RuntimeException(e);

     * Binary search used to get the first tentative index row based on
     * search key
     * @param key search key
     * @return first tentative block
    private int findStartIndex(DataMapRow key, Comparator<DataMapRow> comparator) {
        int childNodeIndex;
        int low = 0;
        int high = memoryDMStore.getRowCount() - 1;
        int mid = 0;
        int compareRes = -1;
        while (low <= high) {
            mid = (low + high) >>> 1;
            // compare the entries
            compareRes =, memoryDMStore.getDataMapRow(mid));
            if (compareRes < 0) {
                high = mid - 1;
            } else if (compareRes > 0) {
                low = mid + 1;
            } else {
                // if key is matched then get the first entry
                int currentPos = mid;
                while (currentPos - 1 >= 0
                        &&, memoryDMStore.getDataMapRow(currentPos - 1)) == 0) {
                mid = currentPos;
        // if compare result is less than zero then we
        // and mid is more than 0 then we need to previous block as duplicates
        // record can be present
        if (compareRes < 0) {
            if (mid > 0) {
            childNodeIndex = mid;
        } else {
            childNodeIndex = mid;
        // get the leaf child
        return childNodeIndex;

     * Binary search used to get the last tentative block  based on
     * search key
     * @param key search key
     * @return first tentative block
    private int findEndIndex(DataMapRow key, Comparator<DataMapRow> comparator) {
        int childNodeIndex;
        int low = 0;
        int high = memoryDMStore.getRowCount() - 1;
        int mid = 0;
        int compareRes = -1;
        while (low <= high) {
            mid = (low + high) >>> 1;
            // compare the entries
            compareRes =, memoryDMStore.getDataMapRow(mid));
            if (compareRes < 0) {
                high = mid - 1;
            } else if (compareRes > 0) {
                low = mid + 1;
            } else {
                int currentPos = mid;
                // if key is matched then get the first entry
                while (currentPos + 1 < memoryDMStore.getRowCount()
                        &&, memoryDMStore.getDataMapRow(currentPos + 1)) == 0) {
                mid = currentPos;
        // if compare result is less than zero then we
        // and mid is more than 0 then we need to previous block as duplicates
        // record can be present
        if (compareRes < 0) {
            if (mid > 0) {
            childNodeIndex = mid;
        } else {
            childNodeIndex = mid;
        return childNodeIndex;

    private DataMapRow convertToRow(IndexKey key) {
        ByteBuffer buffer = ByteBuffer
                .allocate(key.getDictionaryKeys().length + key.getNoDictionaryKeys().length + 8);
        DataMapRowImpl dataMapRow = new DataMapRowImpl(memoryDMStore.getSchema());
        dataMapRow.setByteArray(buffer.array(), 0);
        return dataMapRow;

    public byte[] getColumnSchemaBinary() {
        DataMapRow unsafeRow = summaryDMStore.getDataMapRow(0);
        return unsafeRow.getByteArray(SCHEMA);

     * Convert schema to binary
    private byte[] convertSchemaToBinary(List<ColumnSchema> columnSchemas) throws IOException {
        ByteArrayOutputStream stream = new ByteArrayOutputStream();
        DataOutput dataOutput = new DataOutputStream(stream);
        for (ColumnSchema columnSchema : columnSchemas) {
            if (columnSchema.getColumnReferenceId() == null) {
        byte[] byteArray = stream.toByteArray();
        // Compress with snappy to reduce the size of schema
        return Snappy.rawCompress(byteArray, byteArray.length);

    public void clear() {
        if (memoryDMStore != null) {
            memoryDMStore = null;
            segmentProperties = null;
        // clear task min/max unsafe memory
        if (null != summaryDMStore) {
            summaryDMStore = null;

    public long getMemorySize() {
        long memoryUsed = 0L;
        if (memoryDMStore != null) {
            memoryUsed += memoryDMStore.getMemoryUsed();
        if (null != summaryDMStore) {
            memoryUsed += summaryDMStore.getMemoryUsed();
        return memoryUsed;

    public SegmentProperties getSegmentProperties() {
        return segmentProperties;

    public void setSegmentProperties(SegmentProperties segmentProperties) {
        this.segmentProperties = segmentProperties;

    public int[] getColumnCardinality() {
        return columnCardinality;

    private AbstractMemoryDMStore getMemoryDMStore(CarbonRowSchema[] schema, boolean addToUnsafe)
            throws MemoryException {
        AbstractMemoryDMStore memoryDMStore;
        if (addToUnsafe) {
            memoryDMStore = new UnsafeMemoryDMStore(schema);
        } else {
            memoryDMStore = new SafeMemoryDMStore(schema);
        return memoryDMStore;

     * This method will ocnvert safe to unsafe memory DM store
     * @throws MemoryException
    public void convertToUnsafeDMStore() throws MemoryException {
        if (memoryDMStore instanceof SafeMemoryDMStore) {
            UnsafeMemoryDMStore unsafeMemoryDMStore = memoryDMStore.convertToUnsafeDMStore();
            memoryDMStore = unsafeMemoryDMStore;
        if (summaryDMStore instanceof SafeMemoryDMStore) {
            UnsafeMemoryDMStore unsafeSummaryMemoryDMStore = summaryDMStore.convertToUnsafeDMStore();
            summaryDMStore = unsafeSummaryMemoryDMStore;

     * Read column schema from binary
     * @param schemaArray
     * @throws IOException
    public List<ColumnSchema> readColumnSchema(byte[] schemaArray) throws IOException {
        // uncompress it.
        schemaArray = Snappy.uncompress(schemaArray);
        ByteArrayInputStream schemaStream = new ByteArrayInputStream(schemaArray);
        DataInput schemaInput = new DataInputStream(schemaStream);
        List<ColumnSchema> columnSchemas = new ArrayList<>();
        int size = schemaInput.readShort();
        for (int i = 0; i < size; i++) {
            ColumnSchema columnSchema = new ColumnSchema();
        return columnSchemas;

    public long getBlockletSchemaTime() {
        return blockletSchemaTime;
