com.linkedin.pinot.core.segment.index.SegmentMetadataImpl.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.core.segment.index.SegmentMetadataImpl.java

Source

/**
 * Copyright (C) 2014-2015 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.core.segment.index;

import com.linkedin.pinot.common.data.StarTreeIndexSpec;
import com.linkedin.pinot.common.utils.time.TimeUtils;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.*;
import java.util.concurrent.TimeUnit;

import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.joda.time.Duration;
import org.joda.time.Interval;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.linkedin.pinot.common.data.FieldSpec.DataType;
import com.linkedin.pinot.common.data.FieldSpec.FieldType;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata;
import com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata;
import com.linkedin.pinot.common.segment.SegmentMetadata;
import com.linkedin.pinot.core.indexsegment.IndexType;
import com.linkedin.pinot.core.indexsegment.generator.SegmentVersion;
import com.linkedin.pinot.core.segment.creator.impl.V1Constants;

import static com.linkedin.pinot.core.segment.creator.impl.V1Constants.MetadataKeys;
import static com.linkedin.pinot.core.segment.creator.impl.V1Constants.MetadataKeys.Segment;
import static com.linkedin.pinot.core.segment.creator.impl.V1Constants.MetadataKeys.Segment.TIME_UNIT;

/**
 * Nov 12, 2014
 */

public class SegmentMetadataImpl implements SegmentMetadata {

    private static final Logger LOGGER = LoggerFactory.getLogger(SegmentMetadataImpl.class);

    private final PropertiesConfiguration _segmentMetadataPropertiesConfiguration;
    private final File _metadataFile;
    private final Map<String, ColumnMetadata> _columnMetadataMap;
    private String _segmentName;
    private final Set<String> _allColumns;
    private final Schema _schema;
    private final String _indexDir;
    private long _crc = Long.MIN_VALUE;
    private long _creationTime = Long.MIN_VALUE;
    private Interval _timeInterval;
    private Duration _timeGranularity;
    private long _pushTime = Long.MIN_VALUE;
    private long _refreshTime = Long.MIN_VALUE;

    public SegmentMetadataImpl(File indexDir) throws ConfigurationException, IOException {
        LOGGER.debug("SegmentMetadata location: {}", indexDir);
        if (indexDir.isDirectory()) {
            _metadataFile = new File(indexDir, V1Constants.MetadataKeys.METADATA_FILE_NAME);
        } else {
            _metadataFile = indexDir;
        }
        _segmentMetadataPropertiesConfiguration = new PropertiesConfiguration(_metadataFile);
        _columnMetadataMap = new HashMap<String, ColumnMetadata>();
        _allColumns = new HashSet<String>();
        _schema = new Schema();
        _indexDir = new File(indexDir, V1Constants.MetadataKeys.METADATA_FILE_NAME).getAbsoluteFile().getParent();
        init();
        loadCreationMeta(new File(indexDir, V1Constants.SEGMENT_CREATION_META));
        setTimeIntervalAndGranularity();
        LOGGER.info("loaded metadata for {}", indexDir.getName());
    }

    public SegmentMetadataImpl(OfflineSegmentZKMetadata offlineSegmentZKMetadata) {
        _segmentMetadataPropertiesConfiguration = new PropertiesConfiguration();

        _segmentMetadataPropertiesConfiguration.addProperty(V1Constants.MetadataKeys.Segment.SEGMENT_START_TIME,
                Long.toString(offlineSegmentZKMetadata.getStartTime()));
        _segmentMetadataPropertiesConfiguration.addProperty(V1Constants.MetadataKeys.Segment.SEGMENT_END_TIME,
                Long.toString(offlineSegmentZKMetadata.getEndTime()));
        _segmentMetadataPropertiesConfiguration.addProperty(V1Constants.MetadataKeys.Segment.TABLE_NAME,
                offlineSegmentZKMetadata.getTableName());

        final TimeUnit timeUnit = offlineSegmentZKMetadata.getTimeUnit();
        if (timeUnit != null) {
            _segmentMetadataPropertiesConfiguration.addProperty(V1Constants.MetadataKeys.Segment.TIME_UNIT,
                    timeUnit.toString());
        } else {
            _segmentMetadataPropertiesConfiguration.addProperty(V1Constants.MetadataKeys.Segment.TIME_UNIT, null);
        }

        _crc = offlineSegmentZKMetadata.getCrc();
        _creationTime = offlineSegmentZKMetadata.getCreationTime();
        _pushTime = offlineSegmentZKMetadata.getPushTime();
        _refreshTime = offlineSegmentZKMetadata.getRefreshTime();
        setTimeIntervalAndGranularity();
        _columnMetadataMap = null;
        _segmentName = offlineSegmentZKMetadata.getSegmentName();
        _schema = new Schema();
        _allColumns = new HashSet<String>();
        _indexDir = null;
        _metadataFile = null;
    }

    public SegmentMetadataImpl(RealtimeSegmentZKMetadata segmentMetadata) {

        _segmentMetadataPropertiesConfiguration = new PropertiesConfiguration();

        _segmentMetadataPropertiesConfiguration.addProperty(V1Constants.MetadataKeys.Segment.SEGMENT_START_TIME,
                Long.toString(segmentMetadata.getStartTime()));
        _segmentMetadataPropertiesConfiguration.addProperty(V1Constants.MetadataKeys.Segment.SEGMENT_END_TIME,
                Long.toString(segmentMetadata.getEndTime()));
        _segmentMetadataPropertiesConfiguration.addProperty(V1Constants.MetadataKeys.Segment.TABLE_NAME,
                segmentMetadata.getTableName());

        final TimeUnit timeUnit = segmentMetadata.getTimeUnit();
        if (timeUnit != null) {
            _segmentMetadataPropertiesConfiguration.addProperty(V1Constants.MetadataKeys.Segment.TIME_UNIT,
                    timeUnit.toString());
        } else {
            _segmentMetadataPropertiesConfiguration.addProperty(V1Constants.MetadataKeys.Segment.TIME_UNIT, null);
        }

        _crc = segmentMetadata.getCrc();
        _creationTime = segmentMetadata.getCreationTime();
        setTimeIntervalAndGranularity();
        _columnMetadataMap = null;
        _segmentName = segmentMetadata.getSegmentName();
        _schema = new Schema();
        _allColumns = new HashSet<String>();
        _indexDir = null;
        _metadataFile = null;
    }

    public SegmentMetadataImpl(RealtimeSegmentZKMetadata segmentMetadata, Schema schema) {
        this(segmentMetadata);
        setSchema(schema);
    }

    private void setSchema(Schema schema) {
        for (String columnName : schema.getColumnNames()) {
            _schema.addSchema(columnName, schema.getFieldSpecFor(columnName));
        }
    }

    private void setTimeIntervalAndGranularity() {
        if (_segmentMetadataPropertiesConfiguration.containsKey(V1Constants.MetadataKeys.Segment.SEGMENT_START_TIME)
                && _segmentMetadataPropertiesConfiguration
                        .containsKey(V1Constants.MetadataKeys.Segment.SEGMENT_END_TIME)
                && _segmentMetadataPropertiesConfiguration
                        .containsKey(V1Constants.MetadataKeys.Segment.TIME_UNIT)) {

            try {
                TimeUnit segmentTimeUnit = TimeUtils
                        .timeUnitFromString(_segmentMetadataPropertiesConfiguration.getString(TIME_UNIT));
                _timeGranularity = new Duration(segmentTimeUnit.toMillis(1));
                String startTimeString = _segmentMetadataPropertiesConfiguration
                        .getString(V1Constants.MetadataKeys.Segment.SEGMENT_START_TIME);
                String endTimeString = _segmentMetadataPropertiesConfiguration
                        .getString(V1Constants.MetadataKeys.Segment.SEGMENT_END_TIME);
                _timeInterval = new Interval(segmentTimeUnit.toMillis(Long.parseLong(startTimeString)),
                        segmentTimeUnit.toMillis(Long.parseLong(endTimeString)));
            } catch (Exception e) {
                LOGGER.warn("Caught exception while setting time interval and granularity", e);
                _timeInterval = null;
                _timeGranularity = null;
            }
        }
    }

    private void loadCreationMeta(File crcFile) throws IOException {
        if (crcFile.exists()) {
            final DataInputStream ds = new DataInputStream(new FileInputStream(crcFile));
            _crc = ds.readLong();
            _creationTime = ds.readLong();
            ds.close();
        }
    }

    public Set<String> getAllColumns() {
        return _allColumns;
    }

    private void init() {
        final Iterator<String> metrics = _segmentMetadataPropertiesConfiguration
                .getList(V1Constants.MetadataKeys.Segment.METRICS).iterator();
        while (metrics.hasNext()) {
            final String columnName = metrics.next();
            if (columnName.trim().length() > 0) {
                _allColumns.add(columnName);
            }
        }

        final Iterator<String> dimensions = _segmentMetadataPropertiesConfiguration
                .getList(V1Constants.MetadataKeys.Segment.DIMENSIONS).iterator();
        while (dimensions.hasNext()) {
            final String columnName = dimensions.next();
            if (columnName.trim().length() > 0) {
                _allColumns.add(columnName);
            }
        }

        final Iterator<String> unknowns = _segmentMetadataPropertiesConfiguration
                .getList(V1Constants.MetadataKeys.Segment.UNKNOWN_COLUMNS).iterator();
        while (unknowns.hasNext()) {
            final String columnName = unknowns.next();
            if (columnName.trim().length() > 0) {
                _allColumns.add(columnName);
            }
        }

        final Iterator<String> timeStamps = _segmentMetadataPropertiesConfiguration
                .getList(V1Constants.MetadataKeys.Segment.TIME_COLUMN_NAME).iterator();
        while (timeStamps.hasNext()) {
            final String columnName = timeStamps.next();
            if (columnName.trim().length() > 0) {
                _allColumns.add(columnName);
            }
        }

        // StarTree config here
        Boolean starTreeEnabled = _segmentMetadataPropertiesConfiguration
                .getBoolean(MetadataKeys.StarTree.STAR_TREE_ENABLED, false);
        if (starTreeEnabled) {
            StarTreeIndexSpec starTreeIndexSpec = new StarTreeIndexSpec();

            // Splits
            List<String> splitOrderList = new ArrayList<>();
            Iterator<String> splitOrder = _segmentMetadataPropertiesConfiguration
                    .getList(MetadataKeys.StarTree.SPLIT_ORDER).iterator();
            while (splitOrder.hasNext()) {
                String split = splitOrder.next();
                if (split.trim().length() > 0) {
                    splitOrderList.add(split);
                }
            }
            starTreeIndexSpec.setSplitOrder(splitOrderList);

            // Split excludes
            List<String> splitExcludesList = new ArrayList<>();
            Iterator<String> splitExcludes = _segmentMetadataPropertiesConfiguration
                    .getList(MetadataKeys.StarTree.SPLIT_EXCLUDES).iterator();
            while (splitExcludes.hasNext()) {
                String splitExclude = splitExcludes.next();
                if (splitExclude.trim().length() > 0) {
                    splitExcludesList.add(splitExclude);
                }
            }
            starTreeIndexSpec.setSplitExcludes(splitExcludesList);

            // Dimension excludes
            List<String> dimensionExcludesList = new ArrayList<>();
            Iterator<String> dimensionExcludes = _segmentMetadataPropertiesConfiguration
                    .getList(MetadataKeys.StarTree.EXCLUDED_DIMENSIONS).iterator();
            while (dimensionExcludes.hasNext()) {
                String dimensionExclude = dimensionExcludes.next();
                if (dimensionExclude.trim().length() > 0) {
                    dimensionExcludesList.add(dimensionExclude);
                }
            }
            starTreeIndexSpec.setExcludedDimensions(dimensionExcludesList);

            // Max leaf records
            int maxLeafRecords = Integer.valueOf((String) _segmentMetadataPropertiesConfiguration
                    .getProperty(MetadataKeys.StarTree.MAX_LEAF_RECORDS));
            starTreeIndexSpec.setMaxLeafRecords(maxLeafRecords);

            _schema.setStarTreeIndexSpec(starTreeIndexSpec);
        }

        _segmentName = _segmentMetadataPropertiesConfiguration
                .getString(V1Constants.MetadataKeys.Segment.SEGMENT_NAME);

        for (final String column : _allColumns) {
            _columnMetadataMap.put(column, extractColumnMetadataFor(column));
        }

        for (final String column : _columnMetadataMap.keySet()) {
            _schema.addSchema(column, _columnMetadataMap.get(column).toFieldSpec());
        }

        // Check that all the split dimensions are in the schema, if applicable
        if (_schema.getStarTreeIndexSpec() != null) {
            // Split order
            if (_schema.getStarTreeIndexSpec().getSplitOrder() != null) {
                for (String dimension : _schema.getStarTreeIndexSpec().getSplitOrder()) {
                    if (!_schema.getDimensionNames().contains(dimension)) {
                        throw new IllegalStateException(
                                "Split order dimension " + dimension + " not in schema " + _schema);
                    }
                }
            }

            // Split excludes
            if (_schema.getStarTreeIndexSpec().getSplitExcludes() != null) {
                for (String dimension : _schema.getStarTreeIndexSpec().getSplitExcludes()) {
                    if (!_schema.getDimensionNames().contains(dimension)) {
                        throw new IllegalStateException(
                                "Split exclude dimension " + dimension + " not in schema " + _schema);
                    }
                }
            }

            // Excluded dimensions
            if (_schema.getStarTreeIndexSpec().getExcludedDimensions() != null) {
                for (String dimension : _schema.getStarTreeIndexSpec().getExcludedDimensions()) {
                    if (!_schema.getDimensionNames().contains(dimension)) {
                        throw new IllegalStateException(
                                "Excluded dimension " + dimension + " not in schema " + _schema);
                    }
                }
            }
        }
    }

    private ColumnMetadata extractColumnMetadataFor(String column) {
        final int cardinality = _segmentMetadataPropertiesConfiguration.getInt(
                V1Constants.MetadataKeys.Column.getKeyFor(column, V1Constants.MetadataKeys.Column.CARDINALITY));
        final int totalDocs = _segmentMetadataPropertiesConfiguration.getInt(
                V1Constants.MetadataKeys.Column.getKeyFor(column, V1Constants.MetadataKeys.Column.TOTAL_DOCS));
        final DataType dataType = DataType.valueOf(_segmentMetadataPropertiesConfiguration.getString(
                V1Constants.MetadataKeys.Column.getKeyFor(column, V1Constants.MetadataKeys.Column.DATA_TYPE)));
        final int bitsPerElement = _segmentMetadataPropertiesConfiguration.getInt(V1Constants.MetadataKeys.Column
                .getKeyFor(column, V1Constants.MetadataKeys.Column.BITS_PER_ELEMENT));
        final int stringColumnMaxLength = _segmentMetadataPropertiesConfiguration
                .getInt(V1Constants.MetadataKeys.Column.getKeyFor(column,
                        V1Constants.MetadataKeys.Column.DICTIONARY_ELEMENT_SIZE));

        final FieldType fieldType = FieldType.valueOf(_segmentMetadataPropertiesConfiguration.getString(
                V1Constants.MetadataKeys.Column.getKeyFor(column, V1Constants.MetadataKeys.Column.COLUMN_TYPE))
                .toUpperCase());

        final boolean isSorted = _segmentMetadataPropertiesConfiguration.getBoolean(
                V1Constants.MetadataKeys.Column.getKeyFor(column, V1Constants.MetadataKeys.Column.IS_SORTED));

        final boolean hasInvertedIndex = _segmentMetadataPropertiesConfiguration
                .getBoolean(V1Constants.MetadataKeys.Column.getKeyFor(column,
                        V1Constants.MetadataKeys.Column.HAS_INVERTED_INDEX));

        final boolean insSingleValue = _segmentMetadataPropertiesConfiguration
                .getBoolean(V1Constants.MetadataKeys.Column.getKeyFor(column,
                        V1Constants.MetadataKeys.Column.IS_SINGLE_VALUED));

        final int maxNumberOfMultiValues = _segmentMetadataPropertiesConfiguration
                .getInt(V1Constants.MetadataKeys.Column.getKeyFor(column,
                        V1Constants.MetadataKeys.Column.MAX_MULTI_VALUE_ELEMTS));

        final boolean hasNulls = _segmentMetadataPropertiesConfiguration.getBoolean(
                V1Constants.MetadataKeys.Column.getKeyFor(column, V1Constants.MetadataKeys.Column.HAS_NULL_VALUE));

        TimeUnit segmentTimeUnit = TimeUnit.DAYS;
        if (_segmentMetadataPropertiesConfiguration.containsKey(V1Constants.MetadataKeys.Segment.TIME_UNIT)) {
            segmentTimeUnit = TimeUtils
                    .timeUnitFromString(_segmentMetadataPropertiesConfiguration.getString(TIME_UNIT));
        }

        final boolean hasDictionary = _segmentMetadataPropertiesConfiguration.getBoolean(
                V1Constants.MetadataKeys.Column.getKeyFor(column, V1Constants.MetadataKeys.Column.HAS_DICTIONARY),
                true);

        final int totalNumberOfEntries = _segmentMetadataPropertiesConfiguration
                .getInt(V1Constants.MetadataKeys.Column.getKeyFor(column,
                        V1Constants.MetadataKeys.Column.TOTAL_NUMBER_OF_ENTRIES));

        return new ColumnMetadata(column, cardinality, totalDocs, dataType, bitsPerElement, stringColumnMaxLength,
                fieldType, isSorted, hasInvertedIndex, insSingleValue, maxNumberOfMultiValues, hasNulls,
                hasDictionary, segmentTimeUnit, totalNumberOfEntries);

    }

    public ColumnMetadata getColumnMetadataFor(String column) {
        return _columnMetadataMap.get(column);
    }

    public Map<String, ColumnMetadata> getColumnMetadataMap() {
        return _columnMetadataMap;
    }

    @Override
    public String getTableName() {
        return (String) _segmentMetadataPropertiesConfiguration
                .getProperty(V1Constants.MetadataKeys.Segment.TABLE_NAME);
    }

    @Override
    public String getIndexType() {
        return IndexType.COLUMNAR.toString();
    }

    @Override
    public Duration getTimeGranularity() {
        return _timeGranularity;
    }

    @Override
    public Interval getTimeInterval() {
        return _timeInterval;
    }

    @Override
    public String getCrc() {
        return String.valueOf(_crc);
    }

    @Override
    public String getVersion() {
        return SegmentVersion.v1.toString();
    }

    @Override
    public Schema getSchema() {
        return _schema;
    }

    @Override
    public String getShardingKey() {
        return null;
    }

    @Override
    public int getTotalDocs() {
        return _segmentMetadataPropertiesConfiguration.getInt(V1Constants.MetadataKeys.Segment.SEGMENT_TOTAL_DOCS);
    }

    @Override
    public int getTotalAggregateDocs() {
        return _segmentMetadataPropertiesConfiguration.getInt(Segment.SEGMENT_TOTAL_AGGREGATE_DOCS, 0);
    }

    @Override
    public String getIndexDir() {
        return _indexDir;
    }

    @Override
    public String getName() {
        return _segmentName;
    }

    @Override
    public Map<String, String> toMap() {
        final Map<String, String> ret = new HashMap<String, String>();
        ret.put(V1Constants.MetadataKeys.Segment.TABLE_NAME, getTableName());
        ret.put(V1Constants.MetadataKeys.Segment.SEGMENT_TOTAL_DOCS, String.valueOf(getTotalDocs()));
        ret.put(V1Constants.VERSION, getVersion());
        ret.put(V1Constants.MetadataKeys.Segment.SEGMENT_NAME, getName());
        ret.put(V1Constants.MetadataKeys.Segment.SEGMENT_CRC, getCrc());
        ret.put(V1Constants.MetadataKeys.Segment.SEGMENT_CREATION_TIME, getIndexCreationTime() + "");
        ret.put(V1Constants.MetadataKeys.Segment.SEGMENT_START_TIME, _segmentMetadataPropertiesConfiguration
                .getString(V1Constants.MetadataKeys.Segment.SEGMENT_START_TIME));
        ret.put(V1Constants.MetadataKeys.Segment.SEGMENT_END_TIME, _segmentMetadataPropertiesConfiguration
                .getString(V1Constants.MetadataKeys.Segment.SEGMENT_END_TIME));
        ret.put(V1Constants.MetadataKeys.Segment.TIME_UNIT,
                _segmentMetadataPropertiesConfiguration.getString(V1Constants.MetadataKeys.Segment.TIME_UNIT));

        return ret;
    }

    @Override
    public String toString() {
        final StringBuilder result = new StringBuilder();
        final String newLine = System.getProperty("line.separator");

        result.append(this.getClass().getName());
        result.append(" Object {");
        result.append(newLine);

        //determine fields declared in this class only (no fields of superclass)
        final Field[] fields = this.getClass().getDeclaredFields();

        //print field names paired with their values
        for (final Field field : fields) {
            result.append("  ");
            try {
                result.append(field.getName());
                result.append(": ");
                //requires access to private field:
                result.append(field.get(this));
            } catch (final IllegalAccessException ex) {
                if (LOGGER.isWarnEnabled()) {
                    LOGGER.warn("Caught exception while trying to access field {}", field, ex);
                }
                result.append("ERROR");
            }
            result.append(newLine);
        }
        result.append("}");

        return result.toString();
    }

    @Override
    public long getIndexCreationTime() {
        return _creationTime;
    }

    @Override
    public long getPushTime() {
        return _pushTime;
    }

    @Override
    public long getRefreshTime() {
        return _refreshTime;
    }

    @Override
    public boolean hasDictionary(String columnName) {
        return _columnMetadataMap.get(columnName).hasDictionary();
    }

    @Override
    public boolean close() {
        return false;
    }

    @Override
    public boolean hasStarTree() {
        return _schema.getStarTreeIndexSpec() != null;
    }
}