org.apache.kylin.storage.hbase.ii.coprocessor.endpoint.EndpointTupleIterator.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.kylin.storage.hbase.ii.coprocessor.endpoint.EndpointTupleIterator.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*/

package org.apache.kylin.storage.hbase.ii.coprocessor.endpoint;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.annotation.Nullable;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.SerializationUtils;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.coprocessor.Batch;
import org.apache.hadoop.hbase.ipc.BlockingRpcCallback;
import org.apache.hadoop.hbase.ipc.ServerRpcController;
import org.apache.kylin.common.util.CompressionUtils;
import org.apache.kylin.common.util.DateFormat;
import org.apache.kylin.common.util.RangeUtil;
import org.apache.kylin.invertedindex.IISegment;
import org.apache.kylin.invertedindex.index.TableRecord;
import org.apache.kylin.invertedindex.index.TableRecordInfo;
import org.apache.kylin.measure.hllc.HLLCMeasureType;
import org.apache.kylin.metadata.filter.ConstantTupleFilter;
import org.apache.kylin.metadata.filter.TupleFilter;
import org.apache.kylin.metadata.model.FunctionDesc;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.metadata.tuple.ITuple;
import org.apache.kylin.metadata.tuple.ITupleIterator;
import org.apache.kylin.metadata.tuple.Tuple;
import org.apache.kylin.metadata.tuple.TupleInfo;
import org.apache.kylin.storage.StorageContext;
import org.apache.kylin.storage.cache.TsConditionExtractor;
import org.apache.kylin.storage.hbase.common.coprocessor.CoprocessorFilter;
import org.apache.kylin.storage.hbase.common.coprocessor.CoprocessorProjector;
import org.apache.kylin.storage.hbase.common.coprocessor.CoprocessorRowType;
import org.apache.kylin.storage.hbase.common.coprocessor.FilterDecorator;
import org.apache.kylin.storage.hbase.ii.coprocessor.endpoint.generated.IIProtos;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.Collections2;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Range;
import com.google.common.collect.Ranges;
import com.google.common.collect.Sets;
import com.google.protobuf.HBaseZeroCopyByteString;

/**
 */
public class EndpointTupleIterator implements ITupleIterator {

    private final static Logger logger = LoggerFactory.getLogger(EndpointTupleIterator.class);

    private final IISegment seg;

    private final String factTableName;
    private final List<TblColRef> columns;
    private final TupleInfo tupleInfo;
    private final TableRecordInfo tableRecordInfo;
    private final EndpointTupleConverter tupleConverter;

    private final CoprocessorRowType pushedDownRowType;
    private final CoprocessorFilter pushedDownFilter;
    private final CoprocessorProjector pushedDownProjector;
    private final EndpointAggregators pushedDownAggregators;
    private final Range<Long> tsRange;//timestamp column condition's interval

    private Iterator<List<IIProtos.IIResponseInternal.IIRow>> regionResponsesIterator = null;
    private ITupleIterator tupleIterator = null;
    private HTableInterface table = null;

    private TblColRef partitionCol;
    private long lastDataTime = -1;
    private int rowsInAllMetric = 0;

    public EndpointTupleIterator(IISegment segment, TupleFilter rootFilter, Collection<TblColRef> groupBy,
            List<FunctionDesc> measures, StorageContext context, HConnection conn, TupleInfo returnTupleInfo)
            throws Throwable {

        String tableName = segment.getStorageLocationIdentifier();
        table = conn.getTable(tableName);
        factTableName = segment.getIIDesc().getFactTableName();

        if (rootFilter == null) {
            rootFilter = ConstantTupleFilter.TRUE;
        }

        if (groupBy == null) {
            groupBy = Sets.newHashSet();
        }

        if (measures == null) {
            measures = Lists.newArrayList();
        }

        //this method will change measures
        rewriteMeasureParameters(measures, segment.getColumns());

        this.seg = segment;
        this.columns = segment.getColumns();

        this.tupleInfo = returnTupleInfo;
        this.tupleConverter = new EndpointTupleConverter(columns, measures, returnTupleInfo);
        this.tableRecordInfo = new TableRecordInfo(this.seg);

        this.pushedDownRowType = CoprocessorRowType.fromTableRecordInfo(tableRecordInfo, this.columns);
        this.pushedDownFilter = CoprocessorFilter.fromFilter(new ClearTextDictionary(this.tableRecordInfo),
                rootFilter, FilterDecorator.FilterConstantsTreatment.AS_IT_IS);

        for (TblColRef column : this.pushedDownFilter.getInevaluableColumns()) {
            groupBy.add(column);
        }

        this.pushedDownProjector = CoprocessorProjector.makeForEndpoint(tableRecordInfo, groupBy);
        this.pushedDownAggregators = EndpointAggregators.fromFunctions(tableRecordInfo, measures);

        int tsCol = this.tableRecordInfo.getTimestampColumn();
        this.partitionCol = this.columns.get(tsCol);
        this.tsRange = TsConditionExtractor.extractTsCondition(this.partitionCol, rootFilter);

        if (this.tsRange == null) {
            logger.info("TsRange conflict for endpoint, return empty directly");
            this.tupleIterator = ITupleIterator.EMPTY_TUPLE_ITERATOR;
        } else {
            logger.info("The tsRange being pushed is " + RangeUtil.formatTsRange(tsRange));
        }

        IIProtos.IIRequest endpointRequest = prepareRequest();
        Collection<IIProtos.IIResponse> compressedShardResults = getResults(endpointRequest, table);

        //decompress
        Collection<IIProtos.IIResponseInternal> shardResults = new ArrayList<>();
        for (IIProtos.IIResponse input : compressedShardResults) {
            byte[] compressed = HBaseZeroCopyByteString.zeroCopyGetBytes(input.getBlob());
            try {
                byte[] decompressed = CompressionUtils.decompress(compressed);
                shardResults.add(IIProtos.IIResponseInternal.parseFrom(decompressed));
            } catch (Exception e) {
                throw new RuntimeException("decompress endpoint response error");
            }
        }

        this.lastDataTime = Collections
                .min(Collections2.transform(shardResults, new Function<IIProtos.IIResponseInternal, Long>() {
                    @Nullable
                    @Override
                    public Long apply(IIProtos.IIResponseInternal input) {

                        IIProtos.IIResponseInternal.Stats status = input.getStats();
                        logger.info(
                                "Endpoints all returned, stats from shard {}: start moment:{}, finish moment: {}, elapsed ms: {}, scanned slices: {}, latest slice time is {}", //
                                new Object[] { String.valueOf(status.getMyShard()), //
                                        DateFormat.formatToTimeStr(status.getServiceStartTime()), //
                                        DateFormat.formatToTimeStr(status.getServiceEndTime()), //
                                        String.valueOf(status.getServiceEndTime() - status.getServiceStartTime()), //
                                        String.valueOf(status.getScannedSlices()),
                                        DateFormat.formatToTimeStr(status.getLatestDataTime()) });

                        return status.getLatestDataTime();
                    }
                }));

        this.regionResponsesIterator = Collections2.transform(shardResults,
                new Function<IIProtos.IIResponseInternal, List<IIProtos.IIResponseInternal.IIRow>>() {
                    @Nullable
                    @Override
                    public List<IIProtos.IIResponseInternal.IIRow> apply(
                            @Nullable IIProtos.IIResponseInternal input) {
                        return input.getRowsList();
                    }
                }).iterator();

        if (this.regionResponsesIterator.hasNext()) {
            this.tupleIterator = new SingleRegionTupleIterator(this.regionResponsesIterator.next());
        } else {
            this.tupleIterator = ITupleIterator.EMPTY_TUPLE_ITERATOR;
        }
    }

    /**
     * measure comes from query engine, does not contain enough information
     */
    private void rewriteMeasureParameters(List<FunctionDesc> measures, List<TblColRef> columns) {
        for (FunctionDesc functionDesc : measures) {
            if (functionDesc.isCount()) {
                functionDesc.setReturnType("bigint");
            } else {
                boolean updated = false;
                for (TblColRef column : columns) {
                    if (column.isSameAs(factTableName, functionDesc.getParameter().getValue())) {
                        if (HLLCMeasureType.isCountDistinct(functionDesc)) {
                            //TODO: default precision might need be configurable
                            String iiDefaultHLLC = "hllc10";
                            functionDesc.setReturnType(iiDefaultHLLC);
                        } else {
                            functionDesc.setReturnType(column.getColumnDesc().getType().toString());
                        }
                        functionDesc.getParameter().setColRefs(ImmutableList.of(column));
                        updated = true;
                        break;
                    }
                }
                if (!updated) {
                    throw new RuntimeException("Func " + functionDesc
                            + " is not related to any column in fact table " + factTableName);
                }
            }
        }
    }

    @Override
    public boolean hasNext() {
        while (!this.tupleIterator.hasNext()) {
            if (this.regionResponsesIterator.hasNext()) {
                this.tupleIterator = new SingleRegionTupleIterator(this.regionResponsesIterator.next());
            } else {
                return false;
            }
        }
        return true;
    }

    @Override
    public ITuple next() {
        rowsInAllMetric++;

        if (!hasNext()) {
            throw new IllegalStateException("No more ITuple in EndpointTupleIterator");
        }

        ITuple tuple = this.tupleIterator.next();
        return tuple;
    }

    @Override
    public void remove() {
        throw new UnsupportedOperationException();

    }

    @Override
    public void close() {
        IOUtils.closeQuietly(table);
        logger.info("Closed after " + rowsInAllMetric + " rows are fetched");
    }

    /**
     * tells storage layer cache what time period of data should not be cached.
     * for static storage like cube, it will return null
     * for dynamic storage like ii, it will for example exclude the last two minutes for possible data latency
     * @return
     */
    public Range<Long> getCacheExcludedPeriod() {
        Preconditions.checkArgument(lastDataTime != -1, "lastDataTime is not set yet");
        return Ranges.greaterThan(lastDataTime);
    }

    private IIProtos.IIRequest prepareRequest() throws IOException {
        IIProtos.IIRequest.Builder builder = IIProtos.IIRequest.newBuilder();

        if (this.tsRange != null) {
            byte[] tsRangeBytes = SerializationUtils.serialize(this.tsRange);
            builder.setTsRange(HBaseZeroCopyByteString.wrap(tsRangeBytes));
        }

        builder.setType(HBaseZeroCopyByteString.wrap(CoprocessorRowType.serialize(pushedDownRowType))) //
                .setFilter(HBaseZeroCopyByteString.wrap(CoprocessorFilter.serialize(pushedDownFilter))) //
                .setProjector(HBaseZeroCopyByteString.wrap(CoprocessorProjector.serialize(pushedDownProjector))) //
                .setAggregator(HBaseZeroCopyByteString.wrap(EndpointAggregators.serialize(pushedDownAggregators)));

        IIProtos.IIRequest request = builder.build();

        return request;
    }

    private Collection<IIProtos.IIResponse> getResults(final IIProtos.IIRequest request, HTableInterface table)
            throws Throwable {
        Map<byte[], IIProtos.IIResponse> results = table.coprocessorService(IIProtos.RowsService.class, null, null,
                new Batch.Call<IIProtos.RowsService, IIProtos.IIResponse>() {
                    public IIProtos.IIResponse call(IIProtos.RowsService rowsService) throws IOException {
                        ServerRpcController controller = new ServerRpcController();
                        BlockingRpcCallback<IIProtos.IIResponse> rpcCallback = new BlockingRpcCallback<>();
                        rowsService.getRows(controller, request, rpcCallback);
                        IIProtos.IIResponse response = rpcCallback.get();
                        if (controller.failedOnException()) {
                            throw controller.getFailedOn();
                        }

                        return response;
                    }
                });

        return results.values();
    }

    /**
     * Internal class to handle iterators for a single region's returned rows
     */
    class SingleRegionTupleIterator implements ITupleIterator {
        private List<IIProtos.IIResponseInternal.IIRow> rows;
        private int index = 0;

        //not thread safe!
        private TableRecord tableRecord;
        private List<Object> measureValues;
        private Tuple tuple;

        public SingleRegionTupleIterator(List<IIProtos.IIResponseInternal.IIRow> rows) {
            this.rows = rows;
            this.index = 0;
            this.tableRecord = tableRecordInfo.createTableRecord();
            this.tuple = new Tuple(tupleInfo);
        }

        @Override
        public boolean hasNext() {
            return index < rows.size();
        }

        @Override
        public ITuple next() {
            if (!hasNext()) {
                throw new IllegalStateException("No more Tuple in the SingleRegionTupleIterator");
            }

            IIProtos.IIResponseInternal.IIRow currentRow = rows.get(index);
            byte[] columnsBytes = HBaseZeroCopyByteString.zeroCopyGetBytes(currentRow.getColumns());
            this.tableRecord.setBytes(columnsBytes, 0, columnsBytes.length);
            if (currentRow.hasMeasures()) {
                ByteBuffer buffer = currentRow.getMeasures().asReadOnlyByteBuffer();
                this.measureValues = pushedDownAggregators.deserializeMetricValues(buffer);
            }

            index++;

            return tupleConverter.makeTuple(this.tableRecord, this.measureValues, this.tuple);
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }

        @Override
        public void close() {
        }

    }
}