org.apache.carbondata.core.datamap.DistributableDataMapFormat.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.carbondata.core.datamap.DistributableDataMapFormat.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.carbondata.core.datamap;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import org.apache.carbondata.core.datamap.dev.DataMap;
import org.apache.carbondata.core.datamap.dev.expr.DataMapDistributableWrapper;
import org.apache.carbondata.core.datamap.dev.expr.DataMapExprWrapper;
import org.apache.carbondata.core.datastore.block.SegmentPropertiesAndSchemaHolder;
import org.apache.carbondata.core.indexstore.ExtendedBlocklet;
import org.apache.carbondata.core.indexstore.PartitionSpec;
import org.apache.carbondata.core.metadata.schema.table.CarbonTable;

import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

/**
 * Input format for datamaps, it makes the datamap pruning distributable.
 */
public class DistributableDataMapFormat extends FileInputFormat<Void, ExtendedBlocklet> implements Serializable {

    private CarbonTable table;

    private DataMapExprWrapper dataMapExprWrapper;

    private List<Segment> validSegments;

    private List<Segment> invalidSegments;

    private List<PartitionSpec> partitions;

    private DataMapDistributableWrapper distributable;

    private boolean isJobToClearDataMaps = false;

    DistributableDataMapFormat(CarbonTable table, DataMapExprWrapper dataMapExprWrapper,
            List<Segment> validSegments, List<Segment> invalidSegments, List<PartitionSpec> partitions,
            boolean isJobToClearDataMaps) {
        this.table = table;
        this.dataMapExprWrapper = dataMapExprWrapper;
        this.validSegments = validSegments;
        this.invalidSegments = invalidSegments;
        this.partitions = partitions;
        this.isJobToClearDataMaps = isJobToClearDataMaps;
    }

    @Override
    public List<InputSplit> getSplits(JobContext job) throws IOException {
        List<DataMapDistributableWrapper> distributables = dataMapExprWrapper.toDistributable(validSegments);
        List<InputSplit> inputSplits = new ArrayList<>(distributables.size());
        inputSplits.addAll(distributables);
        return inputSplits;
    }

    @Override
    public RecordReader<Void, ExtendedBlocklet> createRecordReader(InputSplit inputSplit,
            TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        return new RecordReader<Void, ExtendedBlocklet>() {
            private Iterator<ExtendedBlocklet> blockletIterator;
            private ExtendedBlocklet currBlocklet;
            private List<DataMap> dataMaps;

            @Override
            public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
                    throws IOException, InterruptedException {
                distributable = (DataMapDistributableWrapper) inputSplit;
                // clear the segmentMap and from cache in executor when there are invalid segments
                if (invalidSegments.size() > 0) {
                    DataMapStoreManager.getInstance().clearInvalidSegments(table, invalidSegments);
                }
                TableDataMap tableDataMap = DataMapStoreManager.getInstance().getDataMap(table,
                        distributable.getDistributable().getDataMapSchema());
                if (isJobToClearDataMaps) {
                    // if job is to clear datamaps just clear datamaps from cache and return
                    DataMapStoreManager.getInstance()
                            .clearDataMaps(table.getCarbonTableIdentifier().getTableUniqueName());
                    // clear the segment properties cache from executor
                    SegmentPropertiesAndSchemaHolder.getInstance().invalidate(table.getAbsoluteTableIdentifier());
                    blockletIterator = Collections.emptyIterator();
                    return;
                }
                dataMaps = tableDataMap.getTableDataMaps(distributable.getDistributable());
                List<ExtendedBlocklet> blocklets = tableDataMap.prune(dataMaps, distributable.getDistributable(),
                        dataMapExprWrapper.getFilterResolverIntf(distributable.getUniqueId()), partitions);
                for (ExtendedBlocklet blocklet : blocklets) {
                    blocklet.setDataMapUniqueId(distributable.getUniqueId());
                }
                blockletIterator = blocklets.iterator();
            }

            @Override
            public boolean nextKeyValue() throws IOException, InterruptedException {
                boolean hasNext = blockletIterator.hasNext();
                if (hasNext) {
                    currBlocklet = blockletIterator.next();
                } else {
                    // close all resources when all the results are returned
                    close();
                }
                return hasNext;
            }

            @Override
            public Void getCurrentKey() throws IOException, InterruptedException {
                return null;
            }

            @Override
            public ExtendedBlocklet getCurrentValue() throws IOException, InterruptedException {
                return currBlocklet;
            }

            @Override
            public float getProgress() throws IOException, InterruptedException {
                return 0;
            }

            @Override
            public void close() throws IOException {
                if (null != dataMaps) {
                    for (DataMap dataMap : dataMaps) {
                        dataMap.finish();
                    }
                }
            }
        };
    }

}