org.apache.carbondata.hadoop.CarbonInputFormat.java Source code

Introduction

Here is the source code for org.apache.carbondata.hadoop.CarbonInputFormat.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.carbondata.hadoop;

import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;

import org.apache.carbondata.core.carbon.AbsoluteTableIdentifier;
import org.apache.carbondata.core.carbon.CarbonTableIdentifier;
import org.apache.carbondata.core.carbon.datastore.DataRefNode;
import org.apache.carbondata.core.carbon.datastore.DataRefNodeFinder;
import org.apache.carbondata.core.carbon.datastore.IndexKey;
import org.apache.carbondata.core.carbon.datastore.SegmentTaskIndexStore;
import org.apache.carbondata.core.carbon.datastore.block.AbstractIndex;
import org.apache.carbondata.core.carbon.datastore.block.SegmentProperties;
import org.apache.carbondata.core.carbon.datastore.block.TableBlockInfo;
import org.apache.carbondata.core.carbon.datastore.exception.IndexBuilderException;
import org.apache.carbondata.core.carbon.datastore.impl.btree.BTreeDataRefNodeFinder;
import org.apache.carbondata.core.carbon.datastore.impl.btree.BlockBTreeLeafNode;
import org.apache.carbondata.core.carbon.metadata.schema.table.CarbonTable;
import org.apache.carbondata.core.carbon.path.CarbonStorePath;
import org.apache.carbondata.core.carbon.path.CarbonTablePath;
import org.apache.carbondata.core.carbon.querystatistics.QueryStatistic;
import org.apache.carbondata.core.carbon.querystatistics.QueryStatisticsRecorder;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.keygenerator.KeyGenException;
import org.apache.carbondata.core.util.CarbonProperties;
import org.apache.carbondata.hadoop.readsupport.CarbonReadSupport;
import org.apache.carbondata.hadoop.readsupport.impl.DictionaryDecodedReadSupportImpl;
import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil;
import org.apache.carbondata.hadoop.util.ObjectSerializationUtil;
import org.apache.carbondata.hadoop.util.SchemaReader;
import org.apache.carbondata.lcm.status.SegmentStatusManager;
import org.apache.carbondata.scan.executor.exception.QueryExecutionException;
import org.apache.carbondata.scan.expression.Expression;
import org.apache.carbondata.scan.expression.exception.FilterUnsupportedException;
import org.apache.carbondata.scan.filter.FilterExpressionProcessor;
import org.apache.carbondata.scan.filter.FilterUtil;
import org.apache.carbondata.scan.filter.resolver.FilterResolverIntf;
import org.apache.carbondata.scan.model.CarbonQueryPlan;
import org.apache.carbondata.scan.model.QueryModel;

import static org.apache.carbondata.core.constants.CarbonCommonConstants.INVALID_SEGMENT_ID;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.security.TokenCache;
import org.apache.hadoop.mapreduce.task.JobContextImpl;
import org.apache.hadoop.util.StringUtils;

/**
 * Carbon Input format class representing one carbon table
 */
public class CarbonInputFormat<T> extends FileInputFormat<Void, T> {

    //comma separated list of input segment numbers
    public static final String INPUT_SEGMENT_NUMBERS = "mapreduce.input.carboninputformat.segmentnumbers";
    private static final Log LOG = LogFactory.getLog(CarbonInputFormat.class);
    private static final String DATABASE_NAME = "mapreduce.input.carboninputformat.databasename";
    private static final String TABLE_NAME = "mapreduce.input.carboninputformat.tablename";
    private static final String FILTER_PREDICATE = "mapreduce.input.carboninputformat.filter.predicate";
    private static final String COLUMN_PROJECTION = "mapreduce.input.carboninputformat.projection";
    private static final String CARBON_TABLE = "mapreduce.input.carboninputformat.table";
    private static final String CARBON_READ_SUPPORT = "mapreduce.input.carboninputformat.readsupport";
    private static final String TABLE_ID = "mapreduce.input.carboninputformat.tableId";

    public static void setTableToAccess(Configuration configuration, CarbonTableIdentifier tableIdentifier) {
        configuration.set(CarbonInputFormat.DATABASE_NAME, tableIdentifier.getDatabaseName());
        configuration.set(CarbonInputFormat.TABLE_NAME, tableIdentifier.getTableName());
        configuration.set(CarbonInputFormat.TABLE_ID, tableIdentifier.getTableId());
    }

    /**
     * Get CarbonTableIdentifier from job configuration
     */
    public static CarbonTableIdentifier getTableToAccess(Configuration configuration) {
        String databaseName = configuration.get(CarbonInputFormat.DATABASE_NAME);
        String tableName = configuration.get(CarbonInputFormat.TABLE_NAME);
        String tableId = configuration.get(CarbonInputFormat.TABLE_ID);
        if (databaseName != null && tableName != null) {
            return new CarbonTableIdentifier(databaseName, tableName, tableId);
        }
        //TODO: better raise exception
        return null;
    }

    /**
     * It is optional, if user does not set then it reads from store
     *
     * @param configuration
     * @param carbonTable
     * @throws IOException
     */
    public static void setCarbonTable(Configuration configuration, CarbonTable carbonTable) throws IOException {
        if (null != carbonTable) {
            configuration.set(CARBON_TABLE, ObjectSerializationUtil.convertObjectToString(carbonTable));
        }
    }

    public static CarbonTable getCarbonTable(Configuration configuration) throws IOException {
        String carbonTableStr = configuration.get(CARBON_TABLE);
        if (carbonTableStr == null) {
            CarbonTable carbonTable = new SchemaReader().readCarbonTableFromStore(getTablePath(configuration),
                    getTableToAccess(configuration), getStorePathString(configuration));
            setCarbonTable(configuration, carbonTable);
            return carbonTable;
        }
        return (CarbonTable) ObjectSerializationUtil.convertStringToObject(carbonTableStr);
    }

    /**
     * It sets unresolved filter expression.
     *
     * @param configuration
     * @param filterExpression
     */
    public static void setFilterPredicates(Configuration configuration, Expression filterExpression) {
        try {
            String filterString = ObjectSerializationUtil.convertObjectToString(filterExpression);
            configuration.set(FILTER_PREDICATE, filterString);
        } catch (Exception e) {
            throw new RuntimeException("Error while setting filter expression to Job", e);
        }
    }

    /**
     * It sets the resolved filter expression
     *
     * @param configuration
     * @param filterExpression
     */
    public static void setFilterPredicates(Configuration configuration, FilterResolverIntf filterExpression) {
        try {
            if (filterExpression == null) {
                return;
            }
            String filterString = ObjectSerializationUtil.convertObjectToString(filterExpression);
            configuration.set(FILTER_PREDICATE, filterString);
        } catch (Exception e) {
            throw new RuntimeException("Error while setting filter expression to Job", e);
        }
    }

    public static void setColumnProjection(CarbonProjection projection, Configuration configuration) {
        if (projection == null || projection.isEmpty()) {
            return;
        }
        String[] allColumns = projection.getAllColumns();
        StringBuilder builder = new StringBuilder();
        for (String column : allColumns) {
            builder.append(column).append(",");
        }
        String columnString = builder.toString();
        columnString = columnString.substring(0, columnString.length() - 1);
        configuration.set(COLUMN_PROJECTION, columnString);
    }

    public static void setCarbonReadSupport(Class<? extends CarbonReadSupport> readSupportClass,
            Configuration configuration) {
        if (readSupportClass != null) {
            configuration.set(CARBON_READ_SUPPORT, readSupportClass.getName());
        }
    }

    public static CarbonTablePath getTablePath(Configuration configuration) throws IOException {

        String storePathString = getStorePathString(configuration);
        CarbonTableIdentifier tableIdentifier = CarbonInputFormat.getTableToAccess(configuration);
        if (tableIdentifier == null) {
            throw new IOException("Could not find " + DATABASE_NAME + "," + TABLE_NAME);
        }
        return CarbonStorePath.getCarbonTablePath(storePathString, tableIdentifier);
    }

    private static String getStorePathString(Configuration configuration) throws IOException {

        String dirs = configuration.get(INPUT_DIR, "");
        String[] inputPaths = StringUtils.split(dirs);
        if (inputPaths.length == 0) {
            throw new IOException("No input paths specified in job");
        }
        return CarbonInputFormatUtil.processPath(inputPaths[0]);
    }

    /**
     * Set List of segments to access
     */
    private void setSegmentsToAccess(Configuration configuration, List<String> segmentNosList) {

        //serialize to comma separated string
        StringBuilder stringSegmentsBuilder = new StringBuilder();
        for (int i = 0; i < segmentNosList.size(); i++) {
            String segmentNo = segmentNosList.get(i);
            stringSegmentsBuilder.append(segmentNo);
            if (i < segmentNosList.size() - 1) {
                stringSegmentsBuilder.append(",");
            }
        }
        configuration.set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, stringSegmentsBuilder.toString());
    }

    /**
     * {@inheritDoc}
     * Configurations FileInputFormat.INPUT_DIR
     * are used to get table path to read.
     *
     * @param job
     * @return List<InputSplit> list of CarbonInputSplit
     * @throws IOException
     */
    @Override
    public List<InputSplit> getSplits(JobContext job) throws IOException {
        try {
            CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
            Object filterPredicates = getFilterPredicates(job.getConfiguration());
            if (getValidSegments(job).length == 0) {
                // Get the valid segments from the carbon store.
                SegmentStatusManager.ValidSegmentsInfo validSegments = new SegmentStatusManager(
                        getAbsoluteTableIdentifier(job.getConfiguration())).getValidSegments();
                if (validSegments.listOfValidSegments.isEmpty()) {
                    return new ArrayList<InputSplit>();
                }
                setSegmentsToAccess(job.getConfiguration(), validSegments.listOfValidSegments);
            }

            if (filterPredicates == null) {
                return getSplitsInternal(job);
            } else {
                if (filterPredicates instanceof Expression) {
                    //process and resolve the expression.
                    CarbonInputFormatUtil.processFilterExpression((Expression) filterPredicates, carbonTable);
                    return getSplits(job, CarbonInputFormatUtil.resolveFilter((Expression) filterPredicates,
                            getAbsoluteTableIdentifier(job.getConfiguration())));
                } else {
                    //It means user sets already resolved expression.
                    return getSplits(job, (FilterResolverIntf) filterPredicates);
                }
            }
        } catch (Exception ex) {
            throw new IOException(ex);
        }
    }

    private List<InputSplit> getSplitsInternal(JobContext job) throws IOException {
        List<InputSplit> splits = super.getSplits(job);
        List<InputSplit> carbonSplits = new ArrayList<InputSplit>(splits.size());
        // identify table blocks
        for (InputSplit inputSplit : splits) {
            FileSplit fileSplit = (FileSplit) inputSplit;
            String segmentId = CarbonTablePath.DataPathUtil.getSegmentId(fileSplit.getPath().toString());
            if (INVALID_SEGMENT_ID == segmentId) {
                continue;
            }
            carbonSplits.add(CarbonInputSplit.from(segmentId, fileSplit));
        }
        return carbonSplits;
    }

    /**
     * {@inheritDoc}
     * Configurations FileInputFormat.INPUT_DIR, CarbonInputFormat.INPUT_SEGMENT_NUMBERS
     * are used to get table path to read.
     *
     * @return
     * @throws IOException
     */
    private List<InputSplit> getSplits(JobContext job, FilterResolverIntf filterResolver)
            throws IOException, IndexBuilderException {

        List<InputSplit> result = new LinkedList<InputSplit>();

        FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();

        AbsoluteTableIdentifier absoluteTableIdentifier = getAbsoluteTableIdentifier(job.getConfiguration());

        //for each segment fetch blocks matching filter in Driver BTree
        for (String segmentNo : getValidSegments(job)) {
            List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(job, filterExpressionProcessor,
                    absoluteTableIdentifier, filterResolver, segmentNo);
            for (DataRefNode dataRefNode : dataRefNodes) {
                BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
                TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
                result.add(new CarbonInputSplit(segmentNo, new Path(tableBlockInfo.getFilePath()),
                        tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(),
                        tableBlockInfo.getLocations()));
            }
        }
        return result;
    }

    /**
     * get total number of rows. Same as count(*)
     *
     * @throws IOException
     * @throws IndexBuilderException
     */
    public long getRowCount(JobContext job) throws IOException, IndexBuilderException {

        long rowCount = 0;
        AbsoluteTableIdentifier absoluteTableIdentifier = getAbsoluteTableIdentifier(job.getConfiguration());
        SegmentStatusManager.ValidSegmentsInfo validSegments = new SegmentStatusManager(
                getAbsoluteTableIdentifier(job.getConfiguration())).getValidSegments();
        setSegmentsToAccess(job.getConfiguration(), validSegments.listOfValidSegments);
        // no of core to load the blocks in driver
        int numberOfCores = CarbonCommonConstants.NUMBER_OF_CORE_TO_LOAD_DRIVER_SEGMENT_DEFAULT_VALUE;
        try {
            numberOfCores = Integer.parseInt(CarbonProperties.getInstance()
                    .getProperty(CarbonCommonConstants.NUMBER_OF_CORE_TO_LOAD_DRIVER_SEGMENT));
        } catch (NumberFormatException e) {
            numberOfCores = CarbonCommonConstants.NUMBER_OF_CORE_TO_LOAD_DRIVER_SEGMENT_DEFAULT_VALUE;
        }
        // creating a thread pool
        ExecutorService threadPool = Executors.newFixedThreadPool(numberOfCores);
        List<Future<Map<String, AbstractIndex>>> loadedBlocks = new ArrayList<Future<Map<String, AbstractIndex>>>();
        //for each segment fetch blocks matching filter in Driver BTree
        for (String segmentNo : getValidSegments(job)) {
            // submitting the task
            loadedBlocks.add(threadPool.submit(new BlocksLoaderThread(job, absoluteTableIdentifier, segmentNo)));
        }
        threadPool.shutdown();
        try {
            threadPool.awaitTermination(1, TimeUnit.HOURS);
        } catch (InterruptedException e) {
            throw new IndexBuilderException(e);
        }
        try {
            // adding all the rows of the blocks to get the total row
            // count
            for (Future<Map<String, AbstractIndex>> block : loadedBlocks) {
                for (AbstractIndex abstractIndex : block.get().values()) {
                    rowCount += abstractIndex.getTotalNumberOfRows();
                }
            }
        } catch (InterruptedException | ExecutionException e) {
            throw new IndexBuilderException(e);
        }
        return rowCount;
    }

    /**
     * {@inheritDoc}
     * Configurations FileInputFormat.INPUT_DIR, CarbonInputFormat.INPUT_SEGMENT_NUMBERS
     * are used to get table path to read.
     *
     * @return
     * @throws IOException
     */
    public FilterResolverIntf getResolvedFilter(Configuration configuration, Expression filterExpression)
            throws IOException, IndexBuilderException, QueryExecutionException {
        if (filterExpression == null) {
            return null;
        }
        FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
        AbsoluteTableIdentifier absoluteTableIdentifier = getAbsoluteTableIdentifier(configuration);
        //get resolved filter
        try {
            return filterExpressionProcessor.getFilterResolver(filterExpression, absoluteTableIdentifier);
        } catch (FilterUnsupportedException e) {
            throw new QueryExecutionException(e.getMessage());
        }
    }

    private AbsoluteTableIdentifier getAbsoluteTableIdentifier(Configuration configuration) throws IOException {
        return new AbsoluteTableIdentifier(getStorePathString(configuration), getTableToAccess(configuration));
    }

    private Object getFilterPredicates(Configuration configuration) {
        try {
            String filterExprString = configuration.get(FILTER_PREDICATE);
            if (filterExprString == null) {
                return null;
            }
            Object filterExprs = ObjectSerializationUtil.convertStringToObject(filterExprString);
            return filterExprs;
        } catch (IOException e) {
            throw new RuntimeException("Error while reading filter expression", e);
        }
    }

    /**
     * get data blocks of given segment
     */
    private List<DataRefNode> getDataBlocksOfSegment(JobContext job,
            FilterExpressionProcessor filterExpressionProcessor, AbsoluteTableIdentifier absoluteTableIdentifier,
            FilterResolverIntf resolver, String segmentId) throws IndexBuilderException, IOException {

        QueryStatisticsRecorder recorder = new QueryStatisticsRecorder("");
        QueryStatistic statistic = new QueryStatistic();
        Map<String, AbstractIndex> segmentIndexMap = getSegmentAbstractIndexs(job, absoluteTableIdentifier,
                segmentId);

        List<DataRefNode> resultFilterredBlocks = new LinkedList<DataRefNode>();

        // build result
        for (AbstractIndex abstractIndex : segmentIndexMap.values()) {

            List<DataRefNode> filterredBlocks = null;
            // if no filter is given get all blocks from Btree Index
            if (null == resolver) {
                filterredBlocks = getDataBlocksOfIndex(abstractIndex);
            } else {
                // apply filter and get matching blocks
                try {
                    filterredBlocks = filterExpressionProcessor.getFilterredBlocks(abstractIndex.getDataRefNode(),
                            resolver, abstractIndex, absoluteTableIdentifier);
                } catch (QueryExecutionException e) {
                    throw new IndexBuilderException(e.getMessage());
                }
            }
            resultFilterredBlocks.addAll(filterredBlocks);
        }
        statistic.addStatistics("Time taken to load the Block(s) in Driver Side", System.currentTimeMillis());
        recorder.recordStatistics(statistic);
        recorder.logStatistics();
        return resultFilterredBlocks;
    }

    private Map<String, AbstractIndex> getSegmentAbstractIndexs(JobContext job,
            AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId)
            throws IOException, IndexBuilderException {
        Map<String, AbstractIndex> segmentIndexMap = SegmentTaskIndexStore.getInstance()
                .getSegmentBTreeIfExists(absoluteTableIdentifier, segmentId);

        // if segment tree is not loaded, load the segment tree
        if (segmentIndexMap == null) {
            // List<FileStatus> fileStatusList = new LinkedList<FileStatus>();
            List<TableBlockInfo> tableBlockInfoList = new LinkedList<TableBlockInfo>();
            // getFileStatusOfSegments(job, new int[]{ segmentId }, fileStatusList);

            // get file location of all files of given segment
            JobContext newJob = new JobContextImpl(new Configuration(job.getConfiguration()), job.getJobID());
            newJob.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, segmentId + "");

            // identify table blocks
            for (InputSplit inputSplit : getSplitsInternal(newJob)) {
                CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
                tableBlockInfoList
                        .add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(),
                                segmentId, carbonInputSplit.getLocations(), carbonInputSplit.getLength()));
            }

            Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
            segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);

            // get Btree blocks for given segment
            segmentIndexMap = SegmentTaskIndexStore.getInstance()
                    .loadAndGetTaskIdToSegmentsMap(segmentToTableBlocksInfos, absoluteTableIdentifier);

        }
        return segmentIndexMap;
    }

    /**
     * get data blocks of given btree
     */
    private List<DataRefNode> getDataBlocksOfIndex(AbstractIndex abstractIndex) {
        List<DataRefNode> blocks = new LinkedList<DataRefNode>();
        SegmentProperties segmentProperties = abstractIndex.getSegmentProperties();

        try {
            IndexKey startIndexKey = FilterUtil.prepareDefaultStartIndexKey(segmentProperties);
            IndexKey endIndexKey = FilterUtil.prepareDefaultEndIndexKey(segmentProperties);

            // Add all blocks of btree into result
            DataRefNodeFinder blockFinder = new BTreeDataRefNodeFinder(
                    segmentProperties.getEachDimColumnValueSize());
            DataRefNode startBlock = blockFinder.findFirstDataBlock(abstractIndex.getDataRefNode(), startIndexKey);
            DataRefNode endBlock = blockFinder.findLastDataBlock(abstractIndex.getDataRefNode(), endIndexKey);
            while (startBlock != endBlock) {
                blocks.add(startBlock);
                startBlock = startBlock.getNextDataRefNode();
            }
            blocks.add(endBlock);

        } catch (KeyGenException e) {
            LOG.error("Could not generate start key", e);
        }
        return blocks;
    }

    @Override
    public RecordReader<Void, T> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
            throws IOException, InterruptedException {
        Configuration configuration = taskAttemptContext.getConfiguration();
        CarbonTable carbonTable = getCarbonTable(configuration);
        QueryModel queryModel;
        try {
            CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable,
                    configuration.get(COLUMN_PROJECTION));
            queryModel = QueryModel.createModel(getAbsoluteTableIdentifier(configuration), queryPlan, carbonTable);
            Object filterPredicates = getFilterPredicates(configuration);
            if (filterPredicates != null) {
                if (filterPredicates instanceof Expression) {
                    CarbonInputFormatUtil.processFilterExpression((Expression) filterPredicates, carbonTable);
                    queryModel.setFilterExpressionResolverTree(CarbonInputFormatUtil.resolveFilter(
                            (Expression) filterPredicates, getAbsoluteTableIdentifier(configuration)));
                } else {
                    queryModel.setFilterExpressionResolverTree((FilterResolverIntf) filterPredicates);
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
        CarbonReadSupport readSupport = getReadSupportClass(configuration);
        return new CarbonRecordReader<T>(queryModel, readSupport);
    }

    private CarbonReadSupport getReadSupportClass(Configuration configuration) {
        String readSupportClass = configuration.get(CARBON_READ_SUPPORT);
        //By default it uses dictionary decoder read class
        CarbonReadSupport readSupport = null;
        if (readSupportClass != null) {
            try {
                Class<?> myClass = Class.forName(readSupportClass);
                Constructor<?> constructor = myClass.getConstructors()[0];
                Object object = constructor.newInstance();
                if (object instanceof CarbonReadSupport) {
                    readSupport = (CarbonReadSupport) object;
                }
            } catch (ClassNotFoundException ex) {
                LOG.error("Class " + readSupportClass + "not found", ex);
            } catch (Exception ex) {
                LOG.error("Error while creating " + readSupportClass, ex);
            }
        } else {
            readSupport = new DictionaryDecodedReadSupportImpl();
        }
        return readSupport;
    }

    @Override
    protected long computeSplitSize(long blockSize, long minSize, long maxSize) {
        return super.computeSplitSize(blockSize, minSize, maxSize);
    }

    @Override
    protected int getBlockIndex(BlockLocation[] blkLocations, long offset) {
        return super.getBlockIndex(blkLocations, offset);
    }

    @Override
    protected List<FileStatus> listStatus(JobContext job) throws IOException {
        List<FileStatus> result = new ArrayList<FileStatus>();
        String[] segmentsToConsider = getValidSegments(job);
        if (segmentsToConsider.length == 0) {
            throw new IOException("No segments found");
        }

        getFileStatusOfSegments(job, segmentsToConsider, result);
        return result;
    }

    @Override
    protected boolean isSplitable(JobContext context, Path filename) {
        try {
            // Don't split the file if it is local file system
            FileSystem fileSystem = filename.getFileSystem(context.getConfiguration());
            if (fileSystem instanceof LocalFileSystem) {
                return false;
            }
        } catch (Exception e) {
            return true;
        }
        return true;
    }

    private void getFileStatusOfSegments(JobContext job, String[] segmentsToConsider, List<FileStatus> result)
            throws IOException {
        String[] partitionsToConsider = getValidPartitions(job);
        if (partitionsToConsider.length == 0) {
            throw new IOException("No partitions/data found");
        }

        PathFilter inputFilter = getDataFileFilter(job);
        CarbonTablePath tablePath = getTablePath(job.getConfiguration());

        // get tokens for all the required FileSystem for table path
        TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration());

        //get all data files of valid partitions and segments
        for (int i = 0; i < partitionsToConsider.length; ++i) {
            String partition = partitionsToConsider[i];

            for (int j = 0; j < segmentsToConsider.length; ++j) {
                String segmentId = segmentsToConsider[j];
                Path segmentPath = new Path(tablePath.getCarbonDataDirectoryPath(partition, segmentId));
                FileSystem fs = segmentPath.getFileSystem(job.getConfiguration());

                RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(segmentPath);
                while (iter.hasNext()) {
                    LocatedFileStatus stat = iter.next();
                    if (inputFilter.accept(stat.getPath())) {
                        if (stat.isDirectory()) {
                            addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                        } else {
                            result.add(stat);
                        }
                    }
                }
            }
        }
    }

    /**
     * @param job
     * @return the PathFilter for Fact Files.
     */
    public PathFilter getDataFileFilter(JobContext job) {
        return new CarbonPathFilter(getUpdateExtension());
    }

    /**
     * required to be moved to core
     *
     * @return updateExtension
     */
    private String getUpdateExtension() {
        // TODO: required to modify when supporting update, mostly will be update timestamp
        return "update";
    }

    /**
     * @return updateExtension
     */
    private String[] getValidSegments(JobContext job) throws IOException {
        String segmentString = job.getConfiguration().get(INPUT_SEGMENT_NUMBERS, "");
        // if no segments
        if (segmentString.trim().isEmpty()) {
            return new String[0];
        }

        String[] segments = segmentString.split(",");
        String[] segmentIds = new String[segments.length];
        int i = 0;
        try {
            for (; i < segments.length; i++) {
                segmentIds[i] = segments[i];
            }
        } catch (NumberFormatException e) {
            throw new IOException("segment no:" + segments[i] + " should be integer");
        }
        return segmentIds;
    }

    /**
     * required to be moved to core
     *
     * @return updateExtension
     */
    private String[] getValidPartitions(JobContext job) {
        //TODO: has to Identify partitions by partition pruning
        return new String[] { "0" };
    }

    /**
     * Thread class to load the blocks
     */
    private class BlocksLoaderThread implements Callable<Map<String, AbstractIndex>> {
        // job
        private JobContext job;

        // table identifier
        private AbsoluteTableIdentifier absoluteTableIdentifier;

        // segment id
        private String segmentId;

        private BlocksLoaderThread(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier,
                String segmentId) {
            this.job = job;
            this.absoluteTableIdentifier = absoluteTableIdentifier;
            this.segmentId = segmentId;
        }

        @Override
        public Map<String, AbstractIndex> call() throws Exception {
            return getSegmentAbstractIndexs(job, absoluteTableIdentifier, segmentId);
        }
    }
}