Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:org.apache.carbondata.hadoop.api.CarbonInputFormat.java

License:Apache License

/**
 * get data blocks of given segment/*  w  w w  .j a  v  a 2s.c  o m*/
 */
protected List<CarbonInputSplit> getDataBlocksOfSegment(JobContext job, CarbonTable carbonTable,
        Expression expression, BitSet matchedPartitions, List<Segment> segmentIds, PartitionInfo partitionInfo,
        List<Integer> oldPartitionIdList) throws IOException {

    QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder();
    QueryStatistic statistic = new QueryStatistic();

    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
            new Path[] { new Path(carbonTable.getTablePath()) }, job.getConfiguration());
    List<ExtendedBlocklet> prunedBlocklets = getPrunedBlocklets(job, carbonTable, expression, segmentIds);

    List<CarbonInputSplit> resultFilteredBlocks = new ArrayList<>();
    int partitionIndex = 0;
    List<Integer> partitionIdList = new ArrayList<>();
    if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
        partitionIdList = partitionInfo.getPartitionIds();
    }
    for (ExtendedBlocklet blocklet : prunedBlocklets) {
        long partitionId = CarbonTablePath.DataFileUtil
                .getTaskIdFromTaskNo(CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath()));

        // OldPartitionIdList is only used in alter table partition command because it change
        // partition info first and then read data.
        // For other normal query should use newest partitionIdList
        if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
            if (oldPartitionIdList != null) {
                partitionIndex = oldPartitionIdList.indexOf((int) partitionId);
            } else {
                partitionIndex = partitionIdList.indexOf((int) partitionId);
            }
        }
        if (partitionIndex != -1) {
            // matchedPartitions variable will be null in two cases as follows
            // 1. the table is not a partition table
            // 2. the table is a partition table, and all partitions are matched by query
            // for partition table, the task id of carbaondata file name is the partition id.
            // if this partition is not required, here will skip it.
            if (matchedPartitions == null || matchedPartitions.get(partitionIndex)) {
                CarbonInputSplit inputSplit = convertToCarbonInputSplit(blocklet);
                if (inputSplit != null) {
                    resultFilteredBlocks.add(inputSplit);
                }
            }
        }
    }
    statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
    recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id"));
    return resultFilteredBlocks;
}

From source file:org.apache.carbondata.hadoop.api.CarbonInputFormat.java

License:Apache License

/**
 * Prune the blocklets using the filter expression with available datamaps.
 * First pruned with default blocklet datamap, then pruned with CG and FG datamaps
 *///from  w ww .j  av  a 2  s.com
private List<ExtendedBlocklet> getPrunedBlocklets(JobContext job, CarbonTable carbonTable,
        Expression expression, List<Segment> segmentIds) throws IOException {
    ExplainCollector.addPruningInfo(carbonTable.getTableName());
    FilterResolverIntf resolver = null;
    if (expression != null) {
        carbonTable.processFilterExpression(expression, null, null);
        resolver = CarbonTable.resolveFilter(expression, carbonTable.getAbsoluteTableIdentifier());
        ExplainCollector.setFilterStatement(expression.getStatement());
    } else {
        ExplainCollector.setFilterStatement("none");
    }

    boolean distributedCG = Boolean.parseBoolean(
            CarbonProperties.getInstance().getProperty(CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP,
                    CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP_DEFAULT));
    DataMapJob dataMapJob = DataMapUtil.getDataMapJob(job.getConfiguration());
    List<PartitionSpec> partitionsToPrune = getPartitionsToPrune(job.getConfiguration());
    // First prune using default datamap on driver side.
    TableDataMap defaultDataMap = DataMapStoreManager.getInstance().getDefaultDataMap(carbonTable);
    List<ExtendedBlocklet> prunedBlocklets = null;
    // This is to log the event, so user will know what is happening by seeing logs.
    LOG.info("Started block pruning ...");
    if (carbonTable.isTransactionalTable()) {
        prunedBlocklets = defaultDataMap.prune(segmentIds, resolver, partitionsToPrune);
    } else {
        prunedBlocklets = defaultDataMap.prune(segmentIds, expression, partitionsToPrune);
    }

    ExplainCollector.setDefaultDataMapPruningBlockHit(getBlockCount(prunedBlocklets));

    if (prunedBlocklets.size() == 0) {
        return prunedBlocklets;
    }

    DataMapChooser chooser = new DataMapChooser(getOrCreateCarbonTable(job.getConfiguration()));

    // Get the available CG datamaps and prune further.
    DataMapExprWrapper cgDataMapExprWrapper = chooser.chooseCGDataMap(resolver);
    if (cgDataMapExprWrapper != null) {
        // Prune segments from already pruned blocklets
        pruneSegments(segmentIds, prunedBlocklets);
        List<ExtendedBlocklet> cgPrunedBlocklets;
        // Again prune with CG datamap.
        if (distributedCG && dataMapJob != null) {
            cgPrunedBlocklets = DataMapUtil.executeDataMapJob(carbonTable, resolver, segmentIds,
                    cgDataMapExprWrapper, dataMapJob, partitionsToPrune);
        } else {
            cgPrunedBlocklets = cgDataMapExprWrapper.prune(segmentIds, partitionsToPrune);
        }
        // since index datamap prune in segment scope,
        // the result need to intersect with previous pruned result
        prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, cgPrunedBlocklets);
        ExplainCollector.recordCGDataMapPruning(
                DataMapWrapperSimpleInfo.fromDataMapWrapper(cgDataMapExprWrapper), prunedBlocklets.size(),
                getBlockCount(prunedBlocklets));
    }

    if (prunedBlocklets.size() == 0) {
        return prunedBlocklets;
    }
    // Now try to prune with FG DataMap.
    if (isFgDataMapPruningEnable(job.getConfiguration()) && dataMapJob != null) {
        DataMapExprWrapper fgDataMapExprWrapper = chooser.chooseFGDataMap(resolver);
        if (fgDataMapExprWrapper != null) {
            // Prune segments from already pruned blocklets
            pruneSegments(segmentIds, prunedBlocklets);
            List<ExtendedBlocklet> fgPrunedBlocklets = DataMapUtil.executeDataMapJob(carbonTable, resolver,
                    segmentIds, fgDataMapExprWrapper, dataMapJob, partitionsToPrune);
            // note that the 'fgPrunedBlocklets' has extra datamap related info compared with
            // 'prunedBlocklets', so the intersection should keep the elements in 'fgPrunedBlocklets'
            prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, fgPrunedBlocklets);
            ExplainCollector.recordFGDataMapPruning(
                    DataMapWrapperSimpleInfo.fromDataMapWrapper(fgDataMapExprWrapper), prunedBlocklets.size(),
                    getBlockCount(prunedBlocklets));
        }
    }
    LOG.info("Finished block pruning ...");
    return prunedBlocklets;
}

From source file:org.apache.carbondata.hadoop.api.CarbonInputFormat.java

License:Apache License

@Override
protected boolean isSplitable(JobContext context, Path filename) {
    try {/*from  ww  w .  j  a va2  s .c o  m*/
        // Don't split the file if it is local file system
        FileSystem fileSystem = filename.getFileSystem(context.getConfiguration());
        if (fileSystem instanceof LocalFileSystem) {
            return false;
        }
    } catch (Exception e) {
        return true;
    }
    return true;
}

From source file:org.apache.carbondata.hadoop.api.CarbonOutputCommitter.java

License:Apache License

/**
 * Update the tablestatus with inprogress while setup the job.
 *
 * @param context//from   w w  w. jav a  2s.co  m
 * @throws IOException
 */
@Override
public void setupJob(JobContext context) throws IOException {
    super.setupJob(context);
    boolean overwriteSet = CarbonTableOutputFormat.isOverwriteSet(context.getConfiguration());
    CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
    if (loadModel.getSegmentId() == null) {
        CarbonLoaderUtil.readAndUpdateLoadProgressInTableMeta(loadModel, overwriteSet);
    }
    // Take segment lock
    segmentLock = CarbonLockFactory.getCarbonLockObj(
            loadModel.getCarbonDataLoadSchema().getCarbonTable().getAbsoluteTableIdentifier(),
            CarbonTablePath.addSegmentPrefix(loadModel.getSegmentId()) + LockUsage.LOCK);
    if (!segmentLock.lockWithRetries()) {
        throw new RuntimeException("Already segment is locked for loading, not supposed happen");
    }
    CarbonTableOutputFormat.setLoadModel(context.getConfiguration(), loadModel);
}

From source file:org.apache.carbondata.hadoop.api.CarbonOutputCommitter.java

License:Apache License

/**
 * Update the tablestatus as success after job is success
 *
 * @param context//from w w  w .  ja v  a2s  .c  o m
 * @throws IOException
 */
@Override
public void commitJob(JobContext context) throws IOException {
    try {
        super.commitJob(context);
    } catch (IOException e) {
        // ignore, in case of concurrent load it try to remove temporary folders by other load may
        // cause file not found exception. This will not impact carbon load,
        LOGGER.warn(e.getMessage());
    }
    boolean overwriteSet = CarbonTableOutputFormat.isOverwriteSet(context.getConfiguration());
    CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
    LoadMetadataDetails newMetaEntry = loadModel.getCurrentLoadMetadataDetail();
    String readPath = CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath())
            + CarbonCommonConstants.FILE_SEPARATOR + loadModel.getSegmentId() + "_"
            + loadModel.getFactTimeStamp() + ".tmp";
    // Merge all partition files into a single file.
    String segmentFileName = SegmentFileStore.genSegmentFileName(loadModel.getSegmentId(),
            String.valueOf(loadModel.getFactTimeStamp()));
    SegmentFileStore.SegmentFile segmentFile = SegmentFileStore.mergeSegmentFiles(readPath, segmentFileName,
            CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()));
    if (segmentFile != null) {
        if (null == newMetaEntry) {
            throw new RuntimeException("Internal Error");
        }
        // Move all files from temp directory of each segment to partition directory
        SegmentFileStore.moveFromTempFolder(segmentFile,
                loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp() + ".tmp",
                loadModel.getTablePath());
        newMetaEntry.setSegmentFile(segmentFileName + CarbonTablePath.SEGMENT_EXT);
    }
    OperationContext operationContext = (OperationContext) getOperationContext();
    String uuid = "";
    if (loadModel.getCarbonDataLoadSchema().getCarbonTable().isChildDataMap() && operationContext != null) {
        uuid = operationContext.getProperty("uuid").toString();
    }
    CarbonLoaderUtil.populateNewLoadMetaEntry(newMetaEntry, SegmentStatus.SUCCESS, loadModel.getFactTimeStamp(),
            true);
    CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
    long segmentSize = CarbonLoaderUtil.addDataIndexSizeIntoMetaEntry(newMetaEntry, loadModel.getSegmentId(),
            carbonTable);
    if (segmentSize > 0 || overwriteSet) {
        if (operationContext != null && carbonTable.hasAggregationDataMap()) {
            operationContext.setProperty("current.segmentfile", newMetaEntry.getSegmentFile());
            LoadEvents.LoadTablePreStatusUpdateEvent event = new LoadEvents.LoadTablePreStatusUpdateEvent(
                    carbonTable.getCarbonTableIdentifier(), loadModel);
            try {
                OperationListenerBus.getInstance().fireEvent(event, operationContext);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
        String uniqueId = null;
        if (overwriteSet) {
            if (!loadModel.isCarbonTransactionalTable()) {
                CarbonLoaderUtil.deleteNonTransactionalTableForInsertOverwrite(loadModel);
            } else {
                if (segmentSize == 0) {
                    newMetaEntry.setSegmentStatus(SegmentStatus.MARKED_FOR_DELETE);
                }
                uniqueId = overwritePartitions(loadModel, newMetaEntry, uuid);
            }
        } else {
            CarbonLoaderUtil.recordNewLoadMetadata(newMetaEntry, loadModel, false, false, uuid);
        }
        DataMapStatusManager.disableAllLazyDataMaps(carbonTable);
        if (operationContext != null) {
            LoadEvents.LoadTablePostStatusUpdateEvent postStatusUpdateEvent = new LoadEvents.LoadTablePostStatusUpdateEvent(
                    loadModel);
            try {
                OperationListenerBus.getInstance().fireEvent(postStatusUpdateEvent, operationContext);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
        String updateTime = context.getConfiguration().get(CarbonTableOutputFormat.UPADTE_TIMESTAMP, null);
        String segmentsToBeDeleted = context.getConfiguration()
                .get(CarbonTableOutputFormat.SEGMENTS_TO_BE_DELETED, "");
        List<Segment> segmentDeleteList = Segment.toSegmentList(segmentsToBeDeleted.split(","), null);
        Set<Segment> segmentSet = new HashSet<>(
                new SegmentStatusManager(carbonTable.getAbsoluteTableIdentifier(), context.getConfiguration())
                        .getValidAndInvalidSegments().getValidSegments());
        if (updateTime != null) {
            CarbonUpdateUtil.updateTableMetadataStatus(segmentSet, carbonTable, updateTime, true,
                    segmentDeleteList);
        } else if (uniqueId != null) {
            CarbonUpdateUtil.updateTableMetadataStatus(segmentSet, carbonTable, uniqueId, true,
                    segmentDeleteList);
        }
    } else {
        CarbonLoaderUtil.updateTableStatusForFailure(loadModel);
    }
    if (segmentLock != null) {
        segmentLock.unlock();
    }
}

From source file:org.apache.carbondata.hadoop.api.CarbonOutputCommitter.java

License:Apache License

/**
 * Update the tablestatus as fail if any fail happens.And also clean up the temp folders if any
 * are existed./*ww w .  j a va  2s .  co  m*/
 *
 * @param context
 * @param state
 * @throws IOException
 */
@Override
public void abortJob(JobContext context, JobStatus.State state) throws IOException {
    try {
        super.abortJob(context, state);
        CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
        CarbonLoaderUtil.updateTableStatusForFailure(loadModel);
        String segmentFileName = loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp();
        LoadMetadataDetails metadataDetail = loadModel.getCurrentLoadMetadataDetail();
        if (metadataDetail != null) {
            // In case the segment file is already created for this job then just link it so that it
            // will be used while cleaning.
            if (!metadataDetail.getSegmentStatus().equals(SegmentStatus.SUCCESS)) {
                String readPath = CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath())
                        + CarbonCommonConstants.FILE_SEPARATOR + segmentFileName + CarbonTablePath.SEGMENT_EXT;
                if (FileFactory.getCarbonFile(readPath).exists()) {
                    metadataDetail.setSegmentFile(segmentFileName + CarbonTablePath.SEGMENT_EXT);
                }
            }
        }
        // Clean the temp files
        CarbonFile segTmpFolder = FileFactory
                .getCarbonFile(CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath())
                        + CarbonCommonConstants.FILE_SEPARATOR + segmentFileName + ".tmp");
        // delete temp segment folder
        if (segTmpFolder.exists()) {
            FileFactory.deleteAllCarbonFilesOfDir(segTmpFolder);
        }
        CarbonFile segmentFilePath = FileFactory
                .getCarbonFile(CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath())
                        + CarbonCommonConstants.FILE_SEPARATOR + segmentFileName + CarbonTablePath.SEGMENT_EXT);
        // Delete the temp data folders of this job if exists
        if (segmentFilePath.exists()) {
            SegmentFileStore fileStore = new SegmentFileStore(loadModel.getTablePath(),
                    segmentFileName + CarbonTablePath.SEGMENT_EXT);
            SegmentFileStore.removeTempFolder(fileStore.getLocationMap(), segmentFileName + ".tmp",
                    loadModel.getTablePath());
        }
        LOGGER.error("Loading failed with job status : " + state);
    } finally {
        if (segmentLock != null) {
            segmentLock.unlock();
        }
    }
}

From source file:org.apache.carbondata.hadoop.api.CarbonTableInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {

    // work as following steps:
    // get all current valid segment
    // for each segment, get all input split

    List<InputSplit> output = new LinkedList<>();
    Expression filter = getFilter(job.getConfiguration());
    Segment[] segments = segmentManager.getAllValidSegments();
    FilterResolverIntf filterResolver = CarbonInputFormatUtil.resolveFilter(filter, null);
    for (Segment segment : segments) {
        List<InputSplit> splits = segment.getSplits(job, filterResolver);
        output.addAll(splits);/*from   w w  w  .  j a  v a2 s.com*/
    }
    return output;
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

/**
 * {@inheritDoc}/*from   w w  w . j a  v  a  2 s. c o  m*/
 * Configurations FileInputFormat.INPUT_DIR
 * are used to get table path to read.
 *
 * @param job
 * @return List<InputSplit> list of CarbonInputSplit
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    try {
        CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
        Object filterPredicates = getFilterPredicates(job.getConfiguration());
        if (getValidSegments(job).length == 0) {
            // Get the valid segments from the carbon store.
            SegmentStatusManager.ValidSegmentsInfo validSegments = new SegmentStatusManager(
                    getAbsoluteTableIdentifier(job.getConfiguration())).getValidSegments();
            if (validSegments.listOfValidSegments.isEmpty()) {
                return new ArrayList<InputSplit>();
            }
            setSegmentsToAccess(job.getConfiguration(), validSegments.listOfValidSegments);
        }

        if (filterPredicates == null) {
            return getSplitsInternal(job);
        } else {
            if (filterPredicates instanceof Expression) {
                //process and resolve the expression.
                CarbonInputFormatUtil.processFilterExpression((Expression) filterPredicates, carbonTable);
                return getSplits(job, CarbonInputFormatUtil.resolveFilter((Expression) filterPredicates,
                        getAbsoluteTableIdentifier(job.getConfiguration())));
            } else {
                //It means user sets already resolved expression.
                return getSplits(job, (FilterResolverIntf) filterPredicates);
            }
        }
    } catch (Exception ex) {
        throw new IOException(ex);
    }
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

/**
 * {@inheritDoc}/*w ww  .ja v a 2 s . c o m*/
 * Configurations FileInputFormat.INPUT_DIR, CarbonInputFormat.INPUT_SEGMENT_NUMBERS
 * are used to get table path to read.
 *
 * @return
 * @throws IOException
 */
private List<InputSplit> getSplits(JobContext job, FilterResolverIntf filterResolver)
        throws IOException, IndexBuilderException {

    List<InputSplit> result = new LinkedList<InputSplit>();

    FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();

    AbsoluteTableIdentifier absoluteTableIdentifier = getAbsoluteTableIdentifier(job.getConfiguration());

    //for each segment fetch blocks matching filter in Driver BTree
    for (String segmentNo : getValidSegments(job)) {
        List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(job, filterExpressionProcessor,
                absoluteTableIdentifier, filterResolver, segmentNo);
        for (DataRefNode dataRefNode : dataRefNodes) {
            BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
            TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
            result.add(new CarbonInputSplit(segmentNo, new Path(tableBlockInfo.getFilePath()),
                    tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(),
                    tableBlockInfo.getLocations()));
        }
    }
    return result;
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

/**
 * get total number of rows. Same as count(*)
 *
 * @throws IOException// www.ja v a 2s  .  c  o m
 * @throws IndexBuilderException
 */
public long getRowCount(JobContext job) throws IOException, IndexBuilderException {

    long rowCount = 0;
    AbsoluteTableIdentifier absoluteTableIdentifier = getAbsoluteTableIdentifier(job.getConfiguration());
    SegmentStatusManager.ValidSegmentsInfo validSegments = new SegmentStatusManager(
            getAbsoluteTableIdentifier(job.getConfiguration())).getValidSegments();
    setSegmentsToAccess(job.getConfiguration(), validSegments.listOfValidSegments);
    // no of core to load the blocks in driver
    int numberOfCores = CarbonCommonConstants.NUMBER_OF_CORE_TO_LOAD_DRIVER_SEGMENT_DEFAULT_VALUE;
    try {
        numberOfCores = Integer.parseInt(CarbonProperties.getInstance()
                .getProperty(CarbonCommonConstants.NUMBER_OF_CORE_TO_LOAD_DRIVER_SEGMENT));
    } catch (NumberFormatException e) {
        numberOfCores = CarbonCommonConstants.NUMBER_OF_CORE_TO_LOAD_DRIVER_SEGMENT_DEFAULT_VALUE;
    }
    // creating a thread pool
    ExecutorService threadPool = Executors.newFixedThreadPool(numberOfCores);
    List<Future<Map<String, AbstractIndex>>> loadedBlocks = new ArrayList<Future<Map<String, AbstractIndex>>>();
    //for each segment fetch blocks matching filter in Driver BTree
    for (String segmentNo : getValidSegments(job)) {
        // submitting the task
        loadedBlocks.add(threadPool.submit(new BlocksLoaderThread(job, absoluteTableIdentifier, segmentNo)));
    }
    threadPool.shutdown();
    try {
        threadPool.awaitTermination(1, TimeUnit.HOURS);
    } catch (InterruptedException e) {
        throw new IndexBuilderException(e);
    }
    try {
        // adding all the rows of the blocks to get the total row
        // count
        for (Future<Map<String, AbstractIndex>> block : loadedBlocks) {
            for (AbstractIndex abstractIndex : block.get().values()) {
                rowCount += abstractIndex.getTotalNumberOfRows();
            }
        }
    } catch (InterruptedException | ExecutionException e) {
        throw new IndexBuilderException(e);
    }
    return rowCount;
}