Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:org.apache.carbondata.hadoop.api.CarbonInputFormat.java

License:Apache License

/**
 * get data blocks of given segment/*  w  w w  .j a  v  a 2s.c  o m*/
 */
protected List<CarbonInputSplit> getDataBlocksOfSegment(JobContext job, CarbonTable carbonTable,
        Expression expression, BitSet matchedPartitions, List<Segment> segmentIds, PartitionInfo partitionInfo,
        List<Integer> oldPartitionIdList) throws IOException {

    QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder();
    QueryStatistic statistic = new QueryStatistic();

    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
            new Path[] { new Path(carbonTable.getTablePath()) }, job.getConfiguration());
    List<ExtendedBlocklet> prunedBlocklets = getPrunedBlocklets(job, carbonTable, expression, segmentIds);

    List<CarbonInputSplit> resultFilteredBlocks = new ArrayList<>();
    int partitionIndex = 0;
    List<Integer> partitionIdList = new ArrayList<>();
    if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
        partitionIdList = partitionInfo.getPartitionIds();
    }
    for (ExtendedBlocklet blocklet : prunedBlocklets) {
        long partitionId = CarbonTablePath.DataFileUtil
                .getTaskIdFromTaskNo(CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath()));

        // OldPartitionIdList is only used in alter table partition command because it change
        // partition info first and then read data.
        // For other normal query should use newest partitionIdList
        if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
            if (oldPartitionIdList != null) {
                partitionIndex = oldPartitionIdList.indexOf((int) partitionId);
            } else {
                partitionIndex = partitionIdList.indexOf((int) partitionId);
            }
        }
        if (partitionIndex != -1) {
            // matchedPartitions variable will be null in two cases as follows
            // 1. the table is not a partition table
            // 2. the table is a partition table, and all partitions are matched by query
            // for partition table, the task id of carbaondata file name is the partition id.
            // if this partition is not required, here will skip it.
            if (matchedPartitions == null || matchedPartitions.get(partitionIndex)) {
                CarbonInputSplit inputSplit = convertToCarbonInputSplit(blocklet);
                if (inputSplit != null) {
                    resultFilteredBlocks.add(inputSplit);
                }
            }
        }
    }
    statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
    recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id"));
    return resultFilteredBlocks;
}

From source file:org.apache.carbondata.hadoop.api.CarbonInputFormat.java

License:Apache License

/**
 * Prune the blocklets using the filter expression with available datamaps.
 * First pruned with default blocklet datamap, then pruned with CG and FG datamaps
 *///from  w ww .j  av  a 2  s.com
private List<ExtendedBlocklet> getPrunedBlocklets(JobContext job, CarbonTable carbonTable,
        Expression expression, List<Segment> segmentIds) throws IOException {
    ExplainCollector.addPruningInfo(carbonTable.getTableName());
    FilterResolverIntf resolver = null;
    if (expression != null) {
        carbonTable.processFilterExpression(expression, null, null);
        resolver = CarbonTable.resolveFilter(expression, carbonTable.getAbsoluteTableIdentifier());
        ExplainCollector.setFilterStatement(expression.getStatement());
    } else {
        ExplainCollector.setFilterStatement("none");
    }

    boolean distributedCG = Boolean.parseBoolean(
            CarbonProperties.getInstance().getProperty(CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP,
                    CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP_DEFAULT));
    DataMapJob dataMapJob = DataMapUtil.getDataMapJob(job.getConfiguration());
    List<PartitionSpec> partitionsToPrune = getPartitionsToPrune(job.getConfiguration());
    // First prune using default datamap on driver side.
    TableDataMap defaultDataMap = DataMapStoreManager.getInstance().getDefaultDataMap(carbonTable);
    List<ExtendedBlocklet> prunedBlocklets = null;
    // This is to log the event, so user will know what is happening by seeing logs.
    LOG.info("Started block pruning ...");
    if (carbonTable.isTransactionalTable()) {
        prunedBlocklets = defaultDataMap.prune(segmentIds, resolver, partitionsToPrune);
    } else {
        prunedBlocklets = defaultDataMap.prune(segmentIds, expression, partitionsToPrune);
    }

    ExplainCollector.setDefaultDataMapPruningBlockHit(getBlockCount(prunedBlocklets));

    if (prunedBlocklets.size() == 0) {
        return prunedBlocklets;
    }

    DataMapChooser chooser = new DataMapChooser(getOrCreateCarbonTable(job.getConfiguration()));

    // Get the available CG datamaps and prune further.
    DataMapExprWrapper cgDataMapExprWrapper = chooser.chooseCGDataMap(resolver);
    if (cgDataMapExprWrapper != null) {
        // Prune segments from already pruned blocklets
        pruneSegments(segmentIds, prunedBlocklets);
        List<ExtendedBlocklet> cgPrunedBlocklets;
        // Again prune with CG datamap.
        if (distributedCG && dataMapJob != null) {
            cgPrunedBlocklets = DataMapUtil.executeDataMapJob(carbonTable, resolver, segmentIds,
                    cgDataMapExprWrapper, dataMapJob, partitionsToPrune);
        } else {
            cgPrunedBlocklets = cgDataMapExprWrapper.prune(segmentIds, partitionsToPrune);
        }
        // since index datamap prune in segment scope,
        // the result need to intersect with previous pruned result
        prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, cgPrunedBlocklets);
        ExplainCollector.recordCGDataMapPruning(
                DataMapWrapperSimpleInfo.fromDataMapWrapper(cgDataMapExprWrapper), prunedBlocklets.size(),
                getBlockCount(prunedBlocklets));
    }

    if (prunedBlocklets.size() == 0) {
        return prunedBlocklets;
    }
    // Now try to prune with FG DataMap.
    if (isFgDataMapPruningEnable(job.getConfiguration()) && dataMapJob != null) {
        DataMapExprWrapper fgDataMapExprWrapper = chooser.chooseFGDataMap(resolver);
        if (fgDataMapExprWrapper != null) {
            // Prune segments from already pruned blocklets
            pruneSegments(segmentIds, prunedBlocklets);
            List<ExtendedBlocklet> fgPrunedBlocklets = DataMapUtil.executeDataMapJob(carbonTable, resolver,
                    segmentIds, fgDataMapExprWrapper, dataMapJob, partitionsToPrune);
            // note that the 'fgPrunedBlocklets' has extra datamap related info compared with
            // 'prunedBlocklets', so the intersection should keep the elements in 'fgPrunedBlocklets'
            prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, fgPrunedBlocklets);
            ExplainCollector.recordFGDataMapPruning(
                    DataMapWrapperSimpleInfo.fromDataMapWrapper(fgDataMapExprWrapper), prunedBlocklets.size(),
                    getBlockCount(prunedBlocklets));
        }
    }
    LOG.info("Finished block pruning ...");
    return prunedBlocklets;
}

From source file:org.apache.carbondata.hadoop.api.CarbonInputFormat.java

License:Apache License

@Override
protected boolean isSplitable(JobContext context, Path filename) {
    try {/*from  ww  w .  j  a va2  s .c o  m*/
        // Don't split the file if it is local file system
        FileSystem fileSystem = filename.getFileSystem(context.getConfiguration());
        if (fileSystem instanceof LocalFileSystem) {
            return false;
        }
    } catch (Exception e) {
        return true;
    }
    return true;
}

From source file:org.apache.carbondata.hadoop.api.CarbonOutputCommitter.java

License:Apache License

/**
 * Update the tablestatus with inprogress while setup the job.
 *
 * @param context//from   w w  w. jav a  2s.co  m
 * @throws IOException
 */
@Override
public void setupJob(JobContext context) throws IOException {
    super.setupJob(context);
    boolean overwriteSet = CarbonTableOutputFormat.isOverwriteSet(context.getConfiguration());
    CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
    if (loadModel.getSegmentId() == null) {
        CarbonLoaderUtil.readAndUpdateLoadProgressInTableMeta(loadModel, overwriteSet);
    }
    // Take segment lock
    segmentLock = CarbonLockFactory.getCarbonLockObj(
            loadModel.getCarbonDataLoadSchema().getCarbonTable().getAbsoluteTableIdentifier(),
            CarbonTablePath.addSegmentPrefix(loadModel.getSegmentId()) + LockUsage.LOCK);
    if (!segmentLock.lockWithRetries()) {
        throw new RuntimeException("Already segment is locked for loading, not supposed happen");
    }
    CarbonTableOutputFormat.setLoadModel(context.getConfiguration(), loadModel);
}

From source file:org.apache.carbondata.hadoop.api.CarbonOutputCommitter.java

License:Apache License

/**
 * Update the tablestatus as success after job is success
 *
 * @param context//from w w  w .  ja v  a2s  .c  o m
 * @throws IOException
 */
@Override
public void commitJob(JobContext context) throws IOException {
    try {
        super.commitJob(context);
    } catch (IOException e) {
        // ignore, in case of concurrent load it try to remove temporary folders by other load may
        // cause file not found exception. This will not impact carbon load,
        LOGGER.warn(e.getMessage());
    }
    boolean overwriteSet = CarbonTableOutputFormat.isOverwriteSet(context.getConfiguration());
    CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
    LoadMetadataDetails newMetaEntry = loadModel.getCurrentLoadMetadataDetail();
    String readPath = CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath())
            + CarbonCommonConstants.FILE_SEPARATOR + loadModel.getSegmentId() + "_"
            + loadModel.getFactTimeStamp() + ".tmp";
    // Merge all partition files into a single file.
    String segmentFileName = SegmentFileStore.genSegmentFileName(loadModel.getSegmentId(),
            String.valueOf(loadModel.getFactTimeStamp()));
    SegmentFileStore.SegmentFile segmentFile = SegmentFileStore.mergeSegmentFiles(readPath, segmentFileName,
            CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()));
    if (segmentFile != null) {
        if (null == newMetaEntry) {
            throw new RuntimeException("Internal Error");
        }
        // Move all files from temp directory of each segment to partition directory
        SegmentFileStore.moveFromTempFolder(segmentFile,
                loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp() + ".tmp",
                loadModel.getTablePath());
        newMetaEntry.setSegmentFile(segmentFileName + CarbonTablePath.SEGMENT_EXT);
    }
    OperationContext operationContext = (OperationContext) getOperationContext();
    String uuid = "";
    if (loadModel.getCarbonDataLoadSchema().getCarbonTable().isChildDataMap() && operationContext != null) {
        uuid = operationContext.getProperty("uuid").toString();
    }
    CarbonLoaderUtil.populateNewLoadMetaEntry(newMetaEntry, SegmentStatus.SUCCESS, loadModel.getFactTimeStamp(),
            true);
    CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
    long segmentSize = CarbonLoaderUtil.addDataIndexSizeIntoMetaEntry(newMetaEntry, loadModel.getSegmentId(),
            carbonTable);
    if (segmentSize > 0 || overwriteSet) {
        if (operationContext != null && carbonTable.hasAggregationDataMap()) {
            operationContext.setProperty("current.segmentfile", newMetaEntry.getSegmentFile());
            LoadEvents.LoadTablePreStatusUpdateEvent event = new LoadEvents.LoadTablePreStatusUpdateEvent(
                    carbonTable.getCarbonTableIdentifier(), loadModel);
            try {
                OperationListenerBus.getInstance().fireEvent(event, operationContext);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
        String uniqueId = null;
        if (overwriteSet) {
            if (!loadModel.isCarbonTransactionalTable()) {
                CarbonLoaderUtil.deleteNonTransactionalTableForInsertOverwrite(loadModel);
            } else {
                if (segmentSize == 0) {
                    newMetaEntry.setSegmentStatus(SegmentStatus.MARKED_FOR_DELETE);
                }
                uniqueId = overwritePartitions(loadModel, newMetaEntry, uuid);
            }
        } else {
            CarbonLoaderUtil.recordNewLoadMetadata(newMetaEntry, loadModel, false, false, uuid);
        }
        DataMapStatusManager.disableAllLazyDataMaps(carbonTable);
        if (operationContext != null) {
            LoadEvents.LoadTablePostStatusUpdateEvent postStatusUpdateEvent = new LoadEvents.LoadTablePostStatusUpdateEvent(
                    loadModel);
            try {
                OperationListenerBus.getInstance().fireEvent(postStatusUpdateEvent, operationContext);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
        String updateTime = context.getConfiguration().get(CarbonTableOutputFormat.UPADTE_TIMESTAMP, null);
        String segmentsToBeDeleted = context.getConfiguration()
                .get(CarbonTableOutputFormat.SEGMENTS_TO_BE_DELETED, "");
        List<Segment> segmentDeleteList = Segment.toSegmentList(segmentsToBeDeleted.split(","), null);
        Set<Segment> segmentSet = new HashSet<>(
                new SegmentStatusManager(carbonTable.getAbsoluteTableIdentifier(), context.getConfiguration())
                        .getValidAndInvalidSegments().getValidSegments());
        if (updateTime != null) {
            CarbonUpdateUtil.updateTableMetadataStatus(segmentSet, carbonTable, updateTime, true,
                    segmentDeleteList);
        } else if (uniqueId != null) {
            CarbonUpdateUtil.updateTableMetadataStatus(segmentSet, carbonTable, uniqueId, true,
                    segmentDeleteList);
        }
    } else {
        CarbonLoaderUtil.updateTableStatusForFailure(loadModel);
    }
    if (segmentLock != null) {
        segmentLock.unlock();
    }
}

From source file:org.apache.carbondata.hadoop.api.CarbonOutputCommitter.java

License:Apache License

/**
 * Update the tablestatus as fail if any fail happens.And also clean up the temp folders if any
 * are existed./*ww w .  j a va  2s .  co  m*/
 *
 * @param context
 * @param state
 * @throws IOException
 */
@Override
public void abortJob(JobContext context, JobStatus.State state) throws IOException {
    try {
        super.abortJob(context, state);
        CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
        CarbonLoaderUtil.updateTableStatusForFailure(loadModel);
        String segmentFileName = loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp();
        LoadMetadataDetails metadataDetail = loadModel.getCurrentLoadMetadataDetail();
        if (metadataDetail != null) {
            // In case the segment file is already created for this job then just link it so that it
            // will be used while cleaning.
            if (!metadataDetail.getSegmentStatus().equals(SegmentStatus.SUCCESS)) {
                String readPath = CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath())
                        + CarbonCommonConstants.FILE_SEPARATOR + segmentFileName + CarbonTablePath.SEGMENT_EXT;
                if (FileFactory.getCarbonFile(readPath).exists()) {
                    metadataDetail.setSegmentFile(segmentFileName + CarbonTablePath.SEGMENT_EXT);
                }
            }
        }
        // Clean the temp files
        CarbonFile segTmpFolder = FileFactory
                .getCarbonFile(CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath())
                        + CarbonCommonConstants.FILE_SEPARATOR + segmentFileName + ".tmp");
        // delete temp segment folder
        if (segTmpFolder.exists()) {
            FileFactory.deleteAllCarbonFilesOfDir(segTmpFolder);
        }
        CarbonFile segmentFilePath = FileFactory
                .getCarbonFile(CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath())
                        + CarbonCommonConstants.FILE_SEPARATOR + segmentFileName + CarbonTablePath.SEGMENT_EXT);
        // Delete the temp data folders of this job if exists
        if (segmentFilePath.exists()) {
            SegmentFileStore fileStore = new SegmentFileStore(loadModel.getTablePath(),
                    segmentFileName + CarbonTablePath.SEGMENT_EXT);
            SegmentFileStore.removeTempFolder(fileStore.getLocationMap(), segmentFileName + ".tmp",
                    loadModel.getTablePath());
        }
        LOGGER.error("Loading failed with job status : " + state);
    } finally {
        if (segmentLock != null) {
            segmentLock.unlock();
        }
    }
}

From source file:org.apache.carbondata.hadoop.api.CarbonTableInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {

    // work as following steps:
    // get all current valid segment
    // for each segment, get all input split

    List<InputSplit> output = new LinkedList<>();
    Expression filter = getFilter(job.getConfiguration());
    Segment[] segments = segmentManager.getAllValidSegments();
    FilterResolverIntf filterResolver = CarbonInputFormatUtil.resolveFilter(filter, null);
    for (Segment segment : segments) {
        List<InputSplit> splits = segment.getSplits(job, filterResolver);
        output.addAll(splits);/*from   w w  w  .  j a  v a2 s.com*/
    }
    return output;
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

/**
 * {@inheritDoc}/*from   w w  w . j a  v  a  2 s. c o  m*/
 * Configurations FileInputFormat.INPUT_DIR
 * are used to get table path to read.
 *
 * @param job
 * @return List<InputSplit> list of CarbonInputSplit
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    try {
        CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
        Object filterPredicates = getFilterPredicates(job.getConfiguration());
        if (getValidSegments(job).length == 0) {
            // Get the valid segments from the carbon store.
            SegmentStatusManager.ValidSegmentsInfo validSegments = new SegmentStatusManager(
                    getAbsoluteTableIdentifier(job.getConfiguration())).getValidSegments();
            if (validSegments.listOfValidSegments.isEmpty()) {
                return new ArrayList<InputSplit>();
            }
            setSegmentsToAccess(job.getConfiguration(), validSegments.listOfValidSegments);
        }

        if (filterPredicates == null) {
            return getSplitsInternal(job);
        } else {
            if (filterPredicates instanceof Expression) {
                //process and resolve the expression.
                CarbonInputFormatUtil.processFilterExpression((Expression) filterPredicates, carbonTable);
                return getSplits(job, CarbonInputFormatUtil.resolveFilter((Expression) filterPredicates,
                        getAbsoluteTableIdentifier(job.getConfiguration())));
            } else {
                //It means user sets already resolved expression.
                return getSplits(job, (FilterResolverIntf) filterPredicates);
            }
        }
    } catch (Exception ex) {
        throw new IOException(ex);
    }
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

/**
 * {@inheritDoc}/*w ww  .ja v a 2 s . c o m*/
 * Configurations FileInputFormat.INPUT_DIR, CarbonInputFormat.INPUT_SEGMENT_NUMBERS
 * are used to get table path to read.
 *
 * @return
 * @throws IOException
 */
private List<InputSplit> getSplits(JobContext job, FilterResolverIntf filterResolver)
        throws IOException, IndexBuilderException {

    List<InputSplit> result = new LinkedList<InputSplit>();

    FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();

    AbsoluteTableIdentifier absoluteTableIdentifier = getAbsoluteTableIdentifier(job.getConfiguration());

    //for each segment fetch blocks matching filter in Driver BTree
    for (String segmentNo : getValidSegments(job)) {
        List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(job, filterExpressionProcessor,
                absoluteTableIdentifier, filterResolver, segmentNo);
        for (DataRefNode dataRefNode : dataRefNodes) {
            BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
            TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
            result.add(new CarbonInputSplit(segmentNo, new Path(tableBlockInfo.getFilePath()),
                    tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(),
                    tableBlockInfo.getLocations()));
        }
    }
    return result;
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

/**
 * get total number of rows. Same as count(*)
 *
 * @throws IOException// www.ja v a 2s  .  c  o m
 * @throws IndexBuilderException
 */
public long getRowCount(JobContext job) throws IOException, IndexBuilderException {

    long rowCount = 0;
    AbsoluteTableIdentifier absoluteTableIdentifier = getAbsoluteTableIdentifier(job.getConfiguration());
    SegmentStatusManager.ValidSegmentsInfo validSegments = new SegmentStatusManager(
            getAbsoluteTableIdentifier(job.getConfiguration())).getValidSegments();
    setSegmentsToAccess(job.getConfiguration(), validSegments.listOfValidSegments);
    // no of core to load the blocks in driver
    int numberOfCores = CarbonCommonConstants.NUMBER_OF_CORE_TO_LOAD_DRIVER_SEGMENT_DEFAULT_VALUE;
    try {
        numberOfCores = Integer.parseInt(CarbonProperties.getInstance()
                .getProperty(CarbonCommonConstants.NUMBER_OF_CORE_TO_LOAD_DRIVER_SEGMENT));
    } catch (NumberFormatException e) {
        numberOfCores = CarbonCommonConstants.NUMBER_OF_CORE_TO_LOAD_DRIVER_SEGMENT_DEFAULT_VALUE;
    }
    // creating a thread pool
    ExecutorService threadPool = Executors.newFixedThreadPool(numberOfCores);
    List<Future<Map<String, AbstractIndex>>> loadedBlocks = new ArrayList<Future<Map<String, AbstractIndex>>>();
    //for each segment fetch blocks matching filter in Driver BTree
    for (String segmentNo : getValidSegments(job)) {
        // submitting the task
        loadedBlocks.add(threadPool.submit(new BlocksLoaderThread(job, absoluteTableIdentifier, segmentNo)));
    }
    threadPool.shutdown();
    try {
        threadPool.awaitTermination(1, TimeUnit.HOURS);
    } catch (InterruptedException e) {
        throw new IndexBuilderException(e);
    }
    try {
        // adding all the rows of the blocks to get the total row
        // count
        for (Future<Map<String, AbstractIndex>> block : loadedBlocks) {
            for (AbstractIndex abstractIndex : block.get().values()) {
                rowCount += abstractIndex.getTotalNumberOfRows();
            }
        }
    } catch (InterruptedException | ExecutionException e) {
        throw new IndexBuilderException(e);
    }
    return rowCount;
}