Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

private Map<String, AbstractIndex> getSegmentAbstractIndexs(JobContext job,
        AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId)
        throws IOException, IndexBuilderException {
    Map<String, AbstractIndex> segmentIndexMap = SegmentTaskIndexStore.getInstance()
            .getSegmentBTreeIfExists(absoluteTableIdentifier, segmentId);

    // if segment tree is not loaded, load the segment tree
    if (segmentIndexMap == null) {
        // List<FileStatus> fileStatusList = new LinkedList<FileStatus>();
        List<TableBlockInfo> tableBlockInfoList = new LinkedList<TableBlockInfo>();
        // getFileStatusOfSegments(job, new int[]{ segmentId }, fileStatusList);

        // get file location of all files of given segment
        JobContext newJob = new JobContextImpl(new Configuration(job.getConfiguration()), job.getJobID());
        newJob.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, segmentId + "");

        // identify table blocks
        for (InputSplit inputSplit : getSplitsInternal(newJob)) {
            CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
            tableBlockInfoList//from ww  w .  ja  v a 2s.  co  m
                    .add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(),
                            segmentId, carbonInputSplit.getLocations(), carbonInputSplit.getLength()));
        }

        Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
        segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);

        // get Btree blocks for given segment
        segmentIndexMap = SegmentTaskIndexStore.getInstance()
                .loadAndGetTaskIdToSegmentsMap(segmentToTableBlocksInfos, absoluteTableIdentifier);

    }
    return segmentIndexMap;
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

private void getFileStatusOfSegments(JobContext job, String[] segmentsToConsider, List<FileStatus> result)
        throws IOException {
    String[] partitionsToConsider = getValidPartitions(job);
    if (partitionsToConsider.length == 0) {
        throw new IOException("No partitions/data found");
    }/* ww w  .  ja va 2s.  co m*/

    PathFilter inputFilter = getDataFileFilter(job);
    CarbonTablePath tablePath = getTablePath(job.getConfiguration());

    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration());

    //get all data files of valid partitions and segments
    for (int i = 0; i < partitionsToConsider.length; ++i) {
        String partition = partitionsToConsider[i];

        for (int j = 0; j < segmentsToConsider.length; ++j) {
            String segmentId = segmentsToConsider[j];
            Path segmentPath = new Path(tablePath.getCarbonDataDirectoryPath(partition, segmentId));
            FileSystem fs = segmentPath.getFileSystem(job.getConfiguration());

            RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(segmentPath);
            while (iter.hasNext()) {
                LocatedFileStatus stat = iter.next();
                if (inputFilter.accept(stat.getPath())) {
                    if (stat.isDirectory()) {
                        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                    } else {
                        result.add(stat);
                    }
                }
            }
        }
    }
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

/**
 * @return updateExtension/*from  w w w.  j  a  va 2 s .c o  m*/
 */
private String[] getValidSegments(JobContext job) throws IOException {
    String segmentString = job.getConfiguration().get(INPUT_SEGMENT_NUMBERS, "");
    // if no segments
    if (segmentString.trim().isEmpty()) {
        return new String[0];
    }

    String[] segments = segmentString.split(",");
    String[] segmentIds = new String[segments.length];
    int i = 0;
    try {
        for (; i < segments.length; i++) {
            segmentIds[i] = segments[i];
        }
    } catch (NumberFormatException e) {
        throw new IOException("segment no:" + segments[i] + " should be integer");
    }
    return segmentIds;
}

From source file:org.apache.carbondata.hadoop.internal.segment.Segment.java

License:Apache License

/**
 * return all InputSplit of this segment, each file is a InputSplit
 * @param job job context//w  w  w  . ja  va2s.  c o m
 * @return all InputSplit
 * @throws IOException
 */
public List<InputSplit> getAllSplits(JobContext job) throws IOException {
    List<InputSplit> result = new ArrayList<>();
    Path p = new Path(path);
    FileSystem fs = p.getFileSystem(job.getConfiguration());

    //TODO: filter out the hidden files
    FileStatus[] files = fs.globStatus(p);
    for (FileStatus file : files) {
        // make split and add to result
    }
    return result;
}

From source file:org.apache.cassandra.hadoop2.AbstractColumnFamilyInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    logger.info("-------------------- Getting input splits --------------------");
    Configuration conf = context.getConfiguration();

    validateConfiguration(conf);/*from  www. j a  v  a2 s .com*/

    // Canonical ranges and nodes holding replicas
    List<TokenRange> masterRangeNodes = getRangeMap(conf);
    logger.info("Got " + masterRangeNodes.size() + " master range nodes");

    keyspace = ConfigHelper.getInputKeyspace(context.getConfiguration());
    cfName = ConfigHelper.getInputColumnFamily(context.getConfiguration());
    partitioner = ConfigHelper.getInputPartitioner(context.getConfiguration());
    logger.debug("partitioner is " + partitioner);

    // Canonical ranges, split into pieces, fetching the splits in parallel
    int maxThreads = ConfigHelper.getMaxThreads(conf);
    logger.debug("Max threads: {}", maxThreads);
    ExecutorService executor = (maxThreads == 0) ? Executors.newCachedThreadPool()
            : Executors.newFixedThreadPool(maxThreads);
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {
        Map<Future<List<InputSplit>>, SplitCallable> splitfutures = new HashMap<Future<List<InputSplit>>, SplitCallable>();
        KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null) {
            if (jobKeyRange.start_key == null) {
                logger.warn("ignoring jobKeyRange specified without start_key");
            } else {
                if (!partitioner.preservesOrder()) {
                    throw new UnsupportedOperationException(
                            "KeyRange based on keys can only be used with a order preserving paritioner");
                }
                if (jobKeyRange.start_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                if (jobKeyRange.end_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                jobRange = new Range<Token>(partitioner.getToken(jobKeyRange.start_key),
                        partitioner.getToken(jobKeyRange.end_key), partitioner);
            }
        }

        for (TokenRange range : masterRangeNodes) {
            if (jobRange == null) {
                //logger.info("Getting input splits for null jobRange (user did not supply a key range)");
                // for each range, pick a live owner and ask it to compute bite-sized splits
                SplitCallable callable = new SplitCallable(range, conf);
                Future<List<InputSplit>> future = executor.submit(callable);
                splitfutures.put(future, callable);
            } else {
                Range<Token> dhtRange = new Range<Token>(
                        partitioner.getTokenFactory().fromString(range.start_token),
                        partitioner.getTokenFactory().fromString(range.end_token), partitioner);

                if (dhtRange.intersects(jobRange)) {
                    for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) {
                        range.start_token = partitioner.getTokenFactory().toString(intersection.left);
                        range.end_token = partitioner.getTokenFactory().toString(intersection.right);
                        // for each range, pick a live owner and ask it to compute bite-sized splits
                        SplitCallable callable = new SplitCallable(range, conf);
                        Future<List<InputSplit>> future = executor.submit(callable);
                        splitfutures.put(future, callable);
                    }
                }
            }
        }

        logger.info("There are a total of " + splitfutures.size() + " splitFutures to turn into input splits!");

        // wait until we have all the results back
        int retries = 0;
        int maxRetries = ConfigHelper.getMaxRetries(conf);
        logger.debug("Max Retries: {}", maxRetries);

        while (!splitfutures.isEmpty()) {
            Iterator<Future<List<InputSplit>>> iterator = ImmutableList.copyOf(splitfutures.keySet())
                    .iterator();
            //noinspection WhileLoopReplaceableByForEach
            while (iterator.hasNext()) {
                Future<List<InputSplit>> split = iterator.next();
                try {
                    splits.addAll(split.get());
                    splitfutures.remove(split);
                } catch (Exception e) {
                    if (retries >= maxRetries) {
                        throw new IOException("Could not get input splits", e);
                    }
                    SplitCallable callable = splitfutures.get(split);
                    logger.error("Failed to fetch split: {} - retrying.", callable, e);

                    // Remove failed split future
                    splitfutures.remove(split);

                    Future<List<InputSplit>> future = executor.submit(callable);
                    splitfutures.put(future, callable);
                    retries += 1;
                }
            }
        }
    } finally {
        executor.shutdownNow();
    }

    assert splits.size() > 0;
    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}

From source file:org.apache.cassandra.hadoop2.AbstractColumnFamilyOutputFormat.java

License:Apache License

/**
 * Check for validity of the output-specification for the job.
 *
 * @param context information about the job
 * @throws IOException when output should not be attempted
 *///from  w  ww .  ja  v a2 s .  co m
public void checkOutputSpecs(JobContext context) {
    checkOutputSpecs(context.getConfiguration());
}

From source file:org.apache.cassandra.hadoop2.BulkOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext context) {
    checkOutputSpecs(context.getConfiguration());
}

From source file:org.apache.cassandra.hadoop2.multiquery.MultiQueryCqlInputFormat.java

License:Apache License

/**
 * {@inheritDoc}//from   ww w .j  a v a 2s . c  o  m
 */
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    return getSplitsFromConf(conf);
}

From source file:org.apache.crunch.impl.mr.run.CrunchCombineFileInputFormat.java

License:Apache License

public CrunchCombineFileInputFormat(JobContext jobContext) {
    if (getMaxSplitSize(jobContext) == Long.MAX_VALUE) {
        Configuration conf = jobContext.getConfiguration();
        if (conf.get(RuntimeParameters.COMBINE_FILE_BLOCK_SIZE) != null) {
            setMaxSplitSize(conf.getLong(RuntimeParameters.COMBINE_FILE_BLOCK_SIZE, 0));
        } else {/*from w  ww .  ja v a2 s .c  o m*/
            setMaxSplitSize(jobContext.getConfiguration().getLong("dfs.block.size", 134217728L));
        }
    }
}