Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

private Map<String, AbstractIndex> getSegmentAbstractIndexs(JobContext job,
        AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId)
        throws IOException, IndexBuilderException {
    Map<String, AbstractIndex> segmentIndexMap = SegmentTaskIndexStore.getInstance()
            .getSegmentBTreeIfExists(absoluteTableIdentifier, segmentId);

    // if segment tree is not loaded, load the segment tree
    if (segmentIndexMap == null) {
        // List<FileStatus> fileStatusList = new LinkedList<FileStatus>();
        List<TableBlockInfo> tableBlockInfoList = new LinkedList<TableBlockInfo>();
        // getFileStatusOfSegments(job, new int[]{ segmentId }, fileStatusList);

        // get file location of all files of given segment
        JobContext newJob = new JobContextImpl(new Configuration(job.getConfiguration()), job.getJobID());
        newJob.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, segmentId + "");

        // identify table blocks
        for (InputSplit inputSplit : getSplitsInternal(newJob)) {
            CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
            tableBlockInfoList//from ww  w .  ja  v a 2s.  co  m
                    .add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(),
                            segmentId, carbonInputSplit.getLocations(), carbonInputSplit.getLength()));
        }

        Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
        segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);

        // get Btree blocks for given segment
        segmentIndexMap = SegmentTaskIndexStore.getInstance()
                .loadAndGetTaskIdToSegmentsMap(segmentToTableBlocksInfos, absoluteTableIdentifier);

    }
    return segmentIndexMap;
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

private void getFileStatusOfSegments(JobContext job, String[] segmentsToConsider, List<FileStatus> result)
        throws IOException {
    String[] partitionsToConsider = getValidPartitions(job);
    if (partitionsToConsider.length == 0) {
        throw new IOException("No partitions/data found");
    }/* ww w  .  ja va 2s.  co m*/

    PathFilter inputFilter = getDataFileFilter(job);
    CarbonTablePath tablePath = getTablePath(job.getConfiguration());

    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration());

    //get all data files of valid partitions and segments
    for (int i = 0; i < partitionsToConsider.length; ++i) {
        String partition = partitionsToConsider[i];

        for (int j = 0; j < segmentsToConsider.length; ++j) {
            String segmentId = segmentsToConsider[j];
            Path segmentPath = new Path(tablePath.getCarbonDataDirectoryPath(partition, segmentId));
            FileSystem fs = segmentPath.getFileSystem(job.getConfiguration());

            RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(segmentPath);
            while (iter.hasNext()) {
                LocatedFileStatus stat = iter.next();
                if (inputFilter.accept(stat.getPath())) {
                    if (stat.isDirectory()) {
                        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                    } else {
                        result.add(stat);
                    }
                }
            }
        }
    }
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

/**
 * @return updateExtension/*from  w w w.  j  a  va 2 s .c o  m*/
 */
private String[] getValidSegments(JobContext job) throws IOException {
    String segmentString = job.getConfiguration().get(INPUT_SEGMENT_NUMBERS, "");
    // if no segments
    if (segmentString.trim().isEmpty()) {
        return new String[0];
    }

    String[] segments = segmentString.split(",");
    String[] segmentIds = new String[segments.length];
    int i = 0;
    try {
        for (; i < segments.length; i++) {
            segmentIds[i] = segments[i];
        }
    } catch (NumberFormatException e) {
        throw new IOException("segment no:" + segments[i] + " should be integer");
    }
    return segmentIds;
}

From source file:org.apache.carbondata.hadoop.internal.segment.Segment.java

License:Apache License

/**
 * return all InputSplit of this segment, each file is a InputSplit
 * @param job job context//w  w  w  . ja  va2s.  c o m
 * @return all InputSplit
 * @throws IOException
 */
public List<InputSplit> getAllSplits(JobContext job) throws IOException {
    List<InputSplit> result = new ArrayList<>();
    Path p = new Path(path);
    FileSystem fs = p.getFileSystem(job.getConfiguration());

    //TODO: filter out the hidden files
    FileStatus[] files = fs.globStatus(p);
    for (FileStatus file : files) {
        // make split and add to result
    }
    return result;
}

From source file:org.apache.cassandra.hadoop2.AbstractColumnFamilyInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    logger.info("-------------------- Getting input splits --------------------");
    Configuration conf = context.getConfiguration();

    validateConfiguration(conf);/*from  www. j a  v  a2 s .com*/

    // Canonical ranges and nodes holding replicas
    List<TokenRange> masterRangeNodes = getRangeMap(conf);
    logger.info("Got " + masterRangeNodes.size() + " master range nodes");

    keyspace = ConfigHelper.getInputKeyspace(context.getConfiguration());
    cfName = ConfigHelper.getInputColumnFamily(context.getConfiguration());
    partitioner = ConfigHelper.getInputPartitioner(context.getConfiguration());
    logger.debug("partitioner is " + partitioner);

    // Canonical ranges, split into pieces, fetching the splits in parallel
    int maxThreads = ConfigHelper.getMaxThreads(conf);
    logger.debug("Max threads: {}", maxThreads);
    ExecutorService executor = (maxThreads == 0) ? Executors.newCachedThreadPool()
            : Executors.newFixedThreadPool(maxThreads);
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {
        Map<Future<List<InputSplit>>, SplitCallable> splitfutures = new HashMap<Future<List<InputSplit>>, SplitCallable>();
        KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null) {
            if (jobKeyRange.start_key == null) {
                logger.warn("ignoring jobKeyRange specified without start_key");
            } else {
                if (!partitioner.preservesOrder()) {
                    throw new UnsupportedOperationException(
                            "KeyRange based on keys can only be used with a order preserving paritioner");
                }
                if (jobKeyRange.start_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                if (jobKeyRange.end_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                jobRange = new Range<Token>(partitioner.getToken(jobKeyRange.start_key),
                        partitioner.getToken(jobKeyRange.end_key), partitioner);
            }
        }

        for (TokenRange range : masterRangeNodes) {
            if (jobRange == null) {
                //logger.info("Getting input splits for null jobRange (user did not supply a key range)");
                // for each range, pick a live owner and ask it to compute bite-sized splits
                SplitCallable callable = new SplitCallable(range, conf);
                Future<List<InputSplit>> future = executor.submit(callable);
                splitfutures.put(future, callable);
            } else {
                Range<Token> dhtRange = new Range<Token>(
                        partitioner.getTokenFactory().fromString(range.start_token),
                        partitioner.getTokenFactory().fromString(range.end_token), partitioner);

                if (dhtRange.intersects(jobRange)) {
                    for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) {
                        range.start_token = partitioner.getTokenFactory().toString(intersection.left);
                        range.end_token = partitioner.getTokenFactory().toString(intersection.right);
                        // for each range, pick a live owner and ask it to compute bite-sized splits
                        SplitCallable callable = new SplitCallable(range, conf);
                        Future<List<InputSplit>> future = executor.submit(callable);
                        splitfutures.put(future, callable);
                    }
                }
            }
        }

        logger.info("There are a total of " + splitfutures.size() + " splitFutures to turn into input splits!");

        // wait until we have all the results back
        int retries = 0;
        int maxRetries = ConfigHelper.getMaxRetries(conf);
        logger.debug("Max Retries: {}", maxRetries);

        while (!splitfutures.isEmpty()) {
            Iterator<Future<List<InputSplit>>> iterator = ImmutableList.copyOf(splitfutures.keySet())
                    .iterator();
            //noinspection WhileLoopReplaceableByForEach
            while (iterator.hasNext()) {
                Future<List<InputSplit>> split = iterator.next();
                try {
                    splits.addAll(split.get());
                    splitfutures.remove(split);
                } catch (Exception e) {
                    if (retries >= maxRetries) {
                        throw new IOException("Could not get input splits", e);
                    }
                    SplitCallable callable = splitfutures.get(split);
                    logger.error("Failed to fetch split: {} - retrying.", callable, e);

                    // Remove failed split future
                    splitfutures.remove(split);

                    Future<List<InputSplit>> future = executor.submit(callable);
                    splitfutures.put(future, callable);
                    retries += 1;
                }
            }
        }
    } finally {
        executor.shutdownNow();
    }

    assert splits.size() > 0;
    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}

From source file:org.apache.cassandra.hadoop2.AbstractColumnFamilyOutputFormat.java

License:Apache License

/**
 * Check for validity of the output-specification for the job.
 *
 * @param context information about the job
 * @throws IOException when output should not be attempted
 *///from  w  ww .  ja  v a2 s .  co m
public void checkOutputSpecs(JobContext context) {
    checkOutputSpecs(context.getConfiguration());
}

From source file:org.apache.cassandra.hadoop2.BulkOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext context) {
    checkOutputSpecs(context.getConfiguration());
}

From source file:org.apache.cassandra.hadoop2.multiquery.MultiQueryCqlInputFormat.java

License:Apache License

/**
 * {@inheritDoc}//from   ww w .j  a v a 2s . c  o  m
 */
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    return getSplitsFromConf(conf);
}

From source file:org.apache.crunch.impl.mr.run.CrunchCombineFileInputFormat.java

License:Apache License

public CrunchCombineFileInputFormat(JobContext jobContext) {
    if (getMaxSplitSize(jobContext) == Long.MAX_VALUE) {
        Configuration conf = jobContext.getConfiguration();
        if (conf.get(RuntimeParameters.COMBINE_FILE_BLOCK_SIZE) != null) {
            setMaxSplitSize(conf.getLong(RuntimeParameters.COMBINE_FILE_BLOCK_SIZE, 0));
        } else {/*from w  ww .  ja v a2 s .c  o m*/
            setMaxSplitSize(jobContext.getConfiguration().getLong("dfs.block.size", 134217728L));
        }
    }
}