List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java
License:Apache License
private Map<String, AbstractIndex> getSegmentAbstractIndexs(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId) throws IOException, IndexBuilderException { Map<String, AbstractIndex> segmentIndexMap = SegmentTaskIndexStore.getInstance() .getSegmentBTreeIfExists(absoluteTableIdentifier, segmentId); // if segment tree is not loaded, load the segment tree if (segmentIndexMap == null) { // List<FileStatus> fileStatusList = new LinkedList<FileStatus>(); List<TableBlockInfo> tableBlockInfoList = new LinkedList<TableBlockInfo>(); // getFileStatusOfSegments(job, new int[]{ segmentId }, fileStatusList); // get file location of all files of given segment JobContext newJob = new JobContextImpl(new Configuration(job.getConfiguration()), job.getJobID()); newJob.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, segmentId + ""); // identify table blocks for (InputSplit inputSplit : getSplitsInternal(newJob)) { CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit; tableBlockInfoList//from ww w . ja v a 2s. co m .add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), segmentId, carbonInputSplit.getLocations(), carbonInputSplit.getLength())); } Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>(); segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList); // get Btree blocks for given segment segmentIndexMap = SegmentTaskIndexStore.getInstance() .loadAndGetTaskIdToSegmentsMap(segmentToTableBlocksInfos, absoluteTableIdentifier); } return segmentIndexMap; }
From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java
License:Apache License
private void getFileStatusOfSegments(JobContext job, String[] segmentsToConsider, List<FileStatus> result) throws IOException { String[] partitionsToConsider = getValidPartitions(job); if (partitionsToConsider.length == 0) { throw new IOException("No partitions/data found"); }/* ww w . ja va 2s. co m*/ PathFilter inputFilter = getDataFileFilter(job); CarbonTablePath tablePath = getTablePath(job.getConfiguration()); // get tokens for all the required FileSystem for table path TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration()); //get all data files of valid partitions and segments for (int i = 0; i < partitionsToConsider.length; ++i) { String partition = partitionsToConsider[i]; for (int j = 0; j < segmentsToConsider.length; ++j) { String segmentId = segmentsToConsider[j]; Path segmentPath = new Path(tablePath.getCarbonDataDirectoryPath(partition, segmentId)); FileSystem fs = segmentPath.getFileSystem(job.getConfiguration()); RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(segmentPath); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } } }
From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java
License:Apache License
/** * @return updateExtension/*from w w w. j a va 2 s .c o m*/ */ private String[] getValidSegments(JobContext job) throws IOException { String segmentString = job.getConfiguration().get(INPUT_SEGMENT_NUMBERS, ""); // if no segments if (segmentString.trim().isEmpty()) { return new String[0]; } String[] segments = segmentString.split(","); String[] segmentIds = new String[segments.length]; int i = 0; try { for (; i < segments.length; i++) { segmentIds[i] = segments[i]; } } catch (NumberFormatException e) { throw new IOException("segment no:" + segments[i] + " should be integer"); } return segmentIds; }
From source file:org.apache.carbondata.hadoop.internal.segment.Segment.java
License:Apache License
/** * return all InputSplit of this segment, each file is a InputSplit * @param job job context//w w w . ja va2s. c o m * @return all InputSplit * @throws IOException */ public List<InputSplit> getAllSplits(JobContext job) throws IOException { List<InputSplit> result = new ArrayList<>(); Path p = new Path(path); FileSystem fs = p.getFileSystem(job.getConfiguration()); //TODO: filter out the hidden files FileStatus[] files = fs.globStatus(p); for (FileStatus file : files) { // make split and add to result } return result; }
From source file:org.apache.cassandra.hadoop2.AbstractColumnFamilyInputFormat.java
License:Apache License
public List<InputSplit> getSplits(JobContext context) throws IOException { logger.info("-------------------- Getting input splits --------------------"); Configuration conf = context.getConfiguration(); validateConfiguration(conf);/*from www. j a v a2 s .com*/ // Canonical ranges and nodes holding replicas List<TokenRange> masterRangeNodes = getRangeMap(conf); logger.info("Got " + masterRangeNodes.size() + " master range nodes"); keyspace = ConfigHelper.getInputKeyspace(context.getConfiguration()); cfName = ConfigHelper.getInputColumnFamily(context.getConfiguration()); partitioner = ConfigHelper.getInputPartitioner(context.getConfiguration()); logger.debug("partitioner is " + partitioner); // Canonical ranges, split into pieces, fetching the splits in parallel int maxThreads = ConfigHelper.getMaxThreads(conf); logger.debug("Max threads: {}", maxThreads); ExecutorService executor = (maxThreads == 0) ? Executors.newCachedThreadPool() : Executors.newFixedThreadPool(maxThreads); List<InputSplit> splits = new ArrayList<InputSplit>(); try { Map<Future<List<InputSplit>>, SplitCallable> splitfutures = new HashMap<Future<List<InputSplit>>, SplitCallable>(); KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf); Range<Token> jobRange = null; if (jobKeyRange != null) { if (jobKeyRange.start_key == null) { logger.warn("ignoring jobKeyRange specified without start_key"); } else { if (!partitioner.preservesOrder()) { throw new UnsupportedOperationException( "KeyRange based on keys can only be used with a order preserving paritioner"); } if (jobKeyRange.start_token != null) { throw new IllegalArgumentException("only start_key supported"); } if (jobKeyRange.end_token != null) { throw new IllegalArgumentException("only start_key supported"); } jobRange = new Range<Token>(partitioner.getToken(jobKeyRange.start_key), partitioner.getToken(jobKeyRange.end_key), partitioner); } } for (TokenRange range : masterRangeNodes) { if (jobRange == null) { //logger.info("Getting input splits for null jobRange (user did not supply a key range)"); // for each range, pick a live owner and ask it to compute bite-sized splits SplitCallable callable = new SplitCallable(range, conf); Future<List<InputSplit>> future = executor.submit(callable); splitfutures.put(future, callable); } else { Range<Token> dhtRange = new Range<Token>( partitioner.getTokenFactory().fromString(range.start_token), partitioner.getTokenFactory().fromString(range.end_token), partitioner); if (dhtRange.intersects(jobRange)) { for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) { range.start_token = partitioner.getTokenFactory().toString(intersection.left); range.end_token = partitioner.getTokenFactory().toString(intersection.right); // for each range, pick a live owner and ask it to compute bite-sized splits SplitCallable callable = new SplitCallable(range, conf); Future<List<InputSplit>> future = executor.submit(callable); splitfutures.put(future, callable); } } } } logger.info("There are a total of " + splitfutures.size() + " splitFutures to turn into input splits!"); // wait until we have all the results back int retries = 0; int maxRetries = ConfigHelper.getMaxRetries(conf); logger.debug("Max Retries: {}", maxRetries); while (!splitfutures.isEmpty()) { Iterator<Future<List<InputSplit>>> iterator = ImmutableList.copyOf(splitfutures.keySet()) .iterator(); //noinspection WhileLoopReplaceableByForEach while (iterator.hasNext()) { Future<List<InputSplit>> split = iterator.next(); try { splits.addAll(split.get()); splitfutures.remove(split); } catch (Exception e) { if (retries >= maxRetries) { throw new IOException("Could not get input splits", e); } SplitCallable callable = splitfutures.get(split); logger.error("Failed to fetch split: {} - retrying.", callable, e); // Remove failed split future splitfutures.remove(split); Future<List<InputSplit>> future = executor.submit(callable); splitfutures.put(future, callable); retries += 1; } } } } finally { executor.shutdownNow(); } assert splits.size() > 0; Collections.shuffle(splits, new Random(System.nanoTime())); return splits; }
From source file:org.apache.cassandra.hadoop2.AbstractColumnFamilyOutputFormat.java
License:Apache License
/** * Check for validity of the output-specification for the job. * * @param context information about the job * @throws IOException when output should not be attempted *///from w ww . ja v a2 s . co m public void checkOutputSpecs(JobContext context) { checkOutputSpecs(context.getConfiguration()); }
From source file:org.apache.cassandra.hadoop2.BulkOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext context) { checkOutputSpecs(context.getConfiguration()); }
From source file:org.apache.cassandra.hadoop2.multiquery.MultiQueryCqlInputFormat.java
License:Apache License
/** * {@inheritDoc}//from ww w .j a v a 2s . c o m */ @Override public List<InputSplit> getSplits(JobContext context) throws IOException { Configuration conf = context.getConfiguration(); return getSplitsFromConf(conf); }
From source file:org.apache.crunch.impl.mr.run.CrunchCombineFileInputFormat.java
License:Apache License
public CrunchCombineFileInputFormat(JobContext jobContext) { if (getMaxSplitSize(jobContext) == Long.MAX_VALUE) { Configuration conf = jobContext.getConfiguration(); if (conf.get(RuntimeParameters.COMBINE_FILE_BLOCK_SIZE) != null) { setMaxSplitSize(conf.getLong(RuntimeParameters.COMBINE_FILE_BLOCK_SIZE, 0)); } else {/*from w ww . ja v a2 s .c o m*/ setMaxSplitSize(jobContext.getConfiguration().getLong("dfs.block.size", 134217728L)); } } }