List of usage examples for org.apache.hadoop.mapreduce JobContext getJobID
public JobID getJobID();
From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java
License:Apache License
@Override public void abortJob(JobContext jobContext, State state) throws IOException { LOG.info("Abort Job [{0}]", jobContext.getJobID()); Configuration configuration = jobContext.getConfiguration(); Path tableOutput = BlurOutputFormat.getOutputPath(configuration); makeSureNoEmptyShards(configuration, tableOutput); FileSystem fileSystem = tableOutput.getFileSystem(configuration); for (FileStatus fileStatus : fileSystem.listStatus(tableOutput)) { if (isShard(fileStatus)) { commitOrAbortJob(jobContext, fileStatus.getPath(), false); }/* w ww . ja v a2 s . co m*/ } }
From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java
License:Apache License
private Map<String, AbstractIndex> getSegmentAbstractIndexs(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId) throws IOException, IndexBuilderException { Map<String, AbstractIndex> segmentIndexMap = SegmentTaskIndexStore.getInstance() .getSegmentBTreeIfExists(absoluteTableIdentifier, segmentId); // if segment tree is not loaded, load the segment tree if (segmentIndexMap == null) { // List<FileStatus> fileStatusList = new LinkedList<FileStatus>(); List<TableBlockInfo> tableBlockInfoList = new LinkedList<TableBlockInfo>(); // getFileStatusOfSegments(job, new int[]{ segmentId }, fileStatusList); // get file location of all files of given segment JobContext newJob = new JobContextImpl(new Configuration(job.getConfiguration()), job.getJobID()); newJob.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, segmentId + ""); // identify table blocks for (InputSplit inputSplit : getSplitsInternal(newJob)) { CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit; tableBlockInfoList//w ww. ja v a 2s .co m .add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), segmentId, carbonInputSplit.getLocations(), carbonInputSplit.getLength())); } Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>(); segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList); // get Btree blocks for given segment segmentIndexMap = SegmentTaskIndexStore.getInstance() .loadAndGetTaskIdToSegmentsMap(segmentToTableBlocksInfos, absoluteTableIdentifier); } return segmentIndexMap; }
From source file:org.apache.giraph.block_app.framework.output.BlockOutputFormat.java
License:Apache License
public static Map<String, BlockOutputDesc> createInitAndCheckOutputDescsMap(JobContext jobContext) { return createInitAndCheckOutputDescsMap(jobContext.getConfiguration(), jobContext.getJobID().toString()); }
From source file:org.apache.giraph.job.HadoopUtils.java
License:Apache License
/** * Create a JobContext, supporting many Hadoops. * * @param conf Configuration//from w ww .j av a2s . co m * @param jobContext Use JobID from this object * @return JobContext */ public static JobContext makeJobContext(Configuration conf, JobContext jobContext) { return makeJobContext(conf, jobContext.getJobID()); }
From source file:org.apache.hcatalog.mapreduce.FileOutputCommitterContainer.java
License:Apache License
/** * Run to discover dynamic partitions available *///from w ww. jav a2s . co m private void discoverPartitions(JobContext context) throws IOException { if (!partitionsDiscovered) { // LOG.info("discover ptns called"); OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); harProcessor.setEnabled(jobInfo.getHarRequested()); List<Integer> dynamicPartCols = jobInfo.getPosOfDynPartCols(); int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); Path loadPath = new Path(jobInfo.getLocation()); FileSystem fs = loadPath.getFileSystem(context.getConfiguration()); // construct a path pattern (e.g., /*/*) to find all dynamically generated paths String dynPathSpec = loadPath.toUri().getPath(); dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*"); // LOG.info("Searching for "+dynPathSpec); Path pathPattern = new Path(dynPathSpec); FileStatus[] status = fs.globStatus(pathPattern); partitionsDiscoveredByPath = new LinkedHashMap<String, Map<String, String>>(); contextDiscoveredByPath = new LinkedHashMap<String, JobContext>(); if (status.length == 0) { // LOG.warn("No partition found genereated by dynamic partitioning in [" // +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize() // +"], dynSpec["+dynPathSpec+"]"); } else { if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) { this.partitionsDiscovered = true; throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, "Number of dynamic partitions being created " + "exceeds configured max allowable partitions[" + maxDynamicPartitions + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname + "] if needed."); } for (FileStatus st : status) { LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>(); Warehouse.makeSpecFromName(fullPartSpec, st.getPath()); partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec); JobConf jobConf = (JobConf) context.getConfiguration(); JobContext currContext = HCatMapRedUtil.createJobContext(jobConf, context.getJobID(), InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf, HCatHadoopShims.Instance.get().createTaskAttemptID()))); HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec); contextDiscoveredByPath.put(st.getPath().toString(), currContext); } } // for (Entry<String,Map<String,String>> spec : partitionsDiscoveredByPath.entrySet()){ // LOG.info("Partition "+ spec.getKey()); // for (Entry<String,String> e : spec.getValue().entrySet()){ // LOG.info(e.getKey() + "=>" +e.getValue()); // } // } this.partitionsDiscovered = true; } }
From source file:org.apache.hcatalog.mapreduce.MultiOutputFormat.java
License:Apache License
/** * Get the JobContext with the related OutputFormat configuration populated given the alias * and the actual JobContext/* w ww .j ava 2 s . com*/ * @param alias the name given to the OutputFormat configuration * @param context the JobContext * @return a copy of the JobContext with the alias configuration populated */ public static JobContext getJobContext(String alias, JobContext context) { String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); JobContext aliasContext = HCatHadoopShims.Instance.get().createJobContext(context.getConfiguration(), context.getJobID()); addToConfig(aliasConf, aliasContext.getConfiguration()); return aliasContext; }
From source file:org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java
License:Apache License
/** * Run to discover dynamic partitions available *///from w w w .j av a2s . c om private void discoverPartitions(JobContext context) throws IOException { if (!partitionsDiscovered) { // LOG.info("discover ptns called"); OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); harProcessor.setEnabled(jobInfo.getHarRequested()); List<Integer> dynamicPartCols = jobInfo.getPosOfDynPartCols(); int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); Path loadPath = new Path(jobInfo.getLocation()); FileSystem fs = loadPath.getFileSystem(context.getConfiguration()); // construct a path pattern (e.g., /*/*) to find all dynamically generated paths String dynPathSpec = loadPath.toUri().getPath(); dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*"); // LOG.info("Searching for "+dynPathSpec); Path pathPattern = new Path(dynPathSpec); FileStatus[] status = fs.globStatus(pathPattern, FileUtils.HIDDEN_FILES_PATH_FILTER); partitionsDiscoveredByPath = new LinkedHashMap<String, Map<String, String>>(); contextDiscoveredByPath = new LinkedHashMap<String, JobContext>(); if (status.length == 0) { // LOG.warn("No partition found genereated by dynamic partitioning in [" // +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize() // +"], dynSpec["+dynPathSpec+"]"); } else { if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) { this.partitionsDiscovered = true; throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, "Number of dynamic partitions being created " + "exceeds configured max allowable partitions[" + maxDynamicPartitions + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname + "] if needed."); } for (FileStatus st : status) { LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>(); if (!customDynamicLocationUsed) { Warehouse.makeSpecFromName(fullPartSpec, st.getPath()); } else { HCatFileUtil.getPartKeyValuesForCustomLocation(fullPartSpec, jobInfo, st.getPath().toString()); } partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec); JobConf jobConf = (JobConf) context.getConfiguration(); JobContext currContext = HCatMapRedUtil.createJobContext(jobConf, context.getJobID(), InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf, ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID()))); HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec); contextDiscoveredByPath.put(st.getPath().toString(), currContext); } } // for (Entry<String,Map<String,String>> spec : partitionsDiscoveredByPath.entrySet()){ // LOG.info("Partition "+ spec.getKey()); // for (Entry<String,String> e : spec.getValue().entrySet()){ // LOG.info(e.getKey() + "=>" +e.getValue()); // } // } this.partitionsDiscovered = true; } }
From source file:org.apache.hive.hcatalog.mapreduce.MultiOutputFormat.java
License:Apache License
/** * Get the JobContext with the related OutputFormat configuration populated given the alias * and the actual JobContext/* www. j a va2 s .c o m*/ * @param alias the name given to the OutputFormat configuration * @param context the JobContext * @return a copy of the JobContext with the alias configuration populated */ public static JobContext getJobContext(String alias, JobContext context) { String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); JobContext aliasContext = ShimLoader.getHadoopShims().getHCatShim() .createJobContext(context.getConfiguration(), context.getJobID()); addToConfig(aliasConf, aliasContext.getConfiguration()); return aliasContext; }
From source file:org.apache.jena.hadoop.rdf.io.input.util.RdfIOUtils.java
License:Apache License
/** * Selects a seed for use in generating blank node identifiers * //from w w w. j a va 2 s . c om * @param context * Job Context * @param path * File path * @return Seed */ public static UUID getSeed(JobContext context, Path path) { // This is to ensure that blank node allocation policy is constant when // subsequent MapReduce jobs need that String jobId = context.getJobID().toString(); if (jobId == null) { jobId = String.valueOf(System.currentTimeMillis()); LOGGER.warn( "Job ID was not set, using current milliseconds of {}. Sequence of MapReduce jobs must carefully handle blank nodes.", jobId); } if (!context.getConfiguration().getBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, false)) { // Using normal file scoped blank node allocation LOGGER.debug("Generating Blank Node Seed from Job Details (ID={}, Input Path={})", jobId, path); // Form a reproducible seed for the run return new UUID(jobId.hashCode(), path.hashCode()); } else { // Using globally scoped blank node allocation LOGGER.warn( "Using globally scoped blank node allocation policy from Job Details (ID={}) - this is unsafe if your RDF inputs did not originate from a previous job", jobId); return new UUID(jobId.hashCode(), 0); } }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) @Override/*from ww w . j av a 2 s . com*/ public List<InputSplit> getSplits(JobContext jobcontext) throws IOException, InterruptedException { Configuration conf = jobcontext.getConfiguration(); ArrayList<FileSpec> inputs; ArrayList<ArrayList<OperatorKey>> inpTargets; PigContext pigContext; try { inputs = (ArrayList<FileSpec>) ObjectSerializer.deserialize(conf.get("pig.inputs")); inpTargets = (ArrayList<ArrayList<OperatorKey>>) ObjectSerializer .deserialize(conf.get("pig.inpTargets")); pigContext = (PigContext) ObjectSerializer.deserialize(conf.get("pig.pigContext")); PigContext.setPackageImportList( (ArrayList<String>) ObjectSerializer.deserialize(conf.get("udf.import.list"))); MapRedUtil.setupUDFContext(conf); } catch (Exception e) { int errCode = 2094; String msg = "Unable to deserialize object."; throw new ExecException(msg, errCode, PigException.BUG, e); } ArrayList<InputSplit> splits = new ArrayList<InputSplit>(); for (int i = 0; i < inputs.size(); i++) { try { Path path = new Path(inputs.get(i).getFileName()); FileSystem fs; boolean isFsPath = true; try { fs = path.getFileSystem(conf); } catch (Exception e) { // If an application specific // scheme was used // (e.g.: "hbase://table") we will fail // getting the file system. That's // ok, we just use the dfs in that case. fs = new Path("/").getFileSystem(conf); isFsPath = false; } // if the execution is against Mapred DFS, set // working dir to /user/<userid> if (!Utils.isLocal(pigContext, conf)) { fs.setWorkingDirectory(jobcontext.getWorkingDirectory()); } // first pass input location to the loader - for this send a // clone of the configuration we have - this is so that if the // loader (or the inputformat of the loader) decide to store the // input location into the configuration (for example, // FileInputFormat stores this in mapred.input.dir in the conf), // then for different inputs, the loader's don't end up // over-writing the same conf. FuncSpec loadFuncSpec = inputs.get(i).getFuncSpec(); LoadFunc loadFunc = (LoadFunc) PigContext.instantiateFuncFromSpec(loadFuncSpec); boolean combinable = !(loadFunc instanceof MergeJoinIndexer || loadFunc instanceof IndexableLoadFunc || (loadFunc instanceof CollectableLoadFunc && loadFunc instanceof OrderedLoadFunc)); if (combinable) combinable = !conf.getBoolean("pig.noSplitCombination", false); JobConf confClone = new JobConf(conf); Job inputSpecificJob = new Job(confClone); // Pass loader signature to LoadFunc and to InputFormat through // the conf passLoadSignature(loadFunc, i, inputSpecificJob.getConfiguration()); loadFunc.setLocation(inputs.get(i).getFileName(), inputSpecificJob); // The above setLocation call could write to the conf within // the inputSpecificJob - use this updated conf // get the InputFormat from it and ask for splits InputFormat inpFormat = loadFunc.getInputFormat(); List<InputSplit> oneInputSplits = inpFormat.getSplits( HadoopShims.createJobContext(inputSpecificJob.getConfiguration(), jobcontext.getJobID())); List<InputSplit> oneInputPigSplits = getPigSplits(oneInputSplits, i, inpTargets.get(i), HadoopShims.getDefaultBlockSize(fs, isFsPath ? path : fs.getWorkingDirectory()), combinable, confClone); splits.addAll(oneInputPigSplits); } catch (ExecException ee) { throw ee; } catch (Exception e) { int errCode = 2118; String msg = "Unable to create input splits for: " + inputs.get(i).getFileName(); if (e.getMessage() != null && (!e.getMessage().isEmpty())) { throw new ExecException(e.getMessage(), errCode, PigException.BUG, e); } else { throw new ExecException(msg, errCode, PigException.BUG, e); } } } // XXX hadoop 20 new API integration: get around a hadoop 20 bug by // passing total # of splits to each split so that it can be retrieved // in the RecordReader method when called by mapreduce framework later. int n = splits.size(); // also passing the multi-input flag to the back-end so that // the multi-input record counters can be created int m = inputs.size(); boolean disableCounter = conf.getBoolean("pig.disable.counter", false); if ((m > 1) && disableCounter) { log.info("Disable Pig custom input counters"); } for (InputSplit split : splits) { ((PigSplit) split).setTotalSplits(n); if (m > 1) ((PigSplit) split).setMultiInputs(true); ((PigSplit) split).setDisableCounter(disableCounter); } return splits; }