Example usage for org.apache.hadoop.mapreduce JobContext getJobID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getJobID.

Prototype

public JobID getJobID();

Source Link

Document

Get the unique ID for the job.

Usage

From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java

License:Apache License

@Override
public void abortJob(JobContext jobContext, State state) throws IOException {
    LOG.info("Abort Job [{0}]", jobContext.getJobID());
    Configuration configuration = jobContext.getConfiguration();
    Path tableOutput = BlurOutputFormat.getOutputPath(configuration);
    makeSureNoEmptyShards(configuration, tableOutput);
    FileSystem fileSystem = tableOutput.getFileSystem(configuration);
    for (FileStatus fileStatus : fileSystem.listStatus(tableOutput)) {
        if (isShard(fileStatus)) {
            commitOrAbortJob(jobContext, fileStatus.getPath(), false);
        }/* w ww . ja  v  a2 s . co m*/
    }
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

private Map<String, AbstractIndex> getSegmentAbstractIndexs(JobContext job,
        AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId)
        throws IOException, IndexBuilderException {
    Map<String, AbstractIndex> segmentIndexMap = SegmentTaskIndexStore.getInstance()
            .getSegmentBTreeIfExists(absoluteTableIdentifier, segmentId);

    // if segment tree is not loaded, load the segment tree
    if (segmentIndexMap == null) {
        // List<FileStatus> fileStatusList = new LinkedList<FileStatus>();
        List<TableBlockInfo> tableBlockInfoList = new LinkedList<TableBlockInfo>();
        // getFileStatusOfSegments(job, new int[]{ segmentId }, fileStatusList);

        // get file location of all files of given segment
        JobContext newJob = new JobContextImpl(new Configuration(job.getConfiguration()), job.getJobID());
        newJob.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, segmentId + "");

        // identify table blocks
        for (InputSplit inputSplit : getSplitsInternal(newJob)) {
            CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
            tableBlockInfoList//w  ww.  ja  v a  2s .co m
                    .add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(),
                            segmentId, carbonInputSplit.getLocations(), carbonInputSplit.getLength()));
        }

        Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
        segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);

        // get Btree blocks for given segment
        segmentIndexMap = SegmentTaskIndexStore.getInstance()
                .loadAndGetTaskIdToSegmentsMap(segmentToTableBlocksInfos, absoluteTableIdentifier);

    }
    return segmentIndexMap;
}

From source file:org.apache.giraph.block_app.framework.output.BlockOutputFormat.java

License:Apache License

public static Map<String, BlockOutputDesc> createInitAndCheckOutputDescsMap(JobContext jobContext) {
    return createInitAndCheckOutputDescsMap(jobContext.getConfiguration(), jobContext.getJobID().toString());
}

From source file:org.apache.giraph.job.HadoopUtils.java

License:Apache License

/**
 * Create a JobContext, supporting many Hadoops.
 *
 * @param conf Configuration//from   w ww  .j  av  a2s  . co  m
 * @param jobContext Use JobID from this object
 * @return JobContext
 */
public static JobContext makeJobContext(Configuration conf, JobContext jobContext) {
    return makeJobContext(conf, jobContext.getJobID());
}

From source file:org.apache.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

/**
 * Run to discover dynamic partitions available
 *///from   w ww. jav  a2s . co  m
private void discoverPartitions(JobContext context) throws IOException {
    if (!partitionsDiscovered) {
        //      LOG.info("discover ptns called");
        OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context);

        harProcessor.setEnabled(jobInfo.getHarRequested());

        List<Integer> dynamicPartCols = jobInfo.getPosOfDynPartCols();
        int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions();

        Path loadPath = new Path(jobInfo.getLocation());
        FileSystem fs = loadPath.getFileSystem(context.getConfiguration());

        // construct a path pattern (e.g., /*/*) to find all dynamically generated paths
        String dynPathSpec = loadPath.toUri().getPath();
        dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*");

        //      LOG.info("Searching for "+dynPathSpec);
        Path pathPattern = new Path(dynPathSpec);
        FileStatus[] status = fs.globStatus(pathPattern);

        partitionsDiscoveredByPath = new LinkedHashMap<String, Map<String, String>>();
        contextDiscoveredByPath = new LinkedHashMap<String, JobContext>();

        if (status.length == 0) {
            //        LOG.warn("No partition found genereated by dynamic partitioning in ["
            //            +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize()
            //            +"], dynSpec["+dynPathSpec+"]");
        } else {
            if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) {
                this.partitionsDiscovered = true;
                throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                        "Number of dynamic partitions being created "
                                + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                                + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                                + "] if needed.");
            }

            for (FileStatus st : status) {
                LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>();
                Warehouse.makeSpecFromName(fullPartSpec, st.getPath());
                partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec);
                JobConf jobConf = (JobConf) context.getConfiguration();
                JobContext currContext = HCatMapRedUtil.createJobContext(jobConf, context.getJobID(),
                        InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf,
                                HCatHadoopShims.Instance.get().createTaskAttemptID())));
                HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec);
                contextDiscoveredByPath.put(st.getPath().toString(), currContext);
            }
        }

        //      for (Entry<String,Map<String,String>> spec : partitionsDiscoveredByPath.entrySet()){
        //        LOG.info("Partition "+ spec.getKey());
        //        for (Entry<String,String> e : spec.getValue().entrySet()){
        //          LOG.info(e.getKey() + "=>" +e.getValue());
        //        }
        //      }

        this.partitionsDiscovered = true;
    }
}

From source file:org.apache.hcatalog.mapreduce.MultiOutputFormat.java

License:Apache License

/**
 * Get the JobContext with the related OutputFormat configuration populated given the alias
 * and the actual JobContext/* w ww  .j  ava  2 s . com*/
 * @param alias the name given to the OutputFormat configuration
 * @param context the JobContext
 * @return a copy of the JobContext with the alias configuration populated
 */
public static JobContext getJobContext(String alias, JobContext context) {
    String aliasConf = context.getConfiguration().get(getAliasConfName(alias));
    JobContext aliasContext = HCatHadoopShims.Instance.get().createJobContext(context.getConfiguration(),
            context.getJobID());
    addToConfig(aliasConf, aliasContext.getConfiguration());
    return aliasContext;
}

From source file:org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

/**
 * Run to discover dynamic partitions available
 *///from  w w w  .j  av a2s .  c  om
private void discoverPartitions(JobContext context) throws IOException {
    if (!partitionsDiscovered) {
        //      LOG.info("discover ptns called");
        OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration());

        harProcessor.setEnabled(jobInfo.getHarRequested());

        List<Integer> dynamicPartCols = jobInfo.getPosOfDynPartCols();
        int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions();

        Path loadPath = new Path(jobInfo.getLocation());
        FileSystem fs = loadPath.getFileSystem(context.getConfiguration());

        // construct a path pattern (e.g., /*/*) to find all dynamically generated paths
        String dynPathSpec = loadPath.toUri().getPath();
        dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*");

        //      LOG.info("Searching for "+dynPathSpec);
        Path pathPattern = new Path(dynPathSpec);
        FileStatus[] status = fs.globStatus(pathPattern, FileUtils.HIDDEN_FILES_PATH_FILTER);

        partitionsDiscoveredByPath = new LinkedHashMap<String, Map<String, String>>();
        contextDiscoveredByPath = new LinkedHashMap<String, JobContext>();

        if (status.length == 0) {
            //        LOG.warn("No partition found genereated by dynamic partitioning in ["
            //            +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize()
            //            +"], dynSpec["+dynPathSpec+"]");
        } else {
            if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) {
                this.partitionsDiscovered = true;
                throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                        "Number of dynamic partitions being created "
                                + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                                + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                                + "] if needed.");
            }

            for (FileStatus st : status) {
                LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>();
                if (!customDynamicLocationUsed) {
                    Warehouse.makeSpecFromName(fullPartSpec, st.getPath());
                } else {
                    HCatFileUtil.getPartKeyValuesForCustomLocation(fullPartSpec, jobInfo,
                            st.getPath().toString());
                }
                partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec);
                JobConf jobConf = (JobConf) context.getConfiguration();
                JobContext currContext = HCatMapRedUtil.createJobContext(jobConf, context.getJobID(),
                        InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf,
                                ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())));
                HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec);
                contextDiscoveredByPath.put(st.getPath().toString(), currContext);
            }
        }

        //      for (Entry<String,Map<String,String>> spec : partitionsDiscoveredByPath.entrySet()){
        //        LOG.info("Partition "+ spec.getKey());
        //        for (Entry<String,String> e : spec.getValue().entrySet()){
        //          LOG.info(e.getKey() + "=>" +e.getValue());
        //        }
        //      }

        this.partitionsDiscovered = true;
    }
}

From source file:org.apache.hive.hcatalog.mapreduce.MultiOutputFormat.java

License:Apache License

/**
 * Get the JobContext with the related OutputFormat configuration populated given the alias
 * and the actual JobContext/* www.  j  a va2  s .c o  m*/
 * @param alias the name given to the OutputFormat configuration
 * @param context the JobContext
 * @return a copy of the JobContext with the alias configuration populated
 */
public static JobContext getJobContext(String alias, JobContext context) {
    String aliasConf = context.getConfiguration().get(getAliasConfName(alias));
    JobContext aliasContext = ShimLoader.getHadoopShims().getHCatShim()
            .createJobContext(context.getConfiguration(), context.getJobID());
    addToConfig(aliasConf, aliasContext.getConfiguration());
    return aliasContext;
}

From source file:org.apache.jena.hadoop.rdf.io.input.util.RdfIOUtils.java

License:Apache License

/**
 * Selects a seed for use in generating blank node identifiers
 * //from   w w w.  j a  va  2  s  .  c om
 * @param context
 *            Job Context
 * @param path
 *            File path
 * @return Seed
 */
public static UUID getSeed(JobContext context, Path path) {
    // This is to ensure that blank node allocation policy is constant when
    // subsequent MapReduce jobs need that
    String jobId = context.getJobID().toString();
    if (jobId == null) {
        jobId = String.valueOf(System.currentTimeMillis());
        LOGGER.warn(
                "Job ID was not set, using current milliseconds of {}. Sequence of MapReduce jobs must carefully handle blank nodes.",
                jobId);
    }

    if (!context.getConfiguration().getBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, false)) {
        // Using normal file scoped blank node allocation
        LOGGER.debug("Generating Blank Node Seed from Job Details (ID={}, Input Path={})", jobId, path);

        // Form a reproducible seed for the run
        return new UUID(jobId.hashCode(), path.hashCode());
    } else {
        // Using globally scoped blank node allocation
        LOGGER.warn(
                "Using globally scoped blank node allocation policy from Job Details (ID={}) - this is unsafe if your RDF inputs did not originate from a previous job",
                jobId);

        return new UUID(jobId.hashCode(), 0);
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
@Override/*from ww w  . j  av a 2 s  . com*/
public List<InputSplit> getSplits(JobContext jobcontext) throws IOException, InterruptedException {

    Configuration conf = jobcontext.getConfiguration();

    ArrayList<FileSpec> inputs;
    ArrayList<ArrayList<OperatorKey>> inpTargets;
    PigContext pigContext;
    try {
        inputs = (ArrayList<FileSpec>) ObjectSerializer.deserialize(conf.get("pig.inputs"));
        inpTargets = (ArrayList<ArrayList<OperatorKey>>) ObjectSerializer
                .deserialize(conf.get("pig.inpTargets"));
        pigContext = (PigContext) ObjectSerializer.deserialize(conf.get("pig.pigContext"));
        PigContext.setPackageImportList(
                (ArrayList<String>) ObjectSerializer.deserialize(conf.get("udf.import.list")));
        MapRedUtil.setupUDFContext(conf);
    } catch (Exception e) {
        int errCode = 2094;
        String msg = "Unable to deserialize object.";
        throw new ExecException(msg, errCode, PigException.BUG, e);
    }

    ArrayList<InputSplit> splits = new ArrayList<InputSplit>();
    for (int i = 0; i < inputs.size(); i++) {
        try {
            Path path = new Path(inputs.get(i).getFileName());

            FileSystem fs;
            boolean isFsPath = true;
            try {
                fs = path.getFileSystem(conf);
            } catch (Exception e) {
                // If an application specific
                // scheme was used
                // (e.g.: "hbase://table") we will fail
                // getting the file system. That's
                // ok, we just use the dfs in that case.
                fs = new Path("/").getFileSystem(conf);
                isFsPath = false;
            }

            // if the execution is against Mapred DFS, set
            // working dir to /user/<userid>
            if (!Utils.isLocal(pigContext, conf)) {
                fs.setWorkingDirectory(jobcontext.getWorkingDirectory());
            }

            // first pass input location to the loader - for this send a
            // clone of the configuration we have - this is so that if the
            // loader (or the inputformat of the loader) decide to store the
            // input location into the configuration (for example,
            // FileInputFormat stores this in mapred.input.dir in the conf),
            // then for different inputs, the loader's don't end up
            // over-writing the same conf.
            FuncSpec loadFuncSpec = inputs.get(i).getFuncSpec();
            LoadFunc loadFunc = (LoadFunc) PigContext.instantiateFuncFromSpec(loadFuncSpec);
            boolean combinable = !(loadFunc instanceof MergeJoinIndexer || loadFunc instanceof IndexableLoadFunc
                    || (loadFunc instanceof CollectableLoadFunc && loadFunc instanceof OrderedLoadFunc));
            if (combinable)
                combinable = !conf.getBoolean("pig.noSplitCombination", false);
            JobConf confClone = new JobConf(conf);
            Job inputSpecificJob = new Job(confClone);
            // Pass loader signature to LoadFunc and to InputFormat through
            // the conf
            passLoadSignature(loadFunc, i, inputSpecificJob.getConfiguration());
            loadFunc.setLocation(inputs.get(i).getFileName(), inputSpecificJob);
            // The above setLocation call could write to the conf within
            // the inputSpecificJob - use this updated conf

            // get the InputFormat from it and ask for splits
            InputFormat inpFormat = loadFunc.getInputFormat();
            List<InputSplit> oneInputSplits = inpFormat.getSplits(
                    HadoopShims.createJobContext(inputSpecificJob.getConfiguration(), jobcontext.getJobID()));
            List<InputSplit> oneInputPigSplits = getPigSplits(oneInputSplits, i, inpTargets.get(i),
                    HadoopShims.getDefaultBlockSize(fs, isFsPath ? path : fs.getWorkingDirectory()), combinable,
                    confClone);
            splits.addAll(oneInputPigSplits);
        } catch (ExecException ee) {
            throw ee;
        } catch (Exception e) {
            int errCode = 2118;
            String msg = "Unable to create input splits for: " + inputs.get(i).getFileName();
            if (e.getMessage() != null && (!e.getMessage().isEmpty())) {
                throw new ExecException(e.getMessage(), errCode, PigException.BUG, e);
            } else {
                throw new ExecException(msg, errCode, PigException.BUG, e);
            }
        }
    }

    // XXX hadoop 20 new API integration: get around a hadoop 20 bug by
    // passing total # of splits to each split so that it can be retrieved
    // in the RecordReader method when called by mapreduce framework later.
    int n = splits.size();
    // also passing the multi-input flag to the back-end so that
    // the multi-input record counters can be created
    int m = inputs.size();

    boolean disableCounter = conf.getBoolean("pig.disable.counter", false);
    if ((m > 1) && disableCounter) {
        log.info("Disable Pig custom input counters");
    }

    for (InputSplit split : splits) {
        ((PigSplit) split).setTotalSplits(n);
        if (m > 1)
            ((PigSplit) split).setMultiInputs(true);
        ((PigSplit) split).setDisableCounter(disableCounter);
    }

    return splits;
}