Example usage for org.apache.hadoop.mapreduce JobContext getJobID

List of usage examples for org.apache.hadoop.mapreduce JobContext getJobID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getJobID.

Prototype

public JobID getJobID();

Source Link

Document

Get the unique ID for the job.

Usage

From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java

License:Apache License

@Override
public void abortJob(JobContext jobContext, State state) throws IOException {
    LOG.info("Abort Job [{0}]", jobContext.getJobID());
    Configuration configuration = jobContext.getConfiguration();
    Path tableOutput = BlurOutputFormat.getOutputPath(configuration);
    makeSureNoEmptyShards(configuration, tableOutput);
    FileSystem fileSystem = tableOutput.getFileSystem(configuration);
    for (FileStatus fileStatus : fileSystem.listStatus(tableOutput)) {
        if (isShard(fileStatus)) {
            commitOrAbortJob(jobContext, fileStatus.getPath(), false);
        }/* w ww . ja  v  a2 s . co m*/
    }
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

private Map<String, AbstractIndex> getSegmentAbstractIndexs(JobContext job,
        AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId)
        throws IOException, IndexBuilderException {
    Map<String, AbstractIndex> segmentIndexMap = SegmentTaskIndexStore.getInstance()
            .getSegmentBTreeIfExists(absoluteTableIdentifier, segmentId);

    // if segment tree is not loaded, load the segment tree
    if (segmentIndexMap == null) {
        // List<FileStatus> fileStatusList = new LinkedList<FileStatus>();
        List<TableBlockInfo> tableBlockInfoList = new LinkedList<TableBlockInfo>();
        // getFileStatusOfSegments(job, new int[]{ segmentId }, fileStatusList);

        // get file location of all files of given segment
        JobContext newJob = new JobContextImpl(new Configuration(job.getConfiguration()), job.getJobID());
        newJob.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, segmentId + "");

        // identify table blocks
        for (InputSplit inputSplit : getSplitsInternal(newJob)) {
            CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
            tableBlockInfoList//w  ww.  ja  v a  2s .co m
                    .add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(),
                            segmentId, carbonInputSplit.getLocations(), carbonInputSplit.getLength()));
        }

        Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
        segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);

        // get Btree blocks for given segment
        segmentIndexMap = SegmentTaskIndexStore.getInstance()
                .loadAndGetTaskIdToSegmentsMap(segmentToTableBlocksInfos, absoluteTableIdentifier);

    }
    return segmentIndexMap;
}

From source file:org.apache.giraph.block_app.framework.output.BlockOutputFormat.java

License:Apache License

public static Map<String, BlockOutputDesc> createInitAndCheckOutputDescsMap(JobContext jobContext) {
    return createInitAndCheckOutputDescsMap(jobContext.getConfiguration(), jobContext.getJobID().toString());
}

From source file:org.apache.giraph.job.HadoopUtils.java

License:Apache License

/**
 * Create a JobContext, supporting many Hadoops.
 *
 * @param conf Configuration//from   w ww  .j  av  a2s  . co  m
 * @param jobContext Use JobID from this object
 * @return JobContext
 */
public static JobContext makeJobContext(Configuration conf, JobContext jobContext) {
    return makeJobContext(conf, jobContext.getJobID());
}

From source file:org.apache.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

/**
 * Run to discover dynamic partitions available
 *///from   w ww. jav  a2s . co  m
private void discoverPartitions(JobContext context) throws IOException {
    if (!partitionsDiscovered) {
        //      LOG.info("discover ptns called");
        OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context);

        harProcessor.setEnabled(jobInfo.getHarRequested());

        List<Integer> dynamicPartCols = jobInfo.getPosOfDynPartCols();
        int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions();

        Path loadPath = new Path(jobInfo.getLocation());
        FileSystem fs = loadPath.getFileSystem(context.getConfiguration());

        // construct a path pattern (e.g., /*/*) to find all dynamically generated paths
        String dynPathSpec = loadPath.toUri().getPath();
        dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*");

        //      LOG.info("Searching for "+dynPathSpec);
        Path pathPattern = new Path(dynPathSpec);
        FileStatus[] status = fs.globStatus(pathPattern);

        partitionsDiscoveredByPath = new LinkedHashMap<String, Map<String, String>>();
        contextDiscoveredByPath = new LinkedHashMap<String, JobContext>();

        if (status.length == 0) {
            //        LOG.warn("No partition found genereated by dynamic partitioning in ["
            //            +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize()
            //            +"], dynSpec["+dynPathSpec+"]");
        } else {
            if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) {
                this.partitionsDiscovered = true;
                throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                        "Number of dynamic partitions being created "
                                + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                                + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                                + "] if needed.");
            }

            for (FileStatus st : status) {
                LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>();
                Warehouse.makeSpecFromName(fullPartSpec, st.getPath());
                partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec);
                JobConf jobConf = (JobConf) context.getConfiguration();
                JobContext currContext = HCatMapRedUtil.createJobContext(jobConf, context.getJobID(),
                        InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf,
                                HCatHadoopShims.Instance.get().createTaskAttemptID())));
                HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec);
                contextDiscoveredByPath.put(st.getPath().toString(), currContext);
            }
        }

        //      for (Entry<String,Map<String,String>> spec : partitionsDiscoveredByPath.entrySet()){
        //        LOG.info("Partition "+ spec.getKey());
        //        for (Entry<String,String> e : spec.getValue().entrySet()){
        //          LOG.info(e.getKey() + "=>" +e.getValue());
        //        }
        //      }

        this.partitionsDiscovered = true;
    }
}

From source file:org.apache.hcatalog.mapreduce.MultiOutputFormat.java

License:Apache License

/**
 * Get the JobContext with the related OutputFormat configuration populated given the alias
 * and the actual JobContext/* w ww  .j  ava  2 s . com*/
 * @param alias the name given to the OutputFormat configuration
 * @param context the JobContext
 * @return a copy of the JobContext with the alias configuration populated
 */
public static JobContext getJobContext(String alias, JobContext context) {
    String aliasConf = context.getConfiguration().get(getAliasConfName(alias));
    JobContext aliasContext = HCatHadoopShims.Instance.get().createJobContext(context.getConfiguration(),
            context.getJobID());
    addToConfig(aliasConf, aliasContext.getConfiguration());
    return aliasContext;
}

From source file:org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

/**
 * Run to discover dynamic partitions available
 *///from  w w w  .j  av a2s .  c  om
private void discoverPartitions(JobContext context) throws IOException {
    if (!partitionsDiscovered) {
        //      LOG.info("discover ptns called");
        OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration());

        harProcessor.setEnabled(jobInfo.getHarRequested());

        List<Integer> dynamicPartCols = jobInfo.getPosOfDynPartCols();
        int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions();

        Path loadPath = new Path(jobInfo.getLocation());
        FileSystem fs = loadPath.getFileSystem(context.getConfiguration());

        // construct a path pattern (e.g., /*/*) to find all dynamically generated paths
        String dynPathSpec = loadPath.toUri().getPath();
        dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*");

        //      LOG.info("Searching for "+dynPathSpec);
        Path pathPattern = new Path(dynPathSpec);
        FileStatus[] status = fs.globStatus(pathPattern, FileUtils.HIDDEN_FILES_PATH_FILTER);

        partitionsDiscoveredByPath = new LinkedHashMap<String, Map<String, String>>();
        contextDiscoveredByPath = new LinkedHashMap<String, JobContext>();

        if (status.length == 0) {
            //        LOG.warn("No partition found genereated by dynamic partitioning in ["
            //            +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize()
            //            +"], dynSpec["+dynPathSpec+"]");
        } else {
            if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) {
                this.partitionsDiscovered = true;
                throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                        "Number of dynamic partitions being created "
                                + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                                + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                                + "] if needed.");
            }

            for (FileStatus st : status) {
                LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>();
                if (!customDynamicLocationUsed) {
                    Warehouse.makeSpecFromName(fullPartSpec, st.getPath());
                } else {
                    HCatFileUtil.getPartKeyValuesForCustomLocation(fullPartSpec, jobInfo,
                            st.getPath().toString());
                }
                partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec);
                JobConf jobConf = (JobConf) context.getConfiguration();
                JobContext currContext = HCatMapRedUtil.createJobContext(jobConf, context.getJobID(),
                        InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf,
                                ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())));
                HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec);
                contextDiscoveredByPath.put(st.getPath().toString(), currContext);
            }
        }

        //      for (Entry<String,Map<String,String>> spec : partitionsDiscoveredByPath.entrySet()){
        //        LOG.info("Partition "+ spec.getKey());
        //        for (Entry<String,String> e : spec.getValue().entrySet()){
        //          LOG.info(e.getKey() + "=>" +e.getValue());
        //        }
        //      }

        this.partitionsDiscovered = true;
    }
}

From source file:org.apache.hive.hcatalog.mapreduce.MultiOutputFormat.java

License:Apache License

/**
 * Get the JobContext with the related OutputFormat configuration populated given the alias
 * and the actual JobContext/* www.  j  a va2  s .c o  m*/
 * @param alias the name given to the OutputFormat configuration
 * @param context the JobContext
 * @return a copy of the JobContext with the alias configuration populated
 */
public static JobContext getJobContext(String alias, JobContext context) {
    String aliasConf = context.getConfiguration().get(getAliasConfName(alias));
    JobContext aliasContext = ShimLoader.getHadoopShims().getHCatShim()
            .createJobContext(context.getConfiguration(), context.getJobID());
    addToConfig(aliasConf, aliasContext.getConfiguration());
    return aliasContext;
}

From source file:org.apache.jena.hadoop.rdf.io.input.util.RdfIOUtils.java

License:Apache License

/**
 * Selects a seed for use in generating blank node identifiers
 * //from   w w w.  j a  va  2  s  .  c om
 * @param context
 *            Job Context
 * @param path
 *            File path
 * @return Seed
 */
public static UUID getSeed(JobContext context, Path path) {
    // This is to ensure that blank node allocation policy is constant when
    // subsequent MapReduce jobs need that
    String jobId = context.getJobID().toString();
    if (jobId == null) {
        jobId = String.valueOf(System.currentTimeMillis());
        LOGGER.warn(
                "Job ID was not set, using current milliseconds of {}. Sequence of MapReduce jobs must carefully handle blank nodes.",
                jobId);
    }

    if (!context.getConfiguration().getBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, false)) {
        // Using normal file scoped blank node allocation
        LOGGER.debug("Generating Blank Node Seed from Job Details (ID={}, Input Path={})", jobId, path);

        // Form a reproducible seed for the run
        return new UUID(jobId.hashCode(), path.hashCode());
    } else {
        // Using globally scoped blank node allocation
        LOGGER.warn(
                "Using globally scoped blank node allocation policy from Job Details (ID={}) - this is unsafe if your RDF inputs did not originate from a previous job",
                jobId);

        return new UUID(jobId.hashCode(), 0);
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
@Override/*from ww w  . j  av a 2 s  . com*/
public List<InputSplit> getSplits(JobContext jobcontext) throws IOException, InterruptedException {

    Configuration conf = jobcontext.getConfiguration();

    ArrayList<FileSpec> inputs;
    ArrayList<ArrayList<OperatorKey>> inpTargets;
    PigContext pigContext;
    try {
        inputs = (ArrayList<FileSpec>) ObjectSerializer.deserialize(conf.get("pig.inputs"));
        inpTargets = (ArrayList<ArrayList<OperatorKey>>) ObjectSerializer
                .deserialize(conf.get("pig.inpTargets"));
        pigContext = (PigContext) ObjectSerializer.deserialize(conf.get("pig.pigContext"));
        PigContext.setPackageImportList(
                (ArrayList<String>) ObjectSerializer.deserialize(conf.get("udf.import.list")));
        MapRedUtil.setupUDFContext(conf);
    } catch (Exception e) {
        int errCode = 2094;
        String msg = "Unable to deserialize object.";
        throw new ExecException(msg, errCode, PigException.BUG, e);
    }

    ArrayList<InputSplit> splits = new ArrayList<InputSplit>();
    for (int i = 0; i < inputs.size(); i++) {
        try {
            Path path = new Path(inputs.get(i).getFileName());

            FileSystem fs;
            boolean isFsPath = true;
            try {
                fs = path.getFileSystem(conf);
            } catch (Exception e) {
                // If an application specific
                // scheme was used
                // (e.g.: "hbase://table") we will fail
                // getting the file system. That's
                // ok, we just use the dfs in that case.
                fs = new Path("/").getFileSystem(conf);
                isFsPath = false;
            }

            // if the execution is against Mapred DFS, set
            // working dir to /user/<userid>
            if (!Utils.isLocal(pigContext, conf)) {
                fs.setWorkingDirectory(jobcontext.getWorkingDirectory());
            }

            // first pass input location to the loader - for this send a
            // clone of the configuration we have - this is so that if the
            // loader (or the inputformat of the loader) decide to store the
            // input location into the configuration (for example,
            // FileInputFormat stores this in mapred.input.dir in the conf),
            // then for different inputs, the loader's don't end up
            // over-writing the same conf.
            FuncSpec loadFuncSpec = inputs.get(i).getFuncSpec();
            LoadFunc loadFunc = (LoadFunc) PigContext.instantiateFuncFromSpec(loadFuncSpec);
            boolean combinable = !(loadFunc instanceof MergeJoinIndexer || loadFunc instanceof IndexableLoadFunc
                    || (loadFunc instanceof CollectableLoadFunc && loadFunc instanceof OrderedLoadFunc));
            if (combinable)
                combinable = !conf.getBoolean("pig.noSplitCombination", false);
            JobConf confClone = new JobConf(conf);
            Job inputSpecificJob = new Job(confClone);
            // Pass loader signature to LoadFunc and to InputFormat through
            // the conf
            passLoadSignature(loadFunc, i, inputSpecificJob.getConfiguration());
            loadFunc.setLocation(inputs.get(i).getFileName(), inputSpecificJob);
            // The above setLocation call could write to the conf within
            // the inputSpecificJob - use this updated conf

            // get the InputFormat from it and ask for splits
            InputFormat inpFormat = loadFunc.getInputFormat();
            List<InputSplit> oneInputSplits = inpFormat.getSplits(
                    HadoopShims.createJobContext(inputSpecificJob.getConfiguration(), jobcontext.getJobID()));
            List<InputSplit> oneInputPigSplits = getPigSplits(oneInputSplits, i, inpTargets.get(i),
                    HadoopShims.getDefaultBlockSize(fs, isFsPath ? path : fs.getWorkingDirectory()), combinable,
                    confClone);
            splits.addAll(oneInputPigSplits);
        } catch (ExecException ee) {
            throw ee;
        } catch (Exception e) {
            int errCode = 2118;
            String msg = "Unable to create input splits for: " + inputs.get(i).getFileName();
            if (e.getMessage() != null && (!e.getMessage().isEmpty())) {
                throw new ExecException(e.getMessage(), errCode, PigException.BUG, e);
            } else {
                throw new ExecException(msg, errCode, PigException.BUG, e);
            }
        }
    }

    // XXX hadoop 20 new API integration: get around a hadoop 20 bug by
    // passing total # of splits to each split so that it can be retrieved
    // in the RecordReader method when called by mapreduce framework later.
    int n = splits.size();
    // also passing the multi-input flag to the back-end so that
    // the multi-input record counters can be created
    int m = inputs.size();

    boolean disableCounter = conf.getBoolean("pig.disable.counter", false);
    if ((m > 1) && disableCounter) {
        log.info("Disable Pig custom input counters");
    }

    for (InputSplit split : splits) {
        ((PigSplit) split).setTotalSplits(n);
        if (m > 1)
            ((PigSplit) split).setMultiInputs(true);
        ((PigSplit) split).setDisableCounter(disableCounter);
    }

    return splits;
}