Example usage for org.apache.hadoop.mapreduce JobContext getCredentials

List of usage examples for org.apache.hadoop.mapreduce JobContext getCredentials

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getCredentials.

Prototype

public Credentials getCredentials();

Source Link

Document

Get credentials for the job.

Usage

From source file:ml.shifu.shifu.guagua.ShifuInputFormat.java

License:Apache License

@SuppressWarnings("deprecation")
protected List<FileStatus> listCrossValidationStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }/*from   ww  w.  j  a  va 2 s  .c om*/

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // Whether we need to recursive look into the directory structure
    boolean recursive = job.getConfiguration().getBoolean("mapreduce.input.fileinputformat.input.dir.recursive",
            false);

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    FileStatus[] fss = fs.listStatus(globStat.getPath());
                    for (FileStatus fileStatus : fss) {
                        if (inputFilter.accept(fileStatus.getPath())) {
                            if (recursive && fileStatus.isDir()) {
                                addInputPathRecursive(result, fs, fileStatus.getPath(), inputFilter);
                            } else {
                                result.add(fileStatus);
                            }
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total validation paths to process : " + result.size());
    return result;
}

From source file:org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase.java

License:Apache License

/**
 * Unwraps the provided {@link AuthenticationToken} if it is an instance of {@link DelegationTokenStub}, reconstituting it from the provided {@link JobConf}.
 *
 * @param job/*from  w  ww  .  j  a v a2  s  .c  om*/
 *          The job
 * @param token
 *          The authentication token
 */
public static AuthenticationToken unwrapAuthenticationToken(JobContext job, AuthenticationToken token) {
    requireNonNull(job);
    requireNonNull(token);
    if (token instanceof DelegationTokenStub) {
        DelegationTokenStub delTokenStub = (DelegationTokenStub) token;
        Token<? extends TokenIdentifier> hadoopToken = job.getCredentials()
                .getToken(new Text(delTokenStub.getServiceName()));
        AuthenticationTokenIdentifier identifier = new AuthenticationTokenIdentifier();
        try {
            identifier.readFields(new DataInputStream(new ByteArrayInputStream(hadoopToken.getIdentifier())));
            return new DelegationTokenImpl(hadoopToken.getPassword(), identifier);
        } catch (IOException e) {
            throw new RuntimeException("Could not construct DelegationToken from JobConf Credentials", e);
        }
    }
    return token;
}

From source file:org.apache.accumulo.core.clientImpl.mapreduce.lib.ConfiguratorBase.java

License:Apache License

/**
 * Unwraps the provided {@link AuthenticationToken} if it is an instance of DelegationTokenStub,
 * reconstituting it from the provided {@link JobConf}.
 *
 * @param job//from w  ww . j  av a2  s. c  o m
 *          The job
 * @param token
 *          The authentication token
 */
public static AuthenticationToken unwrapAuthenticationToken(JobContext job, AuthenticationToken token) {
    requireNonNull(job);
    requireNonNull(token);
    if (token instanceof org.apache.accumulo.core.clientImpl.mapreduce.DelegationTokenStub) {
        org.apache.accumulo.core.clientImpl.mapreduce.DelegationTokenStub delTokenStub = (org.apache.accumulo.core.clientImpl.mapreduce.DelegationTokenStub) token;
        Token<? extends TokenIdentifier> hadoopToken = job.getCredentials()
                .getToken(new Text(delTokenStub.getServiceName()));
        AuthenticationTokenIdentifier identifier = new AuthenticationTokenIdentifier();
        try {
            identifier.readFields(new DataInputStream(new ByteArrayInputStream(hadoopToken.getIdentifier())));
            return new DelegationTokenImpl(hadoopToken.getPassword(), identifier);
        } catch (IOException e) {
            throw new RuntimeException("Could not construct DelegationToken from JobConf Credentials", e);
        }
    }
    return token;
}

From source file:org.apache.carbondata.hadoop.api.CarbonInputFormat.java

License:Apache License

/**
 * get data blocks of given segment// www  . ja va2  s.  c  om
 */
protected List<CarbonInputSplit> getDataBlocksOfSegment(JobContext job, CarbonTable carbonTable,
        Expression expression, BitSet matchedPartitions, List<Segment> segmentIds, PartitionInfo partitionInfo,
        List<Integer> oldPartitionIdList) throws IOException {

    QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder();
    QueryStatistic statistic = new QueryStatistic();

    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
            new Path[] { new Path(carbonTable.getTablePath()) }, job.getConfiguration());
    List<ExtendedBlocklet> prunedBlocklets = getPrunedBlocklets(job, carbonTable, expression, segmentIds);

    List<CarbonInputSplit> resultFilteredBlocks = new ArrayList<>();
    int partitionIndex = 0;
    List<Integer> partitionIdList = new ArrayList<>();
    if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
        partitionIdList = partitionInfo.getPartitionIds();
    }
    for (ExtendedBlocklet blocklet : prunedBlocklets) {
        long partitionId = CarbonTablePath.DataFileUtil
                .getTaskIdFromTaskNo(CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath()));

        // OldPartitionIdList is only used in alter table partition command because it change
        // partition info first and then read data.
        // For other normal query should use newest partitionIdList
        if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
            if (oldPartitionIdList != null) {
                partitionIndex = oldPartitionIdList.indexOf((int) partitionId);
            } else {
                partitionIndex = partitionIdList.indexOf((int) partitionId);
            }
        }
        if (partitionIndex != -1) {
            // matchedPartitions variable will be null in two cases as follows
            // 1. the table is not a partition table
            // 2. the table is a partition table, and all partitions are matched by query
            // for partition table, the task id of carbaondata file name is the partition id.
            // if this partition is not required, here will skip it.
            if (matchedPartitions == null || matchedPartitions.get(partitionIndex)) {
                CarbonInputSplit inputSplit = convertToCarbonInputSplit(blocklet);
                if (inputSplit != null) {
                    resultFilteredBlocks.add(inputSplit);
                }
            }
        }
    }
    statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
    recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id"));
    return resultFilteredBlocks;
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

private void getFileStatusOfSegments(JobContext job, String[] segmentsToConsider, List<FileStatus> result)
        throws IOException {
    String[] partitionsToConsider = getValidPartitions(job);
    if (partitionsToConsider.length == 0) {
        throw new IOException("No partitions/data found");
    }//from  w ww.  j av a 2 s .  co m

    PathFilter inputFilter = getDataFileFilter(job);
    CarbonTablePath tablePath = getTablePath(job.getConfiguration());

    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration());

    //get all data files of valid partitions and segments
    for (int i = 0; i < partitionsToConsider.length; ++i) {
        String partition = partitionsToConsider[i];

        for (int j = 0; j < segmentsToConsider.length; ++j) {
            String segmentId = segmentsToConsider[j];
            Path segmentPath = new Path(tablePath.getCarbonDataDirectoryPath(partition, segmentId));
            FileSystem fs = segmentPath.getFileSystem(job.getConfiguration());

            RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(segmentPath);
            while (iter.hasNext()) {
                LocatedFileStatus stat = iter.next();
                if (inputFilter.accept(stat.getPath())) {
                    if (stat.isDirectory()) {
                        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                    } else {
                        result.add(stat);
                    }
                }
            }
        }
    }
}

From source file:org.apache.flink.api.java.hadoop.mapreduce.HadoopInputFormatBase.java

License:Apache License

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException {
    configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

    JobContext jobContext;
    try {/*from   w ww . j a v  a 2s . c o  m*/
        jobContext = HadoopUtils.instantiateJobContext(configuration, new JobID());
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    jobContext.getCredentials().addAll(this.credentials);
    Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
    if (currentUserCreds != null) {
        jobContext.getCredentials().addAll(currentUserCreds);
    }

    List<org.apache.hadoop.mapreduce.InputSplit> splits;
    try {
        splits = this.mapreduceInputFormat.getSplits(jobContext);
    } catch (InterruptedException e) {
        throw new IOException("Could not get Splits.", e);
    }
    HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

    for (int i = 0; i < hadoopInputSplits.length; i++) {
        hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
    }
    return hadoopInputSplits;
}

From source file:org.apache.flink.api.java.hadoop.mapreduce.HadoopOutputFormatBase.java

License:Apache License

@Override
public void finalizeGlobal(int parallelism) throws IOException {

    JobContext jobContext;
    TaskAttemptContext taskContext;//from  ww  w .  j a v a 2s .  c o m
    try {
        TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
                + String.format("%" + (6 - Integer.toString(1).length()) + "s", " ").replace(" ", "0")
                + Integer.toString(1) + "_0");

        jobContext = HadoopUtils.instantiateJobContext(this.configuration, new JobID());
        taskContext = HadoopUtils.instantiateTaskAttemptContext(this.configuration, taskAttemptID);
        this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(taskContext);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    jobContext.getCredentials().addAll(this.credentials);
    Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
    if (currentUserCreds != null) {
        jobContext.getCredentials().addAll(currentUserCreds);
    }

    // finalize HDFS output format
    if (this.outputCommitter != null) {
        this.outputCommitter.commitJob(jobContext);
    }
}

From source file:org.apache.hadoop.examples.terasort.TeraOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException {
    // Ensure that the output directory is set
    Path outDir = getOutputPath(job);
    if (outDir == null) {
        throw new InvalidJobConfException("Output directory not set in JobConf.");
    }/*w w w  .j a  v  a  2 s . c o m*/

    final Configuration jobConf = job.getConfiguration();

    // get delegation token for outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf);

    final FileSystem fs = outDir.getFileSystem(jobConf);

    try {
        // existing output dir is considered empty iff its only content is the
        // partition file.
        //
        final FileStatus[] outDirKids = fs.listStatus(outDir);
        boolean empty = false;
        if (outDirKids != null && outDirKids.length == 1) {
            final FileStatus st = outDirKids[0];
            final String fname = st.getPath().getName();
            empty = !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
        }
        if (TeraSort.getUseSimplePartitioner(job) || !empty) {
            throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
        }
    } catch (FileNotFoundException ignored) {
    }
}

From source file:org.apache.hcatalog.hbase.ImportSequenceFile.java

License:Apache License

/**
 * Method to run the Importer MapReduce Job. Normally will be called by another MR job
 * during OutputCommitter.commitJob().//from   www  . j a  va 2 s.co  m
 * @param parentContext JobContext of the parent job
 * @param tableName name of table to bulk load data into
 * @param InputDir path of SequenceFile formatted data to read
 * @param scratchDir temporary path for the Importer MR job to build the HFiles which will be imported
 * @return
 */
static boolean runJob(JobContext parentContext, String tableName, Path InputDir, Path scratchDir) {
    Configuration parentConf = parentContext.getConfiguration();
    Configuration conf = new Configuration();
    for (Map.Entry<String, String> el : parentConf) {
        if (el.getKey().startsWith("hbase."))
            conf.set(el.getKey(), el.getValue());
        if (el.getKey().startsWith("mapred.cache.archives"))
            conf.set(el.getKey(), el.getValue());
    }

    //Inherit jar dependencies added to distributed cache loaded by parent job
    conf.set("mapred.job.classpath.archives", parentConf.get("mapred.job.classpath.archives", ""));
    conf.set("mapreduce.job.cache.archives.visibilities",
            parentConf.get("mapreduce.job.cache.archives.visibilities", ""));

    //Temporary fix until hbase security is ready
    //We need the written HFile to be world readable so
    //hbase regionserver user has the privileges to perform a hdfs move
    if (parentConf.getBoolean("hadoop.security.authorization", false)) {
        FsPermission.setUMask(conf, FsPermission.valueOf("----------"));
    }

    conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);
    conf.setBoolean(JobContext.JOB_CANCEL_DELEGATION_TOKEN, false);

    boolean localMode = "local".equals(conf.get("mapred.job.tracker"));

    boolean success = false;
    try {
        FileSystem fs = FileSystem.get(parentConf);
        Path workDir = new Path(new Job(parentConf).getWorkingDirectory(), IMPORTER_WORK_DIR);
        if (!fs.mkdirs(workDir))
            throw new IOException("Importer work directory already exists: " + workDir);
        Job job = createSubmittableJob(conf, tableName, InputDir, scratchDir, localMode);
        job.setWorkingDirectory(workDir);
        job.getCredentials().addAll(parentContext.getCredentials());
        success = job.waitForCompletion(true);
        fs.delete(workDir, true);
        //We only cleanup on success because failure might've been caused by existence of target directory
        if (localMode && success) {
            new ImporterOutputFormat().getOutputCommitter(
                    org.apache.hadoop.mapred.HCatMapRedUtil.createTaskAttemptContext(conf, new TaskAttemptID()))
                    .commitJob(job);
        }
    } catch (InterruptedException e) {
        LOG.error("ImportSequenceFile Failed", e);
    } catch (ClassNotFoundException e) {
        LOG.error("ImportSequenceFile Failed", e);
    } catch (IOException e) {
        LOG.error("ImportSequenceFile Failed", e);
    }
    return success;
}

From source file:org.apache.hcatalog.mapreduce.MultiOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    for (String alias : getOutputFormatAliases(context)) {
        LOGGER.debug("Calling checkOutputSpecs for alias: " + alias);
        JobContext aliasContext = getJobContext(alias, context);
        OutputFormat<?, ?> outputFormat = getOutputFormatInstance(aliasContext);
        outputFormat.checkOutputSpecs(aliasContext);
        // Copy credentials and any new config added back to JobContext
        context.getCredentials().addAll(aliasContext.getCredentials());
        setAliasConf(alias, context, aliasContext);
    }//from   w w  w. j a  v a  2  s .  com
}