Example usage for org.apache.hadoop.mapreduce JobContext getCredentials

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getCredentials.

Prototype

public Credentials getCredentials();

Source Link

Document

Get credentials for the job.

Usage

From source file:ml.shifu.shifu.guagua.ShifuInputFormat.java

License:Apache License

@SuppressWarnings("deprecation")
protected List<FileStatus> listCrossValidationStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }/*from   ww  w.  j  a  va 2 s  .c om*/

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // Whether we need to recursive look into the directory structure
    boolean recursive = job.getConfiguration().getBoolean("mapreduce.input.fileinputformat.input.dir.recursive",
            false);

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    FileStatus[] fss = fs.listStatus(globStat.getPath());
                    for (FileStatus fileStatus : fss) {
                        if (inputFilter.accept(fileStatus.getPath())) {
                            if (recursive && fileStatus.isDir()) {
                                addInputPathRecursive(result, fs, fileStatus.getPath(), inputFilter);
                            } else {
                                result.add(fileStatus);
                            }
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total validation paths to process : " + result.size());
    return result;
}

From source file:org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase.java

License:Apache License

/**
 * Unwraps the provided {@link AuthenticationToken} if it is an instance of {@link DelegationTokenStub}, reconstituting it from the provided {@link JobConf}.
 *
 * @param job/*from  w  ww  .  j  a v a2  s  .c  om*/
 *          The job
 * @param token
 *          The authentication token
 */
public static AuthenticationToken unwrapAuthenticationToken(JobContext job, AuthenticationToken token) {
    requireNonNull(job);
    requireNonNull(token);
    if (token instanceof DelegationTokenStub) {
        DelegationTokenStub delTokenStub = (DelegationTokenStub) token;
        Token<? extends TokenIdentifier> hadoopToken = job.getCredentials()
                .getToken(new Text(delTokenStub.getServiceName()));
        AuthenticationTokenIdentifier identifier = new AuthenticationTokenIdentifier();
        try {
            identifier.readFields(new DataInputStream(new ByteArrayInputStream(hadoopToken.getIdentifier())));
            return new DelegationTokenImpl(hadoopToken.getPassword(), identifier);
        } catch (IOException e) {
            throw new RuntimeException("Could not construct DelegationToken from JobConf Credentials", e);
        }
    }
    return token;
}

From source file:org.apache.accumulo.core.clientImpl.mapreduce.lib.ConfiguratorBase.java

License:Apache License

/**
 * Unwraps the provided {@link AuthenticationToken} if it is an instance of DelegationTokenStub,
 * reconstituting it from the provided {@link JobConf}.
 *
 * @param job//from w  ww . j  av a2  s. c  o m
 *          The job
 * @param token
 *          The authentication token
 */
public static AuthenticationToken unwrapAuthenticationToken(JobContext job, AuthenticationToken token) {
    requireNonNull(job);
    requireNonNull(token);
    if (token instanceof org.apache.accumulo.core.clientImpl.mapreduce.DelegationTokenStub) {
        org.apache.accumulo.core.clientImpl.mapreduce.DelegationTokenStub delTokenStub = (org.apache.accumulo.core.clientImpl.mapreduce.DelegationTokenStub) token;
        Token<? extends TokenIdentifier> hadoopToken = job.getCredentials()
                .getToken(new Text(delTokenStub.getServiceName()));
        AuthenticationTokenIdentifier identifier = new AuthenticationTokenIdentifier();
        try {
            identifier.readFields(new DataInputStream(new ByteArrayInputStream(hadoopToken.getIdentifier())));
            return new DelegationTokenImpl(hadoopToken.getPassword(), identifier);
        } catch (IOException e) {
            throw new RuntimeException("Could not construct DelegationToken from JobConf Credentials", e);
        }
    }
    return token;
}

From source file:org.apache.carbondata.hadoop.api.CarbonInputFormat.java

License:Apache License

/**
 * get data blocks of given segment// www  . ja va2  s.  c  om
 */
protected List<CarbonInputSplit> getDataBlocksOfSegment(JobContext job, CarbonTable carbonTable,
        Expression expression, BitSet matchedPartitions, List<Segment> segmentIds, PartitionInfo partitionInfo,
        List<Integer> oldPartitionIdList) throws IOException {

    QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder();
    QueryStatistic statistic = new QueryStatistic();

    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
            new Path[] { new Path(carbonTable.getTablePath()) }, job.getConfiguration());
    List<ExtendedBlocklet> prunedBlocklets = getPrunedBlocklets(job, carbonTable, expression, segmentIds);

    List<CarbonInputSplit> resultFilteredBlocks = new ArrayList<>();
    int partitionIndex = 0;
    List<Integer> partitionIdList = new ArrayList<>();
    if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
        partitionIdList = partitionInfo.getPartitionIds();
    }
    for (ExtendedBlocklet blocklet : prunedBlocklets) {
        long partitionId = CarbonTablePath.DataFileUtil
                .getTaskIdFromTaskNo(CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath()));

        // OldPartitionIdList is only used in alter table partition command because it change
        // partition info first and then read data.
        // For other normal query should use newest partitionIdList
        if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
            if (oldPartitionIdList != null) {
                partitionIndex = oldPartitionIdList.indexOf((int) partitionId);
            } else {
                partitionIndex = partitionIdList.indexOf((int) partitionId);
            }
        }
        if (partitionIndex != -1) {
            // matchedPartitions variable will be null in two cases as follows
            // 1. the table is not a partition table
            // 2. the table is a partition table, and all partitions are matched by query
            // for partition table, the task id of carbaondata file name is the partition id.
            // if this partition is not required, here will skip it.
            if (matchedPartitions == null || matchedPartitions.get(partitionIndex)) {
                CarbonInputSplit inputSplit = convertToCarbonInputSplit(blocklet);
                if (inputSplit != null) {
                    resultFilteredBlocks.add(inputSplit);
                }
            }
        }
    }
    statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
    recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id"));
    return resultFilteredBlocks;
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

private void getFileStatusOfSegments(JobContext job, String[] segmentsToConsider, List<FileStatus> result)
        throws IOException {
    String[] partitionsToConsider = getValidPartitions(job);
    if (partitionsToConsider.length == 0) {
        throw new IOException("No partitions/data found");
    }//from  w ww.  j av a 2 s .  co m

    PathFilter inputFilter = getDataFileFilter(job);
    CarbonTablePath tablePath = getTablePath(job.getConfiguration());

    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration());

    //get all data files of valid partitions and segments
    for (int i = 0; i < partitionsToConsider.length; ++i) {
        String partition = partitionsToConsider[i];

        for (int j = 0; j < segmentsToConsider.length; ++j) {
            String segmentId = segmentsToConsider[j];
            Path segmentPath = new Path(tablePath.getCarbonDataDirectoryPath(partition, segmentId));
            FileSystem fs = segmentPath.getFileSystem(job.getConfiguration());

            RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(segmentPath);
            while (iter.hasNext()) {
                LocatedFileStatus stat = iter.next();
                if (inputFilter.accept(stat.getPath())) {
                    if (stat.isDirectory()) {
                        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                    } else {
                        result.add(stat);
                    }
                }
            }
        }
    }
}

From source file:org.apache.flink.api.java.hadoop.mapreduce.HadoopInputFormatBase.java

License:Apache License

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException {
    configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

    JobContext jobContext;
    try {/*from   w ww . j a v  a 2s . c o  m*/
        jobContext = HadoopUtils.instantiateJobContext(configuration, new JobID());
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    jobContext.getCredentials().addAll(this.credentials);
    Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
    if (currentUserCreds != null) {
        jobContext.getCredentials().addAll(currentUserCreds);
    }

    List<org.apache.hadoop.mapreduce.InputSplit> splits;
    try {
        splits = this.mapreduceInputFormat.getSplits(jobContext);
    } catch (InterruptedException e) {
        throw new IOException("Could not get Splits.", e);
    }
    HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

    for (int i = 0; i < hadoopInputSplits.length; i++) {
        hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
    }
    return hadoopInputSplits;
}

From source file:org.apache.flink.api.java.hadoop.mapreduce.HadoopOutputFormatBase.java

License:Apache License

@Override
public void finalizeGlobal(int parallelism) throws IOException {

    JobContext jobContext;
    TaskAttemptContext taskContext;//from  ww  w .  j a v a 2s .  c o m
    try {
        TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
                + String.format("%" + (6 - Integer.toString(1).length()) + "s", " ").replace(" ", "0")
                + Integer.toString(1) + "_0");

        jobContext = HadoopUtils.instantiateJobContext(this.configuration, new JobID());
        taskContext = HadoopUtils.instantiateTaskAttemptContext(this.configuration, taskAttemptID);
        this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(taskContext);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    jobContext.getCredentials().addAll(this.credentials);
    Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
    if (currentUserCreds != null) {
        jobContext.getCredentials().addAll(currentUserCreds);
    }

    // finalize HDFS output format
    if (this.outputCommitter != null) {
        this.outputCommitter.commitJob(jobContext);
    }
}

From source file:org.apache.hadoop.examples.terasort.TeraOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException {
    // Ensure that the output directory is set
    Path outDir = getOutputPath(job);
    if (outDir == null) {
        throw new InvalidJobConfException("Output directory not set in JobConf.");
    }/*w w w  .j a  v  a  2 s . c o m*/

    final Configuration jobConf = job.getConfiguration();

    // get delegation token for outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf);

    final FileSystem fs = outDir.getFileSystem(jobConf);

    try {
        // existing output dir is considered empty iff its only content is the
        // partition file.
        //
        final FileStatus[] outDirKids = fs.listStatus(outDir);
        boolean empty = false;
        if (outDirKids != null && outDirKids.length == 1) {
            final FileStatus st = outDirKids[0];
            final String fname = st.getPath().getName();
            empty = !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
        }
        if (TeraSort.getUseSimplePartitioner(job) || !empty) {
            throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
        }
    } catch (FileNotFoundException ignored) {
    }
}

From source file:org.apache.hcatalog.hbase.ImportSequenceFile.java

License:Apache License

/**
 * Method to run the Importer MapReduce Job. Normally will be called by another MR job
 * during OutputCommitter.commitJob().//from   www  . j a  va 2 s.co  m
 * @param parentContext JobContext of the parent job
 * @param tableName name of table to bulk load data into
 * @param InputDir path of SequenceFile formatted data to read
 * @param scratchDir temporary path for the Importer MR job to build the HFiles which will be imported
 * @return
 */
static boolean runJob(JobContext parentContext, String tableName, Path InputDir, Path scratchDir) {
    Configuration parentConf = parentContext.getConfiguration();
    Configuration conf = new Configuration();
    for (Map.Entry<String, String> el : parentConf) {
        if (el.getKey().startsWith("hbase."))
            conf.set(el.getKey(), el.getValue());
        if (el.getKey().startsWith("mapred.cache.archives"))
            conf.set(el.getKey(), el.getValue());
    }

    //Inherit jar dependencies added to distributed cache loaded by parent job
    conf.set("mapred.job.classpath.archives", parentConf.get("mapred.job.classpath.archives", ""));
    conf.set("mapreduce.job.cache.archives.visibilities",
            parentConf.get("mapreduce.job.cache.archives.visibilities", ""));

    //Temporary fix until hbase security is ready
    //We need the written HFile to be world readable so
    //hbase regionserver user has the privileges to perform a hdfs move
    if (parentConf.getBoolean("hadoop.security.authorization", false)) {
        FsPermission.setUMask(conf, FsPermission.valueOf("----------"));
    }

    conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);
    conf.setBoolean(JobContext.JOB_CANCEL_DELEGATION_TOKEN, false);

    boolean localMode = "local".equals(conf.get("mapred.job.tracker"));

    boolean success = false;
    try {
        FileSystem fs = FileSystem.get(parentConf);
        Path workDir = new Path(new Job(parentConf).getWorkingDirectory(), IMPORTER_WORK_DIR);
        if (!fs.mkdirs(workDir))
            throw new IOException("Importer work directory already exists: " + workDir);
        Job job = createSubmittableJob(conf, tableName, InputDir, scratchDir, localMode);
        job.setWorkingDirectory(workDir);
        job.getCredentials().addAll(parentContext.getCredentials());
        success = job.waitForCompletion(true);
        fs.delete(workDir, true);
        //We only cleanup on success because failure might've been caused by existence of target directory
        if (localMode && success) {
            new ImporterOutputFormat().getOutputCommitter(
                    org.apache.hadoop.mapred.HCatMapRedUtil.createTaskAttemptContext(conf, new TaskAttemptID()))
                    .commitJob(job);
        }
    } catch (InterruptedException e) {
        LOG.error("ImportSequenceFile Failed", e);
    } catch (ClassNotFoundException e) {
        LOG.error("ImportSequenceFile Failed", e);
    } catch (IOException e) {
        LOG.error("ImportSequenceFile Failed", e);
    }
    return success;
}

From source file:org.apache.hcatalog.mapreduce.MultiOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    for (String alias : getOutputFormatAliases(context)) {
        LOGGER.debug("Calling checkOutputSpecs for alias: " + alias);
        JobContext aliasContext = getJobContext(alias, context);
        OutputFormat<?, ?> outputFormat = getOutputFormatInstance(aliasContext);
        outputFormat.checkOutputSpecs(aliasContext);
        // Copy credentials and any new config added back to JobContext
        context.getCredentials().addAll(aliasContext.getCredentials());
        setAliasConf(alias, context, aliasContext);
    }//from   w w  w. j a  v a  2  s .  com
}