Example usage for org.apache.hadoop.mapreduce JobContext getCredentials

List of usage examples for org.apache.hadoop.mapreduce JobContext getCredentials

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getCredentials.

Prototype

public Credentials getCredentials();

Source Link

Document

Get credentials for the job.

Usage

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. /*w w w . jav a2  s.c o m*/
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. // www  .j  av a  2 s.com
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // Whether we need to recursive look into the directory structure
    boolean recursive = getInputDirRecursive(job);

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    List<FileStatus> result = null;

    int numThreads = job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS, DEFAULT_LIST_STATUS_NUM_THREADS);
    Stopwatch sw = Stopwatch.createStarted();
    if (numThreads == 1) {
        result = singleThreadedListStatus(job, dirs, inputFilter, recursive);
    } else {
        Iterable<FileStatus> locatedFiles = null;
        try {
            LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
                    job.getConfiguration(), dirs, recursive, inputFilter, true);
            locatedFiles = locatedFileStatusFetcher.getFileStatuses();
        } catch (InterruptedException e) {
            throw new IOException("Interrupted while getting file statuses");
        }
        result = Lists.newArrayList(locatedFiles);
    }

    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Time taken to get FileStatuses: " + sw.elapsed(TimeUnit.MILLISECONDS));
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:com.marklogic.contentpump.FileAndDirectoryInputFormat.java

License:Apache License

protected List<FileStatus> listStatus(JobContext job) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }//from ww  w.  ja  v  a  2  s .c  om

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // Whether we need to recursive look into the directory structure
    boolean recursive = getInputDirRecursive(job);

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    List<FileStatus> result = simpleListStatus(job, dirs, inputFilter, recursive);

    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:com.ning.metrics.serialization.hadoop.SmileInputFormat.java

License:Apache License

/**
 * List input directories./*from ww w  .ja va  2 s  . c o m*/
 *
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // Get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    List<IOException> errors = new ArrayList<IOException>();
    for (Path p : dirs) {
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        final SmilePathFilter filter = new SmilePathFilter();
        FileStatus[] matches = fs.globStatus(p, filter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    Collections.addAll(result, fs.listStatus(globStat.getPath(), filter));
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }

    return result;
}

From source file:com.sourcecode.FileInputFormat.java

License:Apache License

/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. /*from w  ww  .ja v a2  s  . co  m*/
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // Whether we need to recursive look into the directory structure
    boolean recursive = getInputDirRecursive(job);

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    List<FileStatus> result = null;

    int numThreads = job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS, DEFAULT_LIST_STATUS_NUM_THREADS);
    Stopwatch sw = new Stopwatch().start();
    if (numThreads == 1) {
        result = singleThreadedListStatus(job, dirs, inputFilter, recursive);
    } else {
        Iterable<FileStatus> locatedFiles = null;
        try {
            LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
                    job.getConfiguration(), dirs, recursive, inputFilter, true);
            locatedFiles = locatedFileStatusFetcher.getFileStatuses();
        } catch (InterruptedException e) {
            throw new IOException("Interrupted while getting file statuses");
        }
        result = Lists.newArrayList(locatedFiles);
    }

    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Time taken to get FileStatuses: " + sw.elapsedMillis());
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:info.halo9pan.word2vec.hadoop.mr.SortOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException {
    // Ensure that the output directory is set
    Path outDir = getOutputPath(job);
    if (outDir == null) {
        throw new InvalidJobConfException("Output directory not set in JobConf.");
    }/* w w w. j a  v  a  2s  . co  m*/

    final Configuration jobConf = job.getConfiguration();

    // get delegation token for outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf);

    final FileSystem fs = outDir.getFileSystem(jobConf);

    if (fs.exists(outDir)) {
        // existing output dir is considered empty iff its only content is the
        // partition file.
        //
        final FileStatus[] outDirKids = fs.listStatus(outDir);
        boolean empty = false;
        if (outDirKids != null && outDirKids.length == 1) {
            final FileStatus st = outDirKids[0];
            final String fname = st.getPath().getName();
            empty = !st.isDirectory() && SortInputFormat.PARTITION_FILENAME.equals(fname);
        }
        if (WordSort.getUseSimplePartitioner(job) || !empty) {
            throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
        }
    }
}

From source file:info.halo9pan.word2vec.hadoop.terasort.TeraOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException {
    // Ensure that the output directory is set
    Path outDir = getOutputPath(job);
    if (outDir == null) {
        throw new InvalidJobConfException("Output directory not set in JobConf.");
    }//from  w ww  .j a  va  2 s  . c  o  m

    final Configuration jobConf = job.getConfiguration();

    // get delegation token for outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf);

    final FileSystem fs = outDir.getFileSystem(jobConf);

    if (fs.exists(outDir)) {
        // existing output dir is considered empty iff its only content is the
        // partition file.
        //
        final FileStatus[] outDirKids = fs.listStatus(outDir);
        boolean empty = false;
        if (outDirKids != null && outDirKids.length == 1) {
            final FileStatus st = outDirKids[0];
            final String fname = st.getPath().getName();
            empty = !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
        }
        if (TeraSort.getUseSimplePartitioner(job) || !empty) {
            throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
        }
    }
}

From source file:io.amient.kafka.hadoop.io.MultiOutputFormat.java

License:Apache License

public void checkOutputSpecs(JobContext job) throws IOException {
    // Ensure that the output directory is set and not already there
    Path outDir = getOutputPath(job);
    if (outDir == null) {
        throw new InvalidJobConfException("Output directory not set.");
    }//from w  ww  .j a  va  2  s .c  o m

    // get delegation token for outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, job.getConfiguration());
}

From source file:kogiri.mapreduce.common.kmermatch.KmerMatchInputFormat.java

License:Open Source License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }/*from   ww  w  . ja  v  a2s .co m*/

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    filters.add(new KmerIndexIndexPathFilter());
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        if (inputFilter.accept(p)) {
            FileSystem fs = p.getFileSystem(job.getConfiguration());
            FileStatus status = fs.getFileStatus(p);
            result.add(status);
        }
    }

    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:kogiri.mapreduce.preprocess.common.kmerindex.KmerIndexInputFormat.java

License:Open Source License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }/*from  w  w w .j  av a2  s .  co  m*/

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    filters.add(new KmerIndexPartPathFilter());
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        if (inputFilter.accept(p)) {
            FileSystem fs = p.getFileSystem(job.getConfiguration());
            FileStatus status = fs.getFileStatus(p);
            result.add(status);
        }
    }

    LOG.info("Total input paths to process : " + result.size());
    return result;
}