Example usage for org.apache.hadoop.mapred InvalidInputException InvalidInputException

List of usage examples for org.apache.hadoop.mapred InvalidInputException InvalidInputException

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred InvalidInputException InvalidInputException.

Prototype

public InvalidInputException(List<IOException> probs) 

Source Link

Document

Create the exception with the given list.

Usage

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. //from w  ww .  ja va2  s .  com
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected FileStatus[] listStatus(JobConf job) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job);

    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (Path p : dirs) {
        FileSystem fs = p.getFileSystem(job);
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result.toArray(new FileStatus[result.size()]);
}

From source file:com.blm.orc.OrcInputFormat.java

License:Apache License

static List<OrcSplit> generateSplitsInfo(Configuration conf) throws IOException {
    // use threads to resolve directories into splits
    Context context = new Context(conf);
    for (Path dir : getInputPaths(conf)) {
        FileSystem fs = dir.getFileSystem(conf);
        context.schedule(new FileGenerator(context, fs, dir));
    }//  ww w.j av a 2  s. c  o  m
    context.waitForTasks();
    // deal with exceptions
    if (!context.errors.isEmpty()) {
        List<IOException> errors = new ArrayList<IOException>(context.errors.size());
        for (Throwable th : context.errors) {
            if (th instanceof IOException) {
                errors.add((IOException) th);
            } else {
                throw new RuntimeException("serious problem", th);
            }
        }
        throw new InvalidInputException(errors);
    }
    if (context.cacheStripeDetails) {
        LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/" + context.numFilesCounter.get());
    }
    return context.splits;
}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

/**
 * Sanity check for srcPath/* w  w w . j  av  a  2  s.  c  o  m*/
 */
private static void checkSrcPath(JobConf jobConf, List<Path> srcPaths) throws IOException {
    List<IOException> rslt = new ArrayList<IOException>();

    Path[] ps = new Path[srcPaths.size()];
    ps = srcPaths.toArray(ps);
    TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), ps, jobConf);

    for (Path p : srcPaths) {
        FileSystem fs = p.getFileSystem(jobConf);
        if (!fs.exists(p)) {
            rslt.add(new IOException("Input source " + p + " does not exist."));
        }
    }
    if (!rslt.isEmpty()) {
        throw new InvalidInputException(rslt);
    }
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/** Sanity check for srcPath */
private static void checkSrcPath(Configuration conf, List<Path> srcPaths) throws IOException {
    List<IOException> rslt = new ArrayList<IOException>();
    for (Path p : srcPaths) {
        FileSystem fs = p.getFileSystem(conf);
        if (!fs.exists(p)) {
            rslt.add(new IOException("Input source " + p + " does not exist."));
        }// ww w .j a v  a  2  s  . c  om
    }
    if (!rslt.isEmpty()) {
        throw new InvalidInputException(rslt);
    }
}

From source file:edu.ucsb.cs.hadoop.CustomFileInputFormat.java

License:Apache License

/**
 * List input directories. Subclasses may override to, e.g., select only
 * files matching a regular expression.//from w  ww .ja  v a  2s.c  om
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected FileStatus[] listStatus(JobConf job) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (Path p : dirs) {
        FileSystem fs = p.getFileSystem(job);
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result.toArray(new FileStatus[result.size()]);
}

From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java

License:LGPL

/** Sanity check for srcPath */
private static void checkSrcPath(final Configuration conf, final List<Path> srcPaths) throws IOException {
    List<IOException> rslt = new ArrayList<>();
    for (Path p : srcPaths) {
        FileSystem fs = p.getFileSystem(conf);
        if (!fs.exists(p)) {
            rslt.add(new IOException("Input source " + p + " does not exist."));
        }// ww w . j a  va2s  .  c o m
    }
    if (!rslt.isEmpty()) {
        throw new InvalidInputException(rslt);
    }
}

From source file:org.apache.hama.bsp.FileInputFormat.java

License:Apache License

/**
 * List input directories. Subclasses may override to, e.g., select only files
 * matching a regular expression./*  www.j  av a  2 s  . c  o  m*/
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected FileStatus[] listStatus(BSPJob job) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (Path p : dirs) {
        FileSystem fs = p.getFileSystem(job.getConfiguration());

        FileStatus[] matches = null;
        try {
            matches = fs.globStatus(p, inputFilter);
        } catch (Exception e) {
            LOG.info(p + "\n" + e.toString());
        }

        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    Collections.addAll(result, fs.listStatus(globStat.getPath(), inputFilter));
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result.toArray(new FileStatus[result.size()]);
}