Example usage for org.apache.hadoop.mapreduce.lib.input InvalidInputException InvalidInputException

List of usage examples for org.apache.hadoop.mapreduce.lib.input InvalidInputException InvalidInputException

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input InvalidInputException InvalidInputException.

Prototype

public InvalidInputException(List<IOException> probs) 

Source Link

Document

Create the exception with the given list.

Usage

From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java

License:Apache License

/**
 * List input directories. Subclasses may override to, e.g., select only files
 * matching a regular expression.//from w  ww . j a  v  a 2s.  com
 *
 * @param job
 *        the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException
 *         if zero items.
 */
protected List<FileStatus> listStatus(BSPJob job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }
    List<IOException> errors = new ArrayList<IOException>();
    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(HIDDEN_FILE_FILTER);
    PathFilter inputFilter = new MultiPathFilter(filters);
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConf());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }
    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. // w  w  w .  ja  va2  s . co m
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter,
        boolean recursive) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];//from  www  .  ja v a2s  .c  om
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath());
                    while (iter.hasNext()) {
                        LocatedFileStatus stat = iter.next();
                        if (inputFilter.accept(stat.getPath())) {
                            if (recursive && stat.isDirectory()) {
                                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                            } else {
                                result.add(stat);
                            }
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    return result;
}

From source file:com.marklogic.contentpump.FileAndDirectoryInputFormat.java

License:Apache License

private List<FileStatus> simpleListStatus(JobContext job, Path[] dirs, PathFilter inputFilter,
        boolean recursive) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();
    Configuration conf = job.getConfiguration();
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];/* w  ww  .  ja v  a 2 s .c om*/
        FileSystem fs = p.getFileSystem(conf);
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    FileStatus[] files = fs.listStatus(globStat.getPath(), inputFilter);
                    for (int j = 0; j < files.length; j++) {
                        if (recursive && files[j].isDirectory()) {
                            simpleAddInputPathRecursively(result, fs, files[j].getPath(), inputFilter);
                        } else {
                            result.add(files[j]);
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    return result;
}

From source file:com.ning.metrics.serialization.hadoop.SmileInputFormat.java

License:Apache License

/**
 * List input directories.//from   w  ww  . j  ava2 s.  c  o m
 *
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // Get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    List<IOException> errors = new ArrayList<IOException>();
    for (Path p : dirs) {
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        final SmilePathFilter filter = new SmilePathFilter();
        FileStatus[] matches = fs.globStatus(p, filter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    Collections.addAll(result, fs.listStatus(globStat.getPath(), filter));
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }

    return result;
}

From source file:gr.ntua.h2rdf.inputFormat.MyFileInputFormat.java

License:Open Source License

/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. /* www .j  av a 2  s .co m*/
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:ml.shifu.shifu.guagua.ShifuInputFormat.java

License:Apache License

@SuppressWarnings("deprecation")
protected List<FileStatus> listCrossValidationStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }/*from   w  ww. j  a v a 2 s .  c  om*/

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    // Whether we need to recursive look into the directory structure
    boolean recursive = job.getConfiguration().getBoolean("mapreduce.input.fileinputformat.input.dir.recursive",
            false);

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    FileStatus[] fss = fs.listStatus(globStat.getPath());
                    for (FileStatus fileStatus : fss) {
                        if (inputFilter.accept(fileStatus.getPath())) {
                            if (recursive && fileStatus.isDir()) {
                                addInputPathRecursive(result, fs, fileStatus.getPath(), inputFilter);
                            } else {
                                result.add(fileStatus);
                            }
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total validation paths to process : " + result.size());
    return result;
}

From source file:org.apache.giraph.io.formats.GiraphFileInputFormat.java

License:Apache License

/**
 * Common method for listing vertex/edge input directories.
 *
 * @param job The job//from w  ww . j  a  v  a2  s.  c o  m
 * @param dirs list of vertex/edge input paths
 * @return Array of FileStatus objects
 * @throws IOException
 */
private List<FileStatus> listStatus(JobContext job, Path[] dirs) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    /*if[HADOOP_NON_SECURE]
    else[HADOOP_NON_SECURE]
        // get tokens for all the required FileSystems..
        TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs,
            job.getConfiguration());
    end[HADOOP_NON_SECURE]*/

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the HIDDEN_FILE_FILTER and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(HIDDEN_FILE_FILTER);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (Path p : dirs) {
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    Collections.addAll(result, fs.listStatus(globStat.getPath(), inputFilter));
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:org.mrgeo.hdfs.input.image.HdfsMrsImagePyramidInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    HdfsMrsImageDataProvider dp = new HdfsMrsImageDataProvider(job.getConfiguration(), input, null);
    String inputWithZoom = getZoomName(dp, inputZoom);

    // We are going to read all of the input dirs
    Path[] dirs = new Path[] { new Path(inputWithZoom) };

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);/*from   w  w  w  . j a va  2  s. co  m*/
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                findInputs(globStat, fs, inputFilter, result);
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}