List of usage examples for org.apache.hadoop.mapred InvalidInputException InvalidInputException
public InvalidInputException(List<IOException> probs)
From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java
License:Apache License
/** List input directories. * Subclasses may override to, e.g., select only files matching a regular * expression. //from w ww . ja va2 s . com * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected FileStatus[] listStatus(JobConf job) throws IOException { Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job); List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (Path p : dirs) { FileSystem fs = p.getFileSystem(job); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDir()) { for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) { result.add(stat); } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result.toArray(new FileStatus[result.size()]); }
From source file:com.blm.orc.OrcInputFormat.java
License:Apache License
static List<OrcSplit> generateSplitsInfo(Configuration conf) throws IOException { // use threads to resolve directories into splits Context context = new Context(conf); for (Path dir : getInputPaths(conf)) { FileSystem fs = dir.getFileSystem(conf); context.schedule(new FileGenerator(context, fs, dir)); }// ww w.j av a 2 s. c o m context.waitForTasks(); // deal with exceptions if (!context.errors.isEmpty()) { List<IOException> errors = new ArrayList<IOException>(context.errors.size()); for (Throwable th : context.errors) { if (th instanceof IOException) { errors.add((IOException) th); } else { throw new RuntimeException("serious problem", th); } } throw new InvalidInputException(errors); } if (context.cacheStripeDetails) { LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/" + context.numFilesCounter.get()); } return context.splits; }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Sanity check for srcPath/* w w w . j av a 2 s. c o m*/ */ private static void checkSrcPath(JobConf jobConf, List<Path> srcPaths) throws IOException { List<IOException> rslt = new ArrayList<IOException>(); Path[] ps = new Path[srcPaths.size()]; ps = srcPaths.toArray(ps); TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), ps, jobConf); for (Path p : srcPaths) { FileSystem fs = p.getFileSystem(jobConf); if (!fs.exists(p)) { rslt.add(new IOException("Input source " + p + " does not exist.")); } } if (!rslt.isEmpty()) { throw new InvalidInputException(rslt); } }
From source file:com.pinterest.hdfsbackup.distcp.DistCp.java
License:Apache License
/** Sanity check for srcPath */ private static void checkSrcPath(Configuration conf, List<Path> srcPaths) throws IOException { List<IOException> rslt = new ArrayList<IOException>(); for (Path p : srcPaths) { FileSystem fs = p.getFileSystem(conf); if (!fs.exists(p)) { rslt.add(new IOException("Input source " + p + " does not exist.")); }// ww w .j a v a 2 s . c om } if (!rslt.isEmpty()) { throw new InvalidInputException(rslt); } }
From source file:edu.ucsb.cs.hadoop.CustomFileInputFormat.java
License:Apache License
/** * List input directories. Subclasses may override to, e.g., select only * files matching a regular expression.//from w ww .ja v a 2s.c om * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected FileStatus[] listStatus(JobConf job) throws IOException { Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (Path p : dirs) { FileSystem fs = p.getFileSystem(job); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDir()) { for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) { result.add(stat); } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result.toArray(new FileStatus[result.size()]); }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
/** Sanity check for srcPath */ private static void checkSrcPath(final Configuration conf, final List<Path> srcPaths) throws IOException { List<IOException> rslt = new ArrayList<>(); for (Path p : srcPaths) { FileSystem fs = p.getFileSystem(conf); if (!fs.exists(p)) { rslt.add(new IOException("Input source " + p + " does not exist.")); }// ww w . j a va2s . c o m } if (!rslt.isEmpty()) { throw new InvalidInputException(rslt); } }
From source file:org.apache.hama.bsp.FileInputFormat.java
License:Apache License
/** * List input directories. Subclasses may override to, e.g., select only files * matching a regular expression./* www.j av a 2 s . c o m*/ * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected FileStatus[] listStatus(BSPJob job) throws IOException { Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (Path p : dirs) { FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = null; try { matches = fs.globStatus(p, inputFilter); } catch (Exception e) { LOG.info(p + "\n" + e.toString()); } if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDir()) { Collections.addAll(result, fs.listStatus(globStat.getPath(), inputFilter)); } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result.toArray(new FileStatus[result.size()]); }