List of usage examples for org.apache.hadoop.fs PathFilter PathFilter
PathFilter
From source file:com.redsqirl.workflow.server.datatype.MapRedDir.java
License:Open Source License
public String isPathValid(String path, List<String> shouldNotHaveExt, List<String> shouldHaveExt, boolean fileExtension) throws RemoteException { String error = null;/*from ww w.ja va 2 s .c om*/ HdfsFileChecker hCh = new HdfsFileChecker(path); if (shouldHaveExt != null && !shouldHaveExt.isEmpty()) { boolean found = false; for (String extCur : shouldHaveExt) { found |= path.endsWith(extCur); } if (!found) { error = LanguageManagerWF.getText("mapredtexttype.shouldhaveextcompresssile", new Object[] { path, shouldHaveExt }); } } else if (shouldNotHaveExt != null && !shouldNotHaveExt.isEmpty()) { boolean found = false; for (String extCur : shouldNotHaveExt) { found |= path.endsWith(extCur); } if (found) { error = LanguageManagerWF.getText("mapredtexttype.shouldnothaveextcompresssile", new Object[] { path, shouldNotHaveExt }); } } if (!hCh.isInitialized() || hCh.isFile()) { error = LanguageManagerWF.getText("mapredtexttype.dirisfile"); } else if (isPathExist()) { FileSystem fs; try { fs = NameNodeVar.getFS(); hCh.setPath(new Path(path).getParent()); if (!hCh.isDirectory()) { error = LanguageManagerWF.getText("mapredtexttype.nodir", new String[] { hCh.getPath().toString() }); } FileStatus[] stat = null; if (error == null) { try { stat = fs.listStatus(new Path(path), new PathFilter() { @Override public boolean accept(Path arg0) { return !arg0.getName().startsWith("_") && !arg0.getName().startsWith("."); } }); } catch (Exception e) { stat = null; error = LanguageManagerWF.getText("mapredtexttype.notmrdir", new Object[] { path }); } } if (stat != null) { for (int i = 0; i < stat.length && error == null; ++i) { if (stat[i].isDir()) { error = LanguageManagerWF.getText("mapredtexttype.notmrdir", new Object[] { path }); } else { if (fileExtension) { if (shouldHaveExt != null && !shouldHaveExt.isEmpty()) { boolean found = false; for (String extCur : shouldHaveExt) { found |= stat[i].getPath().getName().endsWith(extCur); } if (!found) { error = LanguageManagerWF.getText( "mapredtexttype.shouldhaveextcompresssile", new Object[] { path, shouldHaveExt }); } } else if (shouldNotHaveExt != null && !shouldNotHaveExt.isEmpty()) { boolean found = false; for (String extCur : shouldNotHaveExt) { found |= stat[i].getPath().getName().endsWith(extCur); } if (found) { error = LanguageManagerWF.getText( "mapredtexttype.shouldnothaveextcompresssile", new Object[] { path, shouldNotHaveExt }); } } } try { hdfsInt.select(stat[i].getPath().toString(), "", 1); } catch (Exception e) { error = LanguageManagerWF.getText("mapredtexttype.notmrdir"); logger.error(error, e); } } } } } catch (IOException e) { error = LanguageManagerWF.getText("unexpectedexception", new Object[] { e.getMessage() }); logger.error(error, e); } } // hCh.close(); return error; }
From source file:com.redsqirl.workflow.server.datatype.MapRedDir.java
License:Open Source License
public List<String> selectLine(int maxToRead) throws RemoteException { List<String> ans = null; if (isPathExist()) { try {// w ww .j a va 2 s. c om FileSystem fs = NameNodeVar.getFS(); FileStatus[] stat = fs.listStatus(new Path(getPath()), new PathFilter() { @Override public boolean accept(Path arg0) { return !arg0.getName().startsWith("_") && !arg0.getName().startsWith("."); } }); if (stat != null && stat.length > 0) { ans = new ArrayList<String>(maxToRead); SortedSet<Map.Entry<FileStatus, Long>> filesSortedBySize = new TreeSet<Map.Entry<FileStatus, Long>>( new Comparator<Map.Entry<FileStatus, Long>>() { @Override public int compare(Map.Entry<FileStatus, Long> e1, Map.Entry<FileStatus, Long> e2) { return -e1.getValue().compareTo(e2.getValue()); } }); //We limit the number of file to be 100 for (int k = 0; k < stat.length; ++k) { filesSortedBySize .add(new AbstractMap.SimpleEntry<FileStatus, Long>(stat[k], stat[k].getLen())); } //Read the biggest files first Iterator<Map.Entry<FileStatus, Long>> fileIt = filesSortedBySize.iterator(); int k = 0; while (fileIt.hasNext() && ans.size() < maxToRead && k < NB_FILE_TO_READ_MAX) { Map.Entry<FileStatus, Long> cur = fileIt.next(); FileStatus file = cur.getKey(); logger.debug("Number of line already read: " + ans.size()); ans.addAll(hdfsInt.select(file.getPath().toString(), ",", maxToRead - ans.size())); ++k; } logger.debug("Number of line read in " + getPath() + ": " + ans.size()); } } catch (IOException e) { String error = "Unexpected error: " + e.getMessage(); logger.error(error, e); ans = null; } catch (Exception e) { logger.error(e, e); ans = null; } } return ans; }
From source file:com.redsqirl.workflow.server.OozieManager.java
License:Open Source License
/** * Clean the directory where the Job details are stored * //w ww .j a v a 2s. c om * @param nameWf * @throws RemoteException */ public void cleanJobDirectory(final String nameWf) throws RemoteException { Path hdfsWfPath = new Path(WorkflowPrefManager.getHDFSPathJobs()); FileSystem fs = null; int numberToKeep = WorkflowPrefManager.getNbOozieDirToKeep(); try { fs = NameNodeVar.getFS(); FileStatus[] children = fs.listStatus(hdfsWfPath, new PathFilter() { @Override public boolean accept(Path arg0) { return arg0.getName().startsWith(nameWf + "_"); } }); Arrays.sort(children, 0, children.length, new Comparator<FileStatus>() { @Override public int compare(FileStatus arg0, FileStatus arg1) { return (int) ((arg0.getModificationTime() - arg1.getModificationTime()) / 10000); } }); for (int i = 0; i < children.length - numberToKeep; ++i) { fs.delete(children[i].getPath(), true); } } catch (Exception e1) { logger.error(e1); } }
From source file:com.redsqirl.workflow.server.OozieManager.java
License:Open Source License
/** * Get a name for a directory to store all the jobs files and configuration * /* w ww . j ava 2 s. c o m*/ * @param df * @return The name for a directory to store all the jobs files and configuration * @throws RemoteException */ protected String buildFileName(DataFlow df) throws RemoteException { final String nameWf = df.getName(); if (nameWf == null) { logger.warn("The workflow to run has no name"); df.setName(RandomString.getRandomName(8)); } String ans = null; Path hdfsWfPath = new Path(WorkflowPrefManager.getHDFSPathJobs()); FileSystem fs = null; int number = -1; try { fs = NameNodeVar.getFS(); FileStatus[] children = fs.listStatus(hdfsWfPath, new PathFilter() { @Override public boolean accept(Path arg0) { if (arg0.getName().startsWith(nameWf)) { try { @SuppressWarnings("unused") int i = Integer.valueOf(arg0.getName().substring(nameWf.length() + 1)); return true; } catch (Exception e) { } } return false; } }); if (children != null && children.length > 0) { for (FileStatus child : children) { number = Math.max(number, Integer.valueOf(child.getPath().getName().substring(nameWf.length() + 1))); } } } catch (Exception e) { logger.error(e, e); } ans = nameWf + "_" + (number + 1); return ans; }
From source file:com.redsqirl.workflow.server.Workflow.java
License:Open Source License
/** * Clean the backup directory//from w ww. j av a2s. c o m * * @throws IOException */ public void cleanUpBackup() throws IOException { String path = WorkflowPrefManager.getBackupPath(); int nbBackup = WorkflowPrefManager.getNbBackup(); FileSystem fs = NameNodeVar.getFS(); // FileStatus stat = fs.getFileStatus(new Path(path)); FileStatus[] fsA = fs.listStatus(new Path(path), new PathFilter() { @Override public boolean accept(Path arg0) { return arg0.getName().matches(".*[0-9]{14}(.rs|.srs)$"); } }); logger.debug("Backup directory: " + fsA.length + " files, " + nbBackup + " to keep, " + Math.max(0, fsA.length - nbBackup) + " to remove"); if (fsA.length > nbBackup) { int numberToRemove = fsA.length - nbBackup; Map<Path, Long> pathToRemove = new HashMap<Path, Long>(); Path pathMin = null; Long min = Long.MAX_VALUE; for (FileStatus stat : fsA) { if (pathToRemove.size() < numberToRemove) { pathToRemove.put(stat.getPath(), stat.getModificationTime()); } else if (min > stat.getModificationTime()) { pathToRemove.remove(pathMin); pathToRemove.put(stat.getPath(), stat.getModificationTime()); } if (min > stat.getModificationTime()) { min = stat.getModificationTime(); pathMin = stat.getPath(); } } for (Path pathDel : pathToRemove.keySet()) { fs.delete(pathDel, false); } } // fs.close(); }
From source file:com.ricemap.spateDB.core.SpatialSite.java
License:Apache License
public static GlobalIndex<Partition> getGlobalIndex(FileSystem fs, Path dir) throws IOException { // Retrieve the master file (the only file with the name _master in it) FileStatus[] masterFiles = fs.listStatus(dir, new PathFilter() { @Override/*from ww w . j a va 2 s . c om*/ public boolean accept(Path path) { return path.getName().contains("_master"); } }); // Check if the given file is indexed if (masterFiles.length == 0) return null; if (masterFiles.length > 1) throw new RuntimeException("Found more than one master file in " + dir); Path masterFile = masterFiles[0].getPath(); ShapeRecordReader<Partition> reader = new ShapeRecordReader<Partition>(fs.open(masterFile), 0, fs.getFileStatus(masterFile).getLen()); CellInfo dummy = new CellInfo(); Partition partition = new Partition(); ArrayList<Partition> partitions = new ArrayList<Partition>(); while (reader.next(dummy, partition)) { partitions.add(partition.clone()); } GlobalIndex<Partition> globalIndex = new GlobalIndex<Partition>(); globalIndex.bulkLoad(partitions.toArray(new Partition[partitions.size()])); globalIndex.setCompact(masterFile.getName().endsWith("rtree") || masterFile.getName().endsWith("r+tree")); globalIndex.setReplicated(masterFile.getName().endsWith("r+tree") || masterFile.getName().endsWith("grid")); return globalIndex; }
From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java
License:Apache License
public static void generateMapReduce(Path file, Prism mbr, long size, long blocksize, Shape shape, String sindex, long seed, int rectsize, RandomShapeGenerator.DistributionType type, boolean overwrite) throws IOException { JobConf job = new JobConf(RandomSpatialGenerator.class); job.setJobName("Generator"); FileSystem outFs = file.getFileSystem(job); // Overwrite output file if (outFs.exists(file)) { if (overwrite) outFs.delete(file, true);/* w w w . j a va 2 s . co m*/ else throw new RuntimeException( "Output file '" + file + "' already exists and overwrite flag is not set"); } // Set generation parameters in job job.setLong(RandomShapeGenerator.GenerationSize, size); SpatialSite.setPrism(job, RandomShapeGenerator.GenerationMBR, mbr); if (seed != 0) job.setLong(RandomShapeGenerator.GenerationSeed, seed); if (rectsize != 0) job.setInt(RandomShapeGenerator.GenerationRectSize, rectsize); if (type != null) job.set(RandomShapeGenerator.GenerationType, type.toString()); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); // Set input format and map class job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Repartition.RepartitionMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); SpatialSite.setShapeClass(job, shape.getClass()); if (blocksize != 0) { job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blocksize); } CellInfo[] cells; if (sindex == null) { cells = new CellInfo[] { new CellInfo(1, mbr) }; } else if (sindex.equals("grid")) { GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2); FileSystem fs = file.getFileSystem(job); if (blocksize == 0) { blocksize = fs.getDefaultBlockSize(file); } int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, file, blocksize); gridInfo.calculateCellDimensions(numOfCells); cells = gridInfo.getAllCells(); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cells); // Do not set a reduce function. Use the default identity reduce function if (cells.length == 1) { // All objects are in one partition. No need for a reduce phase job.setNumReduceTasks(0); } else { // More than one partition. Need a reduce phase to group shapes of the // same partition together job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); } // Set output path FileOutputFormat.setOutputPath(job, file); if (sindex == null || sindex.equals("grid")) { job.setOutputFormat(GridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } JobClient.runJob(job); // Concatenate all master files into one file FileStatus[] resultFiles = outFs.listStatus(file, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().contains("_master"); } }); String ext = resultFiles[0].getPath().getName() .substring(resultFiles[0].getPath().getName().lastIndexOf('.')); Path masterPath = new Path(file, "_master" + ext); OutputStream destOut = outFs.create(masterPath); byte[] buffer = new byte[4096]; for (FileStatus f : resultFiles) { InputStream in = outFs.open(f.getPath()); int bytes_read; do { bytes_read = in.read(buffer); if (bytes_read > 0) destOut.write(buffer, 0, bytes_read); } while (bytes_read > 0); in.close(); outFs.delete(f.getPath(), false); } destOut.close(); // Plot an image for the partitions used in file Path imagePath = new Path(file, "_partitions.png"); int imageSize = (int) (Math.sqrt(cells.length) * 300); Plot.plotLocal(masterPath, imagePath, new Partition(), imageSize, imageSize, Color.BLACK, false, false, false); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
private int getFinalFileNameCount(FileSystem fs, Path dir, final String prefix) throws IOException { return fs.listStatus(dir, new PathFilter() { @Override/* w w w. j av a 2 s. c o m*/ public boolean accept(Path path) { return path.getName().startsWith(prefix); } }).length; }
From source file:com.streamsets.pipeline.stage.origin.hdfs.spooler.HdfsFileSystem.java
License:Apache License
public void addFiles(WrappedFile dirFile, WrappedFile startingFile, List<WrappedFile> toProcess, boolean includeStartingFile, boolean useLastModified) throws IOException { final long scanTime = System.currentTimeMillis(); PathFilter pathFilter = new PathFilter() { @Override//from ww w . j a v a 2 s . c o m public boolean accept(Path entry) { try { FileStatus fileStatus = fs.getFileStatus(entry); if (fileStatus.isDirectory()) { return false; } if (!patternMatches(entry.getName())) { return false; } HdfsFile hdfsFile = new HdfsFile(fs, entry); // SDC-3551: Pick up only files with mtime strictly less than scan time. if (fileStatus.getModificationTime() < scanTime) { if (startingFile == null || startingFile.toString().isEmpty()) { toProcess.add(hdfsFile); } else { int compares = compare(hdfsFile, startingFile, useLastModified); if (includeStartingFile) { if (compares >= 0) { toProcess.add(hdfsFile); } } else { if (compares > 0) { toProcess.add(hdfsFile); } } } } } catch (IOException ex) { LOG.error("Failed to open file {}", entry.toString()); } return false; } }; fs.globStatus(new Path(dirFile.getAbsolutePath(), "*"), pathFilter); }
From source file:com.streamsets.pipeline.stage.origin.hdfs.spooler.HdfsFileSystem.java
License:Apache License
public void archiveFiles(WrappedFile archiveDirPath, List<WrappedFile> toProcess, long timeThreshold) throws IOException { PathFilter pathFilter = new PathFilter() { @Override/* w w w . j av a2 s. c om*/ public boolean accept(Path entry) { try { if (!patternMatches(entry.getName())) { return false; } if (timeThreshold - fs.getFileStatus(entry).getModificationTime() > 0) { toProcess.add(new HdfsFile(fs, entry)); } } catch (IOException ex) { LOG.debug("Failed to open file {}", entry.toString()); } return false; } }; Path path = new Path(archiveDirPath.getAbsolutePath(), "*"); fs.globStatus(path, pathFilter); if (processSubdirectories) { fs.globStatus(new Path(path, "*"), pathFilter); } }