Example usage for org.apache.hadoop.fs PathFilter PathFilter

List of usage examples for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:com.redsqirl.workflow.server.datatype.MapRedDir.java

License:Open Source License

public String isPathValid(String path, List<String> shouldNotHaveExt, List<String> shouldHaveExt,
        boolean fileExtension) throws RemoteException {
    String error = null;/*from  ww  w.ja va 2  s  .c  om*/
    HdfsFileChecker hCh = new HdfsFileChecker(path);
    if (shouldHaveExt != null && !shouldHaveExt.isEmpty()) {
        boolean found = false;
        for (String extCur : shouldHaveExt) {
            found |= path.endsWith(extCur);
        }
        if (!found) {
            error = LanguageManagerWF.getText("mapredtexttype.shouldhaveextcompresssile",
                    new Object[] { path, shouldHaveExt });

        }
    } else if (shouldNotHaveExt != null && !shouldNotHaveExt.isEmpty()) {
        boolean found = false;
        for (String extCur : shouldNotHaveExt) {
            found |= path.endsWith(extCur);
        }
        if (found) {
            error = LanguageManagerWF.getText("mapredtexttype.shouldnothaveextcompresssile",
                    new Object[] { path, shouldNotHaveExt });

        }
    }

    if (!hCh.isInitialized() || hCh.isFile()) {
        error = LanguageManagerWF.getText("mapredtexttype.dirisfile");
    } else if (isPathExist()) {
        FileSystem fs;
        try {
            fs = NameNodeVar.getFS();
            hCh.setPath(new Path(path).getParent());
            if (!hCh.isDirectory()) {
                error = LanguageManagerWF.getText("mapredtexttype.nodir",
                        new String[] { hCh.getPath().toString() });
            }

            FileStatus[] stat = null;
            if (error == null) {
                try {
                    stat = fs.listStatus(new Path(path), new PathFilter() {

                        @Override
                        public boolean accept(Path arg0) {
                            return !arg0.getName().startsWith("_") && !arg0.getName().startsWith(".");
                        }
                    });
                } catch (Exception e) {
                    stat = null;
                    error = LanguageManagerWF.getText("mapredtexttype.notmrdir", new Object[] { path });
                }
            }

            if (stat != null) {
                for (int i = 0; i < stat.length && error == null; ++i) {
                    if (stat[i].isDir()) {
                        error = LanguageManagerWF.getText("mapredtexttype.notmrdir", new Object[] { path });
                    } else {
                        if (fileExtension) {
                            if (shouldHaveExt != null && !shouldHaveExt.isEmpty()) {
                                boolean found = false;
                                for (String extCur : shouldHaveExt) {
                                    found |= stat[i].getPath().getName().endsWith(extCur);
                                }
                                if (!found) {
                                    error = LanguageManagerWF.getText(
                                            "mapredtexttype.shouldhaveextcompresssile",
                                            new Object[] { path, shouldHaveExt });

                                }
                            } else if (shouldNotHaveExt != null && !shouldNotHaveExt.isEmpty()) {
                                boolean found = false;
                                for (String extCur : shouldNotHaveExt) {
                                    found |= stat[i].getPath().getName().endsWith(extCur);
                                }
                                if (found) {
                                    error = LanguageManagerWF.getText(
                                            "mapredtexttype.shouldnothaveextcompresssile",
                                            new Object[] { path, shouldNotHaveExt });

                                }
                            }
                        }

                        try {
                            hdfsInt.select(stat[i].getPath().toString(), "", 1);
                        } catch (Exception e) {
                            error = LanguageManagerWF.getText("mapredtexttype.notmrdir");
                            logger.error(error, e);
                        }
                    }
                }
            }
        } catch (IOException e) {

            error = LanguageManagerWF.getText("unexpectedexception", new Object[] { e.getMessage() });

            logger.error(error, e);
        }

    }
    // hCh.close();
    return error;
}

From source file:com.redsqirl.workflow.server.datatype.MapRedDir.java

License:Open Source License

public List<String> selectLine(int maxToRead) throws RemoteException {
    List<String> ans = null;
    if (isPathExist()) {
        try {// w ww  .j a  va  2  s.  c  om
            FileSystem fs = NameNodeVar.getFS();

            FileStatus[] stat = fs.listStatus(new Path(getPath()), new PathFilter() {

                @Override
                public boolean accept(Path arg0) {
                    return !arg0.getName().startsWith("_") && !arg0.getName().startsWith(".");
                }
            });

            if (stat != null && stat.length > 0) {
                ans = new ArrayList<String>(maxToRead);

                SortedSet<Map.Entry<FileStatus, Long>> filesSortedBySize = new TreeSet<Map.Entry<FileStatus, Long>>(
                        new Comparator<Map.Entry<FileStatus, Long>>() {
                            @Override
                            public int compare(Map.Entry<FileStatus, Long> e1, Map.Entry<FileStatus, Long> e2) {
                                return -e1.getValue().compareTo(e2.getValue());
                            }
                        });
                //We limit the number of file to be 100
                for (int k = 0; k < stat.length; ++k) {
                    filesSortedBySize
                            .add(new AbstractMap.SimpleEntry<FileStatus, Long>(stat[k], stat[k].getLen()));
                }

                //Read the biggest files first
                Iterator<Map.Entry<FileStatus, Long>> fileIt = filesSortedBySize.iterator();
                int k = 0;
                while (fileIt.hasNext() && ans.size() < maxToRead && k < NB_FILE_TO_READ_MAX) {
                    Map.Entry<FileStatus, Long> cur = fileIt.next();
                    FileStatus file = cur.getKey();
                    logger.debug("Number of line already read: " + ans.size());
                    ans.addAll(hdfsInt.select(file.getPath().toString(), ",", maxToRead - ans.size()));
                    ++k;
                }

                logger.debug("Number of line read in " + getPath() + ": " + ans.size());
            }
        } catch (IOException e) {
            String error = "Unexpected error: " + e.getMessage();
            logger.error(error, e);
            ans = null;
        } catch (Exception e) {
            logger.error(e, e);
            ans = null;
        }
    }

    return ans;
}

From source file:com.redsqirl.workflow.server.OozieManager.java

License:Open Source License

/**
 * Clean the directory where the Job details are stored
 * //w ww  .j a v a 2s.  c om
 * @param nameWf
 * @throws RemoteException
 */
public void cleanJobDirectory(final String nameWf) throws RemoteException {
    Path hdfsWfPath = new Path(WorkflowPrefManager.getHDFSPathJobs());
    FileSystem fs = null;
    int numberToKeep = WorkflowPrefManager.getNbOozieDirToKeep();
    try {
        fs = NameNodeVar.getFS();
        FileStatus[] children = fs.listStatus(hdfsWfPath, new PathFilter() {

            @Override
            public boolean accept(Path arg0) {
                return arg0.getName().startsWith(nameWf + "_");
            }
        });
        Arrays.sort(children, 0, children.length, new Comparator<FileStatus>() {

            @Override
            public int compare(FileStatus arg0, FileStatus arg1) {
                return (int) ((arg0.getModificationTime() - arg1.getModificationTime()) / 10000);
            }
        });
        for (int i = 0; i < children.length - numberToKeep; ++i) {
            fs.delete(children[i].getPath(), true);
        }
    } catch (Exception e1) {
        logger.error(e1);
    }
}

From source file:com.redsqirl.workflow.server.OozieManager.java

License:Open Source License

/**
 * Get a name for a directory to store all the jobs files and configuration
 * /*  w  ww . j  ava 2  s. c o m*/
 * @param df
 * @return The name for a directory to store all the jobs files and configuration
 * @throws RemoteException
 */
protected String buildFileName(DataFlow df) throws RemoteException {
    final String nameWf = df.getName();
    if (nameWf == null) {
        logger.warn("The workflow to run has no name");
        df.setName(RandomString.getRandomName(8));
    }
    String ans = null;
    Path hdfsWfPath = new Path(WorkflowPrefManager.getHDFSPathJobs());
    FileSystem fs = null;
    int number = -1;
    try {
        fs = NameNodeVar.getFS();
        FileStatus[] children = fs.listStatus(hdfsWfPath, new PathFilter() {

            @Override
            public boolean accept(Path arg0) {
                if (arg0.getName().startsWith(nameWf)) {
                    try {
                        @SuppressWarnings("unused")
                        int i = Integer.valueOf(arg0.getName().substring(nameWf.length() + 1));
                        return true;
                    } catch (Exception e) {
                    }
                }
                return false;
            }
        });

        if (children != null && children.length > 0) {
            for (FileStatus child : children) {
                number = Math.max(number,
                        Integer.valueOf(child.getPath().getName().substring(nameWf.length() + 1)));
            }
        }
    } catch (Exception e) {
        logger.error(e, e);
    }
    ans = nameWf + "_" + (number + 1);

    return ans;
}

From source file:com.redsqirl.workflow.server.Workflow.java

License:Open Source License

/**
 * Clean the backup directory//from   w  ww.  j av  a2s. c  o  m
 * 
 * @throws IOException
 */
public void cleanUpBackup() throws IOException {
    String path = WorkflowPrefManager.getBackupPath();
    int nbBackup = WorkflowPrefManager.getNbBackup();

    FileSystem fs = NameNodeVar.getFS();
    // FileStatus stat = fs.getFileStatus(new Path(path));
    FileStatus[] fsA = fs.listStatus(new Path(path), new PathFilter() {

        @Override
        public boolean accept(Path arg0) {
            return arg0.getName().matches(".*[0-9]{14}(.rs|.srs)$");
        }
    });
    logger.debug("Backup directory: " + fsA.length + " files, " + nbBackup + " to keep, "
            + Math.max(0, fsA.length - nbBackup) + " to remove");
    if (fsA.length > nbBackup) {
        int numberToRemove = fsA.length - nbBackup;
        Map<Path, Long> pathToRemove = new HashMap<Path, Long>();
        Path pathMin = null;
        Long min = Long.MAX_VALUE;
        for (FileStatus stat : fsA) {
            if (pathToRemove.size() < numberToRemove) {
                pathToRemove.put(stat.getPath(), stat.getModificationTime());
            } else if (min > stat.getModificationTime()) {
                pathToRemove.remove(pathMin);
                pathToRemove.put(stat.getPath(), stat.getModificationTime());
            }
            if (min > stat.getModificationTime()) {
                min = stat.getModificationTime();
                pathMin = stat.getPath();
            }

        }
        for (Path pathDel : pathToRemove.keySet()) {
            fs.delete(pathDel, false);
        }
    }
    // fs.close();
}

From source file:com.ricemap.spateDB.core.SpatialSite.java

License:Apache License

public static GlobalIndex<Partition> getGlobalIndex(FileSystem fs, Path dir) throws IOException {
    // Retrieve the master file (the only file with the name _master in it)
    FileStatus[] masterFiles = fs.listStatus(dir, new PathFilter() {
        @Override/*from  ww  w .  j  a  va  2  s . c om*/
        public boolean accept(Path path) {
            return path.getName().contains("_master");
        }
    });
    // Check if the given file is indexed
    if (masterFiles.length == 0)
        return null;
    if (masterFiles.length > 1)
        throw new RuntimeException("Found more than one master file in " + dir);
    Path masterFile = masterFiles[0].getPath();
    ShapeRecordReader<Partition> reader = new ShapeRecordReader<Partition>(fs.open(masterFile), 0,
            fs.getFileStatus(masterFile).getLen());
    CellInfo dummy = new CellInfo();
    Partition partition = new Partition();
    ArrayList<Partition> partitions = new ArrayList<Partition>();
    while (reader.next(dummy, partition)) {
        partitions.add(partition.clone());
    }
    GlobalIndex<Partition> globalIndex = new GlobalIndex<Partition>();
    globalIndex.bulkLoad(partitions.toArray(new Partition[partitions.size()]));
    globalIndex.setCompact(masterFile.getName().endsWith("rtree") || masterFile.getName().endsWith("r+tree"));
    globalIndex.setReplicated(masterFile.getName().endsWith("r+tree") || masterFile.getName().endsWith("grid"));
    return globalIndex;
}

From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java

License:Apache License

public static void generateMapReduce(Path file, Prism mbr, long size, long blocksize, Shape shape,
        String sindex, long seed, int rectsize, RandomShapeGenerator.DistributionType type, boolean overwrite)
        throws IOException {
    JobConf job = new JobConf(RandomSpatialGenerator.class);

    job.setJobName("Generator");
    FileSystem outFs = file.getFileSystem(job);

    // Overwrite output file
    if (outFs.exists(file)) {
        if (overwrite)
            outFs.delete(file, true);/* w w  w  . j a  va 2 s . co m*/
        else
            throw new RuntimeException(
                    "Output file '" + file + "' already exists and overwrite flag is not set");
    }

    // Set generation parameters in job
    job.setLong(RandomShapeGenerator.GenerationSize, size);
    SpatialSite.setPrism(job, RandomShapeGenerator.GenerationMBR, mbr);
    if (seed != 0)
        job.setLong(RandomShapeGenerator.GenerationSeed, seed);
    if (rectsize != 0)
        job.setInt(RandomShapeGenerator.GenerationRectSize, rectsize);
    if (type != null)
        job.set(RandomShapeGenerator.GenerationType, type.toString());

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    // Set input format and map class
    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(Repartition.RepartitionMap.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(shape.getClass());
    job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

    SpatialSite.setShapeClass(job, shape.getClass());

    if (blocksize != 0) {
        job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blocksize);
    }

    CellInfo[] cells;
    if (sindex == null) {
        cells = new CellInfo[] { new CellInfo(1, mbr) };
    } else if (sindex.equals("grid")) {
        GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2);
        FileSystem fs = file.getFileSystem(job);
        if (blocksize == 0) {
            blocksize = fs.getDefaultBlockSize(file);
        }
        int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, file, blocksize);
        gridInfo.calculateCellDimensions(numOfCells);
        cells = gridInfo.getAllCells();
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    SpatialSite.setCells(job, cells);

    // Do not set a reduce function. Use the default identity reduce function
    if (cells.length == 1) {
        // All objects are in one partition. No need for a reduce phase
        job.setNumReduceTasks(0);
    } else {
        // More than one partition. Need a reduce phase to group shapes of the
        // same partition together
        job.setReducerClass(RepartitionReduce.class);
        job.setNumReduceTasks(
                Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));
    }

    // Set output path
    FileOutputFormat.setOutputPath(job, file);
    if (sindex == null || sindex.equals("grid")) {
        job.setOutputFormat(GridOutputFormat.class);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    JobClient.runJob(job);

    // Concatenate all master files into one file
    FileStatus[] resultFiles = outFs.listStatus(file, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().contains("_master");
        }
    });
    String ext = resultFiles[0].getPath().getName()
            .substring(resultFiles[0].getPath().getName().lastIndexOf('.'));
    Path masterPath = new Path(file, "_master" + ext);
    OutputStream destOut = outFs.create(masterPath);
    byte[] buffer = new byte[4096];
    for (FileStatus f : resultFiles) {
        InputStream in = outFs.open(f.getPath());
        int bytes_read;
        do {
            bytes_read = in.read(buffer);
            if (bytes_read > 0)
                destOut.write(buffer, 0, bytes_read);
        } while (bytes_read > 0);
        in.close();
        outFs.delete(f.getPath(), false);
    }
    destOut.close();

    // Plot an image for the partitions used in file
    Path imagePath = new Path(file, "_partitions.png");
    int imageSize = (int) (Math.sqrt(cells.length) * 300);
    Plot.plotLocal(masterPath, imagePath, new Partition(), imageSize, imageSize, Color.BLACK, false, false,
            false);
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java

License:Apache License

private int getFinalFileNameCount(FileSystem fs, Path dir, final String prefix) throws IOException {
    return fs.listStatus(dir, new PathFilter() {
        @Override/*  w w w.  j av a 2 s. c o m*/
        public boolean accept(Path path) {
            return path.getName().startsWith(prefix);
        }
    }).length;
}

From source file:com.streamsets.pipeline.stage.origin.hdfs.spooler.HdfsFileSystem.java

License:Apache License

public void addFiles(WrappedFile dirFile, WrappedFile startingFile, List<WrappedFile> toProcess,
        boolean includeStartingFile, boolean useLastModified) throws IOException {
    final long scanTime = System.currentTimeMillis();

    PathFilter pathFilter = new PathFilter() {
        @Override//from  ww w .  j a v  a 2 s  .  c  o  m
        public boolean accept(Path entry) {
            try {
                FileStatus fileStatus = fs.getFileStatus(entry);
                if (fileStatus.isDirectory()) {
                    return false;
                }

                if (!patternMatches(entry.getName())) {
                    return false;
                }

                HdfsFile hdfsFile = new HdfsFile(fs, entry);
                // SDC-3551: Pick up only files with mtime strictly less than scan time.
                if (fileStatus.getModificationTime() < scanTime) {
                    if (startingFile == null || startingFile.toString().isEmpty()) {
                        toProcess.add(hdfsFile);
                    } else {
                        int compares = compare(hdfsFile, startingFile, useLastModified);
                        if (includeStartingFile) {
                            if (compares >= 0) {
                                toProcess.add(hdfsFile);
                            }
                        } else {
                            if (compares > 0) {
                                toProcess.add(hdfsFile);
                            }
                        }
                    }
                }
            } catch (IOException ex) {
                LOG.error("Failed to open file {}", entry.toString());
            }
            return false;
        }
    };

    fs.globStatus(new Path(dirFile.getAbsolutePath(), "*"), pathFilter);
}

From source file:com.streamsets.pipeline.stage.origin.hdfs.spooler.HdfsFileSystem.java

License:Apache License

public void archiveFiles(WrappedFile archiveDirPath, List<WrappedFile> toProcess, long timeThreshold)
        throws IOException {
    PathFilter pathFilter = new PathFilter() {
        @Override/*  w w w .  j  av a2 s.  c om*/
        public boolean accept(Path entry) {
            try {
                if (!patternMatches(entry.getName())) {
                    return false;
                }

                if (timeThreshold - fs.getFileStatus(entry).getModificationTime() > 0) {
                    toProcess.add(new HdfsFile(fs, entry));
                }
            } catch (IOException ex) {
                LOG.debug("Failed to open file {}", entry.toString());
            }
            return false;
        }
    };

    Path path = new Path(archiveDirPath.getAbsolutePath(), "*");
    fs.globStatus(path, pathFilter);

    if (processSubdirectories) {
        fs.globStatus(new Path(path, "*"), pathFilter);
    }
}