Example usage for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:edu.umn.cs.spatialHadoop.RandomSpatialGenerator.java

License:Open Source License

private static void generateMapReduce(Path outFile, OperationsParams params) throws IOException {
    JobConf job = new JobConf(params, RandomSpatialGenerator.class);
    job.setJobName("Generator");
    Shape shape = params.getShape("shape");

    FileSystem outFs = outFile.getFileSystem(job);

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    // Set input format and map class
    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(Repartition.RepartitionMap.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(shape.getClass());
    job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

    String sindex = params.get("sindex");
    Rectangle mbr = params.getShape("mbr").getMBR();

    CellInfo[] cells;/*from  w w  w . j  a va 2  s  . co  m*/
    if (sindex == null) {
        cells = new CellInfo[] { new CellInfo(1, mbr) };
    } else if (sindex.equals("grid")) {
        GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2);
        FileSystem fs = outFile.getFileSystem(job);
        long blocksize = fs.getDefaultBlockSize(outFile);
        long size = params.getSize("size");
        int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, outFile, blocksize);
        gridInfo.calculateCellDimensions(numOfCells);
        cells = gridInfo.getAllCells();
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    SpatialSite.setCells(job, cells);

    // Do not set a reduce function. Use the default identity reduce function
    if (cells.length == 1) {
        // All objects are in one partition. No need for a reduce phase
        job.setNumReduceTasks(0);
    } else {
        // More than one partition. Need a reduce phase to group shapes of the
        // same partition together
        job.setReducerClass(RepartitionReduce.class);
        job.setNumReduceTasks(
                Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));
    }

    // Set output path
    FileOutputFormat.setOutputPath(job, outFile);
    if (sindex == null || sindex.equals("grid")) {
        job.setOutputFormat(GridOutputFormat.class);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    JobClient.runJob(job);

    // TODO move the following part to OutputCommitter
    // Concatenate all master files into one file
    FileStatus[] resultFiles = outFs.listStatus(outFile, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().contains("_master");
        }
    });
    String ext = resultFiles[0].getPath().getName()
            .substring(resultFiles[0].getPath().getName().lastIndexOf('.'));
    Path masterPath = new Path(outFile, "_master" + ext);
    OutputStream destOut = outFs.create(masterPath);
    byte[] buffer = new byte[4096];
    for (FileStatus f : resultFiles) {
        InputStream in = outFs.open(f.getPath());
        int bytes_read;
        do {
            bytes_read = in.read(buffer);
            if (bytes_read > 0)
                destOut.write(buffer, 0, bytes_read);
        } while (bytes_read > 0);
        in.close();
        outFs.delete(f.getPath(), false);
    }
    destOut.close();
}

From source file:edu.umn.cs.spatialHadoop.util.NASADatasetUtil.java

License:Open Source License

public static Path[] getMatchingFilesInPath(Path path, final String inputDateString) throws IOException {
    FileSystem fileSystem = path.getFileSystem(new Configuration());
    FileStatus[] matchingDirs = fileSystem.listStatus(path, new PathFilter() {
        @Override//from w  w w  .j a  va  2 s.  c  o  m
        public boolean accept(Path p) {
            String dirName = p.getName();
            if (dirName.contains(inputDateString)) {
                return true;
            } else {
                return false;
            }

        }
    });
    Path[] paths = new Path[matchingDirs.length];
    for (int i = 0; i < paths.length; i++) {
        paths[i] = matchingDirs[i].getPath();
    }
    return paths;
}

From source file:edu.umn.cs.spatialHadoop.util.TemporalIndexManager.java

License:Open Source License

/**
 * Based on a certain time range, this method filters all directories and
* determines which files need to be indexed on daily, monthly and yearly
* levels. After calling this method, you need to call the daily, monthly
* and yearly getters to return paths required to be indexed.
 * @param timeRange/*w  ww . j a va  2 s. c  om*/
 * @throws IOException
 * @throws ParseException
 */
public void prepareNeededIndexes(String timeRange) throws IOException, ParseException {
    if (timeRange == null) {
        LOG.error("TimeRange is empty");
        return;
    }

    // Parse start and end dates
    final Date startDate, endDate;
    try {
        startDate = dayFormat.parse(timeRange.split("\\.\\.")[0]);
        endDate = dayFormat.parse(timeRange.split("\\.\\.")[1]);
    } catch (ArrayIndexOutOfBoundsException e) {
        LOG.error("Use the seperator two periods '..' to seperate from and to dates");
        return;
    } catch (ParseException e) {
        LOG.error("Illegal date format in " + timeRange);
        return;
    }

    // Filter all file/folder paths based on the start-end date range
    FileStatus[] matchingDirs = fileSystem.listStatus(datasetPath, new PathFilter() {
        @Override
        public boolean accept(Path p) {
            String dirName = p.getName();
            try {
                Date date = dayFormat.parse(dirName);
                return date.compareTo(startDate) >= 0 && date.compareTo(endDate) <= 0;
            } catch (ParseException e) {
                LOG.warn("Cannot parse directory name: " + dirName);
                return false;
            }
        }
    });
    if (matchingDirs.length == 0) {
        LOG.warn("No matching directories for the given input");
    }

    // Re-indexing check for each matching
    for (FileStatus matchingDir : matchingDirs) {
        String matchingDirDateString = NASADatasetUtil.extractDateStringFromFileStatus(matchingDir);
        if (existYearlyIndexes.containsKey(NASADatasetUtil.getYearFormat(matchingDirDateString))) {
            // needs to re-build year, month and year indexes
            existYearlyIndexes.put(NASADatasetUtil.getYearFormat(matchingDirDateString), true);
            existMonthlyIndexes.put(NASADatasetUtil.getMonthFormat(matchingDirDateString), true);
            existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true);
        } else if (existMonthlyIndexes.containsKey(NASADatasetUtil.getMonthFormat(matchingDirDateString))) {
            // needs to re-build month and day indexes
            existMonthlyIndexes.put(NASADatasetUtil.getMonthFormat(matchingDirDateString), true);
            existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true);
        } else if (existDailyIndexes.containsKey(NASADatasetUtil.getDayFormat(matchingDirDateString))) {
            // needs to re-build day index
            existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true);
        } else {
            // needs to build a new index
            existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true);

            int daysCountInMonth = getMatchesCountFromMap(existDailyIndexes,
                    NASADatasetUtil.getMonthFormat(matchingDirDateString));

            if (daysCountInMonth >= getNumDaysPerMonth(
                    NASADatasetUtil.extractMonthFromDate(matchingDirDateString))) {
                existMonthlyIndexes.put(NASADatasetUtil.getMonthFormat(matchingDirDateString), true);

                int monthsCountInYear = getMatchesCountFromMap(existMonthlyIndexes,
                        NASADatasetUtil.getYearFormat(matchingDirDateString));
                if (monthsCountInYear >= getNumMonthsPerYear()) {
                    existYearlyIndexes.put(NASADatasetUtil.getYearFormat(matchingDirDateString), true);
                }
            }
        }

    }
    convertNeededIndexesListIntoArrays();
}

From source file:edu.umn.cs.spatialHadoop.visualization.CanvasOutputFormat.java

License:Open Source License

protected static void mergeImages(final Configuration conf, final Path outPath)
        throws IOException, InterruptedException {
    final int width = conf.getInt("width", 1000);
    final int height = conf.getInt("height", 1000);
    final Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, InputMBR);

    final boolean vflip = conf.getBoolean("vflip", true);

    // List all output files resulting from reducers
    final FileSystem outFs = outPath.getFileSystem(conf);
    final FileStatus[] resultFiles = outFs.listStatus(outPath, new PathFilter() {
        @Override/* www  . j a  v  a  2  s. c  om*/
        public boolean accept(Path path) {
            return path.toUri().getPath().contains("part-");
        }
    });

    if (resultFiles.length == 0) {
        System.err.println("Error! Couldn't find any partial output. Exiting!");
        return;
    }
    System.out.println(System.currentTimeMillis() + ": Merging " + resultFiles.length + " layers into one");
    List<Canvas> intermediateLayers = Parallel.forEach(resultFiles.length,
            new Parallel.RunnableRange<Canvas>() {
                @Override
                public Canvas run(int i1, int i2) {
                    Plotter plotter = Plotter.getPlotter(conf);
                    // The canvas that contains the merge of all assigned layers
                    Canvas finalLayer = null;
                    Canvas tempLayer = plotter.createCanvas(1, 1, new Rectangle());
                    for (int i = i1; i < i2; i++) {
                        FileStatus resultFile = resultFiles[i];
                        try {
                            FSDataInputStream inputStream = outFs.open(resultFile.getPath());
                            while (inputStream.getPos() < resultFile.getLen()) {
                                if (tempLayer == finalLayer) {
                                    // More than one layer. Create a separate final layer to merge
                                    finalLayer = plotter.createCanvas(width, height, inputMBR);
                                    plotter.merge(finalLayer, tempLayer);
                                }
                                tempLayer.readFields(inputStream);

                                if (finalLayer == null) {
                                    // First layer. Treat it as a final layer to avoid merging
                                    // if it is the only layer
                                    finalLayer = tempLayer;
                                } else {
                                    // More than only layer. Merge into the final layer
                                    plotter.merge(finalLayer, tempLayer);
                                }
                            }
                            inputStream.close();
                        } catch (IOException e) {
                            System.err.println("Error reading " + resultFile);
                            e.printStackTrace();
                        }
                    }
                    return finalLayer;
                }
            }, conf.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    // Merge all intermediate layers into one final layer
    Plotter plotter = Plotter.getPlotter(conf);
    Canvas finalLayer;
    if (intermediateLayers.size() == 1) {
        finalLayer = intermediateLayers.get(0);
    } else {
        finalLayer = plotter.createCanvas(width, height, inputMBR);
        for (Canvas intermediateLayer : intermediateLayers) {
            plotter.merge(finalLayer, intermediateLayer);
        }
    }

    // Finally, write the resulting image to the given output path
    System.out.println(System.currentTimeMillis() + ": Writing final image");
    outFs.delete(outPath, true); // Delete old (non-combined) images
    FSDataOutputStream outputFile = outFs.create(outPath);
    plotter.writeImage(finalLayer, outputFile, vflip);
    outputFile.close();
}

From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java

License:Open Source License

/**
 * Lists the contents of a directory//  w w  w .j  a v  a 2 s  . co m
 * @param request
 * @param response
 */
private void handleListFiles(HttpServletRequest request, HttpServletResponse response) {
    try {
        String pathStr = request.getParameter("path");
        Path path = new Path(pathStr == null || pathStr.isEmpty() ? "/" : pathStr);
        FileSystem fs = path.getFileSystem(commonParams);
        FileStatus[] fileStatuses = fs.listStatus(path, SpatialSite.NonHiddenFileFilter);
        Arrays.sort(fileStatuses, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus o1, FileStatus o2) {
                if (o1.isDirectory() && o2.isFile())
                    return -1;
                if (o1.isFile() && o2.isDirectory())
                    return 1;
                return o1.getPath().getName().toLowerCase().compareTo(o2.getPath().getName().toLowerCase());
            }
        });
        response.setContentType("application/json;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_OK);
        PrintWriter out = response.getWriter();
        out.print("{\"FileStatuses\":{");
        if (pathStr.endsWith("/")) {
            pathStr = pathStr.substring(0, pathStr.length() - 1);
        }
        out.printf("\"BaseDir\":\"%s\",", pathStr);
        if (path.getParent() != null)
            out.printf("\"ParentDir\":\"%s\",", path.getParent());
        out.print("\"FileStatus\":[");
        for (int i = 0; i < fileStatuses.length; i++) {
            FileStatus fileStatus = fileStatuses[i];
            if (i != 0)
                out.print(',');
            String filename = fileStatus.getPath().getName();
            int idot = filename.lastIndexOf('.');
            String extension = idot == -1 ? "" : filename.substring(idot + 1);
            out.printf(
                    "{\"accessTime\":%d,\"blockSize\":%d,\"childrenNum\":%d,\"fileId\":%d,"
                            + "\"group\":\"%s\",\"length\":%d,\"modificationTime\":%d,"
                            + "\"owner\":\"%s\",\"pathSuffix\":\"%s\",\"permission\":\"%s\","
                            + "\"replication\":%d,\"storagePolicy\":%d,\"type\":\"%s\",\"extension\":\"%s\"}",
                    fileStatus.getAccessTime(), fileStatus.getBlockSize(), 0, 0, fileStatus.getGroup(),
                    fileStatus.getLen(), fileStatus.getModificationTime(), fileStatus.getOwner(),
                    fileStatus.getPath().getName(), fileStatus.getPermission(), fileStatus.getReplication(), 0,
                    fileStatus.isDirectory() ? "DIRECTORY" : "FILE", extension.toLowerCase());
        }
        out.print("]}");
        // Check if there is an image or master file
        FileStatus[] metaFiles = fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("_master") || path.getName().equals("_data.png");
            }
        });
        for (FileStatus metaFile : metaFiles) {
            String metaFileName = metaFile.getPath().getName();
            if (metaFileName.startsWith("_master")) {
                out.printf(",\"MasterPath\":\"%s\"", metaFileName);
                String shape = OperationsParams.detectShape(fileStatuses[0].getPath(), commonParams);
                if (shape != null)
                    out.printf(",\"Shape\":\"%s\"", shape);
            } else if (metaFileName.equals("_data.png"))
                out.printf(",\"ImagePath\":\"%s\"", metaFileName);
        }
        out.print("}");

        out.close();
    } catch (Exception e) {
        System.out.println("error happened");
        e.printStackTrace();
        try {
            e.printStackTrace(response.getWriter());
        } catch (IOException ioe) {
            ioe.printStackTrace();
            e.printStackTrace();
        }
        response.setContentType("text/plain;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
    }
}

From source file:edu.umn.cs.spatialHadoop.visualization.RasterOutputFormat.java

License:Open Source License

protected static void mergeImages(final Configuration conf, final Path outPath)
        throws IOException, InterruptedException {
    final int width = conf.getInt("width", 1000);
    final int height = conf.getInt("height", 1000);
    final Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, InputMBR);

    final boolean vflip = conf.getBoolean("vflip", true);

    // List all output files resulting from reducers
    final FileSystem outFs = outPath.getFileSystem(conf);
    final FileStatus[] resultFiles = outFs.listStatus(outPath, new PathFilter() {
        @Override/*www  . j  a  v  a 2  s .c o  m*/
        public boolean accept(Path path) {
            return path.toUri().getPath().contains("part-");
        }
    });

    if (resultFiles.length == 0) {
        System.err.println("Error! Couldn't find any partial output. Exiting!");
        return;
    }
    System.out.println(System.currentTimeMillis() + ": Merging " + resultFiles.length + " layers into one");
    Vector<RasterLayer> intermediateLayers = Parallel.forEach(resultFiles.length,
            new Parallel.RunnableRange<RasterLayer>() {
                @Override
                public RasterLayer run(int i1, int i2) {
                    Rasterizer rasterizer = Rasterizer.getRasterizer(conf);
                    // The raster layer that contains the merge of all assigned layers
                    RasterLayer finalLayer = null;
                    RasterLayer tempLayer = rasterizer.createRaster(1, 1, new Rectangle());
                    for (int i = i1; i < i2; i++) {
                        FileStatus resultFile = resultFiles[i];
                        try {
                            FSDataInputStream inputStream = outFs.open(resultFile.getPath());
                            while (inputStream.getPos() < resultFile.getLen()) {
                                if (tempLayer == finalLayer) {
                                    // More than one layer. Create a separate final layer to merge
                                    finalLayer = rasterizer.createRaster(width, height, inputMBR);
                                    rasterizer.merge(finalLayer, tempLayer);
                                }
                                tempLayer.readFields(inputStream);

                                if (finalLayer == null) {
                                    // First layer. Treat it as a final layer to avoid merging
                                    // if it is the only layer
                                    finalLayer = tempLayer;
                                } else {
                                    // More than only layer. Merge into the final layer
                                    rasterizer.merge(finalLayer, tempLayer);
                                }
                            }
                            inputStream.close();
                        } catch (IOException e) {
                            System.err.println("Error reading " + resultFile);
                            e.printStackTrace();
                        }
                    }
                    return finalLayer;
                }
            });

    // Merge all intermediate layers into one final layer
    Rasterizer rasterizer = Rasterizer.getRasterizer(conf);
    RasterLayer finalLayer;
    if (intermediateLayers.size() == 1) {
        finalLayer = intermediateLayers.elementAt(0);
    } else {
        finalLayer = rasterizer.createRaster(width, height, inputMBR);
        for (RasterLayer intermediateLayer : intermediateLayers) {
            rasterizer.merge(finalLayer, intermediateLayer);
        }
    }

    // Finally, write the resulting image to the given output path
    System.out.println(System.currentTimeMillis() + ": Writing final image");
    outFs.delete(outPath, true); // Delete old (non-combined) images
    FSDataOutputStream outputFile = outFs.create(outPath);
    rasterizer.writeImage(finalLayer, outputFile, vflip);
    outputFile.close();
}

From source file:ezbake.azkaban.job.HdfsCleaner.java

License:Apache License

private void pruneDirectories(final FileSystem fs, Path path) throws IOException {
    // For each of the projects, filter through each of its runs
    for (FileStatus projectNumber : fs.listStatus(path)) {

        // There SHOULD only be directories, but filter just to make sure
        final FileStatus[] runDirs = fs.listStatus(projectNumber.getPath(), new PathFilter() {
            @Override/*ww  w . j  a va 2s.c o  m*/
            public boolean accept(Path path) {
                try {
                    return fs.isDirectory(path);
                } catch (IOException e) {
                    logger.error("Error trying to filter directories", e);
                    return false;
                }
            }
        });

        // Directories are returned canonically. Though this should be good enough, let's sort the right way
        final SortedSet<Long> runSet = new TreeSet<>();
        for (FileStatus status : runDirs) {
            try {
                runSet.add(Long.valueOf(status.getPath().getName()));
            } catch (NumberFormatException ex) {
                logger.error("Directory {} is not a long and probably not a runtime dir. Skipping",
                        status.getPath().getName());
            }
        }

        // Keep the last N number of runs
        int i = 0;
        final int stopAt = runSet.size() - lastN;
        for (Long runtime : runSet) {
            if (i++ >= stopAt) {
                break;
            }

            final Path runtimeDir = new Path(projectNumber.getPath(), runtime.toString());
            logger.info("Checking dir <{}> vs ageoff of <{}>", runtimeDir, ageOffTimestamp);

            // Check if it meets the threshold for pruning and if so delete the dir
            if (runtime <= ageOffTimestamp) {
                // Check to make sure this isn't the dir of a currently running job
                try (FSDataInputStream pidStream = fs.open(new Path(runtimeDir, FrameworkDriver.STATUS_FILE))) {
                    if (FrameworkDriver.JobStatus
                            .valueOf(pidStream.readUTF()) == FrameworkDriver.JobStatus.RUNNING) {
                        logger.warn("Directory <{}> is for a currently running job.  Skipping", runtimeDir);
                        continue;
                    }
                } catch (IOException e) {
                    logger.warn("directory {} missing PID file or could not be read.  Skipping",
                            runtimeDir.getName(), e.getMessage());
                    continue;
                }

                logger.info("Removing dir: {}", runtimeDir);
                fs.delete(runtimeDir, true);
            }
        }
    }
}

From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java

License:LGPL

private List<Path> getPathToConcat(final FileSystem fs, final Path path) throws IOException {

    // Get the list of files to contact
    final FileStatus[] files = fs.listStatus(path, new PathFilter() {

        @Override/*from www .  ja  v a 2s .c o m*/
        public boolean accept(final Path p) {

            return p.getName().matches("^part-.*[0-9]+$");
        }
    });

    // Sort the list
    Arrays.sort(files, new Comparator<FileStatus>() {

        @Override
        public int compare(final FileStatus f1, final FileStatus f2) {

            return f1.getPath().getName().compareTo(f2.getPath().getName());
        }
    });

    // Create final result
    final List<Path> result = new ArrayList<>(files.length);
    for (FileStatus file : files) {
        result.add(file.getPath());
    }

    return result;
}

From source file:gaffer.accumulo.TestAccumuloBackedGraphUpdatingConf.java

License:Apache License

public static int readResults(FileSystem fs, Path path, Set<GraphElementWithStatistics> results)
        throws IOException {
    FileStatus[] fileStatus = fs.listStatus(path, new PathFilter() {
        @Override//from  w w  w . j a va  2s .c  o m
        public boolean accept(Path path) {
            return path.getName().contains("part-m-");
        }
    });
    int count = 0;
    for (int i = 0; i < fileStatus.length; i++) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, fileStatus[i].getPath(), fs.getConf());
        GraphElement element = new GraphElement();
        SetOfStatistics statistics = new SetOfStatistics();
        while (reader.next(element, statistics)) {
            count++;
            results.add(new GraphElementWithStatistics(element.clone(), statistics.clone()));
        }
        reader.close();
    }
    return count;
}

From source file:gobblin.compaction.dataset.DatasetHelper.java

License:Apache License

private List<Path> getApplicableFilePaths(Path dataDir) throws IOException {
    if (!this.fs.exists(dataDir)) {
        return Lists.newArrayList();
    }/*from  w  w  w . ja  v  a2  s. c o m*/
    List<Path> paths = Lists.newArrayList();
    for (FileStatus fileStatus : FileListUtils.listFilesRecursively(this.fs, dataDir, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            for (String validExtention : extensions) {
                if (path.getName().endsWith(validExtention)) {
                    return true;
                }
            }
            return false;
        }
    })) {
        paths.add(fileStatus.getPath());
    }
    return paths;
}