Example usage for org.apache.hadoop.fs PathFilter PathFilter

List of usage examples for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:edu.umn.cs.spatialHadoop.RandomSpatialGenerator.java

License:Open Source License

private static void generateMapReduce(Path outFile, OperationsParams params) throws IOException {
    JobConf job = new JobConf(params, RandomSpatialGenerator.class);
    job.setJobName("Generator");
    Shape shape = params.getShape("shape");

    FileSystem outFs = outFile.getFileSystem(job);

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    // Set input format and map class
    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(Repartition.RepartitionMap.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(shape.getClass());
    job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

    String sindex = params.get("sindex");
    Rectangle mbr = params.getShape("mbr").getMBR();

    CellInfo[] cells;/*from  w w  w . j  a va 2  s  . co  m*/
    if (sindex == null) {
        cells = new CellInfo[] { new CellInfo(1, mbr) };
    } else if (sindex.equals("grid")) {
        GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2);
        FileSystem fs = outFile.getFileSystem(job);
        long blocksize = fs.getDefaultBlockSize(outFile);
        long size = params.getSize("size");
        int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, outFile, blocksize);
        gridInfo.calculateCellDimensions(numOfCells);
        cells = gridInfo.getAllCells();
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    SpatialSite.setCells(job, cells);

    // Do not set a reduce function. Use the default identity reduce function
    if (cells.length == 1) {
        // All objects are in one partition. No need for a reduce phase
        job.setNumReduceTasks(0);
    } else {
        // More than one partition. Need a reduce phase to group shapes of the
        // same partition together
        job.setReducerClass(RepartitionReduce.class);
        job.setNumReduceTasks(
                Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));
    }

    // Set output path
    FileOutputFormat.setOutputPath(job, outFile);
    if (sindex == null || sindex.equals("grid")) {
        job.setOutputFormat(GridOutputFormat.class);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    JobClient.runJob(job);

    // TODO move the following part to OutputCommitter
    // Concatenate all master files into one file
    FileStatus[] resultFiles = outFs.listStatus(outFile, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().contains("_master");
        }
    });
    String ext = resultFiles[0].getPath().getName()
            .substring(resultFiles[0].getPath().getName().lastIndexOf('.'));
    Path masterPath = new Path(outFile, "_master" + ext);
    OutputStream destOut = outFs.create(masterPath);
    byte[] buffer = new byte[4096];
    for (FileStatus f : resultFiles) {
        InputStream in = outFs.open(f.getPath());
        int bytes_read;
        do {
            bytes_read = in.read(buffer);
            if (bytes_read > 0)
                destOut.write(buffer, 0, bytes_read);
        } while (bytes_read > 0);
        in.close();
        outFs.delete(f.getPath(), false);
    }
    destOut.close();
}

From source file:edu.umn.cs.spatialHadoop.util.NASADatasetUtil.java

License:Open Source License

public static Path[] getMatchingFilesInPath(Path path, final String inputDateString) throws IOException {
    FileSystem fileSystem = path.getFileSystem(new Configuration());
    FileStatus[] matchingDirs = fileSystem.listStatus(path, new PathFilter() {
        @Override//from w  w w  .j a  va  2 s.  c  o  m
        public boolean accept(Path p) {
            String dirName = p.getName();
            if (dirName.contains(inputDateString)) {
                return true;
            } else {
                return false;
            }

        }
    });
    Path[] paths = new Path[matchingDirs.length];
    for (int i = 0; i < paths.length; i++) {
        paths[i] = matchingDirs[i].getPath();
    }
    return paths;
}

From source file:edu.umn.cs.spatialHadoop.util.TemporalIndexManager.java

License:Open Source License

/**
 * Based on a certain time range, this method filters all directories and
* determines which files need to be indexed on daily, monthly and yearly
* levels. After calling this method, you need to call the daily, monthly
* and yearly getters to return paths required to be indexed.
 * @param timeRange/*w  ww . j a va  2 s. c  om*/
 * @throws IOException
 * @throws ParseException
 */
public void prepareNeededIndexes(String timeRange) throws IOException, ParseException {
    if (timeRange == null) {
        LOG.error("TimeRange is empty");
        return;
    }

    // Parse start and end dates
    final Date startDate, endDate;
    try {
        startDate = dayFormat.parse(timeRange.split("\\.\\.")[0]);
        endDate = dayFormat.parse(timeRange.split("\\.\\.")[1]);
    } catch (ArrayIndexOutOfBoundsException e) {
        LOG.error("Use the seperator two periods '..' to seperate from and to dates");
        return;
    } catch (ParseException e) {
        LOG.error("Illegal date format in " + timeRange);
        return;
    }

    // Filter all file/folder paths based on the start-end date range
    FileStatus[] matchingDirs = fileSystem.listStatus(datasetPath, new PathFilter() {
        @Override
        public boolean accept(Path p) {
            String dirName = p.getName();
            try {
                Date date = dayFormat.parse(dirName);
                return date.compareTo(startDate) >= 0 && date.compareTo(endDate) <= 0;
            } catch (ParseException e) {
                LOG.warn("Cannot parse directory name: " + dirName);
                return false;
            }
        }
    });
    if (matchingDirs.length == 0) {
        LOG.warn("No matching directories for the given input");
    }

    // Re-indexing check for each matching
    for (FileStatus matchingDir : matchingDirs) {
        String matchingDirDateString = NASADatasetUtil.extractDateStringFromFileStatus(matchingDir);
        if (existYearlyIndexes.containsKey(NASADatasetUtil.getYearFormat(matchingDirDateString))) {
            // needs to re-build year, month and year indexes
            existYearlyIndexes.put(NASADatasetUtil.getYearFormat(matchingDirDateString), true);
            existMonthlyIndexes.put(NASADatasetUtil.getMonthFormat(matchingDirDateString), true);
            existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true);
        } else if (existMonthlyIndexes.containsKey(NASADatasetUtil.getMonthFormat(matchingDirDateString))) {
            // needs to re-build month and day indexes
            existMonthlyIndexes.put(NASADatasetUtil.getMonthFormat(matchingDirDateString), true);
            existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true);
        } else if (existDailyIndexes.containsKey(NASADatasetUtil.getDayFormat(matchingDirDateString))) {
            // needs to re-build day index
            existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true);
        } else {
            // needs to build a new index
            existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true);

            int daysCountInMonth = getMatchesCountFromMap(existDailyIndexes,
                    NASADatasetUtil.getMonthFormat(matchingDirDateString));

            if (daysCountInMonth >= getNumDaysPerMonth(
                    NASADatasetUtil.extractMonthFromDate(matchingDirDateString))) {
                existMonthlyIndexes.put(NASADatasetUtil.getMonthFormat(matchingDirDateString), true);

                int monthsCountInYear = getMatchesCountFromMap(existMonthlyIndexes,
                        NASADatasetUtil.getYearFormat(matchingDirDateString));
                if (monthsCountInYear >= getNumMonthsPerYear()) {
                    existYearlyIndexes.put(NASADatasetUtil.getYearFormat(matchingDirDateString), true);
                }
            }
        }

    }
    convertNeededIndexesListIntoArrays();
}

From source file:edu.umn.cs.spatialHadoop.visualization.CanvasOutputFormat.java

License:Open Source License

protected static void mergeImages(final Configuration conf, final Path outPath)
        throws IOException, InterruptedException {
    final int width = conf.getInt("width", 1000);
    final int height = conf.getInt("height", 1000);
    final Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, InputMBR);

    final boolean vflip = conf.getBoolean("vflip", true);

    // List all output files resulting from reducers
    final FileSystem outFs = outPath.getFileSystem(conf);
    final FileStatus[] resultFiles = outFs.listStatus(outPath, new PathFilter() {
        @Override/* www  . j a  v  a  2  s. c  om*/
        public boolean accept(Path path) {
            return path.toUri().getPath().contains("part-");
        }
    });

    if (resultFiles.length == 0) {
        System.err.println("Error! Couldn't find any partial output. Exiting!");
        return;
    }
    System.out.println(System.currentTimeMillis() + ": Merging " + resultFiles.length + " layers into one");
    List<Canvas> intermediateLayers = Parallel.forEach(resultFiles.length,
            new Parallel.RunnableRange<Canvas>() {
                @Override
                public Canvas run(int i1, int i2) {
                    Plotter plotter = Plotter.getPlotter(conf);
                    // The canvas that contains the merge of all assigned layers
                    Canvas finalLayer = null;
                    Canvas tempLayer = plotter.createCanvas(1, 1, new Rectangle());
                    for (int i = i1; i < i2; i++) {
                        FileStatus resultFile = resultFiles[i];
                        try {
                            FSDataInputStream inputStream = outFs.open(resultFile.getPath());
                            while (inputStream.getPos() < resultFile.getLen()) {
                                if (tempLayer == finalLayer) {
                                    // More than one layer. Create a separate final layer to merge
                                    finalLayer = plotter.createCanvas(width, height, inputMBR);
                                    plotter.merge(finalLayer, tempLayer);
                                }
                                tempLayer.readFields(inputStream);

                                if (finalLayer == null) {
                                    // First layer. Treat it as a final layer to avoid merging
                                    // if it is the only layer
                                    finalLayer = tempLayer;
                                } else {
                                    // More than only layer. Merge into the final layer
                                    plotter.merge(finalLayer, tempLayer);
                                }
                            }
                            inputStream.close();
                        } catch (IOException e) {
                            System.err.println("Error reading " + resultFile);
                            e.printStackTrace();
                        }
                    }
                    return finalLayer;
                }
            }, conf.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    // Merge all intermediate layers into one final layer
    Plotter plotter = Plotter.getPlotter(conf);
    Canvas finalLayer;
    if (intermediateLayers.size() == 1) {
        finalLayer = intermediateLayers.get(0);
    } else {
        finalLayer = plotter.createCanvas(width, height, inputMBR);
        for (Canvas intermediateLayer : intermediateLayers) {
            plotter.merge(finalLayer, intermediateLayer);
        }
    }

    // Finally, write the resulting image to the given output path
    System.out.println(System.currentTimeMillis() + ": Writing final image");
    outFs.delete(outPath, true); // Delete old (non-combined) images
    FSDataOutputStream outputFile = outFs.create(outPath);
    plotter.writeImage(finalLayer, outputFile, vflip);
    outputFile.close();
}

From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java

License:Open Source License

/**
 * Lists the contents of a directory//  w w  w .j  a v  a 2 s  . co m
 * @param request
 * @param response
 */
private void handleListFiles(HttpServletRequest request, HttpServletResponse response) {
    try {
        String pathStr = request.getParameter("path");
        Path path = new Path(pathStr == null || pathStr.isEmpty() ? "/" : pathStr);
        FileSystem fs = path.getFileSystem(commonParams);
        FileStatus[] fileStatuses = fs.listStatus(path, SpatialSite.NonHiddenFileFilter);
        Arrays.sort(fileStatuses, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus o1, FileStatus o2) {
                if (o1.isDirectory() && o2.isFile())
                    return -1;
                if (o1.isFile() && o2.isDirectory())
                    return 1;
                return o1.getPath().getName().toLowerCase().compareTo(o2.getPath().getName().toLowerCase());
            }
        });
        response.setContentType("application/json;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_OK);
        PrintWriter out = response.getWriter();
        out.print("{\"FileStatuses\":{");
        if (pathStr.endsWith("/")) {
            pathStr = pathStr.substring(0, pathStr.length() - 1);
        }
        out.printf("\"BaseDir\":\"%s\",", pathStr);
        if (path.getParent() != null)
            out.printf("\"ParentDir\":\"%s\",", path.getParent());
        out.print("\"FileStatus\":[");
        for (int i = 0; i < fileStatuses.length; i++) {
            FileStatus fileStatus = fileStatuses[i];
            if (i != 0)
                out.print(',');
            String filename = fileStatus.getPath().getName();
            int idot = filename.lastIndexOf('.');
            String extension = idot == -1 ? "" : filename.substring(idot + 1);
            out.printf(
                    "{\"accessTime\":%d,\"blockSize\":%d,\"childrenNum\":%d,\"fileId\":%d,"
                            + "\"group\":\"%s\",\"length\":%d,\"modificationTime\":%d,"
                            + "\"owner\":\"%s\",\"pathSuffix\":\"%s\",\"permission\":\"%s\","
                            + "\"replication\":%d,\"storagePolicy\":%d,\"type\":\"%s\",\"extension\":\"%s\"}",
                    fileStatus.getAccessTime(), fileStatus.getBlockSize(), 0, 0, fileStatus.getGroup(),
                    fileStatus.getLen(), fileStatus.getModificationTime(), fileStatus.getOwner(),
                    fileStatus.getPath().getName(), fileStatus.getPermission(), fileStatus.getReplication(), 0,
                    fileStatus.isDirectory() ? "DIRECTORY" : "FILE", extension.toLowerCase());
        }
        out.print("]}");
        // Check if there is an image or master file
        FileStatus[] metaFiles = fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("_master") || path.getName().equals("_data.png");
            }
        });
        for (FileStatus metaFile : metaFiles) {
            String metaFileName = metaFile.getPath().getName();
            if (metaFileName.startsWith("_master")) {
                out.printf(",\"MasterPath\":\"%s\"", metaFileName);
                String shape = OperationsParams.detectShape(fileStatuses[0].getPath(), commonParams);
                if (shape != null)
                    out.printf(",\"Shape\":\"%s\"", shape);
            } else if (metaFileName.equals("_data.png"))
                out.printf(",\"ImagePath\":\"%s\"", metaFileName);
        }
        out.print("}");

        out.close();
    } catch (Exception e) {
        System.out.println("error happened");
        e.printStackTrace();
        try {
            e.printStackTrace(response.getWriter());
        } catch (IOException ioe) {
            ioe.printStackTrace();
            e.printStackTrace();
        }
        response.setContentType("text/plain;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
    }
}

From source file:edu.umn.cs.spatialHadoop.visualization.RasterOutputFormat.java

License:Open Source License

protected static void mergeImages(final Configuration conf, final Path outPath)
        throws IOException, InterruptedException {
    final int width = conf.getInt("width", 1000);
    final int height = conf.getInt("height", 1000);
    final Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, InputMBR);

    final boolean vflip = conf.getBoolean("vflip", true);

    // List all output files resulting from reducers
    final FileSystem outFs = outPath.getFileSystem(conf);
    final FileStatus[] resultFiles = outFs.listStatus(outPath, new PathFilter() {
        @Override/*www  . j  a  v  a 2  s .c o  m*/
        public boolean accept(Path path) {
            return path.toUri().getPath().contains("part-");
        }
    });

    if (resultFiles.length == 0) {
        System.err.println("Error! Couldn't find any partial output. Exiting!");
        return;
    }
    System.out.println(System.currentTimeMillis() + ": Merging " + resultFiles.length + " layers into one");
    Vector<RasterLayer> intermediateLayers = Parallel.forEach(resultFiles.length,
            new Parallel.RunnableRange<RasterLayer>() {
                @Override
                public RasterLayer run(int i1, int i2) {
                    Rasterizer rasterizer = Rasterizer.getRasterizer(conf);
                    // The raster layer that contains the merge of all assigned layers
                    RasterLayer finalLayer = null;
                    RasterLayer tempLayer = rasterizer.createRaster(1, 1, new Rectangle());
                    for (int i = i1; i < i2; i++) {
                        FileStatus resultFile = resultFiles[i];
                        try {
                            FSDataInputStream inputStream = outFs.open(resultFile.getPath());
                            while (inputStream.getPos() < resultFile.getLen()) {
                                if (tempLayer == finalLayer) {
                                    // More than one layer. Create a separate final layer to merge
                                    finalLayer = rasterizer.createRaster(width, height, inputMBR);
                                    rasterizer.merge(finalLayer, tempLayer);
                                }
                                tempLayer.readFields(inputStream);

                                if (finalLayer == null) {
                                    // First layer. Treat it as a final layer to avoid merging
                                    // if it is the only layer
                                    finalLayer = tempLayer;
                                } else {
                                    // More than only layer. Merge into the final layer
                                    rasterizer.merge(finalLayer, tempLayer);
                                }
                            }
                            inputStream.close();
                        } catch (IOException e) {
                            System.err.println("Error reading " + resultFile);
                            e.printStackTrace();
                        }
                    }
                    return finalLayer;
                }
            });

    // Merge all intermediate layers into one final layer
    Rasterizer rasterizer = Rasterizer.getRasterizer(conf);
    RasterLayer finalLayer;
    if (intermediateLayers.size() == 1) {
        finalLayer = intermediateLayers.elementAt(0);
    } else {
        finalLayer = rasterizer.createRaster(width, height, inputMBR);
        for (RasterLayer intermediateLayer : intermediateLayers) {
            rasterizer.merge(finalLayer, intermediateLayer);
        }
    }

    // Finally, write the resulting image to the given output path
    System.out.println(System.currentTimeMillis() + ": Writing final image");
    outFs.delete(outPath, true); // Delete old (non-combined) images
    FSDataOutputStream outputFile = outFs.create(outPath);
    rasterizer.writeImage(finalLayer, outputFile, vflip);
    outputFile.close();
}

From source file:ezbake.azkaban.job.HdfsCleaner.java

License:Apache License

private void pruneDirectories(final FileSystem fs, Path path) throws IOException {
    // For each of the projects, filter through each of its runs
    for (FileStatus projectNumber : fs.listStatus(path)) {

        // There SHOULD only be directories, but filter just to make sure
        final FileStatus[] runDirs = fs.listStatus(projectNumber.getPath(), new PathFilter() {
            @Override/*ww  w . j  a va 2s.c o  m*/
            public boolean accept(Path path) {
                try {
                    return fs.isDirectory(path);
                } catch (IOException e) {
                    logger.error("Error trying to filter directories", e);
                    return false;
                }
            }
        });

        // Directories are returned canonically. Though this should be good enough, let's sort the right way
        final SortedSet<Long> runSet = new TreeSet<>();
        for (FileStatus status : runDirs) {
            try {
                runSet.add(Long.valueOf(status.getPath().getName()));
            } catch (NumberFormatException ex) {
                logger.error("Directory {} is not a long and probably not a runtime dir. Skipping",
                        status.getPath().getName());
            }
        }

        // Keep the last N number of runs
        int i = 0;
        final int stopAt = runSet.size() - lastN;
        for (Long runtime : runSet) {
            if (i++ >= stopAt) {
                break;
            }

            final Path runtimeDir = new Path(projectNumber.getPath(), runtime.toString());
            logger.info("Checking dir <{}> vs ageoff of <{}>", runtimeDir, ageOffTimestamp);

            // Check if it meets the threshold for pruning and if so delete the dir
            if (runtime <= ageOffTimestamp) {
                // Check to make sure this isn't the dir of a currently running job
                try (FSDataInputStream pidStream = fs.open(new Path(runtimeDir, FrameworkDriver.STATUS_FILE))) {
                    if (FrameworkDriver.JobStatus
                            .valueOf(pidStream.readUTF()) == FrameworkDriver.JobStatus.RUNNING) {
                        logger.warn("Directory <{}> is for a currently running job.  Skipping", runtimeDir);
                        continue;
                    }
                } catch (IOException e) {
                    logger.warn("directory {} missing PID file or could not be read.  Skipping",
                            runtimeDir.getName(), e.getMessage());
                    continue;
                }

                logger.info("Removing dir: {}", runtimeDir);
                fs.delete(runtimeDir, true);
            }
        }
    }
}

From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java

License:LGPL

private List<Path> getPathToConcat(final FileSystem fs, final Path path) throws IOException {

    // Get the list of files to contact
    final FileStatus[] files = fs.listStatus(path, new PathFilter() {

        @Override/*from www .  ja  v a 2s .c o m*/
        public boolean accept(final Path p) {

            return p.getName().matches("^part-.*[0-9]+$");
        }
    });

    // Sort the list
    Arrays.sort(files, new Comparator<FileStatus>() {

        @Override
        public int compare(final FileStatus f1, final FileStatus f2) {

            return f1.getPath().getName().compareTo(f2.getPath().getName());
        }
    });

    // Create final result
    final List<Path> result = new ArrayList<>(files.length);
    for (FileStatus file : files) {
        result.add(file.getPath());
    }

    return result;
}

From source file:gaffer.accumulo.TestAccumuloBackedGraphUpdatingConf.java

License:Apache License

public static int readResults(FileSystem fs, Path path, Set<GraphElementWithStatistics> results)
        throws IOException {
    FileStatus[] fileStatus = fs.listStatus(path, new PathFilter() {
        @Override//from  w w  w . j a va  2s .c  o m
        public boolean accept(Path path) {
            return path.getName().contains("part-m-");
        }
    });
    int count = 0;
    for (int i = 0; i < fileStatus.length; i++) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, fileStatus[i].getPath(), fs.getConf());
        GraphElement element = new GraphElement();
        SetOfStatistics statistics = new SetOfStatistics();
        while (reader.next(element, statistics)) {
            count++;
            results.add(new GraphElementWithStatistics(element.clone(), statistics.clone()));
        }
        reader.close();
    }
    return count;
}

From source file:gobblin.compaction.dataset.DatasetHelper.java

License:Apache License

private List<Path> getApplicableFilePaths(Path dataDir) throws IOException {
    if (!this.fs.exists(dataDir)) {
        return Lists.newArrayList();
    }/*from  w  w  w . ja  v  a2  s. c o m*/
    List<Path> paths = Lists.newArrayList();
    for (FileStatus fileStatus : FileListUtils.listFilesRecursively(this.fs, dataDir, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            for (String validExtention : extensions) {
                if (path.getName().endsWith(validExtention)) {
                    return true;
                }
            }
            return false;
        }
    })) {
        paths.add(fileStatus.getPath());
    }
    return paths;
}