List of usage examples for org.apache.hadoop.fs PathFilter PathFilter
PathFilter
From source file:edu.umn.cs.spatialHadoop.RandomSpatialGenerator.java
License:Open Source License
private static void generateMapReduce(Path outFile, OperationsParams params) throws IOException { JobConf job = new JobConf(params, RandomSpatialGenerator.class); job.setJobName("Generator"); Shape shape = params.getShape("shape"); FileSystem outFs = outFile.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); // Set input format and map class job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Repartition.RepartitionMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); String sindex = params.get("sindex"); Rectangle mbr = params.getShape("mbr").getMBR(); CellInfo[] cells;/*from w w w . j a va 2 s . co m*/ if (sindex == null) { cells = new CellInfo[] { new CellInfo(1, mbr) }; } else if (sindex.equals("grid")) { GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); FileSystem fs = outFile.getFileSystem(job); long blocksize = fs.getDefaultBlockSize(outFile); long size = params.getSize("size"); int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, outFile, blocksize); gridInfo.calculateCellDimensions(numOfCells); cells = gridInfo.getAllCells(); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cells); // Do not set a reduce function. Use the default identity reduce function if (cells.length == 1) { // All objects are in one partition. No need for a reduce phase job.setNumReduceTasks(0); } else { // More than one partition. Need a reduce phase to group shapes of the // same partition together job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); } // Set output path FileOutputFormat.setOutputPath(job, outFile); if (sindex == null || sindex.equals("grid")) { job.setOutputFormat(GridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } JobClient.runJob(job); // TODO move the following part to OutputCommitter // Concatenate all master files into one file FileStatus[] resultFiles = outFs.listStatus(outFile, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().contains("_master"); } }); String ext = resultFiles[0].getPath().getName() .substring(resultFiles[0].getPath().getName().lastIndexOf('.')); Path masterPath = new Path(outFile, "_master" + ext); OutputStream destOut = outFs.create(masterPath); byte[] buffer = new byte[4096]; for (FileStatus f : resultFiles) { InputStream in = outFs.open(f.getPath()); int bytes_read; do { bytes_read = in.read(buffer); if (bytes_read > 0) destOut.write(buffer, 0, bytes_read); } while (bytes_read > 0); in.close(); outFs.delete(f.getPath(), false); } destOut.close(); }
From source file:edu.umn.cs.spatialHadoop.util.NASADatasetUtil.java
License:Open Source License
public static Path[] getMatchingFilesInPath(Path path, final String inputDateString) throws IOException { FileSystem fileSystem = path.getFileSystem(new Configuration()); FileStatus[] matchingDirs = fileSystem.listStatus(path, new PathFilter() { @Override//from w w w .j a va 2 s. c o m public boolean accept(Path p) { String dirName = p.getName(); if (dirName.contains(inputDateString)) { return true; } else { return false; } } }); Path[] paths = new Path[matchingDirs.length]; for (int i = 0; i < paths.length; i++) { paths[i] = matchingDirs[i].getPath(); } return paths; }
From source file:edu.umn.cs.spatialHadoop.util.TemporalIndexManager.java
License:Open Source License
/** * Based on a certain time range, this method filters all directories and * determines which files need to be indexed on daily, monthly and yearly * levels. After calling this method, you need to call the daily, monthly * and yearly getters to return paths required to be indexed. * @param timeRange/*w ww . j a va 2 s. c om*/ * @throws IOException * @throws ParseException */ public void prepareNeededIndexes(String timeRange) throws IOException, ParseException { if (timeRange == null) { LOG.error("TimeRange is empty"); return; } // Parse start and end dates final Date startDate, endDate; try { startDate = dayFormat.parse(timeRange.split("\\.\\.")[0]); endDate = dayFormat.parse(timeRange.split("\\.\\.")[1]); } catch (ArrayIndexOutOfBoundsException e) { LOG.error("Use the seperator two periods '..' to seperate from and to dates"); return; } catch (ParseException e) { LOG.error("Illegal date format in " + timeRange); return; } // Filter all file/folder paths based on the start-end date range FileStatus[] matchingDirs = fileSystem.listStatus(datasetPath, new PathFilter() { @Override public boolean accept(Path p) { String dirName = p.getName(); try { Date date = dayFormat.parse(dirName); return date.compareTo(startDate) >= 0 && date.compareTo(endDate) <= 0; } catch (ParseException e) { LOG.warn("Cannot parse directory name: " + dirName); return false; } } }); if (matchingDirs.length == 0) { LOG.warn("No matching directories for the given input"); } // Re-indexing check for each matching for (FileStatus matchingDir : matchingDirs) { String matchingDirDateString = NASADatasetUtil.extractDateStringFromFileStatus(matchingDir); if (existYearlyIndexes.containsKey(NASADatasetUtil.getYearFormat(matchingDirDateString))) { // needs to re-build year, month and year indexes existYearlyIndexes.put(NASADatasetUtil.getYearFormat(matchingDirDateString), true); existMonthlyIndexes.put(NASADatasetUtil.getMonthFormat(matchingDirDateString), true); existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true); } else if (existMonthlyIndexes.containsKey(NASADatasetUtil.getMonthFormat(matchingDirDateString))) { // needs to re-build month and day indexes existMonthlyIndexes.put(NASADatasetUtil.getMonthFormat(matchingDirDateString), true); existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true); } else if (existDailyIndexes.containsKey(NASADatasetUtil.getDayFormat(matchingDirDateString))) { // needs to re-build day index existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true); } else { // needs to build a new index existDailyIndexes.put(NASADatasetUtil.getDayFormat(matchingDirDateString), true); int daysCountInMonth = getMatchesCountFromMap(existDailyIndexes, NASADatasetUtil.getMonthFormat(matchingDirDateString)); if (daysCountInMonth >= getNumDaysPerMonth( NASADatasetUtil.extractMonthFromDate(matchingDirDateString))) { existMonthlyIndexes.put(NASADatasetUtil.getMonthFormat(matchingDirDateString), true); int monthsCountInYear = getMatchesCountFromMap(existMonthlyIndexes, NASADatasetUtil.getYearFormat(matchingDirDateString)); if (monthsCountInYear >= getNumMonthsPerYear()) { existYearlyIndexes.put(NASADatasetUtil.getYearFormat(matchingDirDateString), true); } } } } convertNeededIndexesListIntoArrays(); }
From source file:edu.umn.cs.spatialHadoop.visualization.CanvasOutputFormat.java
License:Open Source License
protected static void mergeImages(final Configuration conf, final Path outPath) throws IOException, InterruptedException { final int width = conf.getInt("width", 1000); final int height = conf.getInt("height", 1000); final Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, InputMBR); final boolean vflip = conf.getBoolean("vflip", true); // List all output files resulting from reducers final FileSystem outFs = outPath.getFileSystem(conf); final FileStatus[] resultFiles = outFs.listStatus(outPath, new PathFilter() { @Override/* www . j a v a 2 s. c om*/ public boolean accept(Path path) { return path.toUri().getPath().contains("part-"); } }); if (resultFiles.length == 0) { System.err.println("Error! Couldn't find any partial output. Exiting!"); return; } System.out.println(System.currentTimeMillis() + ": Merging " + resultFiles.length + " layers into one"); List<Canvas> intermediateLayers = Parallel.forEach(resultFiles.length, new Parallel.RunnableRange<Canvas>() { @Override public Canvas run(int i1, int i2) { Plotter plotter = Plotter.getPlotter(conf); // The canvas that contains the merge of all assigned layers Canvas finalLayer = null; Canvas tempLayer = plotter.createCanvas(1, 1, new Rectangle()); for (int i = i1; i < i2; i++) { FileStatus resultFile = resultFiles[i]; try { FSDataInputStream inputStream = outFs.open(resultFile.getPath()); while (inputStream.getPos() < resultFile.getLen()) { if (tempLayer == finalLayer) { // More than one layer. Create a separate final layer to merge finalLayer = plotter.createCanvas(width, height, inputMBR); plotter.merge(finalLayer, tempLayer); } tempLayer.readFields(inputStream); if (finalLayer == null) { // First layer. Treat it as a final layer to avoid merging // if it is the only layer finalLayer = tempLayer; } else { // More than only layer. Merge into the final layer plotter.merge(finalLayer, tempLayer); } } inputStream.close(); } catch (IOException e) { System.err.println("Error reading " + resultFile); e.printStackTrace(); } } return finalLayer; } }, conf.getInt("parallel", Runtime.getRuntime().availableProcessors())); // Merge all intermediate layers into one final layer Plotter plotter = Plotter.getPlotter(conf); Canvas finalLayer; if (intermediateLayers.size() == 1) { finalLayer = intermediateLayers.get(0); } else { finalLayer = plotter.createCanvas(width, height, inputMBR); for (Canvas intermediateLayer : intermediateLayers) { plotter.merge(finalLayer, intermediateLayer); } } // Finally, write the resulting image to the given output path System.out.println(System.currentTimeMillis() + ": Writing final image"); outFs.delete(outPath, true); // Delete old (non-combined) images FSDataOutputStream outputFile = outFs.create(outPath); plotter.writeImage(finalLayer, outputFile, vflip); outputFile.close(); }
From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java
License:Open Source License
/** * Lists the contents of a directory// w w w .j a v a 2 s . co m * @param request * @param response */ private void handleListFiles(HttpServletRequest request, HttpServletResponse response) { try { String pathStr = request.getParameter("path"); Path path = new Path(pathStr == null || pathStr.isEmpty() ? "/" : pathStr); FileSystem fs = path.getFileSystem(commonParams); FileStatus[] fileStatuses = fs.listStatus(path, SpatialSite.NonHiddenFileFilter); Arrays.sort(fileStatuses, new Comparator<FileStatus>() { @Override public int compare(FileStatus o1, FileStatus o2) { if (o1.isDirectory() && o2.isFile()) return -1; if (o1.isFile() && o2.isDirectory()) return 1; return o1.getPath().getName().toLowerCase().compareTo(o2.getPath().getName().toLowerCase()); } }); response.setContentType("application/json;charset=utf-8"); response.setStatus(HttpServletResponse.SC_OK); PrintWriter out = response.getWriter(); out.print("{\"FileStatuses\":{"); if (pathStr.endsWith("/")) { pathStr = pathStr.substring(0, pathStr.length() - 1); } out.printf("\"BaseDir\":\"%s\",", pathStr); if (path.getParent() != null) out.printf("\"ParentDir\":\"%s\",", path.getParent()); out.print("\"FileStatus\":["); for (int i = 0; i < fileStatuses.length; i++) { FileStatus fileStatus = fileStatuses[i]; if (i != 0) out.print(','); String filename = fileStatus.getPath().getName(); int idot = filename.lastIndexOf('.'); String extension = idot == -1 ? "" : filename.substring(idot + 1); out.printf( "{\"accessTime\":%d,\"blockSize\":%d,\"childrenNum\":%d,\"fileId\":%d," + "\"group\":\"%s\",\"length\":%d,\"modificationTime\":%d," + "\"owner\":\"%s\",\"pathSuffix\":\"%s\",\"permission\":\"%s\"," + "\"replication\":%d,\"storagePolicy\":%d,\"type\":\"%s\",\"extension\":\"%s\"}", fileStatus.getAccessTime(), fileStatus.getBlockSize(), 0, 0, fileStatus.getGroup(), fileStatus.getLen(), fileStatus.getModificationTime(), fileStatus.getOwner(), fileStatus.getPath().getName(), fileStatus.getPermission(), fileStatus.getReplication(), 0, fileStatus.isDirectory() ? "DIRECTORY" : "FILE", extension.toLowerCase()); } out.print("]}"); // Check if there is an image or master file FileStatus[] metaFiles = fs.listStatus(path, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("_master") || path.getName().equals("_data.png"); } }); for (FileStatus metaFile : metaFiles) { String metaFileName = metaFile.getPath().getName(); if (metaFileName.startsWith("_master")) { out.printf(",\"MasterPath\":\"%s\"", metaFileName); String shape = OperationsParams.detectShape(fileStatuses[0].getPath(), commonParams); if (shape != null) out.printf(",\"Shape\":\"%s\"", shape); } else if (metaFileName.equals("_data.png")) out.printf(",\"ImagePath\":\"%s\"", metaFileName); } out.print("}"); out.close(); } catch (Exception e) { System.out.println("error happened"); e.printStackTrace(); try { e.printStackTrace(response.getWriter()); } catch (IOException ioe) { ioe.printStackTrace(); e.printStackTrace(); } response.setContentType("text/plain;charset=utf-8"); response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); } }
From source file:edu.umn.cs.spatialHadoop.visualization.RasterOutputFormat.java
License:Open Source License
protected static void mergeImages(final Configuration conf, final Path outPath) throws IOException, InterruptedException { final int width = conf.getInt("width", 1000); final int height = conf.getInt("height", 1000); final Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, InputMBR); final boolean vflip = conf.getBoolean("vflip", true); // List all output files resulting from reducers final FileSystem outFs = outPath.getFileSystem(conf); final FileStatus[] resultFiles = outFs.listStatus(outPath, new PathFilter() { @Override/*www . j a v a 2 s .c o m*/ public boolean accept(Path path) { return path.toUri().getPath().contains("part-"); } }); if (resultFiles.length == 0) { System.err.println("Error! Couldn't find any partial output. Exiting!"); return; } System.out.println(System.currentTimeMillis() + ": Merging " + resultFiles.length + " layers into one"); Vector<RasterLayer> intermediateLayers = Parallel.forEach(resultFiles.length, new Parallel.RunnableRange<RasterLayer>() { @Override public RasterLayer run(int i1, int i2) { Rasterizer rasterizer = Rasterizer.getRasterizer(conf); // The raster layer that contains the merge of all assigned layers RasterLayer finalLayer = null; RasterLayer tempLayer = rasterizer.createRaster(1, 1, new Rectangle()); for (int i = i1; i < i2; i++) { FileStatus resultFile = resultFiles[i]; try { FSDataInputStream inputStream = outFs.open(resultFile.getPath()); while (inputStream.getPos() < resultFile.getLen()) { if (tempLayer == finalLayer) { // More than one layer. Create a separate final layer to merge finalLayer = rasterizer.createRaster(width, height, inputMBR); rasterizer.merge(finalLayer, tempLayer); } tempLayer.readFields(inputStream); if (finalLayer == null) { // First layer. Treat it as a final layer to avoid merging // if it is the only layer finalLayer = tempLayer; } else { // More than only layer. Merge into the final layer rasterizer.merge(finalLayer, tempLayer); } } inputStream.close(); } catch (IOException e) { System.err.println("Error reading " + resultFile); e.printStackTrace(); } } return finalLayer; } }); // Merge all intermediate layers into one final layer Rasterizer rasterizer = Rasterizer.getRasterizer(conf); RasterLayer finalLayer; if (intermediateLayers.size() == 1) { finalLayer = intermediateLayers.elementAt(0); } else { finalLayer = rasterizer.createRaster(width, height, inputMBR); for (RasterLayer intermediateLayer : intermediateLayers) { rasterizer.merge(finalLayer, intermediateLayer); } } // Finally, write the resulting image to the given output path System.out.println(System.currentTimeMillis() + ": Writing final image"); outFs.delete(outPath, true); // Delete old (non-combined) images FSDataOutputStream outputFile = outFs.create(outPath); rasterizer.writeImage(finalLayer, outputFile, vflip); outputFile.close(); }
From source file:ezbake.azkaban.job.HdfsCleaner.java
License:Apache License
private void pruneDirectories(final FileSystem fs, Path path) throws IOException { // For each of the projects, filter through each of its runs for (FileStatus projectNumber : fs.listStatus(path)) { // There SHOULD only be directories, but filter just to make sure final FileStatus[] runDirs = fs.listStatus(projectNumber.getPath(), new PathFilter() { @Override/*ww w . j a va 2s.c o m*/ public boolean accept(Path path) { try { return fs.isDirectory(path); } catch (IOException e) { logger.error("Error trying to filter directories", e); return false; } } }); // Directories are returned canonically. Though this should be good enough, let's sort the right way final SortedSet<Long> runSet = new TreeSet<>(); for (FileStatus status : runDirs) { try { runSet.add(Long.valueOf(status.getPath().getName())); } catch (NumberFormatException ex) { logger.error("Directory {} is not a long and probably not a runtime dir. Skipping", status.getPath().getName()); } } // Keep the last N number of runs int i = 0; final int stopAt = runSet.size() - lastN; for (Long runtime : runSet) { if (i++ >= stopAt) { break; } final Path runtimeDir = new Path(projectNumber.getPath(), runtime.toString()); logger.info("Checking dir <{}> vs ageoff of <{}>", runtimeDir, ageOffTimestamp); // Check if it meets the threshold for pruning and if so delete the dir if (runtime <= ageOffTimestamp) { // Check to make sure this isn't the dir of a currently running job try (FSDataInputStream pidStream = fs.open(new Path(runtimeDir, FrameworkDriver.STATUS_FILE))) { if (FrameworkDriver.JobStatus .valueOf(pidStream.readUTF()) == FrameworkDriver.JobStatus.RUNNING) { logger.warn("Directory <{}> is for a currently running job. Skipping", runtimeDir); continue; } } catch (IOException e) { logger.warn("directory {} missing PID file or could not be read. Skipping", runtimeDir.getName(), e.getMessage()); continue; } logger.info("Removing dir: {}", runtimeDir); fs.delete(runtimeDir, true); } } } }
From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java
License:LGPL
private List<Path> getPathToConcat(final FileSystem fs, final Path path) throws IOException { // Get the list of files to contact final FileStatus[] files = fs.listStatus(path, new PathFilter() { @Override/*from www . ja v a 2s .c o m*/ public boolean accept(final Path p) { return p.getName().matches("^part-.*[0-9]+$"); } }); // Sort the list Arrays.sort(files, new Comparator<FileStatus>() { @Override public int compare(final FileStatus f1, final FileStatus f2) { return f1.getPath().getName().compareTo(f2.getPath().getName()); } }); // Create final result final List<Path> result = new ArrayList<>(files.length); for (FileStatus file : files) { result.add(file.getPath()); } return result; }
From source file:gaffer.accumulo.TestAccumuloBackedGraphUpdatingConf.java
License:Apache License
public static int readResults(FileSystem fs, Path path, Set<GraphElementWithStatistics> results) throws IOException { FileStatus[] fileStatus = fs.listStatus(path, new PathFilter() { @Override//from w w w . j a va 2s .c o m public boolean accept(Path path) { return path.getName().contains("part-m-"); } }); int count = 0; for (int i = 0; i < fileStatus.length; i++) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, fileStatus[i].getPath(), fs.getConf()); GraphElement element = new GraphElement(); SetOfStatistics statistics = new SetOfStatistics(); while (reader.next(element, statistics)) { count++; results.add(new GraphElementWithStatistics(element.clone(), statistics.clone())); } reader.close(); } return count; }
From source file:gobblin.compaction.dataset.DatasetHelper.java
License:Apache License
private List<Path> getApplicableFilePaths(Path dataDir) throws IOException { if (!this.fs.exists(dataDir)) { return Lists.newArrayList(); }/*from w w w . ja v a2 s. c o m*/ List<Path> paths = Lists.newArrayList(); for (FileStatus fileStatus : FileListUtils.listFilesRecursively(this.fs, dataDir, new PathFilter() { @Override public boolean accept(Path path) { for (String validExtention : extensions) { if (path.getName().endsWith(validExtention)) { return true; } } return false; } })) { paths.add(fileStatus.getPath()); } return paths; }