List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:edu.umn.cs.spatialHadoop.mapreduce.SpatialInputFormat3.java
License:Open Source License
protected void listStatus(final FileSystem fs, Path dir, final List<FileStatus> result, BlockFilter filter) throws IOException { GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, dir); if (gindex == null || filter == null) { // No global index which means we cannot use the filter function FileStatus[] listStatus;/*from w w w .j a va 2 s.c o m*/ if (OperationsParams.isWildcard(dir)) { // Wild card listStatus = fs.globStatus(dir); } else { listStatus = fs.listStatus(dir, SpatialSite.NonHiddenFileFilter); } // Add all files under this directory for (FileStatus status : listStatus) { if (status.isDir()) { // Recursively go in subdir listStatus(fs, status.getPath(), result, filter); } else { // A file, just add it result.add(status); } } } else { final Path indexDir = OperationsParams.isWildcard(dir) ? dir.getParent() : dir; // Use the global index to limit files filter.selectCells(gindex, new ResultCollector<Partition>() { @Override public void collect(Partition partition) { try { Path cell_path = new Path(indexDir, partition.filename); if (!fs.exists(cell_path)) LOG.warn("Matched file not found: " + cell_path); result.add(fs.getFileStatus(cell_path)); } catch (IOException e) { e.printStackTrace(); } } }); } }
From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java
License:Open Source License
/** * Creates a full spatio-temporal hierarchy for a source folder * @throws ParseException //from w w w. j ava 2s. c o m * @throws InterruptedException */ public static void directoryIndexer(final OperationsParams params) throws IOException, ParseException, InterruptedException { Path inputDir = params.getInputPath(); FileSystem sourceFs = inputDir.getFileSystem(params); final Path sourceDir = inputDir.makeQualified(sourceFs); Path destDir = params.getOutputPath(); final FileSystem destFs = destDir.getFileSystem(params); TimeRange timeRange = params.get("time") != null ? new TimeRange(params.get("time")) : null; // Create daily indexes that do not exist final Path dailyIndexDir = new Path(destDir, "daily"); FileStatus[] mathcingDays = timeRange == null ? sourceFs.listStatus(inputDir) : sourceFs.listStatus(inputDir, timeRange); final Vector<Path> sourceFiles = new Vector<Path>(); for (FileStatus matchingDay : mathcingDays) { for (FileStatus matchingTile : sourceFs.listStatus(matchingDay.getPath())) { sourceFiles.add(matchingTile.getPath()); } } // Shuffle the array for better load balancing across threads Collections.shuffle(sourceFiles); final String datasetName = params.get("dataset"); Parallel.forEach(sourceFiles.size(), new RunnableRange<Object>() { @Override public Object run(int i1, int i2) { LOG.info("Worker [" + i1 + "," + i2 + ") started"); for (int i = i1; i < i2; i++) { Path sourceFile = sourceFiles.get(i); try { Path relativeSourceFile = makeRelative(sourceDir, sourceFile); Path destFilePath = new Path(dailyIndexDir, relativeSourceFile); if (!destFs.exists(destFilePath)) { LOG.info("Worker [" + i1 + "," + i2 + ") indexing: " + sourceFile.getName()); Path tmpFile; do { tmpFile = new Path((int) (Math.random() * 1000000) + ".tmp"); } while (destFs.exists(tmpFile)); tmpFile = tmpFile.makeQualified(destFs); if (datasetName == null) throw new RuntimeException( "Please provide the name of dataset you would like to index"); AggregateQuadTree.build(params, sourceFile, datasetName, tmpFile); synchronized (destFs) { Path destDir = destFilePath.getParent(); if (!destFs.exists(destDir)) destFs.mkdirs(destDir); } destFs.rename(tmpFile, destFilePath); } } catch (IOException e) { throw new RuntimeException("Error building an index for " + sourceFile, e); } } LOG.info("Worker [" + i1 + "," + i2 + ") finished"); return null; } }); LOG.info("Done generating daily indexes"); // Merge daily indexes into monthly indexes Path monthlyIndexDir = new Path(destDir, "monthly"); final SimpleDateFormat dayFormat = new SimpleDateFormat("yyyy.MM.dd"); final SimpleDateFormat monthFormat = new SimpleDateFormat("yyyy.MM"); mergeIndexes(destFs, dailyIndexDir, monthlyIndexDir, dayFormat, monthFormat, params); LOG.info("Done generating monthly indexes"); // Merge daily indexes into monthly indexes Path yearlyIndexDir = new Path(destDir, "yearly"); final SimpleDateFormat yearFormat = new SimpleDateFormat("yyyy"); mergeIndexes(destFs, monthlyIndexDir, yearlyIndexDir, monthFormat, yearFormat, params); LOG.info("Done generating yearly indexes"); }
From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java
License:Open Source License
/** * Merges a set of indexes into larger indexes * @param fs/* www . ja v a2s . co m*/ * @param srcIndexDir * @param dstIndexDir * @param srcFormat * @param dstFormat * @param params * @throws IOException * @throws ParseException * @throws InterruptedException */ private static void mergeIndexes(final FileSystem fs, Path srcIndexDir, Path dstIndexDir, SimpleDateFormat srcFormat, SimpleDateFormat dstFormat, final OperationsParams params) throws IOException, ParseException, InterruptedException { TimeRange timeRange = params.get("time") != null ? new TimeRange(params.get("time")) : null; final FileStatus[] sourceIndexes = timeRange == null ? fs.listStatus(srcIndexDir) : fs.listStatus(srcIndexDir, timeRange); Arrays.sort(sourceIndexes); // Alphabetical sort acts as sort-by-date here // Scan the source indexes and merge each consecutive run belonging to the // same unit int i1 = 0; while (i1 < sourceIndexes.length) { final String indexToCreate = dstFormat.format(srcFormat.parse(sourceIndexes[i1].getPath().getName())); int i2 = i1 + 1; // Keep scanning as long as the source index belongs to the same dest index while (i2 < sourceIndexes.length && dstFormat .format(srcFormat.parse(sourceIndexes[i2].getPath().getName())).equals(indexToCreate)) i2++; // Merge all source indexes in the range [i1, i2) into one dest index // Copy i1, i2 to other variables as final to be accessible from threads final int firstIndex = i1; final int lastIndex = i2; final Path destIndex = new Path(dstIndexDir, indexToCreate); // For each tile, merge all values in all source indexes /*A regular expression to catch the tile identifier of a MODIS grid cell*/ final Pattern MODISTileID = Pattern.compile("^.*(h\\d\\dv\\d\\d).*$"); final FileStatus[] tilesInFirstDay = fs.listStatus(sourceIndexes[i1].getPath()); // Shuffle the array for better load balancing across threads Random rand = new Random(); for (int i = 0; i < tilesInFirstDay.length - 1; i++) { // Swap the entry at i with any following entry int j = i + rand.nextInt(tilesInFirstDay.length - i - 1); FileStatus temp = tilesInFirstDay[i]; tilesInFirstDay[i] = tilesInFirstDay[j]; tilesInFirstDay[j] = temp; } Parallel.forEach(tilesInFirstDay.length, new RunnableRange<Object>() { @Override public Object run(int i_file1, int i_file2) { for (int i_file = i_file1; i_file < i_file2; i_file++) { try { FileStatus tileInFirstDay = tilesInFirstDay[i_file]; // Extract tile ID Matcher matcher = MODISTileID.matcher(tileInFirstDay.getPath().getName()); if (!matcher.matches()) { LOG.warn("Cannot extract tile id from file " + tileInFirstDay.getPath()); continue; } final String tileID = matcher.group(1); Path destIndexFile = new Path(destIndex, tileID); PathFilter tileFilter = new PathFilter() { @Override public boolean accept(Path path) { return path.getName().contains(tileID); } }; // Find matching tiles in all source indexes to merge Vector<Path> filesToMerge = new Vector<Path>(lastIndex - firstIndex); filesToMerge.add(tileInFirstDay.getPath()); for (int iDailyIndex = firstIndex + 1; iDailyIndex < lastIndex; iDailyIndex++) { FileStatus[] matchedTileFile = fs.listStatus(sourceIndexes[iDailyIndex].getPath(), tileFilter); if (matchedTileFile.length == 0) LOG.warn("Could not find tile " + tileID + " in dir " + sourceIndexes[iDailyIndex].getPath()); else if (matchedTileFile.length == 1) filesToMerge.add(matchedTileFile[0].getPath()); } if (fs.exists(destIndexFile)) { // Destination file already exists // Check the date of the destination and source files to see // whether it needs to be updated or not long destTimestamp = fs.getFileStatus(destIndexFile).getModificationTime(); boolean needsUpdate = false; for (Path fileToMerge : filesToMerge) { long sourceTimestamp = fs.getFileStatus(fileToMerge).getModificationTime(); if (sourceTimestamp > destTimestamp) { needsUpdate = true; break; } } if (!needsUpdate) continue; else LOG.info("Updating file " + destIndexFile.getName()); } // Do the merge Path tmpFile; do { tmpFile = new Path((int) (Math.random() * 1000000) + ".tmp"); } while (fs.exists(tmpFile)); tmpFile = tmpFile.makeQualified(fs); LOG.info("Merging tile " + tileID + " into file " + destIndexFile); AggregateQuadTree.merge(params, filesToMerge.toArray(new Path[filesToMerge.size()]), tmpFile); synchronized (fs) { Path destDir = destIndexFile.getParent(); if (!fs.exists(destDir)) fs.mkdirs(destDir); } fs.rename(tmpFile, destIndexFile); } catch (IOException e) { e.printStackTrace(); } } return null; } }); i1 = i2; } }
From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java
License:Open Source License
/** * Make a path relative to another path by removing all common ancestors * @param parent//from w ww.ja v a 2s .c om * @param descendant * @return */ private static Path makeRelative(Path parent, Path descendant) { Stack<String> components = new Stack<String>(); while (descendant.depth() > parent.depth()) { components.push(descendant.getName()); descendant = descendant.getParent(); } if (!descendant.equals(parent)) throw new RuntimeException("descendant not a child of parent"); if (components.isEmpty()) return new Path("."); Path relative = new Path(components.pop()); while (!components.isEmpty()) relative = new Path(relative, components.pop()); return relative; }
From source file:edu.umn.cs.spatialHadoop.nasa.HDFPlot.java
License:Open Source License
public static Job plotHeatMap(Path[] inFiles, Path outFile, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { if (params.get("shape") == null) { // Set the default shape value params.setClass("shape", NASARectangle.class, Shape.class); } else if (!(params.getShape("shape") instanceof NASAShape)) { System.err.println("The specified shape " + params.get("shape") + " in not an instance of NASAShape"); System.exit(1);/* ww w. j a v a 2 s. c o m*/ } if (params.get("mbr") == null) { // Set to the same value as query rectangle or the whole world params.set("mbr", params.get("rect", "-180,-90,180,90")); } // Restrict to HDF files if working on a directory for (int i = 0; i < inFiles.length; i++) { if (!inFiles[i].getName().toLowerCase().endsWith(".hdf")) inFiles[i] = new Path(inFiles[i], "*.hdf"); } String recover = params.get("recover", "none").toLowerCase(); if (recover.equals("none")) { // Don't recover holes params.setBoolean("recoverholes", false); } else if (recover.equals("read")) { // Recover holes on read params.setBoolean("recoverholes", true); } else if (recover.equals("write")) { // Recover holes upon writing the final image params.setBoolean("recoverholes", false); if (params.get(PREPROCESSED_WATERMARK) == null) { OperationsParams params2 = new OperationsParams(params); params2.setBoolean("background", false); Path wmImage = new Path(outFile.getParent(), outFile.getName() + "_WaterMask"); generateWaterMask(wmImage, params2); params.set(PREPROCESSED_WATERMARK, wmImage.toString()); } } if (params.getBoolean("pyramid", false)) return MultilevelPlot.plot(inFiles, outFile, HDFRasterizer.class, params); else return SingleLevelPlot.plot(inFiles, outFile, HDFRasterizer.class, params); }
From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java
License:Open Source License
/** * Lists the contents of a directory//ww w .j av a 2s. c om * @param request * @param response */ private void handleListFiles(HttpServletRequest request, HttpServletResponse response) { try { String pathStr = request.getParameter("path"); Path path = new Path(pathStr == null || pathStr.isEmpty() ? "/" : pathStr); FileSystem fs = path.getFileSystem(commonParams); FileStatus[] fileStatuses = fs.listStatus(path, SpatialSite.NonHiddenFileFilter); Arrays.sort(fileStatuses, new Comparator<FileStatus>() { @Override public int compare(FileStatus o1, FileStatus o2) { if (o1.isDirectory() && o2.isFile()) return -1; if (o1.isFile() && o2.isDirectory()) return 1; return o1.getPath().getName().toLowerCase().compareTo(o2.getPath().getName().toLowerCase()); } }); response.setContentType("application/json;charset=utf-8"); response.setStatus(HttpServletResponse.SC_OK); PrintWriter out = response.getWriter(); out.print("{\"FileStatuses\":{"); if (pathStr.endsWith("/")) { pathStr = pathStr.substring(0, pathStr.length() - 1); } out.printf("\"BaseDir\":\"%s\",", pathStr); if (path.getParent() != null) out.printf("\"ParentDir\":\"%s\",", path.getParent()); out.print("\"FileStatus\":["); for (int i = 0; i < fileStatuses.length; i++) { FileStatus fileStatus = fileStatuses[i]; if (i != 0) out.print(','); String filename = fileStatus.getPath().getName(); int idot = filename.lastIndexOf('.'); String extension = idot == -1 ? "" : filename.substring(idot + 1); out.printf( "{\"accessTime\":%d,\"blockSize\":%d,\"childrenNum\":%d,\"fileId\":%d," + "\"group\":\"%s\",\"length\":%d,\"modificationTime\":%d," + "\"owner\":\"%s\",\"pathSuffix\":\"%s\",\"permission\":\"%s\"," + "\"replication\":%d,\"storagePolicy\":%d,\"type\":\"%s\",\"extension\":\"%s\"}", fileStatus.getAccessTime(), fileStatus.getBlockSize(), 0, 0, fileStatus.getGroup(), fileStatus.getLen(), fileStatus.getModificationTime(), fileStatus.getOwner(), fileStatus.getPath().getName(), fileStatus.getPermission(), fileStatus.getReplication(), 0, fileStatus.isDirectory() ? "DIRECTORY" : "FILE", extension.toLowerCase()); } out.print("]}"); // Check if there is an image or master file FileStatus[] metaFiles = fs.listStatus(path, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("_master") || path.getName().equals("_data.png"); } }); for (FileStatus metaFile : metaFiles) { String metaFileName = metaFile.getPath().getName(); if (metaFileName.startsWith("_master")) { out.printf(",\"MasterPath\":\"%s\"", metaFileName); String shape = OperationsParams.detectShape(fileStatuses[0].getPath(), commonParams); if (shape != null) out.printf(",\"Shape\":\"%s\"", shape); } else if (metaFileName.equals("_data.png")) out.printf(",\"ImagePath\":\"%s\"", metaFileName); } out.print("}"); out.close(); } catch (Exception e) { System.out.println("error happened"); e.printStackTrace(); try { e.printStackTrace(response.getWriter()); } catch (IOException ioe) { ioe.printStackTrace(); e.printStackTrace(); } response.setContentType("text/plain;charset=utf-8"); response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); } }
From source file:edu.umn.cs.sthadoop.operations.STJoin.java
License:Open Source License
/** * @param args//ww w. j av a 2 s . c o m * @throws Exception */ public static void main(String[] args) throws Exception { // args = new String[10]; // args[0] = "/home/louai/nyc-taxi/yellowIndex"; // args[1] = "/home/louai/nyc-taxi/humanIndex"; // args[2] = "/home/louai/nyc-taxi/resultSTJoin"; // args[3] = "shape:edu.umn.cs.sthadoop.core.STPoint"; // args[4] = // "rect:-74.98451232910156,35.04014587402344,-73.97936248779295,41.49399566650391"; // args[5] = "interval:2015-01-01,2015-01-02"; // args[6] = "timeDistance:1,day"; // args[7] = "spaceDistance:2"; // args[8] = "-overwrite"; // args[9] = "-no-local"; OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); Path[] allFiles = params.getPaths(); if (allFiles.length < 2) { System.err.println("This operation requires at least two input files"); printUsage(); System.exit(1); } if (allFiles.length == 2 && !params.checkInput()) { // One of the input files does not exist printUsage(); System.exit(1); } if (allFiles.length > 2 && !params.checkInputOutput()) { printUsage(); System.exit(1); } if (params.get("timedistance") == null) { System.err.println("time distance is missing"); printUsage(); System.exit(1); } if (params.get("spacedistance") == null) { System.err.println("space distance is missing"); printUsage(); System.exit(1); } Path[] inputPaths = allFiles.length == 2 ? allFiles : params.getInputPaths(); Path outputPath = allFiles.length == 2 ? null : params.getOutputPath(); // modify the query range with new time interval to consider in join String[] value = params.get("timedistance").split(","); String[] date = params.get("interval").split(","); int interval = Integer.parseInt(value[0]); String start = addtimeSpaceToInterval(date[0], -interval); String end = addtimeSpaceToInterval(date[1], interval); params.set("interval", start + "," + end); // Query from the dataset. for (Path input : inputPaths) { args = new String[7]; args[0] = input.toString(); args[1] = outputPath.getParent().toString() + "candidatebuckets/" + input.getName(); args[2] = "shape:" + params.get("shape"); args[3] = "rect:" + params.get("rect"); args[4] = "interval:" + params.get("interval"); args[5] = "-overwrite"; args[6] = "-no-local"; for (String x : args) System.out.println(x); STRangeQuery.main(args); System.out.println("done with the STQuery from: " + input.toString() + "\n" + "candidate:" + args[1]); } // invoke the map-hash and reduce-join . FileSystem fs = outputPath.getFileSystem(params); Path inputstjoin; if (fs.exists(new Path(outputPath.getParent().toString() + "candidatebuckets/"))) { inputstjoin = new Path(outputPath.getParent().toString() + "candidatebuckets"); } else { inputstjoin = new Path(outputPath.getParent().toString() + "/candidatebuckets"); } Path hashedbucket = new Path(outputPath.getParent().toString() + "hashedbucket"); long t1 = System.currentTimeMillis(); // join hash step args = new String[7]; args[0] = inputstjoin.toString(); args[1] = hashedbucket.toString(); args[2] = "shape:" + params.get("shape"); args[3] = "rect:" + params.get("rect"); args[4] = "interval:" + params.get("interval"); args[5] = "-overwrite"; args[6] = "-no-local"; for (String x : args) System.out.println(x); STHash.main(args); // //join Step // if(fs.exists(new Path(outputPath.getParent().toString()+"hashedbucket"))){ // inputstjoin = new Path(outputPath.getParent().toString()+"hashedbucket"); // }else{ // inputstjoin = new Path(outputPath.getParent().toString()+"/hashedbucket"); // } //Join refinement Step stJoin(hashedbucket, outputPath, params); long t2 = System.currentTimeMillis(); System.out.println("Total join time: " + (t2 - t1) + " millis"); }
From source file:eu.stratosphere.hadoopcompatibility.FileOutputCommitterWrapper.java
License:Apache License
public void commitTask(JobConf conf, TaskAttemptID taskAttemptID) throws IOException { Path taskOutputPath = getTempTaskOutputPath(conf, taskAttemptID); if (taskOutputPath != null) { FileSystem fs = taskOutputPath.getFileSystem(conf); if (fs.exists(taskOutputPath)) { Path jobOutputPath = taskOutputPath.getParent().getParent(); // Move the task outputs to their final place moveTaskOutputs(conf, taskAttemptID, fs, jobOutputPath, taskOutputPath); // Delete the temporary task-specific output directory if (!fs.delete(taskOutputPath, true)) { LOG.info("Failed to delete the temporary output" + " directory of task: " + taskAttemptID + " - " + taskOutputPath); }/*from w w w . ja v a2s .c o m*/ LOG.info("Saved output of task '" + taskAttemptID + "' to " + jobOutputPath); } } }
From source file:fr.ens.biologie.genomique.eoulsan.MainHadoop.java
License:LGPL
@Override protected Handler getLogHandler(final URI logFile) throws IOException { if (logFile == null) { throw new NullPointerException("The log file is null"); }/*from w w w.ja v a 2 s .co m*/ final Path loggerPath = new Path(logFile); final FileSystem loggerFs = loggerPath.getFileSystem(this.conf); final Path parentPath = loggerPath.getParent(); // Create parent directory if necessary if (!loggerFs.exists(loggerPath.getParent())) { if (!loggerFs.mkdirs(loggerPath.getParent())) { throw new IOException("Unable to create directory " + parentPath + " for log file:" + logFile); } } return new StreamHandler(loggerFs.create(loggerPath), Globals.LOG_FORMATTER); }
From source file:fr.ens.biologie.genomique.eoulsan.modules.expression.hadoop.ExpressionHadoopModule.java
License:LGPL
private static Job createJobPairedEnd(final Configuration parentConf, final TaskContext context, final Data alignmentsData, final Data genomeDescriptionData) throws IOException, BadBioEntryException { final Configuration jobConf = new Configuration(parentConf); // Get the source final DataFile inputDataFile = alignmentsData.getDataFile(); // Set input path final Path inputPath = new Path(inputDataFile.getSource()); // Set counter group jobConf.set(CommonHadoop.COUNTER_GROUP_KEY, COUNTER_GROUP); // Set Genome description path jobConf.set(GENOME_DESC_PATH_KEY, genomeDescriptionData.getDataFilename()); // Create the job and its name final Job job = Job.getInstance(jobConf, "Pretreatment for the expression estimation step (" + alignmentsData.getName() + ", " + inputDataFile.getSource() + ")"); // Set the jar job.setJarByClass(ExpressionHadoopModule.class); // Set input path FileInputFormat.addInputPath(job, inputPath); // Set the Mapper class job.setMapperClass(PreTreatmentExpressionMapper.class); // Set the Reducer class job.setReducerClass(PreTreatmentExpressionReducer.class); // Set the output key class job.setOutputKeyClass(Text.class); // Set the output value class job.setOutputValueClass(Text.class); // Output name String outputName = StringUtils.filenameWithoutExtension(inputPath.getName()); outputName = outputName.substring(0, outputName.length()); outputName += TSAM_EXTENSION;/*w ww . j a v a 2s .c om*/ // Set output path FileOutputFormat.setOutputPath(job, new Path(inputPath.getParent(), outputName)); return job; }