List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:datafu.hourglass.jobs.AbstractPartitionPreservingIncrementalJob.java
License:Apache License
/** * Moves files from the staging path to the final output path. * //w ww. j a va 2 s.c o m * @param report report to update with output paths * @param sourcePath source of data to move * @throws IOException */ private void moveStagedFiles(Report report, Path sourcePath) throws IOException { _log.info("Following files produced in staging path:"); for (FileStatus stat : getFileSystem().globStatus(new Path(sourcePath, "*.avro"))) { _log.info(String.format("* %s (%d bytes)", stat.getPath(), stat.getLen())); } FileStatus[] incrementalParts = getFileSystem().globStatus(new Path(sourcePath, "*"), new PathFilter() { @Override public boolean accept(Path path) { String[] pathParts = path.getName().split("-"); try { Long.parseLong(pathParts[0]); return true; } catch (NumberFormatException e) { return false; } } }); // collect the new incremental data from the temp folder and move to subfolders Map<String, Path> incrementalTargetPaths = new HashMap<String, Path>(); for (FileStatus stat : incrementalParts) { String[] pathParts = stat.getPath().getName().split("-"); try { String timestamp = pathParts[0]; if (!incrementalTargetPaths.containsKey(timestamp)) { Path parent = new Path(sourcePath, timestamp); if (!getFileSystem().exists(parent)) { getFileSystem().mkdirs(parent); } else { throw new RuntimeException("already exists: " + parent.toString()); } incrementalTargetPaths.put(timestamp, parent); } Path parent = incrementalTargetPaths.get(timestamp); _log.info(String.format("Moving %s to %s", stat.getPath().getName(), parent.toString())); getFileSystem().rename(stat.getPath(), new Path(parent, stat.getPath().getName())); } catch (NumberFormatException e) { throw new RuntimeException(e); } } for (Path src : incrementalTargetPaths.values()) { Date srcDate; try { srcDate = PathUtils.datedPathFormat.parse(src.getName()); } catch (ParseException e) { throw new RuntimeException(e); } Path target = new Path(getOutputPath(), PathUtils.nestedDatedPathFormat.format(srcDate)); _log.info(String.format("Moving %s to %s", src.getName(), target)); getFileSystem().mkdirs(target.getParent()); if (!getFileSystem().rename(src, target)) { throw new RuntimeException("Failed to rename " + src + " to " + target); } report.outputFiles.add(new DatePath(srcDate, target)); } }
From source file:de.huberlin.wbi.hiway.common.Data.java
License:Apache License
private Data(Path localPath, String containerId) { this.localDirectory = localPath.getParent(); this.fileName = localPath.getName(); this.containerId = containerId; }
From source file:de.huberlin.wbi.hiway.common.Data.java
License:Apache License
private void mkHdfsDir(Path dir) throws IOException { if (dir == null || hdfs.isDirectory(dir)) return;/*ww w . j a v a 2 s .c o m*/ mkHdfsDir(dir.getParent()); hdfs.mkdirs(dir); hdfs.setPermission(dir, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); }
From source file:de.l3s.concatgz.io.ImmediateOutput.java
License:Open Source License
public OutputStream stream(String base, String extension) throws IOException { Path path = new Path(dir, base + file + extension); OutputStream stream = streams.get(path.toString()); if (stream == null) { fs.mkdirs(path.getParent()); stream = fs.create(path, true, bufferSize, replication, fs.getDefaultBlockSize(path)); streams.put(path.toString(), stream); }/*from w ww. j a v a2 s . com*/ return stream; }
From source file:edu.emory.bmi.medicurator.storage.HdfsStorage.java
License:Apache License
/** * save data from InputStream to a relative path * required parent directories will be created if not exists * @param path String path// w ww. j a va 2s . c o m * @param in InputStream * @return boolean */ public boolean saveToPath(String path, InputStream in) { try { Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(new URI(Constants.HDFS_URI), conf); Path dst = new Path(hdfsPath + path); //Create a new file and write data to it. hdfs.mkdirs(dst.getParent()); FSDataOutputStream out = hdfs.create(dst, true); byte[] b = new byte[1024]; int numBytes = 0; while ((numBytes = in.read(b)) > 0) { out.write(b, 0, numBytes); } //Close all the file descripters out.close(); hdfs.close(); return true; } catch (Exception e) { System.out.println("[ERROR] Exception occurs when HdfsStorage saveToPath (" + path + ")" + e); } return false; }
From source file:edu.nyu.vida.data_polygamy.utils.GetMergeFiles.java
License:BSD License
public static void main(String[] args) throws IllegalArgumentException, IOException, URISyntaxException { String fromDirectory = args[0]; String toEventsDirectory = args[1]; String toOutliersDirectory = args[2]; String metadataFile = args[3]; // Detecting datasets. HashSet<String> datasets = new HashSet<String>(); FileReader fileReader = new FileReader(metadataFile); BufferedReader bufferedReader = new BufferedReader(fileReader); String line;//from w ww . j a va 2 s.c o m while ((line = bufferedReader.readLine()) != null) { String[] parts = line.split(","); datasets.add(parts[0]); } bufferedReader.close(); // Downloading relationships. String relationshipPatternStr = "([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})\\-([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})"; Pattern relationshipPattern = Pattern.compile(relationshipPatternStr); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileSystem localFS = FileSystem.getLocal(conf); for (FileStatus status : fs.listStatus(new Path(fs.getHomeDirectory() + "/" + fromDirectory))) { if (!status.isDirectory()) { continue; } Path file = status.getPath(); Matcher m = relationshipPattern.matcher(file.getName()); if (!m.find()) continue; String ds1 = m.group(1); String ds2 = m.group(2); if (!datasets.contains(ds1)) continue; if (!datasets.contains(ds2)) continue; for (FileStatus statusDir : fs.listStatus(file)) { if (!statusDir.isDirectory()) { continue; } Path fromPath = statusDir.getPath(); String toPathStr; if (fromPath.getName().contains("events")) { toPathStr = toEventsDirectory + "/" + fromPath.getParent().getName() + "-" + fromPath.getName(); } else { toPathStr = toOutliersDirectory + "/" + fromPath.getParent().getName() + "-" + fromPath.getName(); } Path toPath = new Path(toPathStr); System.out.println("Copying:"); System.out.println(" From: " + fromPath.toString()); System.out.println(" To: " + toPath.toString()); FileUtil.copyMerge(fs, // HDFS File System fromPath, // HDFS path localFS, // Local File System toPath, // Local Path false, // Do not delete HDFS path conf, // Configuration null); } } }
From source file:edu.ucsb.cs.partitioning.cosine.Partitioner.java
License:Apache License
public static FileStatus[] setFiles(FileSystem hdfs, Path inputPath) throws IOException { if (hdfs.isFile(inputPath)) return hdfs.listStatus(inputPath.getParent()); else/* ww w.j a va 2 s. co m*/ return hdfs.listStatus(inputPath); }
From source file:edu.ucsb.cs.partitioning.statistics.Collector.java
License:Apache License
public static String getNumMinMaxAvgVecLengthAvgPosting(FileSystem fs, Path inputPath, JobConf job) throws IOException { LongWritable key = new LongWritable(); FeatureWeightArrayWritable value = new FeatureWeightArrayWritable(); long numDocuments = 0, minDocLength = Long.MAX_VALUE, maxDocLength = 0; double avgDocLength = 0; int partitionSize; // remove HashMap<Long, Float> partitionfeaturesWeight = new HashMap<Long, Float>(); Iterator<Path> pathItr = getSortedFiles(inputPath, fs); if (!pathItr.hasNext()) return "0,0,0,0"; while (pathItr.hasNext()) { inputPath = pathItr.next();/*from w ww . j av a2 s .c o m*/ SequenceFile.Reader in = new SequenceFile.Reader(fs, inputPath, job); partitionSize = 0;// remove while (in.next(key, value)) { partitionSize++;// remove numDocuments++; avgDocLength += value.vectorSize; if (minDocLength > value.vectorSize) minDocLength = value.vectorSize; if (maxDocLength < value.vectorSize) maxDocLength = value.vectorSize; for (int j = 0; j < value.vectorSize; j++) { FeatureWeight current = value.vector[j]; updatePartitionBaraglia(partitionfeaturesWeight, current); } } System.out.println(inputPath.getName() + " has " + partitionSize + " vectors."); // remove partitionsWriter.append(new Text(inputPath.getName()), new LongWritable(partitionSize)); in.close(); writePartitionBaraglia(inputPath.getName(), partitionfeaturesWeight); } partitionsWriter.close(); maxWeightVector.clear(); String nFeaturesAvgPost = getNFeaturesAvgPosting(fs, inputPath.getParent(), job); avgDocLength = avgDocLength / numDocuments; return numDocuments + " , " + minDocLength + " , " + maxDocLength + " ," + avgDocLength + " ," + nFeaturesAvgPost; }
From source file:edu.umn.cs.spatialHadoop.core.RTreeGridRecordWriter.java
License:Open Source License
@Override protected Path getFinalCellPath(int cellIndex) throws IOException { Path finalCellPath = super.getFinalCellPath(cellIndex); return new Path(finalCellPath.getParent(), finalCellPath.getName() + ".rtree"); }
From source file:edu.umn.cs.spatialHadoop.mapred.SpatialInputFormat.java
License:Open Source License
protected void listStatus(final FileSystem fs, Path dir, final List<FileStatus> result, BlockFilter filter) throws IOException { GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, dir); if (gindex == null) { FileStatus[] listStatus;/* ww w . j a v a 2 s. c o m*/ if (OperationsParams.isWildcard(dir)) { // Wild card listStatus = fs.globStatus(dir); } else { listStatus = fs.listStatus(dir, SpatialSite.NonHiddenFileFilter); } // Add all files under this directory for (FileStatus status : listStatus) { if (status.isDir()) { listStatus(fs, status.getPath(), result, filter); } else if (status.getPath().getName().toLowerCase().endsWith(".list")) { LineRecordReader in = new LineRecordReader(fs.open(status.getPath()), 0, status.getLen(), Integer.MAX_VALUE); LongWritable key = in.createKey(); Text value = in.createValue(); while (in.next(key, value)) { result.add(fs.getFileStatus(new Path(status.getPath().getParent(), value.toString()))); } in.close(); } else { result.add(status); } } } else { final Path indexDir = OperationsParams.isWildcard(dir) ? dir.getParent() : dir; // Use the global index to limit files filter.selectCells(gindex, new ResultCollector<Partition>() { @Override public void collect(Partition partition) { try { Path cell_path = new Path(indexDir, partition.filename); if (!fs.exists(cell_path)) LOG.warn("Matched file not found: " + cell_path); result.add(fs.getFileStatus(cell_path)); } catch (IOException e) { e.printStackTrace(); } } }); } }