List of usage examples for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority
public static Path getPathWithoutSchemeAndAuthority(Path path)
From source file:com.aliyun.odps.volume.VolumeFSUtil.java
License:Apache License
/** * Get volume name from a specific {@link Path} * //from w w w . ja v a 2 s.c om * @param path * @throws VolumeException */ public static String getVolumeFromPath(Path path) throws VolumeException { path = Path.getPathWithoutSchemeAndAuthority(path); if (path.depth() == 0) { throw new VolumeException(VolumeFSErrorCode.VolumeMissing, "No volume found!"); } else { String p = path.toUri().getPath(); String volume = p.split(VolumeFSConstants.SEPARATOR)[1]; return volume; } }
From source file:com.datatorrent.flume.storage.HDFSStorage.java
License:Open Source License
/** * This function writes the bytes to a file specified by the path * * @param path the file location/*from www . j a v a2 s . c o m*/ * @param data the data to be written to the file * @return * @throws IOException */ private FSDataOutputStream writeData(Path path, byte[] data) throws IOException { FSDataOutputStream fsOutputStream; if (fs.getScheme().equals("file")) { // local FS does not support hflush and does not flush native stream fsOutputStream = new FSDataOutputStream( new FileOutputStream(Path.getPathWithoutSchemeAndAuthority(path).toString()), null); } else { fsOutputStream = fs.create(path); } fsOutputStream.write(data); return fsOutputStream; }
From source file:com.datatorrent.lib.io.fs.FileStitcher.java
License:Apache License
/** * Moving temp output file to final file * /* w ww . j a va 2 s . co m*/ * @param tempOutFilePath * Temporary output file * @param destination * Destination directory path * @throws IOException */ protected void moveToFinalFile(Path tempOutFilePath, Path destination) throws IOException { Path src = Path.getPathWithoutSchemeAndAuthority(tempOutFilePath); Path dst = Path.getPathWithoutSchemeAndAuthority(destination); boolean moveSuccessful = false; if (!outputFS.exists(dst.getParent())) { outputFS.mkdirs(dst.getParent()); } if (outputFS.exists(dst)) { outputFS.delete(dst, false); } moveSuccessful = outputFS.rename(src, dst); if (moveSuccessful) { LOG.debug("File {} moved successfully to destination folder.", dst); } else { throw new RuntimeException("Unable to move file from " + src + " to " + dst); } }
From source file:com.datatorrent.stram.FSRecoveryHandler.java
License:Apache License
@Override public DataOutputStream rotateLog() throws IOException { if (fs.exists(logBackupPath)) { // log backup is purged on snapshot/restore throw new AssertionError("Snapshot state prior to log rotation: " + logBackupPath); }/*from w w w .j a va 2s .co m*/ if (fs.exists(logPath)) { LOG.debug("Creating log backup {}", logBackupPath); if (!fs.rename(logPath, logBackupPath)) { throw new IOException("Failed to rotate log: " + logPath); } } LOG.info("Creating {}", logPath); final FSDataOutputStream fsOutputStream; String scheme = null; try { scheme = fs.getScheme(); } catch (UnsupportedOperationException e) { LOG.warn("{} doesn't implement getScheme() method", fs.getClass().getName()); } if ("file".equals(scheme)) { // local FS does not support hflush and does not flush native stream FSUtil.mkdirs(fs, logPath.getParent()); fsOutputStream = new FSDataOutputStream( new FileOutputStream(Path.getPathWithoutSchemeAndAuthority(logPath).toString()), null); } else { fsOutputStream = fs.create(logPath); } DataOutputStream osWrapper = new DataOutputStream(fsOutputStream) { @Override public void flush() throws IOException { super.flush(); fsOutputStream.hflush(); } @Override public void close() throws IOException { LOG.debug("Closing {}", logPath); super.close(); } }; return osWrapper; }
From source file:com.uber.hoodie.common.table.view.IncrementalFSViewSyncTest.java
License:Apache License
/** * Check for equality of views//from ww w .j ava 2 s.c om * * @param view1 View1 * @param view2 View2 */ private void areViewsConsistent(SyncableFileSystemView view1, SyncableFileSystemView view2, long expectedTotalFileSlices) { //Timeline check HoodieTimeline timeline1 = view1.getTimeline(); HoodieTimeline timeline2 = view2.getTimeline(); Assert.assertEquals(view1.getLastInstant(), view2.getLastInstant()); Iterators.elementsEqual(timeline1.getInstants().iterator(), timeline2.getInstants().iterator()); //View Checks Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap1 = partitions.stream() .flatMap(p -> view1.getAllFileGroups(p)) .collect(Collectors.toMap(fg -> fg.getFileGroupId(), fg -> fg)); Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap2 = partitions.stream() .flatMap(p -> view2.getAllFileGroups(p)) .collect(Collectors.toMap(fg -> fg.getFileGroupId(), fg -> fg)); Assert.assertEquals(fileGroupsMap1.keySet(), fileGroupsMap2.keySet()); long gotSlicesCount = fileGroupsMap1.keySet().stream() .map(k -> Pair.of(fileGroupsMap1.get(k), fileGroupsMap2.get(k))).mapToLong(e -> { HoodieFileGroup fg1 = e.getKey(); HoodieFileGroup fg2 = e.getValue(); Assert.assertEquals(fg1.getFileGroupId(), fg2.getFileGroupId()); List<FileSlice> slices1 = fg1.getAllRawFileSlices().collect(Collectors.toList()); List<FileSlice> slices2 = fg2.getAllRawFileSlices().collect(Collectors.toList()); Assert.assertEquals(slices1.size(), slices2.size()); IntStream.range(0, slices1.size()).mapToObj(idx -> Pair.of(slices1.get(idx), slices2.get(idx))) .forEach(e2 -> { FileSlice slice1 = e2.getKey(); FileSlice slice2 = e2.getValue(); Assert.assertEquals(slice1.getBaseInstantTime(), slice2.getBaseInstantTime()); Assert.assertEquals(slice1.getFileId(), slice2.getFileId()); Assert.assertEquals(slice1.getDataFile().isPresent(), slice2.getDataFile().isPresent()); if (slice1.getDataFile().isPresent()) { HoodieDataFile df1 = slice1.getDataFile().get(); HoodieDataFile df2 = slice2.getDataFile().get(); Assert.assertEquals(df1.getCommitTime(), df2.getCommitTime()); Assert.assertEquals(df1.getFileId(), df2.getFileId()); Assert.assertEquals(df1.getFileName(), df2.getFileName()); Assert.assertEquals( Path.getPathWithoutSchemeAndAuthority(new Path(df1.getPath())), Path.getPathWithoutSchemeAndAuthority(new Path(df2.getPath()))); } List<Path> logPaths1 = slice1.getLogFiles() .map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath())) .collect(Collectors.toList()); List<Path> logPaths2 = slice2.getLogFiles() .map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath())) .collect(Collectors.toList()); Assert.assertEquals(logPaths1, logPaths2); }); return slices1.size(); }).sum(); Assert.assertEquals(expectedTotalFileSlices, gotSlicesCount); // Pending Compaction Operations Check Set<Pair<String, CompactionOperation>> ops1 = view1.getPendingCompactionOperations() .collect(Collectors.toSet()); Set<Pair<String, CompactionOperation>> ops2 = view2.getPendingCompactionOperations() .collect(Collectors.toSet()); Assert.assertEquals(ops1, ops2); }
From source file:com.uber.hoodie.common.table.view.IncrementalTimelineSyncFileSystemView.java
License:Apache License
/** * Apply changes to partition file-system view. Base Implementation overwrites the entire partitions view assuming * some sort of map (in-mem/disk-based) is used. For View implementation which supports fine-granular updates (e:g * RocksDB), override this method./* ww w . ja v a 2s . com*/ * * @param partition PartitionPath * @param deltaFileGroups Changed file-slices aggregated as file-groups * @param mode Delta Apply mode */ protected void applyDeltaFileSlicesToPartitionView(String partition, List<HoodieFileGroup> deltaFileGroups, DeltaApplyMode mode) { if (deltaFileGroups.isEmpty()) { log.info("No delta file groups for partition :" + partition); return; } List<HoodieFileGroup> fileGroups = fetchAllStoredFileGroups(partition).collect(Collectors.toList()); /** * Note that while finding the new data/log files added/removed, the path stored in metadata will be missing * the base-path,scheme and authority. Ensure the matching process takes care of this discrepancy. */ Map<String, HoodieDataFile> viewDataFiles = fileGroups.stream() .flatMap(HoodieFileGroup::getAllRawFileSlices).map(FileSlice::getDataFile) .filter(Optional::isPresent).map(Optional::get) .map(df -> Pair.of(Path.getPathWithoutSchemeAndAuthority(new Path(df.getPath())).toString(), df)) .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); //Note: Delta Log Files and Data FIles can be empty when adding/removing pending compactions Map<String, HoodieDataFile> deltaDataFiles = deltaFileGroups.stream() .flatMap(HoodieFileGroup::getAllRawFileSlices).map(FileSlice::getDataFile) .filter(Optional::isPresent).map(Optional::get) .map(df -> Pair.of(Path.getPathWithoutSchemeAndAuthority(new Path(df.getPath())).toString(), df)) .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); Map<String, HoodieLogFile> viewLogFiles = fileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices) .flatMap(FileSlice::getLogFiles) .map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf)) .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); Map<String, HoodieLogFile> deltaLogFiles = deltaFileGroups.stream() .flatMap(HoodieFileGroup::getAllRawFileSlices).flatMap(FileSlice::getLogFiles) .map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf)) .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); switch (mode) { case ADD: viewDataFiles.putAll(deltaDataFiles); viewLogFiles.putAll(deltaLogFiles); break; case REMOVE: deltaDataFiles.keySet().stream().forEach(p -> viewDataFiles.remove(p)); deltaLogFiles.keySet().stream().forEach(p -> viewLogFiles.remove(p)); break; default: throw new IllegalStateException("Unknown diff apply mode=" + mode); } HoodieTimeline timeline = deltaFileGroups.stream().map(df -> df.getTimeline()).findAny().get(); List<HoodieFileGroup> fgs = buildFileGroups(viewDataFiles.values().stream(), viewLogFiles.values().stream(), timeline, true); storePartitionView(partition, fgs); }
From source file:com.uber.hoodie.common.table.view.RocksDbBasedFileSystemView.java
License:Apache License
@Override /**/* ww w . ja va2 s. c o m*/ * This is overridden to incrementally apply file-slices to rocks DB */ protected void applyDeltaFileSlicesToPartitionView(String partition, List<HoodieFileGroup> deltaFileGroups, DeltaApplyMode mode) { rocksDB.writeBatch(batch -> { deltaFileGroups.stream().forEach(fg -> { fg.getAllRawFileSlices().map(fs -> { FileSlice oldSlice = getFileSlice(partition, fs.getFileId(), fs.getBaseInstantTime()); if (null == oldSlice) { return fs; } else { // First remove the file-slice log.info("Removing old Slice in DB. FS=" + oldSlice); rocksDB.deleteInBatch(batch, schemaHelper.getColFamilyForView(), schemaHelper.getKeyForSliceView(fg, oldSlice)); rocksDB.deleteInBatch(batch, schemaHelper.getColFamilyForView(), schemaHelper.getKeyForDataFileView(fg, oldSlice)); Map<String, HoodieLogFile> logFiles = oldSlice.getLogFiles().map( lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf)) .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); Map<String, HoodieLogFile> deltaLogFiles = fs.getLogFiles().map( lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf)) .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); switch (mode) { case ADD: { FileSlice newFileSlice = new FileSlice(oldSlice.getFileGroupId(), oldSlice.getBaseInstantTime()); oldSlice.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df)); fs.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df)); Map<String, HoodieLogFile> newLogFiles = new HashMap<>(logFiles); deltaLogFiles.entrySet().stream().filter(e -> !logFiles.containsKey(e.getKey())) .forEach(p -> newLogFiles.put(p.getKey(), p.getValue())); newLogFiles.values().stream().forEach(lf -> newFileSlice.addLogFile(lf)); log.info("Adding back new File Slice after add FS=" + newFileSlice); return newFileSlice; } case REMOVE: { log.info("Removing old File Slice =" + fs); FileSlice newFileSlice = new FileSlice(oldSlice.getFileGroupId(), oldSlice.getBaseInstantTime()); fs.getDataFile().orElseGet(() -> { oldSlice.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df)); return null; }); deltaLogFiles.keySet().stream().forEach(p -> logFiles.remove(p)); //Add remaining log files back logFiles.values().stream().forEach(lf -> newFileSlice.addLogFile(lf)); if (newFileSlice.getDataFile().isPresent() || (newFileSlice.getLogFiles().count() > 0)) { log.info("Adding back new file-slice after remove FS=" + newFileSlice); return newFileSlice; } return null; } default: throw new IllegalStateException("Unknown diff apply mode=" + mode); } } }).filter(Objects::nonNull).forEach(fs -> { rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(), schemaHelper.getKeyForSliceView(fg, fs), fs); fs.getDataFile().ifPresent(df -> { rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(), schemaHelper.getKeyForDataFileView(fg, fs), df); }); }); }); }); }
From source file:com.uber.hoodie.common.util.FailSafeConsistencyGuard.java
License:Apache License
/** * Helper function to wait for all files belonging to single directory to appear * @param dirPath Dir Path// www. j a va 2s . c om * @param files Files to appear/disappear * @param event Appear/Disappear * @throws TimeoutException */ public void waitForFilesVisibility(String dirPath, List<String> files, FileVisibility event) throws TimeoutException { Path dir = new Path(dirPath); List<String> filesWithoutSchemeAndAuthority = files.stream() .map(f -> Path.getPathWithoutSchemeAndAuthority(new Path(f))).map(p -> p.toString()) .collect(Collectors.toList()); retryTillSuccess((retryNum) -> { try { log.info("Trying " + retryNum); FileStatus[] entries = fs.listStatus(dir); List<String> gotFiles = Arrays.stream(entries) .map(e -> Path.getPathWithoutSchemeAndAuthority(e.getPath())).map(p -> p.toString()) .collect(Collectors.toList()); List<String> candidateFiles = new ArrayList<>(filesWithoutSchemeAndAuthority); boolean altered = candidateFiles.removeAll(gotFiles); switch (event) { case DISAPPEAR: log.info("Following files are visible" + candidateFiles); // If no candidate files gets removed, it means all of them have disappeared return !altered; case APPEAR: default: // if all files appear, the list is empty return candidateFiles.isEmpty(); } } catch (IOException ioe) { log.warn("Got IOException waiting for file event. Have tried " + retryNum + " time(s)", ioe); } return false; }, "Timed out waiting for filles to become visible"); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
public static String translateMarkerToDataPath(String basePath, String markerPath, String instantTs) { Preconditions.checkArgument(markerPath.endsWith(MARKER_EXTN)); String markerRootPath = Path .getPathWithoutSchemeAndAuthority(new Path( String.format("%s/%s/%s", basePath, HoodieTableMetaClient.TEMPFOLDER_NAME, instantTs))) .toString();/*from w w w. ja v a 2s .c o m*/ int begin = markerPath.indexOf(markerRootPath); Preconditions.checkArgument(begin >= 0, "Not in marker dir. Marker Path=" + markerPath + ", Expected Marker Root=" + markerRootPath); String rPath = markerPath.substring(begin + markerRootPath.length() + 1); return String.format("%s/%s%s", basePath, rPath.replace(MARKER_EXTN, ""), HoodieFileFormat.PARQUET.getFileExtension()); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Given a base partition and a partition path, return * relative path of partition path to the base path *///from ww w. ja va 2 s. c om public static String getRelativePartitionPath(Path basePath, Path partitionPath) { basePath = Path.getPathWithoutSchemeAndAuthority(basePath); partitionPath = Path.getPathWithoutSchemeAndAuthority(partitionPath); String partitionFullPath = partitionPath.toString(); int partitionStartIndex = partitionFullPath.indexOf(basePath.getName(), basePath.getParent() == null ? 0 : basePath.getParent().toString().length()); // Partition-Path could be empty for non-partitioned tables return partitionStartIndex + basePath.getName().length() == partitionFullPath.length() ? "" : partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1); }