Example usage for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority.

Prototype

public static Path getPathWithoutSchemeAndAuthority(Path path)

Source Link

Document

Return a version of the given Path without the scheme information.

Usage

From source file:com.aliyun.odps.volume.VolumeFSUtil.java

License:Apache License

/**
 * Get volume name from a specific {@link Path}
 * //from  w  w  w  . ja v a  2  s.c om
 * @param path
 * @throws VolumeException
 */
public static String getVolumeFromPath(Path path) throws VolumeException {
    path = Path.getPathWithoutSchemeAndAuthority(path);
    if (path.depth() == 0) {
        throw new VolumeException(VolumeFSErrorCode.VolumeMissing, "No volume found!");
    } else {
        String p = path.toUri().getPath();
        String volume = p.split(VolumeFSConstants.SEPARATOR)[1];
        return volume;
    }
}

From source file:com.datatorrent.flume.storage.HDFSStorage.java

License:Open Source License

/**
 * This function writes the bytes to a file specified by the path
 *
 * @param path the file location/*from   www  .  j a v  a2  s . c o m*/
 * @param data the data to be written to the file
 * @return
 * @throws IOException
 */
private FSDataOutputStream writeData(Path path, byte[] data) throws IOException {
    FSDataOutputStream fsOutputStream;
    if (fs.getScheme().equals("file")) {
        // local FS does not support hflush and does not flush native stream
        fsOutputStream = new FSDataOutputStream(
                new FileOutputStream(Path.getPathWithoutSchemeAndAuthority(path).toString()), null);
    } else {
        fsOutputStream = fs.create(path);
    }
    fsOutputStream.write(data);
    return fsOutputStream;
}

From source file:com.datatorrent.lib.io.fs.FileStitcher.java

License:Apache License

/**
 * Moving temp output file to final file
 * /* w ww .  j a  va 2 s  . co  m*/
 * @param tempOutFilePath
 *          Temporary output file
 * @param destination
 *          Destination directory path
 * @throws IOException
 */
protected void moveToFinalFile(Path tempOutFilePath, Path destination) throws IOException {
    Path src = Path.getPathWithoutSchemeAndAuthority(tempOutFilePath);
    Path dst = Path.getPathWithoutSchemeAndAuthority(destination);

    boolean moveSuccessful = false;
    if (!outputFS.exists(dst.getParent())) {
        outputFS.mkdirs(dst.getParent());
    }
    if (outputFS.exists(dst)) {
        outputFS.delete(dst, false);
    }
    moveSuccessful = outputFS.rename(src, dst);

    if (moveSuccessful) {
        LOG.debug("File {} moved successfully to destination folder.", dst);
    } else {
        throw new RuntimeException("Unable to move file from " + src + " to " + dst);
    }
}

From source file:com.datatorrent.stram.FSRecoveryHandler.java

License:Apache License

@Override
public DataOutputStream rotateLog() throws IOException {

    if (fs.exists(logBackupPath)) {
        // log backup is purged on snapshot/restore
        throw new AssertionError("Snapshot state prior to log rotation: " + logBackupPath);
    }/*from  w  w  w  .j  a  va 2s .co m*/

    if (fs.exists(logPath)) {
        LOG.debug("Creating log backup {}", logBackupPath);
        if (!fs.rename(logPath, logBackupPath)) {
            throw new IOException("Failed to rotate log: " + logPath);
        }
    }

    LOG.info("Creating {}", logPath);
    final FSDataOutputStream fsOutputStream;
    String scheme = null;
    try {
        scheme = fs.getScheme();
    } catch (UnsupportedOperationException e) {
        LOG.warn("{} doesn't implement getScheme() method", fs.getClass().getName());
    }
    if ("file".equals(scheme)) {
        // local FS does not support hflush and does not flush native stream
        FSUtil.mkdirs(fs, logPath.getParent());
        fsOutputStream = new FSDataOutputStream(
                new FileOutputStream(Path.getPathWithoutSchemeAndAuthority(logPath).toString()), null);
    } else {
        fsOutputStream = fs.create(logPath);
    }

    DataOutputStream osWrapper = new DataOutputStream(fsOutputStream) {
        @Override
        public void flush() throws IOException {
            super.flush();
            fsOutputStream.hflush();
        }

        @Override
        public void close() throws IOException {
            LOG.debug("Closing {}", logPath);
            super.close();
        }
    };
    return osWrapper;
}

From source file:com.uber.hoodie.common.table.view.IncrementalFSViewSyncTest.java

License:Apache License

/**
 * Check for equality of views//from ww  w .j ava  2  s.c  om
 *
 * @param view1 View1
 * @param view2 View2
 */
private void areViewsConsistent(SyncableFileSystemView view1, SyncableFileSystemView view2,
        long expectedTotalFileSlices) {
    //Timeline check
    HoodieTimeline timeline1 = view1.getTimeline();
    HoodieTimeline timeline2 = view2.getTimeline();
    Assert.assertEquals(view1.getLastInstant(), view2.getLastInstant());
    Iterators.elementsEqual(timeline1.getInstants().iterator(), timeline2.getInstants().iterator());

    //View Checks
    Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap1 = partitions.stream()
            .flatMap(p -> view1.getAllFileGroups(p))
            .collect(Collectors.toMap(fg -> fg.getFileGroupId(), fg -> fg));
    Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap2 = partitions.stream()
            .flatMap(p -> view2.getAllFileGroups(p))
            .collect(Collectors.toMap(fg -> fg.getFileGroupId(), fg -> fg));
    Assert.assertEquals(fileGroupsMap1.keySet(), fileGroupsMap2.keySet());
    long gotSlicesCount = fileGroupsMap1.keySet().stream()
            .map(k -> Pair.of(fileGroupsMap1.get(k), fileGroupsMap2.get(k))).mapToLong(e -> {
                HoodieFileGroup fg1 = e.getKey();
                HoodieFileGroup fg2 = e.getValue();
                Assert.assertEquals(fg1.getFileGroupId(), fg2.getFileGroupId());
                List<FileSlice> slices1 = fg1.getAllRawFileSlices().collect(Collectors.toList());
                List<FileSlice> slices2 = fg2.getAllRawFileSlices().collect(Collectors.toList());
                Assert.assertEquals(slices1.size(), slices2.size());
                IntStream.range(0, slices1.size()).mapToObj(idx -> Pair.of(slices1.get(idx), slices2.get(idx)))
                        .forEach(e2 -> {
                            FileSlice slice1 = e2.getKey();
                            FileSlice slice2 = e2.getValue();
                            Assert.assertEquals(slice1.getBaseInstantTime(), slice2.getBaseInstantTime());
                            Assert.assertEquals(slice1.getFileId(), slice2.getFileId());
                            Assert.assertEquals(slice1.getDataFile().isPresent(),
                                    slice2.getDataFile().isPresent());
                            if (slice1.getDataFile().isPresent()) {
                                HoodieDataFile df1 = slice1.getDataFile().get();
                                HoodieDataFile df2 = slice2.getDataFile().get();
                                Assert.assertEquals(df1.getCommitTime(), df2.getCommitTime());
                                Assert.assertEquals(df1.getFileId(), df2.getFileId());
                                Assert.assertEquals(df1.getFileName(), df2.getFileName());
                                Assert.assertEquals(
                                        Path.getPathWithoutSchemeAndAuthority(new Path(df1.getPath())),
                                        Path.getPathWithoutSchemeAndAuthority(new Path(df2.getPath())));
                            }
                            List<Path> logPaths1 = slice1.getLogFiles()
                                    .map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath()))
                                    .collect(Collectors.toList());
                            List<Path> logPaths2 = slice2.getLogFiles()
                                    .map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath()))
                                    .collect(Collectors.toList());
                            Assert.assertEquals(logPaths1, logPaths2);
                        });
                return slices1.size();
            }).sum();
    Assert.assertEquals(expectedTotalFileSlices, gotSlicesCount);

    // Pending Compaction Operations Check
    Set<Pair<String, CompactionOperation>> ops1 = view1.getPendingCompactionOperations()
            .collect(Collectors.toSet());
    Set<Pair<String, CompactionOperation>> ops2 = view2.getPendingCompactionOperations()
            .collect(Collectors.toSet());
    Assert.assertEquals(ops1, ops2);
}

From source file:com.uber.hoodie.common.table.view.IncrementalTimelineSyncFileSystemView.java

License:Apache License

/**
 * Apply changes to partition file-system view. Base Implementation overwrites the entire partitions view assuming
 * some sort of map (in-mem/disk-based) is used. For View implementation which supports fine-granular updates (e:g
 * RocksDB), override this method./* ww  w  . ja  v a  2s  . com*/
 *
 * @param partition PartitionPath
 * @param deltaFileGroups Changed file-slices aggregated as file-groups
 * @param mode Delta Apply mode
 */
protected void applyDeltaFileSlicesToPartitionView(String partition, List<HoodieFileGroup> deltaFileGroups,
        DeltaApplyMode mode) {
    if (deltaFileGroups.isEmpty()) {
        log.info("No delta file groups for partition :" + partition);
        return;
    }

    List<HoodieFileGroup> fileGroups = fetchAllStoredFileGroups(partition).collect(Collectors.toList());
    /**
     * Note that while finding the new data/log files added/removed, the path stored in metadata will be missing
     * the base-path,scheme and authority. Ensure the matching process takes care of this discrepancy.
     */
    Map<String, HoodieDataFile> viewDataFiles = fileGroups.stream()
            .flatMap(HoodieFileGroup::getAllRawFileSlices).map(FileSlice::getDataFile)
            .filter(Optional::isPresent).map(Optional::get)
            .map(df -> Pair.of(Path.getPathWithoutSchemeAndAuthority(new Path(df.getPath())).toString(), df))
            .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    //Note: Delta Log Files and Data FIles can be empty when adding/removing pending compactions
    Map<String, HoodieDataFile> deltaDataFiles = deltaFileGroups.stream()
            .flatMap(HoodieFileGroup::getAllRawFileSlices).map(FileSlice::getDataFile)
            .filter(Optional::isPresent).map(Optional::get)
            .map(df -> Pair.of(Path.getPathWithoutSchemeAndAuthority(new Path(df.getPath())).toString(), df))
            .collect(Collectors.toMap(Pair::getKey, Pair::getValue));

    Map<String, HoodieLogFile> viewLogFiles = fileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
            .flatMap(FileSlice::getLogFiles)
            .map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
            .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    Map<String, HoodieLogFile> deltaLogFiles = deltaFileGroups.stream()
            .flatMap(HoodieFileGroup::getAllRawFileSlices).flatMap(FileSlice::getLogFiles)
            .map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
            .collect(Collectors.toMap(Pair::getKey, Pair::getValue));

    switch (mode) {
    case ADD:
        viewDataFiles.putAll(deltaDataFiles);
        viewLogFiles.putAll(deltaLogFiles);
        break;
    case REMOVE:
        deltaDataFiles.keySet().stream().forEach(p -> viewDataFiles.remove(p));
        deltaLogFiles.keySet().stream().forEach(p -> viewLogFiles.remove(p));
        break;
    default:
        throw new IllegalStateException("Unknown diff apply mode=" + mode);
    }

    HoodieTimeline timeline = deltaFileGroups.stream().map(df -> df.getTimeline()).findAny().get();
    List<HoodieFileGroup> fgs = buildFileGroups(viewDataFiles.values().stream(), viewLogFiles.values().stream(),
            timeline, true);
    storePartitionView(partition, fgs);
}

From source file:com.uber.hoodie.common.table.view.RocksDbBasedFileSystemView.java

License:Apache License

@Override
/**/* ww w  .  ja va2 s. c  o  m*/
 * This is overridden to incrementally apply file-slices to rocks DB
 */
protected void applyDeltaFileSlicesToPartitionView(String partition, List<HoodieFileGroup> deltaFileGroups,
        DeltaApplyMode mode) {
    rocksDB.writeBatch(batch -> {
        deltaFileGroups.stream().forEach(fg -> {
            fg.getAllRawFileSlices().map(fs -> {
                FileSlice oldSlice = getFileSlice(partition, fs.getFileId(), fs.getBaseInstantTime());
                if (null == oldSlice) {
                    return fs;
                } else {
                    // First remove the file-slice
                    log.info("Removing old Slice in DB. FS=" + oldSlice);
                    rocksDB.deleteInBatch(batch, schemaHelper.getColFamilyForView(),
                            schemaHelper.getKeyForSliceView(fg, oldSlice));
                    rocksDB.deleteInBatch(batch, schemaHelper.getColFamilyForView(),
                            schemaHelper.getKeyForDataFileView(fg, oldSlice));

                    Map<String, HoodieLogFile> logFiles = oldSlice.getLogFiles().map(
                            lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
                            .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
                    Map<String, HoodieLogFile> deltaLogFiles = fs.getLogFiles().map(
                            lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
                            .collect(Collectors.toMap(Pair::getKey, Pair::getValue));

                    switch (mode) {
                    case ADD: {
                        FileSlice newFileSlice = new FileSlice(oldSlice.getFileGroupId(),
                                oldSlice.getBaseInstantTime());
                        oldSlice.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df));
                        fs.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df));
                        Map<String, HoodieLogFile> newLogFiles = new HashMap<>(logFiles);
                        deltaLogFiles.entrySet().stream().filter(e -> !logFiles.containsKey(e.getKey()))
                                .forEach(p -> newLogFiles.put(p.getKey(), p.getValue()));
                        newLogFiles.values().stream().forEach(lf -> newFileSlice.addLogFile(lf));
                        log.info("Adding back new File Slice after add FS=" + newFileSlice);
                        return newFileSlice;
                    }
                    case REMOVE: {
                        log.info("Removing old File Slice =" + fs);
                        FileSlice newFileSlice = new FileSlice(oldSlice.getFileGroupId(),
                                oldSlice.getBaseInstantTime());
                        fs.getDataFile().orElseGet(() -> {
                            oldSlice.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df));
                            return null;
                        });

                        deltaLogFiles.keySet().stream().forEach(p -> logFiles.remove(p));
                        //Add remaining log files back
                        logFiles.values().stream().forEach(lf -> newFileSlice.addLogFile(lf));
                        if (newFileSlice.getDataFile().isPresent()
                                || (newFileSlice.getLogFiles().count() > 0)) {
                            log.info("Adding back new file-slice after remove FS=" + newFileSlice);
                            return newFileSlice;
                        }
                        return null;
                    }
                    default:
                        throw new IllegalStateException("Unknown diff apply mode=" + mode);
                    }
                }
            }).filter(Objects::nonNull).forEach(fs -> {
                rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(),
                        schemaHelper.getKeyForSliceView(fg, fs), fs);
                fs.getDataFile().ifPresent(df -> {
                    rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(),
                            schemaHelper.getKeyForDataFileView(fg, fs), df);
                });
            });
        });
    });
}

From source file:com.uber.hoodie.common.util.FailSafeConsistencyGuard.java

License:Apache License

/**
 * Helper function to wait for all files belonging to single directory to appear
 * @param dirPath Dir Path//  www. j a va 2s .  c  om
 * @param files Files to appear/disappear
 * @param event Appear/Disappear
 * @throws TimeoutException
 */
public void waitForFilesVisibility(String dirPath, List<String> files, FileVisibility event)
        throws TimeoutException {
    Path dir = new Path(dirPath);
    List<String> filesWithoutSchemeAndAuthority = files.stream()
            .map(f -> Path.getPathWithoutSchemeAndAuthority(new Path(f))).map(p -> p.toString())
            .collect(Collectors.toList());

    retryTillSuccess((retryNum) -> {
        try {
            log.info("Trying " + retryNum);
            FileStatus[] entries = fs.listStatus(dir);
            List<String> gotFiles = Arrays.stream(entries)
                    .map(e -> Path.getPathWithoutSchemeAndAuthority(e.getPath())).map(p -> p.toString())
                    .collect(Collectors.toList());
            List<String> candidateFiles = new ArrayList<>(filesWithoutSchemeAndAuthority);
            boolean altered = candidateFiles.removeAll(gotFiles);

            switch (event) {
            case DISAPPEAR:
                log.info("Following files are visible" + candidateFiles);
                // If no candidate files gets removed, it means all of them have disappeared
                return !altered;
            case APPEAR:
            default:
                // if all files appear, the list is empty
                return candidateFiles.isEmpty();
            }
        } catch (IOException ioe) {
            log.warn("Got IOException waiting for file event. Have tried " + retryNum + " time(s)", ioe);
        }
        return false;
    }, "Timed out waiting for filles to become visible");
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

public static String translateMarkerToDataPath(String basePath, String markerPath, String instantTs) {
    Preconditions.checkArgument(markerPath.endsWith(MARKER_EXTN));
    String markerRootPath = Path
            .getPathWithoutSchemeAndAuthority(new Path(
                    String.format("%s/%s/%s", basePath, HoodieTableMetaClient.TEMPFOLDER_NAME, instantTs)))
            .toString();/*from  w  w  w. ja  v  a  2s  .c o m*/
    int begin = markerPath.indexOf(markerRootPath);
    Preconditions.checkArgument(begin >= 0,
            "Not in marker dir. Marker Path=" + markerPath + ", Expected Marker Root=" + markerRootPath);
    String rPath = markerPath.substring(begin + markerRootPath.length() + 1);
    return String.format("%s/%s%s", basePath, rPath.replace(MARKER_EXTN, ""),
            HoodieFileFormat.PARQUET.getFileExtension());
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Given a base partition and a partition path, return
 * relative path of partition path to the base path
 *///from ww  w.  ja  va 2  s.  c om
public static String getRelativePartitionPath(Path basePath, Path partitionPath) {
    basePath = Path.getPathWithoutSchemeAndAuthority(basePath);
    partitionPath = Path.getPathWithoutSchemeAndAuthority(partitionPath);
    String partitionFullPath = partitionPath.toString();
    int partitionStartIndex = partitionFullPath.indexOf(basePath.getName(),
            basePath.getParent() == null ? 0 : basePath.getParent().toString().length());
    // Partition-Path could be empty for non-partitioned tables
    return partitionStartIndex + basePath.getName().length() == partitionFullPath.length() ? ""
            : partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
}