Example usage for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority

List of usage examples for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority.

Prototype

public static Path getPathWithoutSchemeAndAuthority(Path path) 

Source Link

Document

Return a version of the given Path without the scheme information.

Usage

From source file:com.aliyun.odps.volume.VolumeFSUtil.java

License:Apache License

/**
 * Get volume name from a specific {@link Path}
 * //from  w  w  w  . ja v a  2  s.c om
 * @param path
 * @throws VolumeException
 */
public static String getVolumeFromPath(Path path) throws VolumeException {
    path = Path.getPathWithoutSchemeAndAuthority(path);
    if (path.depth() == 0) {
        throw new VolumeException(VolumeFSErrorCode.VolumeMissing, "No volume found!");
    } else {
        String p = path.toUri().getPath();
        String volume = p.split(VolumeFSConstants.SEPARATOR)[1];
        return volume;
    }
}

From source file:com.datatorrent.flume.storage.HDFSStorage.java

License:Open Source License

/**
 * This function writes the bytes to a file specified by the path
 *
 * @param path the file location/*from   www  .  j a v  a2  s . c o m*/
 * @param data the data to be written to the file
 * @return
 * @throws IOException
 */
private FSDataOutputStream writeData(Path path, byte[] data) throws IOException {
    FSDataOutputStream fsOutputStream;
    if (fs.getScheme().equals("file")) {
        // local FS does not support hflush and does not flush native stream
        fsOutputStream = new FSDataOutputStream(
                new FileOutputStream(Path.getPathWithoutSchemeAndAuthority(path).toString()), null);
    } else {
        fsOutputStream = fs.create(path);
    }
    fsOutputStream.write(data);
    return fsOutputStream;
}

From source file:com.datatorrent.lib.io.fs.FileStitcher.java

License:Apache License

/**
 * Moving temp output file to final file
 * /* w ww .  j a  va 2 s  . co  m*/
 * @param tempOutFilePath
 *          Temporary output file
 * @param destination
 *          Destination directory path
 * @throws IOException
 */
protected void moveToFinalFile(Path tempOutFilePath, Path destination) throws IOException {
    Path src = Path.getPathWithoutSchemeAndAuthority(tempOutFilePath);
    Path dst = Path.getPathWithoutSchemeAndAuthority(destination);

    boolean moveSuccessful = false;
    if (!outputFS.exists(dst.getParent())) {
        outputFS.mkdirs(dst.getParent());
    }
    if (outputFS.exists(dst)) {
        outputFS.delete(dst, false);
    }
    moveSuccessful = outputFS.rename(src, dst);

    if (moveSuccessful) {
        LOG.debug("File {} moved successfully to destination folder.", dst);
    } else {
        throw new RuntimeException("Unable to move file from " + src + " to " + dst);
    }
}

From source file:com.datatorrent.stram.FSRecoveryHandler.java

License:Apache License

@Override
public DataOutputStream rotateLog() throws IOException {

    if (fs.exists(logBackupPath)) {
        // log backup is purged on snapshot/restore
        throw new AssertionError("Snapshot state prior to log rotation: " + logBackupPath);
    }/*from  w  w  w  .j  a  va 2s .co m*/

    if (fs.exists(logPath)) {
        LOG.debug("Creating log backup {}", logBackupPath);
        if (!fs.rename(logPath, logBackupPath)) {
            throw new IOException("Failed to rotate log: " + logPath);
        }
    }

    LOG.info("Creating {}", logPath);
    final FSDataOutputStream fsOutputStream;
    String scheme = null;
    try {
        scheme = fs.getScheme();
    } catch (UnsupportedOperationException e) {
        LOG.warn("{} doesn't implement getScheme() method", fs.getClass().getName());
    }
    if ("file".equals(scheme)) {
        // local FS does not support hflush and does not flush native stream
        FSUtil.mkdirs(fs, logPath.getParent());
        fsOutputStream = new FSDataOutputStream(
                new FileOutputStream(Path.getPathWithoutSchemeAndAuthority(logPath).toString()), null);
    } else {
        fsOutputStream = fs.create(logPath);
    }

    DataOutputStream osWrapper = new DataOutputStream(fsOutputStream) {
        @Override
        public void flush() throws IOException {
            super.flush();
            fsOutputStream.hflush();
        }

        @Override
        public void close() throws IOException {
            LOG.debug("Closing {}", logPath);
            super.close();
        }
    };
    return osWrapper;
}

From source file:com.uber.hoodie.common.table.view.IncrementalFSViewSyncTest.java

License:Apache License

/**
 * Check for equality of views//from ww  w .j ava  2  s.c  om
 *
 * @param view1 View1
 * @param view2 View2
 */
private void areViewsConsistent(SyncableFileSystemView view1, SyncableFileSystemView view2,
        long expectedTotalFileSlices) {
    //Timeline check
    HoodieTimeline timeline1 = view1.getTimeline();
    HoodieTimeline timeline2 = view2.getTimeline();
    Assert.assertEquals(view1.getLastInstant(), view2.getLastInstant());
    Iterators.elementsEqual(timeline1.getInstants().iterator(), timeline2.getInstants().iterator());

    //View Checks
    Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap1 = partitions.stream()
            .flatMap(p -> view1.getAllFileGroups(p))
            .collect(Collectors.toMap(fg -> fg.getFileGroupId(), fg -> fg));
    Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap2 = partitions.stream()
            .flatMap(p -> view2.getAllFileGroups(p))
            .collect(Collectors.toMap(fg -> fg.getFileGroupId(), fg -> fg));
    Assert.assertEquals(fileGroupsMap1.keySet(), fileGroupsMap2.keySet());
    long gotSlicesCount = fileGroupsMap1.keySet().stream()
            .map(k -> Pair.of(fileGroupsMap1.get(k), fileGroupsMap2.get(k))).mapToLong(e -> {
                HoodieFileGroup fg1 = e.getKey();
                HoodieFileGroup fg2 = e.getValue();
                Assert.assertEquals(fg1.getFileGroupId(), fg2.getFileGroupId());
                List<FileSlice> slices1 = fg1.getAllRawFileSlices().collect(Collectors.toList());
                List<FileSlice> slices2 = fg2.getAllRawFileSlices().collect(Collectors.toList());
                Assert.assertEquals(slices1.size(), slices2.size());
                IntStream.range(0, slices1.size()).mapToObj(idx -> Pair.of(slices1.get(idx), slices2.get(idx)))
                        .forEach(e2 -> {
                            FileSlice slice1 = e2.getKey();
                            FileSlice slice2 = e2.getValue();
                            Assert.assertEquals(slice1.getBaseInstantTime(), slice2.getBaseInstantTime());
                            Assert.assertEquals(slice1.getFileId(), slice2.getFileId());
                            Assert.assertEquals(slice1.getDataFile().isPresent(),
                                    slice2.getDataFile().isPresent());
                            if (slice1.getDataFile().isPresent()) {
                                HoodieDataFile df1 = slice1.getDataFile().get();
                                HoodieDataFile df2 = slice2.getDataFile().get();
                                Assert.assertEquals(df1.getCommitTime(), df2.getCommitTime());
                                Assert.assertEquals(df1.getFileId(), df2.getFileId());
                                Assert.assertEquals(df1.getFileName(), df2.getFileName());
                                Assert.assertEquals(
                                        Path.getPathWithoutSchemeAndAuthority(new Path(df1.getPath())),
                                        Path.getPathWithoutSchemeAndAuthority(new Path(df2.getPath())));
                            }
                            List<Path> logPaths1 = slice1.getLogFiles()
                                    .map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath()))
                                    .collect(Collectors.toList());
                            List<Path> logPaths2 = slice2.getLogFiles()
                                    .map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath()))
                                    .collect(Collectors.toList());
                            Assert.assertEquals(logPaths1, logPaths2);
                        });
                return slices1.size();
            }).sum();
    Assert.assertEquals(expectedTotalFileSlices, gotSlicesCount);

    // Pending Compaction Operations Check
    Set<Pair<String, CompactionOperation>> ops1 = view1.getPendingCompactionOperations()
            .collect(Collectors.toSet());
    Set<Pair<String, CompactionOperation>> ops2 = view2.getPendingCompactionOperations()
            .collect(Collectors.toSet());
    Assert.assertEquals(ops1, ops2);
}

From source file:com.uber.hoodie.common.table.view.IncrementalTimelineSyncFileSystemView.java

License:Apache License

/**
 * Apply changes to partition file-system view. Base Implementation overwrites the entire partitions view assuming
 * some sort of map (in-mem/disk-based) is used. For View implementation which supports fine-granular updates (e:g
 * RocksDB), override this method./* ww  w  . ja  v a  2s  . com*/
 *
 * @param partition PartitionPath
 * @param deltaFileGroups Changed file-slices aggregated as file-groups
 * @param mode Delta Apply mode
 */
protected void applyDeltaFileSlicesToPartitionView(String partition, List<HoodieFileGroup> deltaFileGroups,
        DeltaApplyMode mode) {
    if (deltaFileGroups.isEmpty()) {
        log.info("No delta file groups for partition :" + partition);
        return;
    }

    List<HoodieFileGroup> fileGroups = fetchAllStoredFileGroups(partition).collect(Collectors.toList());
    /**
     * Note that while finding the new data/log files added/removed, the path stored in metadata will be missing
     * the base-path,scheme and authority. Ensure the matching process takes care of this discrepancy.
     */
    Map<String, HoodieDataFile> viewDataFiles = fileGroups.stream()
            .flatMap(HoodieFileGroup::getAllRawFileSlices).map(FileSlice::getDataFile)
            .filter(Optional::isPresent).map(Optional::get)
            .map(df -> Pair.of(Path.getPathWithoutSchemeAndAuthority(new Path(df.getPath())).toString(), df))
            .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    //Note: Delta Log Files and Data FIles can be empty when adding/removing pending compactions
    Map<String, HoodieDataFile> deltaDataFiles = deltaFileGroups.stream()
            .flatMap(HoodieFileGroup::getAllRawFileSlices).map(FileSlice::getDataFile)
            .filter(Optional::isPresent).map(Optional::get)
            .map(df -> Pair.of(Path.getPathWithoutSchemeAndAuthority(new Path(df.getPath())).toString(), df))
            .collect(Collectors.toMap(Pair::getKey, Pair::getValue));

    Map<String, HoodieLogFile> viewLogFiles = fileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
            .flatMap(FileSlice::getLogFiles)
            .map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
            .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    Map<String, HoodieLogFile> deltaLogFiles = deltaFileGroups.stream()
            .flatMap(HoodieFileGroup::getAllRawFileSlices).flatMap(FileSlice::getLogFiles)
            .map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
            .collect(Collectors.toMap(Pair::getKey, Pair::getValue));

    switch (mode) {
    case ADD:
        viewDataFiles.putAll(deltaDataFiles);
        viewLogFiles.putAll(deltaLogFiles);
        break;
    case REMOVE:
        deltaDataFiles.keySet().stream().forEach(p -> viewDataFiles.remove(p));
        deltaLogFiles.keySet().stream().forEach(p -> viewLogFiles.remove(p));
        break;
    default:
        throw new IllegalStateException("Unknown diff apply mode=" + mode);
    }

    HoodieTimeline timeline = deltaFileGroups.stream().map(df -> df.getTimeline()).findAny().get();
    List<HoodieFileGroup> fgs = buildFileGroups(viewDataFiles.values().stream(), viewLogFiles.values().stream(),
            timeline, true);
    storePartitionView(partition, fgs);
}

From source file:com.uber.hoodie.common.table.view.RocksDbBasedFileSystemView.java

License:Apache License

@Override
/**/* ww w  .  ja va2 s. c  o  m*/
 * This is overridden to incrementally apply file-slices to rocks DB
 */
protected void applyDeltaFileSlicesToPartitionView(String partition, List<HoodieFileGroup> deltaFileGroups,
        DeltaApplyMode mode) {
    rocksDB.writeBatch(batch -> {
        deltaFileGroups.stream().forEach(fg -> {
            fg.getAllRawFileSlices().map(fs -> {
                FileSlice oldSlice = getFileSlice(partition, fs.getFileId(), fs.getBaseInstantTime());
                if (null == oldSlice) {
                    return fs;
                } else {
                    // First remove the file-slice
                    log.info("Removing old Slice in DB. FS=" + oldSlice);
                    rocksDB.deleteInBatch(batch, schemaHelper.getColFamilyForView(),
                            schemaHelper.getKeyForSliceView(fg, oldSlice));
                    rocksDB.deleteInBatch(batch, schemaHelper.getColFamilyForView(),
                            schemaHelper.getKeyForDataFileView(fg, oldSlice));

                    Map<String, HoodieLogFile> logFiles = oldSlice.getLogFiles().map(
                            lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
                            .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
                    Map<String, HoodieLogFile> deltaLogFiles = fs.getLogFiles().map(
                            lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
                            .collect(Collectors.toMap(Pair::getKey, Pair::getValue));

                    switch (mode) {
                    case ADD: {
                        FileSlice newFileSlice = new FileSlice(oldSlice.getFileGroupId(),
                                oldSlice.getBaseInstantTime());
                        oldSlice.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df));
                        fs.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df));
                        Map<String, HoodieLogFile> newLogFiles = new HashMap<>(logFiles);
                        deltaLogFiles.entrySet().stream().filter(e -> !logFiles.containsKey(e.getKey()))
                                .forEach(p -> newLogFiles.put(p.getKey(), p.getValue()));
                        newLogFiles.values().stream().forEach(lf -> newFileSlice.addLogFile(lf));
                        log.info("Adding back new File Slice after add FS=" + newFileSlice);
                        return newFileSlice;
                    }
                    case REMOVE: {
                        log.info("Removing old File Slice =" + fs);
                        FileSlice newFileSlice = new FileSlice(oldSlice.getFileGroupId(),
                                oldSlice.getBaseInstantTime());
                        fs.getDataFile().orElseGet(() -> {
                            oldSlice.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df));
                            return null;
                        });

                        deltaLogFiles.keySet().stream().forEach(p -> logFiles.remove(p));
                        //Add remaining log files back
                        logFiles.values().stream().forEach(lf -> newFileSlice.addLogFile(lf));
                        if (newFileSlice.getDataFile().isPresent()
                                || (newFileSlice.getLogFiles().count() > 0)) {
                            log.info("Adding back new file-slice after remove FS=" + newFileSlice);
                            return newFileSlice;
                        }
                        return null;
                    }
                    default:
                        throw new IllegalStateException("Unknown diff apply mode=" + mode);
                    }
                }
            }).filter(Objects::nonNull).forEach(fs -> {
                rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(),
                        schemaHelper.getKeyForSliceView(fg, fs), fs);
                fs.getDataFile().ifPresent(df -> {
                    rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(),
                            schemaHelper.getKeyForDataFileView(fg, fs), df);
                });
            });
        });
    });
}

From source file:com.uber.hoodie.common.util.FailSafeConsistencyGuard.java

License:Apache License

/**
 * Helper function to wait for all files belonging to single directory to appear
 * @param dirPath Dir Path//  www. j a va 2s .  c  om
 * @param files Files to appear/disappear
 * @param event Appear/Disappear
 * @throws TimeoutException
 */
public void waitForFilesVisibility(String dirPath, List<String> files, FileVisibility event)
        throws TimeoutException {
    Path dir = new Path(dirPath);
    List<String> filesWithoutSchemeAndAuthority = files.stream()
            .map(f -> Path.getPathWithoutSchemeAndAuthority(new Path(f))).map(p -> p.toString())
            .collect(Collectors.toList());

    retryTillSuccess((retryNum) -> {
        try {
            log.info("Trying " + retryNum);
            FileStatus[] entries = fs.listStatus(dir);
            List<String> gotFiles = Arrays.stream(entries)
                    .map(e -> Path.getPathWithoutSchemeAndAuthority(e.getPath())).map(p -> p.toString())
                    .collect(Collectors.toList());
            List<String> candidateFiles = new ArrayList<>(filesWithoutSchemeAndAuthority);
            boolean altered = candidateFiles.removeAll(gotFiles);

            switch (event) {
            case DISAPPEAR:
                log.info("Following files are visible" + candidateFiles);
                // If no candidate files gets removed, it means all of them have disappeared
                return !altered;
            case APPEAR:
            default:
                // if all files appear, the list is empty
                return candidateFiles.isEmpty();
            }
        } catch (IOException ioe) {
            log.warn("Got IOException waiting for file event. Have tried " + retryNum + " time(s)", ioe);
        }
        return false;
    }, "Timed out waiting for filles to become visible");
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

public static String translateMarkerToDataPath(String basePath, String markerPath, String instantTs) {
    Preconditions.checkArgument(markerPath.endsWith(MARKER_EXTN));
    String markerRootPath = Path
            .getPathWithoutSchemeAndAuthority(new Path(
                    String.format("%s/%s/%s", basePath, HoodieTableMetaClient.TEMPFOLDER_NAME, instantTs)))
            .toString();/*from  w  w  w. ja  v  a  2s  .c o m*/
    int begin = markerPath.indexOf(markerRootPath);
    Preconditions.checkArgument(begin >= 0,
            "Not in marker dir. Marker Path=" + markerPath + ", Expected Marker Root=" + markerRootPath);
    String rPath = markerPath.substring(begin + markerRootPath.length() + 1);
    return String.format("%s/%s%s", basePath, rPath.replace(MARKER_EXTN, ""),
            HoodieFileFormat.PARQUET.getFileExtension());
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Given a base partition and a partition path, return
 * relative path of partition path to the base path
 *///from ww  w.  ja  va 2  s.  c om
public static String getRelativePartitionPath(Path basePath, Path partitionPath) {
    basePath = Path.getPathWithoutSchemeAndAuthority(basePath);
    partitionPath = Path.getPathWithoutSchemeAndAuthority(partitionPath);
    String partitionFullPath = partitionPath.toString();
    int partitionStartIndex = partitionFullPath.indexOf(basePath.getName(),
            basePath.getParent() == null ? 0 : basePath.getParent().toString().length());
    // Partition-Path could be empty for non-partitioned tables
    return partitionStartIndex + basePath.getName().length() == partitionFullPath.length() ? ""
            : partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
}