Example usage for org.apache.hadoop.fs FileStatus getModificationTime

List of usage examples for org.apache.hadoop.fs FileStatus getModificationTime

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getModificationTime.

Prototype

public long getModificationTime() 

Source Link

Document

Get the modification time of the file.

Usage

From source file:gobblin.compaction.mapreduce.MRCompactorTimeBasedJobPropCreator.java

License:Open Source License

/**
 * Check if inputFolder contains any files which have modification times which are more
 * recent than the last compaction time as stored within outputFolder; return any files
 * which do. An empty list will be returned if all files are older than the last compaction time.
 *//* w w w  .ja  v  a2  s .c om*/
private List<Path> getNewDataInFolder(Path inputFolder, Path outputFolder) throws IOException {
    List<Path> newFiles = Lists.newArrayList();

    Path filePath = new Path(outputFolder, ConfigurationKeys.COMPACTION_COMPLETE_FILE_NAME);
    Closer closer = Closer.create();
    try {
        FSDataInputStream completionFileStream = closer.register(this.fs.open(filePath));
        DateTime lastCompactionTime = new DateTime(completionFileStream.readLong(), this.timeZone);
        for (FileStatus fstat : HadoopUtils.listStatusRecursive(this.fs, inputFolder)) {
            DateTime fileModificationTime = new DateTime(fstat.getModificationTime(), this.timeZone);
            if (fileModificationTime.isAfter(lastCompactionTime)) {
                newFiles.add(fstat.getPath());
            }
        }
        if (!newFiles.isEmpty()) {
            LOG.info(String.format(
                    "Found %d new files within folder %s which are more recent than the previous "
                            + "compaction start time of %s.",
                    newFiles.size(), inputFolder, lastCompactionTime));
        }
    } catch (IOException e) {
        LOG.error("Failed to check for new data within folder: " + inputFolder, e);
    } catch (Throwable e) {
        throw closer.rethrow(e);
    } finally {
        closer.close();
    }
    return newFiles;
}

From source file:gobblin.data.management.copy.CopyableFileTest.java

License:Apache License

@Test
public void testCopyableFileBuilderMinimumConfiguration() throws IOException {
    // Source//from  w  ww  .  ja va2s .  com
    String datasetRootDir = "/data/databases/source";
    Path datasetRoot = new Path(datasetRootDir);
    FileSystem originFS = FileSystem.getLocal(new Configuration());
    Path originFile = new Path(datasetRootDir, "copyableFile");
    FileStatus origin = new FileStatus(0l, false, 0, 0l, System.currentTimeMillis(), originFile);
    PreserveAttributes preserveAttributes = PreserveAttributes.fromMnemonicString("ugp");

    // Target
    String targetRoot = "/data/databases/destination";
    Path relativePath = PathUtils.relativizePath(originFile, datasetRoot);
    Path targetPath = new Path(targetRoot, relativePath);

    Properties properties = new Properties();
    properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher");
    CopyConfiguration copyConfiguration = CopyConfiguration
            .builder(FileSystem.getLocal(new Configuration()), properties).preserve(preserveAttributes).build();

    CopyableFile copyableFile = CopyableFile.builder(originFS, origin, datasetRoot, copyConfiguration)
            .destination(targetPath).ancestorsOwnerAndPermission(Lists.<OwnerAndPermission>newArrayList()) // not testing ancestors
            .build();

    // Making sure all fields are populated correctly via CopyableFile builder

    // Verify preserve attribute options
    Assert.assertEquals(copyableFile.getPreserve().toMnemonicString(), preserveAttributes.toMnemonicString());

    // Verify origin
    Assert.assertEquals(copyableFile.getFileSet(), "");
    Assert.assertEquals(copyableFile.getOrigin(), origin);

    // Verify destination target, permissions and other attributes
    Assert.assertEquals(copyableFile.getChecksum().length, 0);
    Assert.assertEquals(copyableFile.getDestination().toString(), targetPath.toString());
    Assert.assertEquals(copyableFile.getDestinationOwnerAndPermission().getGroup(), origin.getGroup());
    Assert.assertEquals(copyableFile.getDestinationOwnerAndPermission().getOwner(), origin.getOwner());
    Assert.assertEquals(copyableFile.getDestinationOwnerAndPermission().getFsPermission(),
            origin.getPermission());

    // Verify auto determined timestamp
    Assert.assertEquals(copyableFile.getOriginTimestamp(), origin.getModificationTime());
    Assert.assertEquals(copyableFile.getUpstreamTimestamp(), origin.getModificationTime());
}

From source file:gobblin.data.management.copy.hive.HiveCopyEntityHelper.java

License:Apache License

private static boolean shouldReplaceFile(FileStatus referencePath, FileStatus replacementFile) {
    return replacementFile.getLen() != referencePath.getLen()
            || referencePath.getModificationTime() < replacementFile.getModificationTime();
}

From source file:gobblin.data.management.copy.recovery.RecoveryHelper.java

License:Apache License

/**
 * Delete all persisted files older than the number of hours set by {@link #PERSIST_RETENTION_KEY}.
 * @throws IOException//from  w w w .  java2  s.c o  m
 */
public void purgeOldPersistedFile() throws IOException {
    if (!this.persistDir.isPresent() || !this.fs.exists(this.persistDir.get())) {
        log.info("No persist directory to clean.");
        return;
    }

    long retentionMillis = TimeUnit.HOURS.toMillis(this.retentionHours);
    long now = System.currentTimeMillis();

    for (FileStatus fileStatus : this.fs.listStatus(this.persistDir.get())) {
        if (now - fileStatus.getModificationTime() > retentionMillis) {
            if (!this.fs.delete(fileStatus.getPath(), true)) {
                log.warn("Failed to delete path " + fileStatus.getPath());
            }
        }
    }
}

From source file:gobblin.data.management.copy.RecursiveCopyableDataset.java

License:Apache License

private static boolean sameFile(FileStatus fileInSource, FileStatus fileInTarget) {
    return fileInTarget.getLen() == fileInSource.getLen()
            && fileInSource.getModificationTime() <= fileInTarget.getModificationTime();
}

From source file:gobblin.data.management.copy.replication.ConfigBasedDataset.java

License:Apache License

@Override
public Collection<? extends CopyEntity> getCopyableFiles(FileSystem targetFs,
        CopyConfiguration copyConfiguration) throws IOException {
    List<CopyEntity> copyableFiles = Lists.newArrayList();
    EndPoint copyFromRaw = copyRoute.getCopyFrom();
    EndPoint copyToRaw = copyRoute.getCopyTo();
    if (!(copyFromRaw instanceof HadoopFsEndPoint && copyToRaw instanceof HadoopFsEndPoint)) {
        log.warn("Currently only handle the Hadoop Fs EndPoint replication");
        return copyableFiles;
    }/* ww  w  .  j av  a 2s.  c o m*/

    if ((!copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent())
            || (copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent()
                    && copyFromRaw.getWatermark().get().compareTo(copyToRaw.getWatermark().get()) <= 0)) {
        log.info(
                "No need to copy as destination watermark >= source watermark with source watermark {}, for dataset with metadata {}",
                copyFromRaw.getWatermark().isPresent() ? copyFromRaw.getWatermark().get().toJson() : "N/A",
                this.rc.getMetaData());
        return copyableFiles;
    }

    HadoopFsEndPoint copyFrom = (HadoopFsEndPoint) copyFromRaw;
    HadoopFsEndPoint copyTo = (HadoopFsEndPoint) copyToRaw;
    Configuration conf = HadoopUtils.newConfiguration();
    FileSystem copyFromFs = FileSystem.get(copyFrom.getFsURI(), conf);
    FileSystem copyToFs = FileSystem.get(copyTo.getFsURI(), conf);

    Collection<FileStatus> allFilesInSource = copyFrom.getFiles();
    Collection<FileStatus> allFilesInTarget = copyTo.getFiles();

    final PathFilter pathFilter = DatasetUtils.instantiatePathFilter(this.props);
    Predicate<FileStatus> predicate = new Predicate<FileStatus>() {
        @Override
        public boolean apply(FileStatus input) {
            return pathFilter.accept(input.getPath());
        }
    };

    Set<FileStatus> copyFromFileStatuses = Sets.newHashSet(Collections2.filter(allFilesInSource, predicate));
    Map<Path, FileStatus> copyToFileMap = Maps.newHashMap();
    for (FileStatus f : allFilesInTarget) {
        if (pathFilter.accept(f.getPath())) {
            copyToFileMap.put(PathUtils.getPathWithoutSchemeAndAuthority(f.getPath()), f);
        }
    }

    Collection<Path> deletedPaths = Lists.newArrayList();

    boolean watermarkMetadataCopied = false;

    boolean deleteTargetIfNotExistOnSource = rc.isDeleteTargetIfNotExistOnSource();

    for (FileStatus originFileStatus : copyFromFileStatuses) {
        Path relative = PathUtils.relativizePath(
                PathUtils.getPathWithoutSchemeAndAuthority(originFileStatus.getPath()),
                PathUtils.getPathWithoutSchemeAndAuthority(copyFrom.getDatasetPath()));
        // construct the new path in the target file system
        Path newPath = new Path(copyTo.getDatasetPath(), relative);

        if (relative.toString().equals(ReplicaHadoopFsEndPoint.WATERMARK_FILE)) {
            watermarkMetadataCopied = true;
        }

        // skip copy same file
        if (copyToFileMap.containsKey(newPath)
                && copyToFileMap.get(newPath).getLen() == originFileStatus.getLen()
                && copyToFileMap.get(newPath).getModificationTime() > originFileStatus.getModificationTime()) {
            log.debug(
                    "Copy from timestamp older than copy to timestamp, skipped copy {} for dataset with metadata {}",
                    originFileStatus.getPath(), this.rc.getMetaData());
        } else {
            // need to remove those files in the target File System
            if (copyToFileMap.containsKey(newPath)) {
                deletedPaths.add(newPath);
            }

            copyableFiles.add(CopyableFile
                    .fromOriginAndDestination(copyFromFs, originFileStatus, copyToFs.makeQualified(newPath),
                            copyConfiguration)
                    .fileSet(PathUtils.getPathWithoutSchemeAndAuthority(copyTo.getDatasetPath()).toString())
                    .build());

        }

        // clean up already checked paths
        copyToFileMap.remove(newPath);
    }

    // delete the paths on target directory if NOT exists on source
    if (deleteTargetIfNotExistOnSource) {
        deletedPaths.addAll(copyToFileMap.keySet());
    }

    // delete old files first
    if (!deletedPaths.isEmpty()) {
        DeleteFileCommitStep deleteCommitStep = DeleteFileCommitStep.fromPaths(copyToFs, deletedPaths,
                this.props);
        copyableFiles.add(new PrePublishStep(copyTo.getDatasetPath().toString(),
                Maps.<String, String>newHashMap(), deleteCommitStep, 0));
    }

    // generate the watermark file
    if ((!watermarkMetadataCopied) && copyFrom.getWatermark().isPresent()) {
        copyableFiles
                .add(new PostPublishStep(copyTo.getDatasetPath().toString(), Maps.<String, String>newHashMap(),
                        new WatermarkMetadataGenerationCommitStep(copyTo.getFsURI().toString(),
                                copyTo.getDatasetPath(), copyFrom.getWatermark().get()),
                        1));
    }

    return copyableFiles;
}

From source file:gobblin.data.management.copy.replication.SourceHadoopFsEndPoint.java

License:Apache License

@Override
public synchronized Optional<ComparableWatermark> getWatermark() {
    if (this.initialized) {
        return this.cachedWatermark;
    }/*  w w  w.j ava  2 s. c  om*/

    this.initialized = true;

    try {
        long curTs = -1;
        FileSystem fs = FileSystem.get(rc.getFsURI(), new Configuration());

        Collection<Path> validPaths = ReplicationDataValidPathPicker.getValidPaths(this);
        for (Path p : validPaths) {
            this.allFileStatus.addAll(FileListUtils.listFilesRecursively(fs, p));
        }

        for (FileStatus f : this.allFileStatus) {
            if (f.getModificationTime() > curTs) {
                curTs = f.getModificationTime();
            }
        }

        ComparableWatermark result = new LongWatermark(curTs);
        this.cachedWatermark = Optional.of(result);
        return this.cachedWatermark;
    } catch (IOException e) {
        log.error("Error while retrieve the watermark for " + this);
        return this.cachedWatermark;
    }
}

From source file:gobblin.data.management.retention.policy.RawDatasetRetentionPolicy.java

License:Apache License

private Optional<Long> getLatestModTime(Iterable<Path> paths) throws IOException {
    long latestModTime = Long.MIN_VALUE;
    for (FileStatus status : FileListUtils.listMostNestedPathRecursively(this.fs, paths)) {
        latestModTime = Math.max(latestModTime, status.getModificationTime());
    }//from ww  w.  j  av  a2 s  .  c om
    return latestModTime == Long.MIN_VALUE ? Optional.<Long>absent() : Optional.of(latestModTime);
}

From source file:gobblin.data.management.version.finder.FileLevelTimestampVersionFinder.java

License:Apache License

@Override
public Collection<TimestampedDatasetVersion> findDatasetVersions(Dataset dataset) {
    FileSystemDataset fsDataset = (FileSystemDataset) dataset;
    try {//from   w ww .j av a  2 s.c  o m
        List<TimestampedDatasetVersion> timestampedVersions = Lists.newArrayList();
        for (FileStatus fileStatus : FileListUtils.listMostNestedPathRecursively(this.fs,
                fsDataset.datasetRoot())) {
            timestampedVersions.add(new TimestampedDatasetVersion(
                    new DateTime(fileStatus.getModificationTime()), fileStatus.getPath()));
        }
        return timestampedVersions;
    } catch (IOException e) {
        LOGGER.warn("Failed to get ModifiedTimeStamp for candidate dataset version at "
                + fsDataset.datasetRoot() + ". Ignoring.");
        return Lists.newArrayList();
    }
}

From source file:gobblin.data.management.version.finder.ModDateTimeDatasetVersionFinder.java

License:Apache License

@Override
public Collection<TimestampedDatasetVersion> findDatasetVersions(Dataset dataset) throws IOException {
    FileSystemDataset fsDataset = (FileSystemDataset) dataset;
    FileStatus status = this.fs.getFileStatus(fsDataset.datasetRoot());
    return Lists.newArrayList(
            new TimestampedDatasetVersion(new DateTime(status.getModificationTime()), fsDataset.datasetRoot()));
}