Example usage for org.apache.hadoop.fs FileStatus getModificationTime

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getModificationTime.

Prototype

public long getModificationTime()

Source Link

Document

Get the modification time of the file.

Usage

From source file:org.apache.metron.maas.service.Client.java

License:Apache License

private Path addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String appId,
        Map<String, LocalResource> localResources, String resources) throws IOException {
    String suffix = appName + "/" + appId + "/" + fileDstPath;
    Path dst = new Path(fs.getHomeDirectory(), suffix);
    if (fileSrcPath == null) {
        FSDataOutputStream ostream = null;
        try {//from  w w  w .  j av a2  s .  c o m
            ostream = FileSystem.create(fs, dst, new FsPermission((short) 0710));
            ostream.writeUTF(resources);
        } finally {
            IOUtils.closeQuietly(ostream);
        }
    } else {
        fs.copyFromLocalFile(new Path(fileSrcPath), dst);
    }
    FileStatus scFileStatus = fs.getFileStatus(dst);
    LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(),
            scFileStatus.getModificationTime());
    localResources.put(fileDstPath, scRsrc);
    return dst;
}

From source file:org.apache.mrql.HDFSFileInputStream.java

License:Apache License

private ArrayList<String> new_files() {
    try {/*from w ww.  j  a  va2s  .co m*/
        long ct = System.currentTimeMillis();
        Path dpath = new Path(directory);
        final FileSystem fs = dpath.getFileSystem(Plan.conf);
        final FileStatus[] ds = fs.listStatus(dpath, new PathFilter() {
            public boolean accept(Path path) {
                return !path.getName().startsWith("_") && !path.getName().endsWith(".type");
            }
        });
        ArrayList<String> s = new ArrayList<String>();
        for (FileStatus d : ds) {
            String name = d.getPath().toString();
            if (file_modification_times.get(name) == null
                    || d.getModificationTime() > file_modification_times.get(name)) {
                file_modification_times.put(name, new Long(ct));
                s.add(name);
            }
        }
        ;
        return s;
    } catch (Exception ex) {
        throw new Error("Cannot open a new file from the directory " + directory + ": " + ex);
    }
}

From source file:org.apache.nifi.processors.hadoop.GetHDFS.java

License:Apache License

/**
 * Poll HDFS for files to process that match the configured file filters.
 *
 * @param hdfs hdfs/*  w  w w.jav  a 2s  . co m*/
 * @param dir dir
 * @param filesVisited filesVisited
 * @return files to process
 * @throws java.io.IOException ex
 */
protected Set<Path> selectFiles(final FileSystem hdfs, final Path dir, Set<Path> filesVisited)
        throws IOException, InterruptedException {
    if (null == filesVisited) {
        filesVisited = new HashSet<>();
    }

    if (!hdfs.exists(dir)) {
        throw new IOException("Selection directory " + dir.toString() + " doesn't appear to exist!");
    }

    final Set<Path> files = new HashSet<>();

    FileStatus[] fileStatuses = getUserGroupInformation()
            .doAs((PrivilegedExceptionAction<FileStatus[]>) () -> hdfs.listStatus(dir));
    for (final FileStatus file : fileStatuses) {
        if (files.size() >= MAX_WORKING_QUEUE_SIZE) {
            // no need to make the files set larger than what we would queue anyway
            break;
        }

        final Path canonicalFile = file.getPath();

        if (!filesVisited.add(canonicalFile)) { // skip files we've already seen (may be looping directory links)
            continue;
        }

        if (file.isDirectory() && processorConfig.getRecurseSubdirs()) {
            files.addAll(selectFiles(hdfs, canonicalFile, filesVisited));

        } else if (!file.isDirectory() && processorConfig.getPathFilter(dir).accept(canonicalFile)) {
            final long fileAge = System.currentTimeMillis() - file.getModificationTime();
            if (processorConfig.getMinimumAge() < fileAge && fileAge < processorConfig.getMaximumAge()) {
                files.add(canonicalFile);

                if (getLogger().isDebugEnabled()) {
                    getLogger().debug(this + " selected file at path: " + canonicalFile.toString());
                }

            }
        }
    }
    return files;
}

From source file:org.apache.nifi.processors.hadoop.ListHDFS.java

License:Apache License

/**
 * Determines which of the given FileStatus's describes a File that should be listed.
 *
 * @param statuses the eligible FileStatus objects that we could potentially list
 * @param context processor context with properties values
 * @return a Set containing only those FileStatus objects that we want to list
 *///  w w  w .j  a v a  2s.c  o m
Set<FileStatus> determineListable(final Set<FileStatus> statuses, ProcessContext context) {
    final long minTimestamp = this.latestTimestampListed;
    final TreeMap<Long, List<FileStatus>> orderedEntries = new TreeMap<>();

    final Long minAgeProp = context.getProperty(MIN_AGE).asTimePeriod(TimeUnit.MILLISECONDS);
    // NIFI-4144 - setting to MIN_VALUE so that in case the file modification time is in
    // the future relative to the nifi instance, files are not skipped.
    final long minimumAge = (minAgeProp == null) ? Long.MIN_VALUE : minAgeProp;
    final Long maxAgeProp = context.getProperty(MAX_AGE).asTimePeriod(TimeUnit.MILLISECONDS);
    final long maximumAge = (maxAgeProp == null) ? Long.MAX_VALUE : maxAgeProp;

    // Build a sorted map to determine the latest possible entries
    for (final FileStatus status : statuses) {
        if (status.getPath().getName().endsWith("_COPYING_")) {
            continue;
        }

        final long fileAge = System.currentTimeMillis() - status.getModificationTime();
        if (minimumAge > fileAge || fileAge > maximumAge) {
            continue;
        }

        final long entityTimestamp = status.getModificationTime();

        if (entityTimestamp > latestTimestampListed) {
            latestTimestampListed = entityTimestamp;
        }

        // New entries are all those that occur at or after the associated timestamp
        final boolean newEntry = entityTimestamp >= minTimestamp && entityTimestamp > latestTimestampEmitted;

        if (newEntry) {
            List<FileStatus> entitiesForTimestamp = orderedEntries.get(status.getModificationTime());
            if (entitiesForTimestamp == null) {
                entitiesForTimestamp = new ArrayList<FileStatus>();
                orderedEntries.put(status.getModificationTime(), entitiesForTimestamp);
            }
            entitiesForTimestamp.add(status);
        }
    }

    final Set<FileStatus> toList = new HashSet<>();

    if (orderedEntries.size() > 0) {
        long latestListingTimestamp = orderedEntries.lastKey();

        // If the last listing time is equal to the newest entries previously seen,
        // another iteration has occurred without new files and special handling is needed to avoid starvation
        if (latestListingTimestamp == minTimestamp) {
            // We are done if the latest listing timestamp is equal to the last processed time,
            // meaning we handled those items originally passed over
            if (latestListingTimestamp == latestTimestampEmitted) {
                return Collections.emptySet();
            }
        } else {
            // Otherwise, newest entries are held back one cycle to avoid issues in writes occurring exactly when the listing is being performed to avoid missing data
            orderedEntries.remove(latestListingTimestamp);
        }

        for (List<FileStatus> timestampEntities : orderedEntries.values()) {
            for (FileStatus status : timestampEntities) {
                toList.add(status);
            }
        }
    }

    return toList;
}

From source file:org.apache.nifi.processors.hadoop.ListHDFS.java

License:Apache License

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    // We have to ensure that we don't continually perform listings, because if we perform two listings within
    // the same millisecond, our algorithm for comparing timestamps will not work. So we ensure here that we do
    // not let that happen.
    final long now = System.nanoTime();
    if (now - lastRunTimestamp < LISTING_LAG_NANOS) {
        lastRunTimestamp = now;/*from   w w  w. ja  v  a 2s  . c o m*/
        context.yield();
        return;
    }
    lastRunTimestamp = now;

    final String directory = context.getProperty(DIRECTORY).evaluateAttributeExpressions().getValue();

    // Ensure that we are using the latest listing information before we try to perform a listing of HDFS files.
    try {
        final StateMap stateMap = context.getStateManager().getState(Scope.CLUSTER);
        if (stateMap.getVersion() == -1L) {
            latestTimestampEmitted = -1L;
            latestTimestampListed = -1L;
            getLogger().debug("Found no state stored");
        } else {
            // Determine if state is stored in the 'new' format or the 'old' format
            final String emittedString = stateMap.get(EMITTED_TIMESTAMP_KEY);
            if (emittedString == null) {
                latestTimestampEmitted = -1L;
                latestTimestampListed = -1L;
                getLogger().debug(
                        "Found no recognized state keys; assuming no relevant state and resetting listing/emitted time to -1");
            } else {
                // state is stored in the new format, using just two timestamps
                latestTimestampEmitted = Long.parseLong(emittedString);
                final String listingTimestmapString = stateMap.get(LISTING_TIMESTAMP_KEY);
                if (listingTimestmapString != null) {
                    latestTimestampListed = Long.parseLong(listingTimestmapString);
                }

                getLogger().debug(
                        "Found new-style state stored, latesting timestamp emitted = {}, latest listed = {}",
                        new Object[] { latestTimestampEmitted, latestTimestampListed });
            }
        }
    } catch (final IOException ioe) {
        getLogger().error(
                "Failed to retrieve timestamp of last listing from Distributed Cache Service. Will not perform listing until this is accomplished.");
        context.yield();
        return;
    }

    // Pull in any file that is newer than the timestamp that we have.
    final FileSystem hdfs = getFileSystem();
    final boolean recursive = context.getProperty(RECURSE_SUBDIRS).asBoolean();

    final Set<FileStatus> statuses;
    try {
        final Path rootPath = new Path(directory);
        statuses = getStatuses(rootPath, recursive, hdfs, createPathFilter(context));
        getLogger().debug("Found a total of {} files in HDFS", new Object[] { statuses.size() });
    } catch (final IOException | IllegalArgumentException e) {
        getLogger().error("Failed to perform listing of HDFS due to {}", new Object[] { e });
        return;
    } catch (final InterruptedException e) {
        Thread.currentThread().interrupt();
        getLogger().error("Interrupted while performing listing of HDFS", e);
        return;
    }

    final Set<FileStatus> listable = determineListable(statuses, context);
    getLogger().debug("Of the {} files found in HDFS, {} are listable",
            new Object[] { statuses.size(), listable.size() });

    for (final FileStatus status : listable) {
        final Map<String, String> attributes = createAttributes(status);
        FlowFile flowFile = session.create();
        flowFile = session.putAllAttributes(flowFile, attributes);
        session.transfer(flowFile, REL_SUCCESS);

        final long fileModTime = status.getModificationTime();
        if (fileModTime > latestTimestampEmitted) {
            latestTimestampEmitted = fileModTime;
        }
    }

    final int listCount = listable.size();
    if (listCount > 0) {
        getLogger().info("Successfully created listing with {} new files from HDFS",
                new Object[] { listCount });
        session.commit();
    } else {
        getLogger().debug("There is no data to list. Yielding.");
        context.yield();
    }

    final Map<String, String> updatedState = new HashMap<>(1);
    updatedState.put(LISTING_TIMESTAMP_KEY, String.valueOf(latestTimestampListed));
    updatedState.put(EMITTED_TIMESTAMP_KEY, String.valueOf(latestTimestampEmitted));
    getLogger().debug("New state map: {}", new Object[] { updatedState });

    try {
        context.getStateManager().setState(updatedState, Scope.CLUSTER);
    } catch (final IOException ioe) {
        getLogger().warn("Failed to save cluster-wide state. If NiFi is restarted, data duplication may occur",
                ioe);
    }
}

From source file:org.apache.nifi.processors.hadoop.ListHDFS.java

License:Apache License

private Map<String, String> createAttributes(final FileStatus status) {
    final Map<String, String> attributes = new HashMap<>();
    attributes.put(CoreAttributes.FILENAME.key(), status.getPath().getName());
    attributes.put(CoreAttributes.PATH.key(), getAbsolutePath(status.getPath().getParent()));

    attributes.put("hdfs.owner", status.getOwner());
    attributes.put("hdfs.group", status.getGroup());
    attributes.put("hdfs.lastModified", String.valueOf(status.getModificationTime()));
    attributes.put("hdfs.length", String.valueOf(status.getLen()));
    attributes.put("hdfs.replication", String.valueOf(status.getReplication()));

    final FsPermission permission = status.getPermission();
    final String perms = getPerms(permission.getUserAction()) + getPerms(permission.getGroupAction())
            + getPerms(permission.getOtherAction());
    attributes.put("hdfs.permissions", perms);
    return attributes;
}

From source file:org.apache.oozie.tools.diag.OozieLauncherLogFetcher.java

License:Apache License

public int dumpAllContainersLogs(ApplicationId appId, String appOwner, PrintStream out) throws IOException {
    Path remoteRootLogDir = new Path(hadoopConfig.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
            YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
    String logDirSuffix = getRemoteNodeLogDirSuffix(hadoopConfig);
    Path remoteAppLogDir = getRemoteAppLogDir(remoteRootLogDir, appId, appOwner, logDirSuffix);

    RemoteIterator nodeFiles;//from   ww w . ja  va  2s  . co m
    try {
        Path qualifiedLogDir = FileContext.getFileContext(hadoopConfig).makeQualified(remoteAppLogDir);
        nodeFiles = FileContext.getFileContext(qualifiedLogDir.toUri(), hadoopConfig)
                .listStatus(remoteAppLogDir);
    } catch (FileNotFoundException fileNotFoundException) {
        logDirNotExist(remoteAppLogDir.toString());
        return -1;
    }

    boolean foundAnyLogs = false;

    while (true) {
        FileStatus thisNodeFile;
        do {
            if (!nodeFiles.hasNext()) {
                if (!foundAnyLogs) {
                    emptyLogDir(remoteAppLogDir.toString());
                    return -1;
                }

                return 0;
            }

            thisNodeFile = (FileStatus) nodeFiles.next();
        } while (thisNodeFile.getPath().getName().endsWith(TMP_FILE_SUFFIX));

        AggregatedLogFormat.LogReader reader = new AggregatedLogFormat.LogReader(hadoopConfig,
                thisNodeFile.getPath());

        try {
            AggregatedLogFormat.LogKey key = new AggregatedLogFormat.LogKey();
            DataInputStream valueStream = reader.next(key);

            while (valueStream != null) {
                String containerString = "\n\nContainer: " + key + " on " + thisNodeFile.getPath().getName();
                out.println(containerString);
                out.println(StringUtils.repeat("=", containerString.length()));

                while (true) {
                    try {
                        AggregatedLogFormat.LogReader.readAContainerLogsForALogType(valueStream, out,
                                thisNodeFile.getModificationTime());
                        foundAnyLogs = true;
                    } catch (EOFException eofException) {
                        key = new AggregatedLogFormat.LogKey();
                        valueStream = reader.next(key);
                        break;
                    }
                }
            }
        } finally {
            reader.close();
        }
    }
}

From source file:org.apache.orc.impl.ReaderImpl.java

License:Apache License

protected OrcTail extractFileTail(FileSystem fs, Path path, long maxFileLength) throws IOException {
    FSDataInputStream file = fs.open(path);
    ByteBuffer buffer;/*from w w w  . j av a2  s  .  c  o  m*/
    OrcProto.PostScript ps;
    OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder();
    long modificationTime;
    try {
        // figure out the size of the file using the option or filesystem
        long size;
        if (maxFileLength == Long.MAX_VALUE) {
            FileStatus fileStatus = fs.getFileStatus(path);
            size = fileStatus.getLen();
            modificationTime = fileStatus.getModificationTime();
        } else {
            size = maxFileLength;
            modificationTime = -1;
        }
        fileTailBuilder.setFileLength(size);

        //read last bytes into buffer to get PostScript
        int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
        buffer = ByteBuffer.allocate(readSize);
        assert buffer.position() == 0;
        file.readFully((size - readSize), buffer.array(), buffer.arrayOffset(), readSize);
        buffer.position(0);

        //read the PostScript
        //get length of PostScript
        int psLen = buffer.get(readSize - 1) & 0xff;
        ensureOrcFooter(file, path, psLen, buffer);
        int psOffset = readSize - 1 - psLen;
        ps = extractPostScript(buffer, path, psLen, psOffset);
        bufferSize = (int) ps.getCompressionBlockSize();
        codec = WriterImpl.createCodec(CompressionKind.valueOf(ps.getCompression().name()));
        fileTailBuilder.setPostscriptLength(psLen).setPostscript(ps);

        int footerSize = (int) ps.getFooterLength();
        int metadataSize = (int) ps.getMetadataLength();

        //check if extra bytes need to be read
        int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
        int tailSize = 1 + psLen + footerSize + metadataSize;
        if (extra > 0) {
            //more bytes need to be read, seek back to the right place and read extra bytes
            ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
            file.readFully((size - readSize - extra), extraBuf.array(),
                    extraBuf.arrayOffset() + extraBuf.position(), extra);
            extraBuf.position(extra);
            //append with already read bytes
            extraBuf.put(buffer);
            buffer = extraBuf;
            buffer.position(0);
            buffer.limit(tailSize);
            readSize += extra;
            psOffset = readSize - 1 - psLen;
        } else {
            //footer is already in the bytes in buffer, just adjust position, length
            buffer.position(psOffset - footerSize - metadataSize);
            buffer.limit(buffer.position() + tailSize);
        }

        buffer.mark();
        int footerOffset = psOffset - footerSize;
        buffer.position(footerOffset);
        ByteBuffer footerBuffer = buffer.slice();
        buffer.reset();
        OrcProto.Footer footer = extractFooter(footerBuffer, 0, footerSize, codec, bufferSize);
        fileTailBuilder.setFooter(footer);
    } finally {
        try {
            file.close();
        } catch (IOException ex) {
            LOG.error("Failed to close the file after another error", ex);
        }
    }

    ByteBuffer serializedTail = ByteBuffer.allocate(buffer.remaining());
    serializedTail.put(buffer.slice());
    serializedTail.rewind();
    return new OrcTail(fileTailBuilder.build(), serializedTail, modificationTime);
}

From source file:org.apache.pig.backend.hadoop.datastorage.HPath.java

License:Apache License

public Map<String, Object> getStatistics() throws IOException {
    HashMap<String, Object> props = new HashMap<String, Object>();

    FileStatus fileStatus = fs.getHFS().getFileStatus(path);

    props.put(BLOCK_SIZE_KEY, fileStatus.getBlockSize());
    props.put(BLOCK_REPLICATION_KEY, fileStatus.getReplication());
    props.put(LENGTH_KEY, fileStatus.getLen());
    props.put(MODIFICATION_TIME_KEY, fileStatus.getModificationTime());

    return props;
}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.TezResourceManager.java

License:Apache License

public Map<String, LocalResource> getTezResources(Set<String> resourceNames) throws Exception {
    Map<String, LocalResource> tezResources = new HashMap<String, LocalResource>();
    for (String resourceName : resourceNames) {
        // The resource name will be symlinked to the resource path in the
        // container's working directory.
        Path resourcePath = resources.get(resourceName);
        FileStatus fstat = remoteFs.getFileStatus(resourcePath);

        LocalResource tezResource = LocalResource.newInstance(
                ConverterUtils.getYarnUrlFromPath(fstat.getPath()), LocalResourceType.FILE,
                LocalResourceVisibility.APPLICATION, fstat.getLen(), fstat.getModificationTime());

        tezResources.put(resourceName, tezResource);
    }/* ww  w .  ja  va2s.co m*/
    return tezResources;
}