List of usage examples for org.apache.hadoop.fs FileStatus getModificationTime
public long getModificationTime()
From source file:org.apache.metron.maas.service.Client.java
License:Apache License
private Path addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String appId, Map<String, LocalResource> localResources, String resources) throws IOException { String suffix = appName + "/" + appId + "/" + fileDstPath; Path dst = new Path(fs.getHomeDirectory(), suffix); if (fileSrcPath == null) { FSDataOutputStream ostream = null; try {//from w w w . j av a2 s . c o m ostream = FileSystem.create(fs, dst, new FsPermission((short) 0710)); ostream.writeUTF(resources); } finally { IOUtils.closeQuietly(ostream); } } else { fs.copyFromLocalFile(new Path(fileSrcPath), dst); } FileStatus scFileStatus = fs.getFileStatus(dst); LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime()); localResources.put(fileDstPath, scRsrc); return dst; }
From source file:org.apache.mrql.HDFSFileInputStream.java
License:Apache License
private ArrayList<String> new_files() { try {/*from w ww. j a va2s .co m*/ long ct = System.currentTimeMillis(); Path dpath = new Path(directory); final FileSystem fs = dpath.getFileSystem(Plan.conf); final FileStatus[] ds = fs.listStatus(dpath, new PathFilter() { public boolean accept(Path path) { return !path.getName().startsWith("_") && !path.getName().endsWith(".type"); } }); ArrayList<String> s = new ArrayList<String>(); for (FileStatus d : ds) { String name = d.getPath().toString(); if (file_modification_times.get(name) == null || d.getModificationTime() > file_modification_times.get(name)) { file_modification_times.put(name, new Long(ct)); s.add(name); } } ; return s; } catch (Exception ex) { throw new Error("Cannot open a new file from the directory " + directory + ": " + ex); } }
From source file:org.apache.nifi.processors.hadoop.GetHDFS.java
License:Apache License
/** * Poll HDFS for files to process that match the configured file filters. * * @param hdfs hdfs/* w w w.jav a 2s . co m*/ * @param dir dir * @param filesVisited filesVisited * @return files to process * @throws java.io.IOException ex */ protected Set<Path> selectFiles(final FileSystem hdfs, final Path dir, Set<Path> filesVisited) throws IOException, InterruptedException { if (null == filesVisited) { filesVisited = new HashSet<>(); } if (!hdfs.exists(dir)) { throw new IOException("Selection directory " + dir.toString() + " doesn't appear to exist!"); } final Set<Path> files = new HashSet<>(); FileStatus[] fileStatuses = getUserGroupInformation() .doAs((PrivilegedExceptionAction<FileStatus[]>) () -> hdfs.listStatus(dir)); for (final FileStatus file : fileStatuses) { if (files.size() >= MAX_WORKING_QUEUE_SIZE) { // no need to make the files set larger than what we would queue anyway break; } final Path canonicalFile = file.getPath(); if (!filesVisited.add(canonicalFile)) { // skip files we've already seen (may be looping directory links) continue; } if (file.isDirectory() && processorConfig.getRecurseSubdirs()) { files.addAll(selectFiles(hdfs, canonicalFile, filesVisited)); } else if (!file.isDirectory() && processorConfig.getPathFilter(dir).accept(canonicalFile)) { final long fileAge = System.currentTimeMillis() - file.getModificationTime(); if (processorConfig.getMinimumAge() < fileAge && fileAge < processorConfig.getMaximumAge()) { files.add(canonicalFile); if (getLogger().isDebugEnabled()) { getLogger().debug(this + " selected file at path: " + canonicalFile.toString()); } } } } return files; }
From source file:org.apache.nifi.processors.hadoop.ListHDFS.java
License:Apache License
/** * Determines which of the given FileStatus's describes a File that should be listed. * * @param statuses the eligible FileStatus objects that we could potentially list * @param context processor context with properties values * @return a Set containing only those FileStatus objects that we want to list */// w w w .j a v a 2s.c o m Set<FileStatus> determineListable(final Set<FileStatus> statuses, ProcessContext context) { final long minTimestamp = this.latestTimestampListed; final TreeMap<Long, List<FileStatus>> orderedEntries = new TreeMap<>(); final Long minAgeProp = context.getProperty(MIN_AGE).asTimePeriod(TimeUnit.MILLISECONDS); // NIFI-4144 - setting to MIN_VALUE so that in case the file modification time is in // the future relative to the nifi instance, files are not skipped. final long minimumAge = (minAgeProp == null) ? Long.MIN_VALUE : minAgeProp; final Long maxAgeProp = context.getProperty(MAX_AGE).asTimePeriod(TimeUnit.MILLISECONDS); final long maximumAge = (maxAgeProp == null) ? Long.MAX_VALUE : maxAgeProp; // Build a sorted map to determine the latest possible entries for (final FileStatus status : statuses) { if (status.getPath().getName().endsWith("_COPYING_")) { continue; } final long fileAge = System.currentTimeMillis() - status.getModificationTime(); if (minimumAge > fileAge || fileAge > maximumAge) { continue; } final long entityTimestamp = status.getModificationTime(); if (entityTimestamp > latestTimestampListed) { latestTimestampListed = entityTimestamp; } // New entries are all those that occur at or after the associated timestamp final boolean newEntry = entityTimestamp >= minTimestamp && entityTimestamp > latestTimestampEmitted; if (newEntry) { List<FileStatus> entitiesForTimestamp = orderedEntries.get(status.getModificationTime()); if (entitiesForTimestamp == null) { entitiesForTimestamp = new ArrayList<FileStatus>(); orderedEntries.put(status.getModificationTime(), entitiesForTimestamp); } entitiesForTimestamp.add(status); } } final Set<FileStatus> toList = new HashSet<>(); if (orderedEntries.size() > 0) { long latestListingTimestamp = orderedEntries.lastKey(); // If the last listing time is equal to the newest entries previously seen, // another iteration has occurred without new files and special handling is needed to avoid starvation if (latestListingTimestamp == minTimestamp) { // We are done if the latest listing timestamp is equal to the last processed time, // meaning we handled those items originally passed over if (latestListingTimestamp == latestTimestampEmitted) { return Collections.emptySet(); } } else { // Otherwise, newest entries are held back one cycle to avoid issues in writes occurring exactly when the listing is being performed to avoid missing data orderedEntries.remove(latestListingTimestamp); } for (List<FileStatus> timestampEntities : orderedEntries.values()) { for (FileStatus status : timestampEntities) { toList.add(status); } } } return toList; }
From source file:org.apache.nifi.processors.hadoop.ListHDFS.java
License:Apache License
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { // We have to ensure that we don't continually perform listings, because if we perform two listings within // the same millisecond, our algorithm for comparing timestamps will not work. So we ensure here that we do // not let that happen. final long now = System.nanoTime(); if (now - lastRunTimestamp < LISTING_LAG_NANOS) { lastRunTimestamp = now;/*from w w w. ja v a 2s . c o m*/ context.yield(); return; } lastRunTimestamp = now; final String directory = context.getProperty(DIRECTORY).evaluateAttributeExpressions().getValue(); // Ensure that we are using the latest listing information before we try to perform a listing of HDFS files. try { final StateMap stateMap = context.getStateManager().getState(Scope.CLUSTER); if (stateMap.getVersion() == -1L) { latestTimestampEmitted = -1L; latestTimestampListed = -1L; getLogger().debug("Found no state stored"); } else { // Determine if state is stored in the 'new' format or the 'old' format final String emittedString = stateMap.get(EMITTED_TIMESTAMP_KEY); if (emittedString == null) { latestTimestampEmitted = -1L; latestTimestampListed = -1L; getLogger().debug( "Found no recognized state keys; assuming no relevant state and resetting listing/emitted time to -1"); } else { // state is stored in the new format, using just two timestamps latestTimestampEmitted = Long.parseLong(emittedString); final String listingTimestmapString = stateMap.get(LISTING_TIMESTAMP_KEY); if (listingTimestmapString != null) { latestTimestampListed = Long.parseLong(listingTimestmapString); } getLogger().debug( "Found new-style state stored, latesting timestamp emitted = {}, latest listed = {}", new Object[] { latestTimestampEmitted, latestTimestampListed }); } } } catch (final IOException ioe) { getLogger().error( "Failed to retrieve timestamp of last listing from Distributed Cache Service. Will not perform listing until this is accomplished."); context.yield(); return; } // Pull in any file that is newer than the timestamp that we have. final FileSystem hdfs = getFileSystem(); final boolean recursive = context.getProperty(RECURSE_SUBDIRS).asBoolean(); final Set<FileStatus> statuses; try { final Path rootPath = new Path(directory); statuses = getStatuses(rootPath, recursive, hdfs, createPathFilter(context)); getLogger().debug("Found a total of {} files in HDFS", new Object[] { statuses.size() }); } catch (final IOException | IllegalArgumentException e) { getLogger().error("Failed to perform listing of HDFS due to {}", new Object[] { e }); return; } catch (final InterruptedException e) { Thread.currentThread().interrupt(); getLogger().error("Interrupted while performing listing of HDFS", e); return; } final Set<FileStatus> listable = determineListable(statuses, context); getLogger().debug("Of the {} files found in HDFS, {} are listable", new Object[] { statuses.size(), listable.size() }); for (final FileStatus status : listable) { final Map<String, String> attributes = createAttributes(status); FlowFile flowFile = session.create(); flowFile = session.putAllAttributes(flowFile, attributes); session.transfer(flowFile, REL_SUCCESS); final long fileModTime = status.getModificationTime(); if (fileModTime > latestTimestampEmitted) { latestTimestampEmitted = fileModTime; } } final int listCount = listable.size(); if (listCount > 0) { getLogger().info("Successfully created listing with {} new files from HDFS", new Object[] { listCount }); session.commit(); } else { getLogger().debug("There is no data to list. Yielding."); context.yield(); } final Map<String, String> updatedState = new HashMap<>(1); updatedState.put(LISTING_TIMESTAMP_KEY, String.valueOf(latestTimestampListed)); updatedState.put(EMITTED_TIMESTAMP_KEY, String.valueOf(latestTimestampEmitted)); getLogger().debug("New state map: {}", new Object[] { updatedState }); try { context.getStateManager().setState(updatedState, Scope.CLUSTER); } catch (final IOException ioe) { getLogger().warn("Failed to save cluster-wide state. If NiFi is restarted, data duplication may occur", ioe); } }
From source file:org.apache.nifi.processors.hadoop.ListHDFS.java
License:Apache License
private Map<String, String> createAttributes(final FileStatus status) { final Map<String, String> attributes = new HashMap<>(); attributes.put(CoreAttributes.FILENAME.key(), status.getPath().getName()); attributes.put(CoreAttributes.PATH.key(), getAbsolutePath(status.getPath().getParent())); attributes.put("hdfs.owner", status.getOwner()); attributes.put("hdfs.group", status.getGroup()); attributes.put("hdfs.lastModified", String.valueOf(status.getModificationTime())); attributes.put("hdfs.length", String.valueOf(status.getLen())); attributes.put("hdfs.replication", String.valueOf(status.getReplication())); final FsPermission permission = status.getPermission(); final String perms = getPerms(permission.getUserAction()) + getPerms(permission.getGroupAction()) + getPerms(permission.getOtherAction()); attributes.put("hdfs.permissions", perms); return attributes; }
From source file:org.apache.oozie.tools.diag.OozieLauncherLogFetcher.java
License:Apache License
public int dumpAllContainersLogs(ApplicationId appId, String appOwner, PrintStream out) throws IOException { Path remoteRootLogDir = new Path(hadoopConfig.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR)); String logDirSuffix = getRemoteNodeLogDirSuffix(hadoopConfig); Path remoteAppLogDir = getRemoteAppLogDir(remoteRootLogDir, appId, appOwner, logDirSuffix); RemoteIterator nodeFiles;//from ww w . ja va 2s . co m try { Path qualifiedLogDir = FileContext.getFileContext(hadoopConfig).makeQualified(remoteAppLogDir); nodeFiles = FileContext.getFileContext(qualifiedLogDir.toUri(), hadoopConfig) .listStatus(remoteAppLogDir); } catch (FileNotFoundException fileNotFoundException) { logDirNotExist(remoteAppLogDir.toString()); return -1; } boolean foundAnyLogs = false; while (true) { FileStatus thisNodeFile; do { if (!nodeFiles.hasNext()) { if (!foundAnyLogs) { emptyLogDir(remoteAppLogDir.toString()); return -1; } return 0; } thisNodeFile = (FileStatus) nodeFiles.next(); } while (thisNodeFile.getPath().getName().endsWith(TMP_FILE_SUFFIX)); AggregatedLogFormat.LogReader reader = new AggregatedLogFormat.LogReader(hadoopConfig, thisNodeFile.getPath()); try { AggregatedLogFormat.LogKey key = new AggregatedLogFormat.LogKey(); DataInputStream valueStream = reader.next(key); while (valueStream != null) { String containerString = "\n\nContainer: " + key + " on " + thisNodeFile.getPath().getName(); out.println(containerString); out.println(StringUtils.repeat("=", containerString.length())); while (true) { try { AggregatedLogFormat.LogReader.readAContainerLogsForALogType(valueStream, out, thisNodeFile.getModificationTime()); foundAnyLogs = true; } catch (EOFException eofException) { key = new AggregatedLogFormat.LogKey(); valueStream = reader.next(key); break; } } } } finally { reader.close(); } } }
From source file:org.apache.orc.impl.ReaderImpl.java
License:Apache License
protected OrcTail extractFileTail(FileSystem fs, Path path, long maxFileLength) throws IOException { FSDataInputStream file = fs.open(path); ByteBuffer buffer;/*from w w w . j av a2 s . c o m*/ OrcProto.PostScript ps; OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder(); long modificationTime; try { // figure out the size of the file using the option or filesystem long size; if (maxFileLength == Long.MAX_VALUE) { FileStatus fileStatus = fs.getFileStatus(path); size = fileStatus.getLen(); modificationTime = fileStatus.getModificationTime(); } else { size = maxFileLength; modificationTime = -1; } fileTailBuilder.setFileLength(size); //read last bytes into buffer to get PostScript int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS); buffer = ByteBuffer.allocate(readSize); assert buffer.position() == 0; file.readFully((size - readSize), buffer.array(), buffer.arrayOffset(), readSize); buffer.position(0); //read the PostScript //get length of PostScript int psLen = buffer.get(readSize - 1) & 0xff; ensureOrcFooter(file, path, psLen, buffer); int psOffset = readSize - 1 - psLen; ps = extractPostScript(buffer, path, psLen, psOffset); bufferSize = (int) ps.getCompressionBlockSize(); codec = WriterImpl.createCodec(CompressionKind.valueOf(ps.getCompression().name())); fileTailBuilder.setPostscriptLength(psLen).setPostscript(ps); int footerSize = (int) ps.getFooterLength(); int metadataSize = (int) ps.getMetadataLength(); //check if extra bytes need to be read int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize); int tailSize = 1 + psLen + footerSize + metadataSize; if (extra > 0) { //more bytes need to be read, seek back to the right place and read extra bytes ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize); file.readFully((size - readSize - extra), extraBuf.array(), extraBuf.arrayOffset() + extraBuf.position(), extra); extraBuf.position(extra); //append with already read bytes extraBuf.put(buffer); buffer = extraBuf; buffer.position(0); buffer.limit(tailSize); readSize += extra; psOffset = readSize - 1 - psLen; } else { //footer is already in the bytes in buffer, just adjust position, length buffer.position(psOffset - footerSize - metadataSize); buffer.limit(buffer.position() + tailSize); } buffer.mark(); int footerOffset = psOffset - footerSize; buffer.position(footerOffset); ByteBuffer footerBuffer = buffer.slice(); buffer.reset(); OrcProto.Footer footer = extractFooter(footerBuffer, 0, footerSize, codec, bufferSize); fileTailBuilder.setFooter(footer); } finally { try { file.close(); } catch (IOException ex) { LOG.error("Failed to close the file after another error", ex); } } ByteBuffer serializedTail = ByteBuffer.allocate(buffer.remaining()); serializedTail.put(buffer.slice()); serializedTail.rewind(); return new OrcTail(fileTailBuilder.build(), serializedTail, modificationTime); }
From source file:org.apache.pig.backend.hadoop.datastorage.HPath.java
License:Apache License
public Map<String, Object> getStatistics() throws IOException { HashMap<String, Object> props = new HashMap<String, Object>(); FileStatus fileStatus = fs.getHFS().getFileStatus(path); props.put(BLOCK_SIZE_KEY, fileStatus.getBlockSize()); props.put(BLOCK_REPLICATION_KEY, fileStatus.getReplication()); props.put(LENGTH_KEY, fileStatus.getLen()); props.put(MODIFICATION_TIME_KEY, fileStatus.getModificationTime()); return props; }
From source file:org.apache.pig.backend.hadoop.executionengine.tez.TezResourceManager.java
License:Apache License
public Map<String, LocalResource> getTezResources(Set<String> resourceNames) throws Exception { Map<String, LocalResource> tezResources = new HashMap<String, LocalResource>(); for (String resourceName : resourceNames) { // The resource name will be symlinked to the resource path in the // container's working directory. Path resourcePath = resources.get(resourceName); FileStatus fstat = remoteFs.getFileStatus(resourcePath); LocalResource tezResource = LocalResource.newInstance( ConverterUtils.getYarnUrlFromPath(fstat.getPath()), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, fstat.getLen(), fstat.getModificationTime()); tezResources.put(resourceName, tezResource); }/* ww w . ja va2s.co m*/ return tezResources; }