List of usage examples for org.apache.hadoop.fs FileStatus getModificationTime
public long getModificationTime()
From source file:com.stumbleupon.hbaseadmin.ClusterUtils.java
License:Open Source License
/** * Remove any regions that do not qualify for compaction * @param admin The hbase admin//from w w w . j a v a 2s. c o m * @param serverName The server name * @param server The HRegion interface * @return The filtered regions * @throws IOException */ private HRegionInfo getNextEligibleRegion(HBaseAdmin admin, ServerName serverName, HRegionInterface server) throws IOException { HRegionInfo ret = null; List<HRegionInfo> onlineRegions = server.getOnlineRegions(); String hostport = serverName.getHostAndPort(); HServerLoad serverLoad = clusterStatus.getLoad(serverName); if (serverLoad == null) { LOG.warn("Skipping server {} because could not get server load", hostport); } else { List<String> tableNames = compact.getTableNames(); boolean excludeFromList = compact.getExcludeTables(); Map<byte[], RegionLoad> regionLoadMap = serverLoad.getRegionsLoad(); List<String> reasons = new ArrayList<String>(); for (HRegionInfo region : onlineRegions) { String regionName = region.getRegionNameAsString(); String tableName = region.getTableNameAsString(); reasons.clear(); // Ignore any regions in tables that are marked as excluded if (tableNames.size() > 0) { if (excludeFromList && tableNames.contains(tableName)) { continue; } else if (!excludeFromList && !tableNames.contains(tableName)) { continue; } else if (LOG.isDebugEnabled()) { reasons.add(hostport + " [" + regionName + "] qualifies because its table '" + tableName + "' has NOT been excluded"); } } // Ignore any regions that we have already visited/compacted if (visitedRegions.isRegionVisited(hostport, regionName)) { continue; } else if (LOG.isDebugEnabled()) { reasons.add(hostport + " [" + regionName + "] qualifies because it has NOT been visited"); } // Remove any regions that do not have enough store files to qualify for compaction RegionLoad regionLoad = regionLoadMap.get(region.getRegionName()); boolean isRegionEligible = true; if (regionLoad == null) { LOG.warn("Could not get region load for '{}'. Skipping region...", regionName); continue; } else { try { int numFamilies = getTableDescriptor(admin, region).getColumnFamilies().length; int numRegionStoreFiles = regionLoad.getStorefiles(); int minStoreFilesNeeded = compact.getNumStoreFiles() * numFamilies; if (numRegionStoreFiles >= minStoreFilesNeeded) { isRegionEligible = true; if (LOG.isDebugEnabled()) { reasons.add(hostport + " [" + regionName + "] qualifies because it has a total of " + numRegionStoreFiles + " store files in " + numFamilies + " families"); } } else { if (LOG.isDebugEnabled()) { reasons.add(hostport + " [" + regionName + "] does not qualify because it has a total of " + numRegionStoreFiles + " store files in " + numFamilies + " families. Needs at least " + minStoreFilesNeeded); } isRegionEligible = false; } } catch (TableNotFoundException e) { LOG.error("Could not determine if region '{}' is eligible. Skipping region.", regionName, e); continue; } catch (IOException e) { LOG.error("Could not determine if region '{}' is eligible. Skipping region.", regionName, e); continue; } catch (Exception e) { LOG.error("Could not determine if region '{}' is eligible. Skipping region.", regionName, e); continue; } } // If enabled, force compaction of any regions that contain store files older than maxStoreFileAge if (!isRegionEligible && compact.getMaxStoreFileAge() > 0) { List<String> files = server.getStoreFileList(region.getRegionName()); FileSystem fs = FileSystem.get(admin.getConfiguration()); if (files != null) { Path[] filePaths = new Path[files.size()]; for (int i = 0; i < files.size(); i++) { filePaths[i] = new Path(files.get(0)); } long maxStoreFileAge = compact.getMaxStoreFileAge(); long now = System.currentTimeMillis(); FileStatus[] storeFilesStatus = fs.listStatus(filePaths); for (FileStatus fileStatus : storeFilesStatus) { long storeFileAge = now - fileStatus.getModificationTime(); if (storeFileAge > maxStoreFileAge) { isRegionEligible = true; if (LOG.isDebugEnabled()) { reasons.add(hostport + " [" + regionName + "] forced to qualify because " + "at least one store file is older than the specified maxStoreFileAge"); } break; } } } } if (isRegionEligible) { if (reasons.size() > 0) { for (String reason : reasons) { LOG.debug(reason); } } ret = region; break; } } } return ret; }
From source file:com.sun.kohsuke.hadoop.importer.App.java
License:Open Source License
public static void main(String[] args) throws Exception { if (args.length != 3) { System.out.println("Usage: java -jar importer.jar [HDFS URL] [local directory] [HDFS directory]"); System.exit(-1);// w w w . j a va 2 s. com } Configuration conf = new Configuration(); conf.set("fs.default.name", args[0]); DFSClient dfs = new DFSClient(conf); File in = new File(args[1]); String out = args[2]; File[] children = in.listFiles(new FileFilter() { public boolean accept(File child) { return child.isFile(); } }); if (children == null) { System.out.println("No such directory exists: " + in); System.exit(-1); } int cnt = 1; for (File f : children) { String dest = out + '/' + f.getName(); FileStatus i = dfs.getFileInfo(dest); if (i == null || i.getModificationTime() != f.lastModified() || i.getLen() != f.length()) { System.out.printf("(%d/%d) Importing %s\n", cnt, children.length, f); try { IOUtils.copyBytes(new FileInputStream(f), dfs.create(dest, true), conf); dfs.setTimes(dest, f.lastModified(), f.lastModified()); } catch (RemoteException e) { // failure to create e.printStackTrace(); } } else { System.out.printf("(%d/%d) Skipping %s\n", cnt, children.length, f); } cnt++; } }
From source file:com.talis.hadoop.rdf.ZipUtils.java
License:Apache License
/** * Write a file to a zip output stream, removing leading path name components * from the actual file name when creating the zip file entry. * /* www .j a va2 s . c o m*/ * The entry placed in the zip file is <code>baseName</code>/ * <code>relativePath</code>, where <code>relativePath</code> is constructed * by removing a leading <code>root</code> from the path for * <code>itemToZip</code>. * * If <code>itemToZip</code> is an empty directory, it is ignored. If * <code>itemToZip</code> is a directory, the contents of the directory are * added recursively. * * @param zos The zip output stream * @param baseName The base name to use for the file name entry in the zip * file * @param root The path to remove from <code>itemToZip</code> to make a * relative path name * @param itemToZip The path to the file to be added to the zip file * @return the number of entries added * @throws IOException */ static public int zipDirectory(final Configuration conf, final ZipOutputStream zos, final String baseName, final String root, final Path itemToZip) throws IOException { LOG.info("zipDirectory: {} {} {}", new Object[] { baseName, root, itemToZip }); LocalFileSystem localFs = FileSystem.getLocal(conf); int count = 0; final FileStatus itemStatus = localFs.getFileStatus(itemToZip); if (itemStatus.isDir()) { final FileStatus[] statai = localFs.listStatus(itemToZip); // Add a directory entry to the zip file final String zipDirName = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root); final ZipEntry dirZipEntry = new ZipEntry(zipDirName + Path.SEPARATOR_CHAR); LOG.info(String.format("Adding directory %s to zip", zipDirName)); zos.putNextEntry(dirZipEntry); zos.closeEntry(); count++; if (statai == null || statai.length == 0) { LOG.info(String.format("Skipping empty directory %s", itemToZip)); return count; } for (FileStatus status : statai) { count += zipDirectory(conf, zos, baseName, root, status.getPath()); } LOG.info(String.format("Wrote %d entries for directory %s", count, itemToZip)); return count; } final String inZipPath = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root); if (inZipPath.length() == 0) { LOG.warn(String.format("Skipping empty zip file path for %s (%s %s)", itemToZip, root, baseName)); return 0; } // Take empty files in case the place holder is needed FSDataInputStream in = null; try { in = localFs.open(itemToZip); final ZipEntry ze = new ZipEntry(inZipPath); ze.setTime(itemStatus.getModificationTime()); // Comments confuse looking at the zip file // ze.setComment(itemToZip.toString()); zos.putNextEntry(ze); IOUtils.copyBytes(in, zos, conf, false); zos.closeEntry(); LOG.info(String.format("Wrote %d entries for file %s", count, itemToZip)); return 1; } finally { in.close(); } }
From source file:com.thinkbiganalytics.kylo.catalog.spark.sources.spark.HighWaterMarkInputFormat.java
License:Apache License
@Nonnull @Override// w w w. j a v a 2 s .c o m public List<FileStatus> listStatus(@Nonnull final JobContext job) throws IOException { // Get job configuration long highWaterMark = Math.max(lastHighWaterMark, getHighWaterMark(job)); final long maxAge = HighWaterMarkInputFormat.getMaxFileAge(job); final long minAge = HighWaterMarkInputFormat.getMinFileAge(job); if (minAge > maxAge) { throw new IOException(MIN_FILE_AGE + " cannot be greater than " + MAX_FILE_AGE); } // List and filter files final List<FileStatus> allFiles = super.listStatus(job); final List<FileStatus> jobFiles = new ArrayList<>(allFiles.size()); final long currentTime = currentTimeMillis(); for (final FileStatus file : allFiles) { final long fileTime = file.getModificationTime(); final long fileAge = currentTime - fileTime; if (!file.isDirectory() && fileAge >= minAge && fileAge <= maxAge && fileTime > highWaterMark) { jobFiles.add(file); if (fileTime > lastHighWaterMark) { lastHighWaterMark = fileTime; } } } lastHighWaterMark = Math.max(lastHighWaterMark, highWaterMark); return jobFiles; }
From source file:com.toy.Client.java
License:Apache License
private static void registerLocalResource(Map<String, LocalResource> localResources, ApplicationId appId, FileSystem fs, Path src) throws IOException { String pathSuffix = Constants.TOY_PREFIX + appId.toString() + "/" + src.getName(); Path dst = new Path(fs.getHomeDirectory(), pathSuffix); LOG.info("Copy {} from local filesystem to {} and add to local environment", src.getName(), dst.toUri()); fs.copyFromLocalFile(false, true, src, dst); FileStatus destStatus = fs.getFileStatus(dst); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); amJarRsrc.setType(LocalResourceType.FILE); amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); amJarRsrc.setTimestamp(destStatus.getModificationTime()); amJarRsrc.setSize(destStatus.getLen()); localResources.put(src.getName(), amJarRsrc); }
From source file:com.toy.Client.java
License:Apache License
private void uploadDepAndRegister(Map<String, LocalResource> localResources, ApplicationId appId, FileSystem fs, String depname) throws IOException { File dep = new File(depname); if (!dep.exists()) throw new IOException(dep.getAbsolutePath() + " does not exist"); Path dst = new Path(fs.getHomeDirectory(), Constants.TOY_PREFIX + appId.toString() + "/" + dep.getName()); LOG.info("Copy {} from local filesystem to {} and add to local environment", dep.getName(), dst.toUri()); FileInputStream input = new FileInputStream(dep); final FSDataOutputStream outputStream = fs.create(dst, true); ByteStreams.copy(input, outputStream); input.close();/* w ww . jav a 2s. co m*/ outputStream.close(); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); amJarRsrc.setType(LocalResourceType.FILE); amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); FileStatus destStatus = fs.getFileStatus(dst); amJarRsrc.setTimestamp(destStatus.getModificationTime()); amJarRsrc.setSize(destStatus.getLen()); localResources.put(dep.getName(), amJarRsrc); }
From source file:com.trendmicro.hdfs.webdav.HDFSResource.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) private void populateProperties() { if (properties != null) { return;/*from www . ja va 2 s. c o m*/ } properties = new DavPropertySet(); FileStatus stat = null; try { stat = user.doAs(new PrivilegedExceptionAction<FileStatus>() { public FileStatus run() throws Exception { return FileSystem.get(conf).getFileStatus(getPath()); } }); } catch (IOException ex) { LOG.warn(StringUtils.stringifyException(ex)); } catch (InterruptedException e) { LOG.warn(StringUtils.stringifyException(e)); } if (stat != null) { properties.add(new DefaultDavProperty(DavPropertyName.GETCONTENTLENGTH, stat.getLen())); SimpleDateFormat simpleFormat = (SimpleDateFormat) DavConstants.modificationDateFormat.clone(); simpleFormat.setTimeZone(TimeZone.getTimeZone("GMT")); Date date = new Date(stat.getModificationTime()); properties.add(new DefaultDavProperty(DavPropertyName.GETLASTMODIFIED, simpleFormat.format(date))); properties.add(new DefaultDavProperty(SecurityConstants.OWNER, stat.getOwner())); properties.add(new DefaultDavProperty(SecurityConstants.GROUP, stat.getGroup())); // TODO: Populate DAV property SecurityConstants.CURRENT_USER_PRIVILEGE_SET } if (getDisplayName() != null) { properties.add(new DefaultDavProperty(DavPropertyName.DISPLAYNAME, getDisplayName())); } if (isCollection()) { properties.add(new ResourceType(ResourceType.COLLECTION)); // Windows XP support properties.add(new DefaultDavProperty(DavPropertyName.ISCOLLECTION, "1")); } else { properties.add(new ResourceType(ResourceType.DEFAULT_RESOURCE)); // Windows XP support properties.add(new DefaultDavProperty(DavPropertyName.ISCOLLECTION, "0")); } }
From source file:com.tripadvisor.hadoop.BackupHdfs.java
License:Apache License
/** * Method to move files from HDFS to local filesystem * * localPath: Path on the machines filesystem * fs:FileSystem object from HDFS/*from www . j ava2 s . c o m*/ * pathList:List of paths for files that might need to be backed * up * size:max size in bytes to be backed up * * ReturnsDate of the last files backed up if reached size limit, * else, zero **/ public long backupFiles(String localPath, String preservePath, FileSystem fs, ArrayList<Path> pathList, long size) { Path fsPath; long tmpSize = 0; long tmpDate = 0; // Start iterating over all paths for (Path hdfsPath : pathList) { try { long nFileSize = fs.getContentSummary(hdfsPath).getLength(); tmpSize = tmpSize + nFileSize; if ((tmpSize <= size) || (size == 0)) { FileStatus stat = fs.getFileStatus(hdfsPath); System.err.println("File " + hdfsPath.toUri().getPath() + " " + nFileSize + " bytes, " + "perms: " + stat.getOwner() + "/" + stat.getGroup() + ", " + stat.getPermission().toString()); tmpDate = stat.getModificationTime() / 1000; String sFsPath = localPath + hdfsPath.toUri().getPath(); fsPath = new Path(sFsPath); File f = new File(sFsPath); // COMMENTED OUT: until a few backup cycles run // and the mtime gets in fact set on all copied // files. // // ignore it if the file exists and has the same mtime // if (f.exists() && f.isFile() && f.lastModified() == stat.getModificationTime()) // { // System.out.println("no need to backup " + f.toString() + ", mtime matches hdfs"); // continue; // } if (false == m_bDryRun) { // check if we need to back up the local file // (not directory), if it already exists. if (f.exists() && f.isFile()) { // ignore files with substrings in the // no-preserve file if (true == doPreserveFile(sFsPath)) { // move it to the backup path String sNewPath = preservePath + hdfsPath.toUri().getPath(); File newFile = new File(sNewPath); // create directory structure for new file? if (false == newFile.getParentFile().exists()) { if (false == newFile.getParentFile().mkdirs()) { System.err .println("Failed to mkdirs " + newFile.getParentFile().toString()); System.exit(1); } } // rename existing file to new location if (false == f.renameTo(newFile)) { System.err.println( "Failed to renameTo " + f.toString() + " to " + newFile.toString()); System.exit(1); } System.out.println("preserved " + f.toString() + " into " + newFile.toString()); } else { System.out.println("skipped preservation of " + f.toString()); } } // copy from hdfs to local filesystem fs.copyToLocalFile(hdfsPath, fsPath); // set the mtime to match hdfs file f.setLastModified(stat.getModificationTime()); // compare checksums on both files compareChecksums(fs, hdfsPath, sFsPath); } // don't print the progress after every file -- go // by at least 1% increments long nPercentDone = (long) (100 * tmpSize / m_nTotalBytes); if (nPercentDone > m_nLastPercentBytesDone) { System.out.println("progress: copied " + prettyPrintBytes(tmpSize) + ", " + nPercentDone + "% done" + ", tstamp=" + tmpDate); m_nLastPercentBytesDone = nPercentDone; } if (m_nSleepSeconds > 0) { try { Thread.sleep(1000 * m_nSleepSeconds); } catch (Exception e2) { // ignore } } } else { return tmpDate; } } catch (IOException e) { System.err.println("FATAL ERROR: Something wrong with the file"); System.err.println(e); System.out.println(tmpDate); System.exit(1); return 0; } } return 0; }
From source file:com.tripadvisor.hadoop.BackupHdfs.java
License:Apache License
/** * Method to go though the HDFS filesystem in a DFS to find all * files/*from w w w . ja v a2 s.c o m*/ * * fs:FileSystem object from HDFS * minDate: Oldest date for files to be backed up * maxDate:Newest date for files to be backed up * p:Path in HDFS to look for files * pathList:Will be filled with all files in p * hmTimestamps: hashmap of timestamps for later sorting **/ public void checkDir(FileSystem fs, long minDate, long maxDate, Path p, ArrayList<Path> pathList, HashMap<Path, Long> hmTimestamps) { long tmpDate; FileStatus[] fStat; try { String sPath = p.toUri().getPath(); // If this is a directory if (fs.getFileStatus(p).isDir()) { // ignore certain directories if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName()) || sPath.startsWith("/mapred") || "ops".equals(p.getName()) || p.getName().startsWith("_distcp_logs")) { return; } // dump the mkdir and chmod commands for this // directory -- skip root directory only { FileStatus stat = fs.getFileStatus(p); if (!sPath.equals("/")) { m_wrMkdirs.println("hadoop fs -mkdir " + sPath); } m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath); Short sh = new Short(stat.getPermission().toShort()); m_wrChmods.println("hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath); } fStat = fs.listStatus(p); // Do a recursive call to all elements for (int i = 0; i < fStat.length; i++) { checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps); } } else { // If not a directory then we've found a file // ignore crc files if (p.getName().endsWith(".crc")) { return; } // ignore other files if (sPath.startsWith("/user/oozie/etl/workflows/")) { return; } // try to get the table name from the path. There are // various types of tables, from those replicated from // another database to regular hive tables to // partitioned hive tables. We use table names to // both exclude some from the backup, and for the rest // to dump out the schema and partition name. if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) { m_nIgnoredTables++; if (m_nIgnoredTables < 5) { System.out.println("Skipping ignore-table file: " + sPath); } else if (m_nIgnoredTables == 5) { System.out.println("(...not showing other skipped tables...)"); } return; } FileStatus stat = fs.getFileStatus(p); tmpDate = stat.getModificationTime() / 1000; // store the chmods/chowns for all files m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath); m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath); // check dates. is it too young? if (tmpDate < minDate) { return; } // is the file too recent? if (tmpDate > maxDate) { //System.out.println("file too recent: " + sPath); return; } // file timestamp is ok pathList.add(p); hmTimestamps.put(p, new Long(tmpDate)); // store info about total bytes neeed to backup m_nTotalBytes += fs.getContentSummary(p).getLength(); } } catch (IOException e) { System.err.println("ERROR: could not open " + p + ": " + e); // System.exit(1) ; } }
From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java
License:Apache License
/** * Method to go though the HDFS filesystem in a DFS to find all * files// w w w .j a v a2 s. co m * * fs:FileSystem object from HDFS * maxDate:Newest date for files to be backed up * p:Path in HDFS to look for files **/ public void checkDir(FileSystem fs, Path p, String sLocalPathRoot, long maxDate) { FileStatus[] fStat; try { String sPath = p.toUri().getPath(); // If this is a directory if (fs.getFileStatus(p).isDir()) { // ignore certain directories if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName()) || sPath.startsWith("/mapred") || "ops".equals(p.getName()) || p.getName().startsWith("_distcp_logs")) { return; } fStat = fs.listStatus(p); // Do a recursive call to all elements for (int i = 0; i < fStat.length; i++) { checkDir(fs, fStat[i].getPath(), sLocalPathRoot, maxDate); } } else { // If not a directory then we've found a file // ignore crc files if (p.getName().endsWith(".crc")) { return; } // ignore other files if (sPath.startsWith("/user/oozie/etl/workflows/")) { return; } // try to get the table name from the path. There are // various types of tables, from those replicated from // tripmonster to regular hive tables to partitioned // hive tables. We use table names to both exclude // some from the backup, and for the rest to dump out // the schema and partition name. if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) { return; } // check the file FileStatus stat = fs.getFileStatus(p); // ignore files that are too new if ((stat.getModificationTime() / 1000) > maxDate) { System.out.println("IGNORING: " + sPath + " too new"); return; } // warn about files that have a mis-matching block // size. The checksum check will fail for them // anyways, so just catch it here. if (stat.getBlockSize() != N_BLOCK_SIZE) { System.out.println("ERROR: non-default block size (" + (stat.getBlockSize() / (1024 * 1024)) + "M) would fail checksum: " + sPath); return; } // get HDFS checksum FileChecksum ck = fs.getFileChecksum(p); String sCk, sCkShort; if (ck == null) { sCk = sCkShort = "<null>"; } else { sCk = ck.toString(); sCkShort = sCk.replaceAll("^.*:", ""); } System.out.println(sPath + " len=" + stat.getLen() + " " + stat.getOwner() + "/" + stat.getGroup() + " checksum=" + sCk); // find the local file String sFsPath = sLocalPathRoot + p.toUri().getPath(); File fLocal = new File(sFsPath); if (!fLocal.exists()) { Calendar cal = Calendar.getInstance(); cal.setTimeInMillis(stat.getModificationTime()); System.out.println("ERROR: file does not exist: " + sFsPath + " hdfs-last-mtime=" + cal.getTime().toString()); return; } if (!fLocal.isFile()) { System.out.println("ERROR: path is not a file: " + sFsPath); return; } if (stat.getLen() != fLocal.length()) { System.out.println("ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen() + " fslen=" + fLocal.length()); return; } // get local fs checksum FileChecksum ckLocal = getLocalFileChecksum(sFsPath); if (ckLocal == null) { System.out.println("ERROR Failed to get checksum for local file " + sFsPath); return; } // compare checksums as a string, to strip the // algorithm name from the beginning String sCkLocal = ckLocal.toString(); String sCkLocalShort = sCkLocal.replaceAll("^.*:", ""); if (false == sCkShort.equals(sCkLocalShort)) { System.out.println( "ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= " + sCkLocal); return; } } } catch (IOException e) { System.out.println("ERROR: could not open " + p + ": " + e); // System.exit(1) ; } }