List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(final JobContext job) throws IOException { final Configuration configuration = job.getConfiguration(); final List<InputSplit> result = Lists.newArrayList(); final List<FileStatus> files = listStatus(job); LOG.debug("Initial file list: {} {}", files.size(), files); for (final FileStatus fileStatus : files) { final Path dataFile = fileStatus.getPath(); final FileSystem fileSystem = dataFile.getFileSystem(configuration); final BlockLocation[] blockLocations = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());/*from www . ja v a2 s .co m*/ // Data file, try to split if the .index file was found final SSTableIndexIndex index = indexes.get(dataFile); if (index == null) { throw new IOException("Index not found for " + dataFile); } for (final SSTableIndexIndex.Chunk chunk : index.getOffsets()) { // This isn't likely to work well because we are dealing with the index into uncompressed data... final int blockIndex = getBlockIndex(blockLocations, chunk.getStart() / COMPRESSION_RATIO_ASSUMPTION); final SSTableSplit split = new SSTableSplit(dataFile, chunk.getStart(), chunk.getEnd(), chunk.getEnd() - chunk.getStart(), blockLocations[blockIndex].getHosts()); result.add(split); } } LOG.debug("Splits calculated: {} {}", result.size(), result); return result; }
From source file:com.fullcontact.sstable.index.SSTableIndexIndexer.java
License:Apache License
public void index(final Path sstablePath) throws IOException { final FileSystem fileSystem = FileSystem.get(URI.create(sstablePath.toString()), configuration); final FileStatus fileStatus = fileSystem.getFileStatus(sstablePath); if (fileStatus.isDir()) { LOG.info("SSTable Indexing directory {}", sstablePath); final FileStatus[] statuses = fileSystem.listStatus(sstablePath); for (final FileStatus childStatus : statuses) { index(childStatus.getPath()); }/*from w w w. j av a2s .com*/ } else if (sstablePath.toString().endsWith(SST_EXTENSION)) { final Path sstableIndexPath = new Path(sstablePath.toString() + SSTableIndexIndex.SSTABLE_INDEX_SUFFIX); if (fileSystem.exists(sstableIndexPath)) { LOG.info("Skipping as SSTable index file already exists for {}", sstablePath); } else { // Kick a thread for the index. final ListenableFuture<IndexRequest> indexFuture = service.submit(new Callable<IndexRequest>() { @Override public IndexRequest call() throws Exception { final long startTime = System.currentTimeMillis(); final long fileSize = fileStatus.getLen(); LOG.info("Indexing SSTABLE Indexing file {}, size {} GB...", sstablePath, decimalFormat.format(fileSize / (1024.0 * 1024.0 * 1024.0))); indexSingleFile(fileSystem, sstablePath); return new IndexRequest(sstableIndexPath, startTime, fileSize); } }); Futures.addCallback(indexFuture, new FutureCallback<IndexRequest>() { public void onSuccess(final IndexRequest indexRequest) { long indexSize = 0; try { indexSize = fileSystem.getFileStatus(indexRequest.getIndexPath()).getLen(); } catch (IOException e) { LOG.error("Error getting file status for index path: {}", indexRequest.getIndexPath()); } final double elapsed = (System.currentTimeMillis() - indexRequest.getStartTime()) / 1000.0; LOG.info("Completed SSTABLE Indexing in {} seconds ({} MB/s). Index size is {} KB.", decimalFormat.format(elapsed), decimalFormat.format(indexRequest.getFileSize() / (1024.0 * 1024.0 * elapsed)), decimalFormat.format(indexSize / 1024.0)); } public void onFailure(Throwable e) { LOG.error("Failed to index.", e); } }); } } }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.AbstractHoplogOrganizer.java
License:Apache License
/** * Utility method to remove a file from valid file list if a expired marker * for the file exists//from w w w . j av a2s . c o m * * @param valid * list of valid files * @param expired * list of expired file markers * @return list f valid files that do not have a expired file marker */ public static FileStatus[] filterValidHoplogs(FileStatus[] valid, FileStatus[] expired) { if (valid == null) { return null; } if (expired == null) { return valid; } ArrayList<FileStatus> result = new ArrayList<FileStatus>(); for (FileStatus vs : valid) { boolean found = false; for (FileStatus ex : expired) { if (ex.getPath().getName() .equals(vs.getPath().getName() + HdfsSortedOplogOrganizer.EXPIRED_HOPLOG_EXTENSION)) { found = true; } } if (!found) { result.add(vs); } } return result.toArray(new FileStatus[result.size()]); }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HdfsSortedOplogOrganizer.java
License:Apache License
protected int deleteExpiredFiles(List<FileStatus> targets) throws IOException { if (targets == null) { return 0; }/*w w w. jav a2s. co m*/ for (FileStatus file : targets) { if (logger.isDebugEnabled()) { logger.debug("{}Deleting file: " + file.getPath(), logPrefix); } store.getFileSystem().delete(file.getPath(), false); if (isClosed()) { if (logger.isDebugEnabled()) logger.debug("{}Expiry file cleanup interupted by bucket close", logPrefix); return 0; } incrementDiskUsage(-1 * file.getLen()); } previousCleanupTimestamp.set(System.currentTimeMillis()); return targets.size(); }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HdfsSortedOplogOrganizer.java
License:Apache License
/** * @param ts/*from ww w.ja v a 2s.com*/ * target timestamp * @return list of hoplogs, whose expiry markers were created before target * timestamp, and the expiry marker itself. * @throws IOException */ protected List<FileStatus> getOptimizationTargets(long ts) throws IOException { if (logger.isDebugEnabled()) { logger.debug("{}Identifying optimization targets " + ts, logPrefix); } List<FileStatus> deleteTargets = new ArrayList<FileStatus>(); FileStatus[] markers = getExpiryMarkers(); if (markers != null) { for (FileStatus marker : markers) { String name = truncateExpiryExtension(marker.getPath().getName()); long timestamp = marker.getModificationTime(); // expired minor compacted files are not being used anywhere. These can // be removed immediately. All the other expired files should be removed // when the files have aged boolean isTarget = false; if (name.endsWith(MINOR_HOPLOG_EXTENSION)) { isTarget = true; } else if (timestamp < ts && name.endsWith(FLUSH_HOPLOG_EXTENSION)) { isTarget = true; } else if (timestamp < ts && name.endsWith(MAJOR_HOPLOG_EXTENSION)) { long majorCInterval = ((long) store.getMajorCompactionInterval()) * 60 * 1000; if (timestamp < (System.currentTimeMillis() - majorCInterval)) { isTarget = true; } } if (!isTarget) { continue; } // if the file is still being read, do not delete or rename it TrackedReference<Hoplog> used = hoplogReadersController.getInactiveHoplog(name); if (used != null) { if (used.inUse() && logger.isDebugEnabled()) { logger.debug("{}Optimizer: found active expired hoplog:" + name, logPrefix); } else if (logger.isDebugEnabled()) { logger.debug("{}Optimizer: found open expired hoplog:" + name, logPrefix); } continue; } if (logger.isDebugEnabled()) { logger.debug("{}Delete target identified " + marker.getPath(), logPrefix); } deleteTargets.add(marker); Path hoplogPath = new Path(bucketPath, name); if (store.getFileSystem().exists(hoplogPath)) { FileStatus hoplog = store.getFileSystem().getFileStatus(hoplogPath); deleteTargets.add(hoplog); } } } return deleteTargets; }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HdfsSortedOplogOrganizer.java
License:Apache License
/** * Returns a list of of hoplogs present in the bucket's directory, expected to be called during * hoplog set initialization//w w w. j a va 2 s.c o m */ List<Hoplog> identifyAndLoadSortedOplogs(boolean countSize) throws IOException { FileSystem fs = store.getFileSystem(); if (!fs.exists(bucketPath)) { return new ArrayList<Hoplog>(); } FileStatus allFiles[] = fs.listStatus(bucketPath); ArrayList<FileStatus> validFiles = new ArrayList<FileStatus>(); for (FileStatus file : allFiles) { // All hoplog files contribute to disk usage Matcher matcher = HOPLOG_NAME_PATTERN.matcher(file.getPath().getName()); if (!matcher.matches()) { // not a hoplog continue; } // account for the disk used by this file if (countSize) { incrementDiskUsage(file.getLen()); } // All valid hoplog files must match the regex matcher = SORTED_HOPLOG_PATTERN.matcher(file.getPath().getName()); if (matcher.matches()) { validFiles.add(file); } } FileStatus[] markers = getExpiryMarkers(); FileStatus[] validHoplogs = filterValidHoplogs(validFiles.toArray(new FileStatus[validFiles.size()]), markers); ArrayList<Hoplog> results = new ArrayList<Hoplog>(); if (validHoplogs == null || validHoplogs.length == 0) { return results; } for (int i = 0; i < validHoplogs.length; i++) { // Skip directories if (validHoplogs[i].isDirectory()) { continue; } final Path p = validHoplogs[i].getPath(); // skip empty file if (fs.getFileStatus(p).getLen() <= 0) { continue; } Hoplog hoplog = new HFileSortedOplog(store, p, store.getBlockCache(), stats, store.getStats()); results.add(hoplog); } return results; }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HdfsSortedOplogOrganizer.java
License:Apache License
private void cleanupTmpFiles() throws IOException { if (oldTmpFiles == null && tmpFiles == null) { return;/*from www. j a v a 2 s . com*/ } if (oldTmpFiles != null) { FileSystem fs = store.getFileSystem(); long now = System.currentTimeMillis(); for (Iterator<FileStatus> itr = oldTmpFiles.iterator(); itr.hasNext();) { FileStatus file = itr.next(); if (file.getModificationTime() + TMP_FILE_EXPIRATION_TIME_MS > now) { if (logger.isDebugEnabled()) { logger.debug("{}Deleting temporary file:" + file.getPath(), logPrefix); } fs.delete(file.getPath(), false); itr.remove(); } } } if (tmpFiles != null) { for (Hoplog so : tmpFiles.keySet()) { if (logger.isDebugEnabled()) { logger.debug("{}Deleting temporary file:" + so.getFileName(), logPrefix); } deleteTmpFile(null, so); } } }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HDFSUnsortedHoplogOrganizer.java
License:Apache License
/** * locks sorted oplogs collection, removes oplog and renames for deletion later * @throws IOException /*from w w w .j a v a2 s . c o m*/ */ private void markHoplogsForDeletion() throws IOException { ArrayList<IOException> errors = new ArrayList<IOException>(); FileStatus validHoplogs[] = FSUtils.listStatus(fileSystem, bucketPath, new PathFilter() { @Override public boolean accept(Path file) { // All valid hoplog files must match the regex Matcher matcher = HOPLOG_PATTERN.matcher(file.getName()); return matcher.matches(); } }); FileStatus[] expired = getExpiredHoplogs(); validHoplogs = filterValidHoplogs(validHoplogs, expired); if (validHoplogs == null || validHoplogs.length == 0) { return; } for (FileStatus fileStatus : validHoplogs) { try { addExpiryMarkerForAFile(getHoplog(fileStatus.getPath())); } catch (IOException e) { // even if there is an IO error continue removing other hoplogs and // notify at the end errors.add(e); } } if (!errors.isEmpty()) { for (IOException e : errors) { logger.warn(LocalizedStrings.HOPLOG_HOPLOG_REMOVE_FAILED, e); } } }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java
License:Apache License
public static Collection<Collection<FileStatus>> getBucketHoplogs(Path regionPath, FileSystem fs, String type, long start, long end) throws IOException { Collection<Collection<FileStatus>> allBuckets = new ArrayList<Collection<FileStatus>>(); // hoplog files names follow this pattern String HOPLOG_NAME_REGEX = AbstractHoplogOrganizer.HOPLOG_NAME_REGEX + type; String EXPIRED_HOPLOG_NAME_REGEX = HOPLOG_NAME_REGEX + AbstractHoplogOrganizer.EXPIRED_HOPLOG_EXTENSION; final Pattern pattern = Pattern.compile(HOPLOG_NAME_REGEX); final Pattern expiredPattern = Pattern.compile(EXPIRED_HOPLOG_NAME_REGEX); Path cleanUpIntervalPath = new Path(regionPath.getParent(), HoplogConfig.CLEAN_UP_INTERVAL_FILE_NAME); long intervalDurationMillis = readCleanUpIntervalMillis(fs, cleanUpIntervalPath); // a region directory contains directories for individual buckets. A bucket // has a integer name. FileStatus[] bucketDirs = fs.listStatus(regionPath); for (FileStatus bucket : bucketDirs) { if (!bucket.isDirectory()) { continue; }/*from w ww . j a va 2 s . c o m*/ try { Integer.valueOf(bucket.getPath().getName()); } catch (NumberFormatException e) { continue; } ArrayList<FileStatus> bucketHoplogs = new ArrayList<FileStatus>(); // identify all the flush hoplogs and seq hoplogs by visiting all the // bucket directories FileStatus[] bucketFiles = fs.listStatus(bucket.getPath()); Map<String, Long> expiredHoplogs = getExpiredHoplogs(fs, bucketFiles, expiredPattern); FileStatus oldestHopAfterEndTS = null; long oldestHopTS = Long.MAX_VALUE; long currentTimeStamp = System.currentTimeMillis(); for (FileStatus file : bucketFiles) { if (!file.isFile()) { continue; } Matcher match = pattern.matcher(file.getPath().getName()); if (!match.matches()) { continue; } long timeStamp = AbstractHoplogOrganizer.getHoplogTimestamp(match); if (start > 0 && timeStamp < start) { // this hoplog contains records less than the start time stamp continue; } if (end > 0 && timeStamp > end) { // this hoplog contains records mutated after end time stamp. Ignore // this hoplog if it is not the oldest. if (oldestHopTS > timeStamp) { oldestHopTS = timeStamp; oldestHopAfterEndTS = file; } continue; } long expiredTimeStamp = expiredTime(file, expiredHoplogs); if (expiredTimeStamp > 0 && intervalDurationMillis > 0) { if ((currentTimeStamp - expiredTimeStamp) > 0.8 * intervalDurationMillis) { continue; } } bucketHoplogs.add(file); } if (oldestHopAfterEndTS != null) { long expiredTimeStamp = expiredTime(oldestHopAfterEndTS, expiredHoplogs); if (expiredTimeStamp <= 0 || intervalDurationMillis <= 0 || (currentTimeStamp - expiredTimeStamp) <= 0.8 * intervalDurationMillis) { bucketHoplogs.add(oldestHopAfterEndTS); } } if (bucketHoplogs.size() > 0) { allBuckets.add(bucketHoplogs); } } return allBuckets; }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java
License:Apache License
private static Map<String, Long> getExpiredHoplogs(FileSystem fs, FileStatus[] bucketFiles, Pattern expiredPattern) throws IOException { Map<String, Long> expiredHoplogs = new HashMap<String, Long>(); for (FileStatus file : bucketFiles) { if (!file.isFile()) { continue; }//from w w w .j a v a 2 s . c o m String fileName = file.getPath().getName(); Matcher match = expiredPattern.matcher(fileName); if (!match.matches()) { continue; } expiredHoplogs.put(fileName, file.getModificationTime()); } return expiredHoplogs; }