Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(final JobContext job) throws IOException {
    final Configuration configuration = job.getConfiguration();

    final List<InputSplit> result = Lists.newArrayList();

    final List<FileStatus> files = listStatus(job);

    LOG.debug("Initial file list: {} {}", files.size(), files);

    for (final FileStatus fileStatus : files) {
        final Path dataFile = fileStatus.getPath();
        final FileSystem fileSystem = dataFile.getFileSystem(configuration);
        final BlockLocation[] blockLocations = fileSystem.getFileBlockLocations(fileStatus, 0,
                fileStatus.getLen());/*from   www  . ja  v a2  s .co m*/

        // Data file, try to split if the .index file was found
        final SSTableIndexIndex index = indexes.get(dataFile);
        if (index == null) {
            throw new IOException("Index not found for " + dataFile);
        }

        for (final SSTableIndexIndex.Chunk chunk : index.getOffsets()) {
            // This isn't likely to work well because we are dealing with the index into uncompressed data...
            final int blockIndex = getBlockIndex(blockLocations,
                    chunk.getStart() / COMPRESSION_RATIO_ASSUMPTION);
            final SSTableSplit split = new SSTableSplit(dataFile, chunk.getStart(), chunk.getEnd(),
                    chunk.getEnd() - chunk.getStart(), blockLocations[blockIndex].getHosts());
            result.add(split);
        }
    }

    LOG.debug("Splits calculated: {} {}", result.size(), result);

    return result;
}

From source file:com.fullcontact.sstable.index.SSTableIndexIndexer.java

License:Apache License

public void index(final Path sstablePath) throws IOException {

    final FileSystem fileSystem = FileSystem.get(URI.create(sstablePath.toString()), configuration);
    final FileStatus fileStatus = fileSystem.getFileStatus(sstablePath);

    if (fileStatus.isDir()) {
        LOG.info("SSTable Indexing directory {}", sstablePath);
        final FileStatus[] statuses = fileSystem.listStatus(sstablePath);
        for (final FileStatus childStatus : statuses) {
            index(childStatus.getPath());
        }/*from  w w  w.  j av  a2s  .com*/
    } else if (sstablePath.toString().endsWith(SST_EXTENSION)) {
        final Path sstableIndexPath = new Path(sstablePath.toString() + SSTableIndexIndex.SSTABLE_INDEX_SUFFIX);
        if (fileSystem.exists(sstableIndexPath)) {
            LOG.info("Skipping as SSTable index file already exists for {}", sstablePath);
        } else {
            // Kick a thread for the index.
            final ListenableFuture<IndexRequest> indexFuture = service.submit(new Callable<IndexRequest>() {
                @Override
                public IndexRequest call() throws Exception {
                    final long startTime = System.currentTimeMillis();
                    final long fileSize = fileStatus.getLen();

                    LOG.info("Indexing SSTABLE Indexing file {}, size {} GB...", sstablePath,
                            decimalFormat.format(fileSize / (1024.0 * 1024.0 * 1024.0)));

                    indexSingleFile(fileSystem, sstablePath);

                    return new IndexRequest(sstableIndexPath, startTime, fileSize);
                }
            });

            Futures.addCallback(indexFuture, new FutureCallback<IndexRequest>() {
                public void onSuccess(final IndexRequest indexRequest) {
                    long indexSize = 0;

                    try {
                        indexSize = fileSystem.getFileStatus(indexRequest.getIndexPath()).getLen();
                    } catch (IOException e) {
                        LOG.error("Error getting file status for index path: {}", indexRequest.getIndexPath());
                    }

                    final double elapsed = (System.currentTimeMillis() - indexRequest.getStartTime()) / 1000.0;

                    LOG.info("Completed SSTABLE Indexing in {} seconds ({} MB/s).  Index size is {} KB.",
                            decimalFormat.format(elapsed),
                            decimalFormat.format(indexRequest.getFileSize() / (1024.0 * 1024.0 * elapsed)),
                            decimalFormat.format(indexSize / 1024.0));
                }

                public void onFailure(Throwable e) {
                    LOG.error("Failed to index.", e);
                }
            });

        }
    }
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.AbstractHoplogOrganizer.java

License:Apache License

/**
 * Utility method to remove a file from valid file list if a expired marker
 * for the file exists//from  w  w w  . j av a2s  .  c o m
 * 
 * @param valid
 *          list of valid files
 * @param expired
 *          list of expired file markers
 * @return list f valid files that do not have a expired file marker
 */
public static FileStatus[] filterValidHoplogs(FileStatus[] valid, FileStatus[] expired) {
    if (valid == null) {
        return null;
    }

    if (expired == null) {
        return valid;
    }

    ArrayList<FileStatus> result = new ArrayList<FileStatus>();
    for (FileStatus vs : valid) {
        boolean found = false;
        for (FileStatus ex : expired) {
            if (ex.getPath().getName()
                    .equals(vs.getPath().getName() + HdfsSortedOplogOrganizer.EXPIRED_HOPLOG_EXTENSION)) {
                found = true;
            }
        }
        if (!found) {
            result.add(vs);
        }
    }

    return result.toArray(new FileStatus[result.size()]);
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HdfsSortedOplogOrganizer.java

License:Apache License

protected int deleteExpiredFiles(List<FileStatus> targets) throws IOException {
    if (targets == null) {
        return 0;
    }/*w w w. jav a2s.  co  m*/

    for (FileStatus file : targets) {
        if (logger.isDebugEnabled()) {
            logger.debug("{}Deleting file: " + file.getPath(), logPrefix);
        }
        store.getFileSystem().delete(file.getPath(), false);

        if (isClosed()) {
            if (logger.isDebugEnabled())
                logger.debug("{}Expiry file cleanup interupted by bucket close", logPrefix);
            return 0;
        }
        incrementDiskUsage(-1 * file.getLen());
    }

    previousCleanupTimestamp.set(System.currentTimeMillis());
    return targets.size();
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HdfsSortedOplogOrganizer.java

License:Apache License

/**
 * @param ts/*from   ww  w.ja v  a 2s.com*/
 *          target timestamp
 * @return list of hoplogs, whose expiry markers were created before target
 *         timestamp, and the expiry marker itself.
 * @throws IOException
 */
protected List<FileStatus> getOptimizationTargets(long ts) throws IOException {
    if (logger.isDebugEnabled()) {
        logger.debug("{}Identifying optimization targets " + ts, logPrefix);
    }

    List<FileStatus> deleteTargets = new ArrayList<FileStatus>();
    FileStatus[] markers = getExpiryMarkers();
    if (markers != null) {
        for (FileStatus marker : markers) {
            String name = truncateExpiryExtension(marker.getPath().getName());
            long timestamp = marker.getModificationTime();

            // expired minor compacted files are not being used anywhere. These can
            // be removed immediately. All the other expired files should be removed
            // when the files have aged
            boolean isTarget = false;

            if (name.endsWith(MINOR_HOPLOG_EXTENSION)) {
                isTarget = true;
            } else if (timestamp < ts && name.endsWith(FLUSH_HOPLOG_EXTENSION)) {
                isTarget = true;
            } else if (timestamp < ts && name.endsWith(MAJOR_HOPLOG_EXTENSION)) {
                long majorCInterval = ((long) store.getMajorCompactionInterval()) * 60 * 1000;
                if (timestamp < (System.currentTimeMillis() - majorCInterval)) {
                    isTarget = true;
                }
            }
            if (!isTarget) {
                continue;
            }

            // if the file is still being read, do not delete or rename it
            TrackedReference<Hoplog> used = hoplogReadersController.getInactiveHoplog(name);
            if (used != null) {
                if (used.inUse() && logger.isDebugEnabled()) {
                    logger.debug("{}Optimizer: found active expired hoplog:" + name, logPrefix);
                } else if (logger.isDebugEnabled()) {
                    logger.debug("{}Optimizer: found open expired hoplog:" + name, logPrefix);
                }
                continue;
            }

            if (logger.isDebugEnabled()) {
                logger.debug("{}Delete target identified " + marker.getPath(), logPrefix);
            }

            deleteTargets.add(marker);
            Path hoplogPath = new Path(bucketPath, name);
            if (store.getFileSystem().exists(hoplogPath)) {
                FileStatus hoplog = store.getFileSystem().getFileStatus(hoplogPath);
                deleteTargets.add(hoplog);
            }
        }
    }
    return deleteTargets;
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HdfsSortedOplogOrganizer.java

License:Apache License

/**
 * Returns a list of of hoplogs present in the bucket's directory, expected to be called during
 * hoplog set initialization//w  w  w.  j  a  va  2  s.c o m
 */
List<Hoplog> identifyAndLoadSortedOplogs(boolean countSize) throws IOException {
    FileSystem fs = store.getFileSystem();
    if (!fs.exists(bucketPath)) {
        return new ArrayList<Hoplog>();
    }

    FileStatus allFiles[] = fs.listStatus(bucketPath);
    ArrayList<FileStatus> validFiles = new ArrayList<FileStatus>();
    for (FileStatus file : allFiles) {
        // All hoplog files contribute to disk usage
        Matcher matcher = HOPLOG_NAME_PATTERN.matcher(file.getPath().getName());
        if (!matcher.matches()) {
            // not a hoplog
            continue;
        }

        // account for the disk used by this file
        if (countSize) {
            incrementDiskUsage(file.getLen());
        }

        // All valid hoplog files must match the regex
        matcher = SORTED_HOPLOG_PATTERN.matcher(file.getPath().getName());
        if (matcher.matches()) {
            validFiles.add(file);
        }
    }

    FileStatus[] markers = getExpiryMarkers();
    FileStatus[] validHoplogs = filterValidHoplogs(validFiles.toArray(new FileStatus[validFiles.size()]),
            markers);

    ArrayList<Hoplog> results = new ArrayList<Hoplog>();
    if (validHoplogs == null || validHoplogs.length == 0) {
        return results;
    }

    for (int i = 0; i < validHoplogs.length; i++) {
        // Skip directories
        if (validHoplogs[i].isDirectory()) {
            continue;
        }

        final Path p = validHoplogs[i].getPath();
        // skip empty file
        if (fs.getFileStatus(p).getLen() <= 0) {
            continue;
        }

        Hoplog hoplog = new HFileSortedOplog(store, p, store.getBlockCache(), stats, store.getStats());
        results.add(hoplog);
    }

    return results;
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HdfsSortedOplogOrganizer.java

License:Apache License

private void cleanupTmpFiles() throws IOException {
    if (oldTmpFiles == null && tmpFiles == null) {
        return;/*from www.  j  a  v  a 2  s  . com*/
    }

    if (oldTmpFiles != null) {
        FileSystem fs = store.getFileSystem();
        long now = System.currentTimeMillis();
        for (Iterator<FileStatus> itr = oldTmpFiles.iterator(); itr.hasNext();) {
            FileStatus file = itr.next();
            if (file.getModificationTime() + TMP_FILE_EXPIRATION_TIME_MS > now) {
                if (logger.isDebugEnabled()) {
                    logger.debug("{}Deleting temporary file:" + file.getPath(), logPrefix);
                }
                fs.delete(file.getPath(), false);
                itr.remove();
            }
        }
    }
    if (tmpFiles != null) {
        for (Hoplog so : tmpFiles.keySet()) {
            if (logger.isDebugEnabled()) {
                logger.debug("{}Deleting temporary file:" + so.getFileName(), logPrefix);
            }
            deleteTmpFile(null, so);
        }
    }
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HDFSUnsortedHoplogOrganizer.java

License:Apache License

/**
 * locks sorted oplogs collection, removes oplog and renames for deletion later
 * @throws IOException /*from   w  w  w  .j  a v a2  s  .  c o  m*/
 */
private void markHoplogsForDeletion() throws IOException {

    ArrayList<IOException> errors = new ArrayList<IOException>();
    FileStatus validHoplogs[] = FSUtils.listStatus(fileSystem, bucketPath, new PathFilter() {
        @Override
        public boolean accept(Path file) {
            // All valid hoplog files must match the regex
            Matcher matcher = HOPLOG_PATTERN.matcher(file.getName());
            return matcher.matches();
        }
    });

    FileStatus[] expired = getExpiredHoplogs();
    validHoplogs = filterValidHoplogs(validHoplogs, expired);

    if (validHoplogs == null || validHoplogs.length == 0) {
        return;
    }
    for (FileStatus fileStatus : validHoplogs) {
        try {
            addExpiryMarkerForAFile(getHoplog(fileStatus.getPath()));
        } catch (IOException e) {
            // even if there is an IO error continue removing other hoplogs and
            // notify at the end
            errors.add(e);
        }
    }

    if (!errors.isEmpty()) {
        for (IOException e : errors) {
            logger.warn(LocalizedStrings.HOPLOG_HOPLOG_REMOVE_FAILED, e);
        }
    }
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java

License:Apache License

public static Collection<Collection<FileStatus>> getBucketHoplogs(Path regionPath, FileSystem fs, String type,
        long start, long end) throws IOException {
    Collection<Collection<FileStatus>> allBuckets = new ArrayList<Collection<FileStatus>>();

    // hoplog files names follow this pattern
    String HOPLOG_NAME_REGEX = AbstractHoplogOrganizer.HOPLOG_NAME_REGEX + type;
    String EXPIRED_HOPLOG_NAME_REGEX = HOPLOG_NAME_REGEX + AbstractHoplogOrganizer.EXPIRED_HOPLOG_EXTENSION;
    final Pattern pattern = Pattern.compile(HOPLOG_NAME_REGEX);
    final Pattern expiredPattern = Pattern.compile(EXPIRED_HOPLOG_NAME_REGEX);

    Path cleanUpIntervalPath = new Path(regionPath.getParent(), HoplogConfig.CLEAN_UP_INTERVAL_FILE_NAME);
    long intervalDurationMillis = readCleanUpIntervalMillis(fs, cleanUpIntervalPath);

    // a region directory contains directories for individual buckets. A bucket
    // has a integer name.
    FileStatus[] bucketDirs = fs.listStatus(regionPath);

    for (FileStatus bucket : bucketDirs) {
        if (!bucket.isDirectory()) {
            continue;
        }/*from w ww . j a  va 2  s . c  o m*/
        try {
            Integer.valueOf(bucket.getPath().getName());
        } catch (NumberFormatException e) {
            continue;
        }

        ArrayList<FileStatus> bucketHoplogs = new ArrayList<FileStatus>();

        // identify all the flush hoplogs and seq hoplogs by visiting all the
        // bucket directories
        FileStatus[] bucketFiles = fs.listStatus(bucket.getPath());

        Map<String, Long> expiredHoplogs = getExpiredHoplogs(fs, bucketFiles, expiredPattern);

        FileStatus oldestHopAfterEndTS = null;
        long oldestHopTS = Long.MAX_VALUE;
        long currentTimeStamp = System.currentTimeMillis();
        for (FileStatus file : bucketFiles) {
            if (!file.isFile()) {
                continue;
            }

            Matcher match = pattern.matcher(file.getPath().getName());
            if (!match.matches()) {
                continue;
            }

            long timeStamp = AbstractHoplogOrganizer.getHoplogTimestamp(match);
            if (start > 0 && timeStamp < start) {
                // this hoplog contains records less than the start time stamp
                continue;
            }

            if (end > 0 && timeStamp > end) {
                // this hoplog contains records mutated after end time stamp. Ignore
                // this hoplog if it is not the oldest.
                if (oldestHopTS > timeStamp) {
                    oldestHopTS = timeStamp;
                    oldestHopAfterEndTS = file;
                }
                continue;
            }
            long expiredTimeStamp = expiredTime(file, expiredHoplogs);
            if (expiredTimeStamp > 0 && intervalDurationMillis > 0) {
                if ((currentTimeStamp - expiredTimeStamp) > 0.8 * intervalDurationMillis) {
                    continue;
                }
            }
            bucketHoplogs.add(file);
        }

        if (oldestHopAfterEndTS != null) {
            long expiredTimeStamp = expiredTime(oldestHopAfterEndTS, expiredHoplogs);
            if (expiredTimeStamp <= 0 || intervalDurationMillis <= 0
                    || (currentTimeStamp - expiredTimeStamp) <= 0.8 * intervalDurationMillis) {
                bucketHoplogs.add(oldestHopAfterEndTS);
            }
        }

        if (bucketHoplogs.size() > 0) {
            allBuckets.add(bucketHoplogs);
        }
    }

    return allBuckets;
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java

License:Apache License

private static Map<String, Long> getExpiredHoplogs(FileSystem fs, FileStatus[] bucketFiles,
        Pattern expiredPattern) throws IOException {
    Map<String, Long> expiredHoplogs = new HashMap<String, Long>();

    for (FileStatus file : bucketFiles) {
        if (!file.isFile()) {
            continue;
        }//from  w  w  w .j  a  v a  2 s  .  c o m
        String fileName = file.getPath().getName();
        Matcher match = expiredPattern.matcher(fileName);
        if (!match.matches()) {
            continue;
        }
        expiredHoplogs.put(fileName, file.getModificationTime());
    }
    return expiredHoplogs;
}