Example usage for org.apache.hadoop.fs FileSystem getContentSummary

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getContentSummary.

Prototype

public ContentSummary getContentSummary(Path f) throws IOException

Source Link

Document

Return the ContentSummary of a given Path .

Usage

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files/*from w w w.j a  v  a2s.c  o m*/
 *
 * fs:FileSystem object from HDFS
 * minDate:      Oldest date for files to be backed up
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 * pathList:Will be filled with all files in p
 * hmTimestamps: hashmap of timestamps for later sorting
 **/
public void checkDir(FileSystem fs, long minDate, long maxDate, Path p, ArrayList<Path> pathList,
        HashMap<Path, Long> hmTimestamps) {
    long tmpDate;
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            // dump the mkdir and chmod commands for this
            // directory -- skip root directory only
            {
                FileStatus stat = fs.getFileStatus(p);

                if (!sPath.equals("/")) {
                    m_wrMkdirs.println("hadoop fs -mkdir " + sPath);
                }

                m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

                Short sh = new Short(stat.getPermission().toShort());
                m_wrChmods.println("hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath);
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // another database to regular hive tables to
            // partitioned hive tables.  We use table names to
            // both exclude some from the backup, and for the rest
            // to dump out the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                m_nIgnoredTables++;

                if (m_nIgnoredTables < 5) {
                    System.out.println("Skipping ignore-table file: " + sPath);
                } else if (m_nIgnoredTables == 5) {
                    System.out.println("(...not showing other skipped tables...)");
                }
                return;
            }

            FileStatus stat = fs.getFileStatus(p);

            tmpDate = stat.getModificationTime() / 1000;

            // store the chmods/chowns for all files
            m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

            m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath);

            // check dates.  is it too young?
            if (tmpDate < minDate) {
                return;
            }

            // is the file too recent?
            if (tmpDate > maxDate) {
                //System.out.println("file too recent: " + sPath);
                return;
            }

            // file timestamp is ok
            pathList.add(p);

            hmTimestamps.put(p, new Long(tmpDate));

            // store info about total bytes neeed to backup
            m_nTotalBytes += fs.getContentSummary(p).getLength();
        }
    } catch (IOException e) {
        System.err.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}

From source file:com.twitter.hraven.etl.JobFilePreprocessor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    // When we started processing. This is also the upper limit of files we
    // accept, next run will pick up the new incoming files.
    long processingStartMillis = System.currentTimeMillis();

    Configuration hbaseConf = HBaseConfiguration.create(getConf());

    // Grab input args and allow for -Dxyz style arguments
    String[] otherArgs = new GenericOptionsParser(hbaseConf, args).getRemainingArgs();

    // Grab the arguments we're looking for.
    CommandLine commandLine = parseArgs(otherArgs);

    // Output should be an hdfs path.
    FileSystem hdfs = FileSystem.get(hbaseConf);

    // Grab the input path argument
    String output = commandLine.getOptionValue("o");
    LOG.info(" output=" + output);
    Path outputPath = new Path(output);
    FileStatus outputFileStatus = hdfs.getFileStatus(outputPath);

    if (!outputFileStatus.isDir()) {
        throw new IOException("Output is not a directory" + outputFileStatus.getPath().getName());
    }//w w  w . j  av  a  2  s  .c  o  m

    // Grab the input path argument
    String input;
    if (commandLine.hasOption("i")) {
        input = commandLine.getOptionValue("i");
    } else {
        input = hbaseConf.get("mapred.job.tracker.history.completed.location");
    }
    LOG.info("input=" + input);

    // Grab the batch-size argument
    int batchSize;
    if (commandLine.hasOption("b")) {
        try {
            batchSize = Integer.parseInt(commandLine.getOptionValue("b"));
        } catch (NumberFormatException nfe) {
            throw new IllegalArgumentException(
                    "batch size option -b is is not a valid number: " + commandLine.getOptionValue("b"), nfe);
        }
        // Additional check
        if (batchSize < 1) {
            throw new IllegalArgumentException(
                    "Cannot process files in batches smaller than 1. Specified batch size option -b is: "
                            + commandLine.getOptionValue("b"));
        }
    } else {
        batchSize = DEFAULT_BATCH_SIZE;
    }

    boolean forceAllFiles = commandLine.hasOption("f");
    LOG.info("forceAllFiles: " + forceAllFiles);

    Path inputPath = new Path(input);
    FileStatus inputFileStatus = hdfs.getFileStatus(inputPath);

    if (!inputFileStatus.isDir()) {
        throw new IOException("Input is not a directory" + inputFileStatus.getPath().getName());
    }

    // Grab the cluster argument
    String cluster = commandLine.getOptionValue("c");
    LOG.info("cluster=" + cluster);

    /**
     * Grab the size of huge files to be moved argument
     * hbase cell can't store files bigger than
     * maxFileSize, hence no need to consider them for rawloading
     * Reference:
     * {@link https://github.com/twitter/hraven/issues/59}
     */
    String maxFileSizeStr = commandLine.getOptionValue("s");
    LOG.info("maxFileSize=" + maxFileSizeStr);
    long maxFileSize = DEFAULT_RAW_FILE_SIZE_LIMIT;
    try {
        maxFileSize = Long.parseLong(maxFileSizeStr);
    } catch (NumberFormatException nfe) {
        throw new ProcessingException(
                "Caught NumberFormatException during conversion " + " of maxFileSize to long", nfe);
    }

    ProcessRecordService processRecordService = new ProcessRecordService(hbaseConf);

    boolean success = true;
    try {

        // Figure out where we last left off (if anywhere at all)
        ProcessRecord lastProcessRecord = null;

        if (!forceAllFiles) {
            lastProcessRecord = processRecordService.getLastSuccessfulProcessRecord(cluster);
        }

        long minModificationTimeMillis = 0;
        if (lastProcessRecord != null) {
            // Start of this time period is the end of the last period.
            minModificationTimeMillis = lastProcessRecord.getMaxModificationTimeMillis();
        }

        // Do a sanity check. The end time of the last scan better not be later
        // than when we started processing.
        if (minModificationTimeMillis > processingStartMillis) {
            throw new RuntimeException("The last processing record has maxModificationMillis later than now: "
                    + lastProcessRecord);
        }

        // Accept only jobFiles and only those that fall in the desired range of
        // modification time.
        JobFileModifiedRangePathFilter jobFileModifiedRangePathFilter = new JobFileModifiedRangePathFilter(
                hbaseConf, minModificationTimeMillis);

        String timestamp = Constants.TIMESTAMP_FORMAT.format(new Date(minModificationTimeMillis));

        ContentSummary contentSummary = hdfs.getContentSummary(inputPath);
        LOG.info("Listing / filtering (" + contentSummary.getFileCount() + ") files in: " + inputPath
                + " that are modified since " + timestamp);

        // get the files in the done folder,
        // need to traverse dirs under done recursively for versions
        // that include MAPREDUCE-323: on/after hadoop 0.20.203.0
        // on/after cdh3u5
        FileStatus[] jobFileStatusses = FileLister.getListFilesToProcess(maxFileSize, true, hdfs, inputPath,
                jobFileModifiedRangePathFilter);

        LOG.info("Sorting " + jobFileStatusses.length + " job files.");

        Arrays.sort(jobFileStatusses, new FileStatusModificationComparator());

        // Process these files in batches at a time.
        int batchCount = BatchUtil.getBatchCount(jobFileStatusses.length, batchSize);
        LOG.info("Batch count: " + batchCount);
        for (int b = 0; b < batchCount; b++) {
            processBatch(jobFileStatusses, b, batchSize, processRecordService, cluster, outputPath);
        }

    } finally {
        processRecordService.close();
    }

    Statistics statistics = FileSystem.getStatistics(inputPath.toUri().getScheme(), hdfs.getClass());
    if (statistics != null) {
        LOG.info("HDFS bytes read: " + statistics.getBytesRead());
        LOG.info("HDFS bytes written: " + statistics.getBytesWritten());
        LOG.info("HDFS read ops: " + statistics.getReadOps());
        LOG.info("HDFS large read ops: " + statistics.getLargeReadOps());
        LOG.info("HDFS write ops: " + statistics.getWriteOps());
    }

    // Return the status
    return success ? 0 : 1;
}

From source file:org.apache.accumulo.monitor.servlets.DefaultServlet.java

License:Apache License

private void doAccumuloTable(StringBuilder sb) throws IOException {
    // Accumulo/*from w  w w.j  a va  2s . co m*/
    VolumeManager vm = VolumeManagerImpl.get(ServerConfiguration.getSiteConfiguration());
    MasterMonitorInfo info = Monitor.getMmi();
    sb.append("<table>\n");
    sb.append("<tr><th colspan='2'><a href='/master'>Accumulo Master</a></th></tr>\n");
    if (info == null) {
        sb.append("<tr><td colspan='2'><span class='error'>Master is Down</span></td></tr>\n");
    } else {
        long totalAcuBytesUsed = 0l;
        long totalHdfsBytesUsed = 0l;

        try {
            for (String baseDir : VolumeConfiguration
                    .getVolumeUris(ServerConfiguration.getSiteConfiguration())) {
                final Path basePath = new Path(baseDir);
                final FileSystem fs = vm.getVolumeByPath(basePath).getFileSystem();

                try {
                    // Calculate the amount of space used by Accumulo on the FileSystem
                    ContentSummary accumuloSummary = fs.getContentSummary(basePath);
                    long bytesUsedByAcuOnFs = accumuloSummary.getSpaceConsumed();
                    totalAcuBytesUsed += bytesUsedByAcuOnFs;

                    // Catch the overflow -- this is big data
                    if (totalAcuBytesUsed < bytesUsedByAcuOnFs) {
                        log.debug("Overflowed long in bytes used by Accumulo for " + baseDir);
                        totalAcuBytesUsed = 0l;
                        break;
                    }

                    // Calculate the total amount of space used on the FileSystem
                    ContentSummary volumeSummary = fs.getContentSummary(new Path("/"));
                    long bytesUsedOnVolume = volumeSummary.getSpaceConsumed();
                    totalHdfsBytesUsed += bytesUsedOnVolume;

                    // Catch the overflow -- this is big data
                    if (totalHdfsBytesUsed < bytesUsedOnVolume) {
                        log.debug("Overflowed long in bytes used in HDFS for " + baseDir);
                        totalHdfsBytesUsed = 0;
                        break;
                    }
                } catch (Exception ex) {
                    log.trace("Unable to get disk usage information for " + baseDir, ex);
                }
            }

            String diskUsed = "Unknown";
            String consumed = null;
            if (totalAcuBytesUsed > 0) {
                // Convert Accumulo usage to a readable String
                diskUsed = bytes(totalAcuBytesUsed);

                if (totalHdfsBytesUsed > 0) {
                    // Compute amount of space used by Accumulo as a percentage of total space usage.
                    consumed = String.format("%.2f%%", totalAcuBytesUsed * 100. / totalHdfsBytesUsed);
                }
            }

            boolean highlight = false;
            tableRow(sb, (highlight = !highlight), "Disk&nbsp;Used", diskUsed);
            if (null != consumed)
                tableRow(sb, (highlight = !highlight), "%&nbsp;of&nbsp;Used&nbsp;DFS", consumed);
            tableRow(sb, (highlight = !highlight), "<a href='/tables'>Tables</a>",
                    NumberType.commas(Monitor.getTotalTables()));
            tableRow(sb, (highlight = !highlight), "<a href='/tservers'>Tablet&nbsp;Servers</a>",
                    NumberType.commas(info.tServerInfo.size(), 1, Long.MAX_VALUE));
            tableRow(sb, (highlight = !highlight), "<a href='/tservers'>Dead&nbsp;Tablet&nbsp;Servers</a>",
                    NumberType.commas(info.deadTabletServers.size(), 0, 0));
            tableRow(sb, (highlight = !highlight), "Tablets",
                    NumberType.commas(Monitor.getTotalTabletCount(), 1, Long.MAX_VALUE));
            tableRow(sb, (highlight = !highlight), "Entries", NumberType.commas(Monitor.getTotalEntries()));
            tableRow(sb, (highlight = !highlight), "Lookups", NumberType.commas(Monitor.getTotalLookups()));
            tableRow(sb, (highlight = !highlight), "Uptime",
                    Duration.format(System.currentTimeMillis() - Monitor.getStartTime()));
        } catch (Exception e) {
            log.debug(e, e);
        }
    }
    sb.append("</table>\n");
}

From source file:org.apache.accumulo.server.client.BulkImporter.java

License:Apache License

private Map<Path, List<AssignmentInfo>> estimateSizes(final AccumuloConfiguration acuConf,
        final Configuration conf, final VolumeManager vm, Map<Path, List<TabletLocation>> assignments,
        Collection<Path> paths, int numThreads) {

    long t1 = System.currentTimeMillis();
    final Map<Path, Long> mapFileSizes = new TreeMap<>();

    try {//w w w .  ja v  a2 s .  com
        for (Path path : paths) {
            FileSystem fs = vm.getVolumeByPath(path).getFileSystem();
            mapFileSizes.put(path, fs.getContentSummary(path).getLength());
        }
    } catch (IOException e) {
        log.error("Failed to get map files in for {}: {}", paths, e.getMessage(), e);
        throw new RuntimeException(e);
    }

    final Map<Path, List<AssignmentInfo>> ais = Collections
            .synchronizedMap(new TreeMap<Path, List<AssignmentInfo>>());

    ExecutorService threadPool = Executors.newFixedThreadPool(numThreads,
            new NamingThreadFactory("estimateSizes"));

    for (final Entry<Path, List<TabletLocation>> entry : assignments.entrySet()) {
        if (entry.getValue().size() == 1) {
            TabletLocation tabletLocation = entry.getValue().get(0);

            // if the tablet completely contains the map file, there is no
            // need to estimate its
            // size
            ais.put(entry.getKey(), Collections.singletonList(
                    new AssignmentInfo(tabletLocation.tablet_extent, mapFileSizes.get(entry.getKey()))));
            continue;
        }

        Runnable estimationTask = new Runnable() {
            @Override
            public void run() {
                Map<KeyExtent, Long> estimatedSizes = null;

                try {
                    estimatedSizes = FileUtil.estimateSizes(acuConf, entry.getKey(),
                            mapFileSizes.get(entry.getKey()), extentsOf(entry.getValue()), conf, vm);
                } catch (IOException e) {
                    log.warn("Failed to estimate map file sizes {}", e.getMessage());
                }

                if (estimatedSizes == null) {
                    // estimation failed, do a simple estimation
                    estimatedSizes = new TreeMap<>();
                    long estSize = (long) (mapFileSizes.get(entry.getKey()) / (double) entry.getValue().size());
                    for (TabletLocation tl : entry.getValue())
                        estimatedSizes.put(tl.tablet_extent, estSize);
                }

                List<AssignmentInfo> assignmentInfoList = new ArrayList<>(estimatedSizes.size());

                for (Entry<KeyExtent, Long> entry2 : estimatedSizes.entrySet())
                    assignmentInfoList.add(new AssignmentInfo(entry2.getKey(), entry2.getValue()));

                ais.put(entry.getKey(), assignmentInfoList);
            }
        };

        threadPool.submit(new TraceRunnable(new LoggingRunnable(log, estimationTask)));
    }

    threadPool.shutdown();

    while (!threadPool.isTerminated()) {
        try {
            threadPool.awaitTermination(60, TimeUnit.SECONDS);
        } catch (InterruptedException e) {
            log.error("Encountered InterruptedException while waiting for the threadPool to terminate.", e);
            throw new RuntimeException(e);
        }
    }

    long t2 = System.currentTimeMillis();

    log.debug(String.format("Estimated map files sizes in %6.2f secs", (t2 - t1) / 1000.0));

    return ais;
}

From source file:org.apache.accumulo.server.monitor.servlets.DefaultServlet.java

License:Apache License

private void doAccumuloTable(StringBuilder sb) throws IOException {
    // Accumulo/*from   w w  w  .  ja v a 2  s . com*/
    Configuration conf = CachedConfiguration.getInstance();
    FileSystem fs = TraceFileSystem
            .wrap(FileUtil.getFileSystem(conf, ServerConfiguration.getSiteConfiguration()));
    MasterMonitorInfo info = Monitor.getMmi();
    sb.append("<table>\n");
    sb.append("<tr><th colspan='2'><a href='/master'>Accumulo Master</a></th></tr>\n");
    if (info == null) {
        sb.append("<tr><td colspan='2'><span class='error'>Master is Down</span></td></tr>\n");
    } else {
        String consumed = "Unknown";
        String diskUsed = "Unknown";
        try {
            Path path = new Path(Monitor.getSystemConfiguration().get(Property.INSTANCE_DFS_DIR));
            log.debug("Reading the content summary for " + path);
            try {
                ContentSummary acu = fs.getContentSummary(path);
                ContentSummary rootSummary = fs.getContentSummary(new Path("/"));
                consumed = String.format("%.2f%%",
                        acu.getSpaceConsumed() * 100. / rootSummary.getSpaceConsumed());
                diskUsed = bytes(acu.getSpaceConsumed());
            } catch (Exception ex) {
                log.trace("Unable to get disk usage information from hdfs", ex);
            }

            boolean highlight = false;
            tableRow(sb, (highlight = !highlight), "Disk&nbsp;Used", diskUsed);
            if (fs.getUsed() != 0)
                tableRow(sb, (highlight = !highlight), "%&nbsp;of&nbsp;Used&nbsp;DFS", consumed);
            tableRow(sb, (highlight = !highlight), "<a href='/tables'>Tables</a>",
                    NumberType.commas(Monitor.getTotalTables()));
            tableRow(sb, (highlight = !highlight), "<a href='/tservers'>Tablet&nbsp;Servers</a>",
                    NumberType.commas(info.tServerInfo.size(), 1, Long.MAX_VALUE));
            tableRow(sb, (highlight = !highlight), "<a href='/tservers'>Dead&nbsp;Tablet&nbsp;Servers</a>",
                    NumberType.commas(info.deadTabletServers.size(), 0, 0));
            tableRow(sb, (highlight = !highlight), "Tablets",
                    NumberType.commas(Monitor.getTotalTabletCount(), 1, Long.MAX_VALUE));
            tableRow(sb, (highlight = !highlight), "Entries", NumberType.commas(Monitor.getTotalEntries()));
            tableRow(sb, (highlight = !highlight), "Lookups", NumberType.commas(Monitor.getTotalLookups()));
            tableRow(sb, (highlight = !highlight), "Uptime",
                    Duration.format(System.currentTimeMillis() - Monitor.getStartTime()));
        } catch (Exception e) {
            log.debug(e, e);
        }
    }
    sb.append("</table>\n");
}

From source file:org.apache.carbondata.core.datastorage.store.impl.FileFactory.java

License:Apache License

/**
 * It computes size of directory//w  ww.  ja  v a 2 s.  c  o  m
 *
 * @param filePath
 * @return size in bytes
 * @throws IOException
 */
public static long getDirectorySize(String filePath) throws IOException {
    FileType fileType = getFileType(filePath);
    switch (fileType) {
    case HDFS:
    case VIEWFS:
        Path path = new Path(filePath);
        FileSystem fs = path.getFileSystem(configuration);
        return fs.getContentSummary(path).getLength();
    case LOCAL:
    default:
        File file = new File(filePath);
        return FileUtils.sizeOfDirectory(file);
    }
}

From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java

License:Apache License

/**
 * It computes size of directory/*from  w ww .j  a va  2 s .  c om*/
 *
 * @param filePath
 * @return size in bytes
 * @throws IOException
 */
public static long getDirectorySize(String filePath) throws IOException {
    FileType fileType = getFileType(filePath);
    switch (fileType) {
    case HDFS:
    case ALLUXIO:
    case VIEWFS:
        Path path = new Path(filePath);
        FileSystem fs = path.getFileSystem(configuration);
        return fs.getContentSummary(path).getLength();
    case LOCAL:
    default:
        filePath = getUpdatedFilePath(filePath, fileType);
        File file = new File(filePath);
        return FileUtils.sizeOfDirectory(file);
    }
}

From source file:org.apache.falcon.entity.FileSystemStorage.java

License:Apache License

@Override
@SuppressWarnings("MagicConstant")
public List<FeedInstanceStatus> getListing(Feed feed, String clusterName, LocationType locationType, Date start,
        Date end) throws FalconException {

    Calendar calendar = Calendar.getInstance();
    List<Location> clusterSpecificLocation = FeedHelper.getLocations(FeedHelper.getCluster(feed, clusterName),
            feed);//  w w w.j  a va2  s  . c o  m
    Location location = getLocation(clusterSpecificLocation, locationType);
    try {
        FileSystem fileSystem = HadoopClientFactory.get().createProxiedFileSystem(getConf());
        Cluster cluster = ClusterHelper.getCluster(clusterName);
        Properties baseProperties = FeedHelper.getClusterProperties(cluster);
        baseProperties.putAll(FeedHelper.getFeedProperties(feed));
        List<FeedInstanceStatus> instances = new ArrayList<FeedInstanceStatus>();
        Date feedStart = FeedHelper.getCluster(feed, clusterName).getValidity().getStart();
        TimeZone tz = feed.getTimezone();
        Date alignedStart = EntityUtil.getNextStartTime(feedStart, feed.getFrequency(), tz, start);

        String basePath = location.getPath();
        while (!end.before(alignedStart)) {
            Properties allProperties = ExpressionHelper.getTimeVariables(alignedStart, tz);
            allProperties.putAll(baseProperties);
            String feedInstancePath = ExpressionHelper.substitute(basePath, allProperties);
            FileStatus fileStatus = getFileStatus(fileSystem, new Path(feedInstancePath));
            FeedInstanceStatus instance = new FeedInstanceStatus(feedInstancePath);

            Date date = FeedHelper.getDate(basePath, new Path(feedInstancePath), tz);
            instance.setInstance(SchemaHelper.formatDateUTC(date));
            if (fileStatus != null) {
                instance.setCreationTime(fileStatus.getModificationTime());
                ContentSummary contentSummary = fileSystem.getContentSummary(fileStatus.getPath());
                if (contentSummary != null) {
                    long size = contentSummary.getSpaceConsumed();
                    instance.setSize(size);
                    if (!StringUtils.isEmpty(feed.getAvailabilityFlag())) {
                        FileStatus doneFile = getFileStatus(fileSystem,
                                new Path(fileStatus.getPath(), feed.getAvailabilityFlag()));
                        if (doneFile != null) {
                            instance.setStatus(FeedInstanceStatus.AvailabilityStatus.AVAILABLE);
                        } else {
                            instance.setStatus(FeedInstanceStatus.AvailabilityStatus.PARTIAL);
                        }
                    } else {
                        instance.setStatus(size > 0 ? FeedInstanceStatus.AvailabilityStatus.AVAILABLE
                                : FeedInstanceStatus.AvailabilityStatus.EMPTY);
                    }
                }
            }
            instances.add(instance);
            calendar.setTime(alignedStart);
            calendar.add(feed.getFrequency().getTimeUnit().getCalendarUnit(),
                    feed.getFrequency().getFrequencyAsInt());
            alignedStart = calendar.getTime();
        }
        return instances;
    } catch (IOException e) {
        LOG.error("Unable to retrieve listing for {}:{}", locationType, getStorageUrl(), e);
        throw new FalconException("Unable to retrieve listing for (URI " + getStorageUrl() + ")", e);
    }
}

From source file:org.apache.falcon.latedata.LateDataHandler.java

License:Apache License

private long usage(Path inPath, Configuration conf) throws IOException, FalconException {
    FileSystem fs = HadoopClientFactory.get().createFileSystem(inPath.toUri(), conf);
    FileStatus[] fileStatuses = fs.globStatus(inPath);
    if (fileStatuses == null || fileStatuses.length == 0) {
        return 0;
    }/*from  w w w.  jav  a2 s  . co m*/
    long totalSize = 0;
    for (FileStatus fileStatus : fileStatuses) {
        totalSize += fs.getContentSummary(fileStatus.getPath()).getLength();
    }
    return totalSize;
}

From source file:org.apache.falcon.regression.core.util.AssertUtil.java

License:Apache License

/**
 * Checks size of the content a two locations.
 *
 * @param firstPath  path to the first location
 * @param secondPath path to the second location
 * @param fs         hadoop file system for the locations
 * @throws IOException//from w  ww .  ja  va  2  s.  com
 */
public static void checkContentSize(String firstPath, String secondPath, FileSystem fs) throws IOException {
    final ContentSummary firstSummary = fs.getContentSummary(new Path(firstPath));
    final ContentSummary secondSummary = fs.getContentSummary(new Path(secondPath));
    LOGGER.info(firstPath + " : firstSummary = " + firstSummary.toString(false));
    LOGGER.info(secondPath + " : secondSummary = " + secondSummary.toString(false));
    Assert.assertEquals(firstSummary.getLength(), secondSummary.getLength(),
            "Contents at the two locations don't have same size.");
}