Example usage for org.apache.hadoop.fs FileSystem globStatus

List of usage examples for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException 

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:org.apache.falcon.cleanup.AbstractCleanupHandler.java

License:Apache License

protected FileStatus[] getAllLogs(FileSystem fs, Cluster cluster, Entity entity) throws FalconException {
    FileStatus[] paths;/*  ww  w . j  a v a2  s . com*/
    try {
        Path logPath = getLogPath(cluster, entity);
        paths = fs.globStatus(logPath);
    } catch (IOException e) {
        throw new FalconException(e);
    }

    return paths;
}

From source file:org.apache.falcon.cleanup.FeedCleanupHandler.java

License:Apache License

/**
 * Delete the staging area used for replicating tables.
 *
 * @param cluster cluster hosting the staged data
 * @param feed feed entity//w w  w . j  a va 2  s .c  om
 * @param retention retention limit
 * @throws FalconException
 */
private void deleteStagedData(Cluster cluster, Feed feed, long retention) throws FalconException {
    Storage storage = FeedHelper.createStorage(cluster, feed);
    if (storage.getType() == Storage.TYPE.FILESYSTEM) { // FS does NOT use staging dirs
        return;
    }

    final CatalogStorage tableStorage = (CatalogStorage) storage;
    String stagingDir = FeedHelper.getStagingDir(cluster, feed, tableStorage, Tag.REPLICATION);
    Path stagingPath = new Path(stagingDir + "/*/*/*"); // stagingDir/dataOutPartitionValue/nominal-time/data
    FileSystem fs = getFileSystem(cluster);
    try {
        FileStatus[] paths = fs.globStatus(stagingPath);
        delete(cluster, feed, retention, paths);
    } catch (IOException e) {
        throw new FalconException(e);
    }
}

From source file:org.apache.falcon.entity.FileSystemStorage.java

License:Apache License

private FileStatus[] findFilesForFeed(FileSystem fs, String feedBasePath) throws IOException {
    Matcher matcher = FeedDataPath.PATTERN.matcher(feedBasePath);
    boolean regexMatchFound = false;
    while (matcher.find()) {
        regexMatchFound = true;//from  w w w . j  av  a 2s . c  o  m
        String var = feedBasePath.substring(matcher.start(), matcher.end());
        feedBasePath = feedBasePath.replaceAll(Pattern.quote(var), "*");
        matcher = FeedDataPath.PATTERN.matcher(feedBasePath);
    }
    if (regexMatchFound) {
        LOG.info("Searching for {}", feedBasePath);
        return fs.globStatus(new Path(feedBasePath));
    } else {
        LOG.info("Ignoring static path {}", feedBasePath);
        return null;
    }
}

From source file:org.apache.falcon.latedata.LateDataHandler.java

License:Apache License

private long usage(Path inPath, Configuration conf) throws IOException, FalconException {
    FileSystem fs = HadoopClientFactory.get().createFileSystem(inPath.toUri(), conf);
    FileStatus[] fileStatuses = fs.globStatus(inPath);
    if (fileStatuses == null || fileStatuses.length == 0) {
        return 0;
    }/*from  www .ja v a 2  s . c  om*/
    long totalSize = 0;
    for (FileStatus fileStatus : fileStatuses) {
        totalSize += fs.getContentSummary(fileStatus.getPath()).getLength();
    }
    return totalSize;
}

From source file:org.apache.falcon.logging.LogProvider.java

License:Apache License

private Instance populateActionLogUrls(FileSystem fs, Cluster cluster, Entity entity, Instance instance,
        String formattedRunId) throws FalconException, OozieClientException, IOException {

    Path actionPaths = new Path(ClusterHelper.getStorageUrl(cluster), EntityUtil.getLogPath(cluster, entity)
            + "/job-" + EntityUtil.fromUTCtoURIDate(instance.instance) + "/" + formattedRunId + "/*");
    FileStatus[] actions = fs.globStatus(actionPaths);
    if (actions != null && actions.length > 0) {
        InstanceAction[] instanceActions = new InstanceAction[actions.length - 1];
        instance.actions = instanceActions;
        int i = 0;
        for (FileStatus file : actions) {
            Path filePath = file.getPath();
            String dfsBrowserUrl = getDFSbrowserUrl(ClusterHelper.getStorageUrl(cluster),
                    EntityUtil.getLogPath(cluster, entity) + "/job-"
                            + EntityUtil.fromUTCtoURIDate(instance.instance) + "/" + formattedRunId,
                    file.getPath().getName());
            if (filePath.getName().equals("oozie.log")) {
                instance.logFile = dfsBrowserUrl;
                continue;
            }/*w  ww .j a v  a  2s  .  c o m*/

            InstanceAction instanceAction = new InstanceAction(getActionName(filePath.getName()),
                    getActionStatus(filePath.getName()), dfsBrowserUrl);
            instanceActions[i++] = instanceAction;
        }
    }
    return instance;

}

From source file:org.apache.falcon.logging.LogProvider.java

License:Apache License

private Path findMaxRunIdPath(FileSystem fs, Path jobLogPath) throws IOException {
    // In case of multiple runs, dirs are sorted in increasing
    // order of runs. If runId is not specified, return the max runId (whose dir exists)
    Path maxRunIdPath = null;/*from  w w w .  j av a 2  s  . c  om*/
    for (FileStatus fileStatus : fs.globStatus(jobLogPath)) {
        if (fs.isDirectory(fileStatus.getPath())) {
            maxRunIdPath = fileStatus.getPath();
        }
    }
    return maxRunIdPath;
}

From source file:org.apache.falcon.replication.FeedReplicator.java

License:Apache License

private void executePostProcessing(Configuration conf, DistCpOptions options)
        throws IOException, FalconException {
    Path targetPath = options.getTargetPath();
    FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(targetPath.toUri(), getConf());

    final String availabilityFlag = conf.get("falcon.feed.availability.flag");
    FileStatus[] files = fs.globStatus(targetPath);
    if (files != null) {
        for (FileStatus file : files) {
            fs.create(new Path(file.getPath(), availabilityFlag)).close();
            LOG.info("Created {}", new Path(file.getPath(), availabilityFlag));
        }//from ww w .  j  a v a  2  s.  c om
    } else {
        // As distcp is not copying empty directories we are creating availabilityFlag file here
        fs.create(new Path(targetPath, availabilityFlag)).close();
        LOG.info("No files present in path: {}", targetPath);
    }
}

From source file:org.apache.falcon.workflow.LateDataHandler.java

License:Apache License

private long usage(Path inPath, Configuration conf) throws IOException, FalconException {
    FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(inPath.toUri(), conf);
    FileStatus[] fileStatuses = fs.globStatus(inPath);
    if (fileStatuses == null || fileStatuses.length == 0) {
        return 0;
    }/*from   w ww. ja  v a 2 s .  c o  m*/
    long totalSize = 0;
    for (FileStatus fileStatus : fileStatuses) {
        totalSize += fs.getContentSummary(fileStatus.getPath()).getLength();
    }
    return totalSize;
}

From source file:org.apache.hama.bsp.TestBSPMasterGroomServer.java

License:Apache License

public static void checkOutput(FileSystem fileSys, Configuration conf, int tasks) throws Exception {
    FileStatus[] listStatus = fileSys.globStatus(new Path(OUTPUT_PATH + "/part-*"));

    assertEquals(listStatus.length, tasks);
    for (FileStatus status : listStatus) {
        if (!status.isDir()) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, status.getPath(), conf);
            int superStep = 0;
            int taskstep = 0;
            IntWritable key = new IntWritable();
            Text value = new Text();
            /*//from   w  w  w  .ja  va 2 s  .c  o m
             * The serialize printing task should write in each superstep
             * "tasks"-times its superstep, along with the hostname.
             */
            while (reader.next(key, value)) {
                assertEquals(superStep, key.get());
                taskstep++;
                if (taskstep % tasks == 0) {
                    superStep++;
                }
            }
            reader.close();
            // the maximum should be the number of supersteps defined in the task
            assertEquals(superStep, ClassSerializePrinting.NUM_SUPERSTEPS);
        }
    }

    fileSys.delete(new Path(TMP_OUTPUT), true);
}

From source file:org.apache.hama.examples.FastGraphGenTest.java

License:Apache License

@Test
public void testGraphGenerator() throws Exception {
    Configuration conf = new Configuration();

    // vertex size : 20
    // maximum edges : 10
    // output path : /tmp/test
    // tasks num : 3
    FastGraphGen.main(new String[] { "-v", "20", "-e", "10", "-o", TEST_OUTPUT, "-t", "3" });
    FileSystem fs = FileSystem.get(conf);

    FileStatus[] globStatus = fs.globStatus(new Path(TEST_OUTPUT + "/part-*"));
    for (FileStatus fts : globStatus) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(fts.getPath())));
        try {//from w w w . java2 s  .  co  m
            String line;
            line = br.readLine();
            while (line != null) {
                String[] keyValue = line.split("\t");
                String[] outlinkId = keyValue[1].split(" ");
                assertTrue(outlinkId.length <= 10);
                for (String edge : outlinkId) {
                    assertTrue(Integer.parseInt(edge) < 20);
                    assertTrue(Integer.parseInt(edge) >= 0);
                }
                line = br.readLine();
            }
        } finally {
            br.close();
        }
    }

    fs.delete(new Path(TEST_OUTPUT), true);
}