Example usage for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory()

Source Link

Document

Is this a directory?

Usage

From source file:org.apache.nifi.processors.hadoop.ListHDFS.java

License:Apache License

private Set<FileStatus> getStatuses(final Path path, final boolean recursive, final FileSystem hdfs,
        final PathFilter filter) throws IOException, InterruptedException {
    final Set<FileStatus> statusSet = new HashSet<>();

    getLogger().debug("Fetching listing for {}", new Object[] { path });
    final FileStatus[] statuses = getUserGroupInformation()
            .doAs((PrivilegedExceptionAction<FileStatus[]>) () -> hdfs.listStatus(path, filter));

    for (final FileStatus status : statuses) {
        if (status.isDirectory()) {
            if (recursive) {
                try {
                    statusSet.addAll(getStatuses(status.getPath(), recursive, hdfs, filter));
                } catch (final IOException ioe) {
                    getLogger().error(/*from w w w.  j av  a2  s  . c  om*/
                            "Failed to retrieve HDFS listing for subdirectory {} due to {}; will continue listing others",
                            new Object[] { status.getPath(), ioe });
                }
            }
        } else {
            statusSet.add(status);
        }
    }

    return statusSet;
}

From source file:org.apache.nifi.processors.hadoop.MoveHDFS.java

License:Apache License

protected Set<Path> selectFiles(final FileSystem hdfs, final Path inputPath, Set<Path> filesVisited)
        throws IOException {
    if (null == filesVisited) {
        filesVisited = new HashSet<>();
    }//w  w w . java 2  s.c o m

    if (!hdfs.exists(inputPath)) {
        throw new IOException("Selection directory " + inputPath.toString() + " doesn't appear to exist!");
    }

    final Set<Path> files = new HashSet<>();

    FileStatus inputStatus = hdfs.getFileStatus(inputPath);

    if (inputStatus.isDirectory()) {
        for (final FileStatus file : hdfs.listStatus(inputPath)) {
            final Path canonicalFile = file.getPath();

            if (!filesVisited.add(canonicalFile)) { // skip files we've already seen (may be looping directory links)
                continue;
            }

            if (!file.isDirectory() && processorConfig.getPathFilter(inputPath).accept(canonicalFile)) {
                files.add(canonicalFile);

                if (getLogger().isDebugEnabled()) {
                    getLogger().debug(this + " selected file at path: " + canonicalFile.toString());
                }
            }
        }
    } else if (inputStatus.isFile()) {
        files.add(inputPath);
    }
    return files;
}

From source file:org.apache.rya.accumulo.mr.merge.CopyTool.java

License:Apache License

private int runQueryCopy() throws Exception {
    log.info("Setting up Copy Tool with a query-based ruleset...");
    setup();//w  ww .  j a va 2 s.  c om
    if (!useCopyFileOutput) {
        createChildInstance(conf);
    }

    // Set up the configuration
    final AccumuloRdfConfiguration aconf = new AccumuloRdfConfiguration(conf);
    aconf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, mock);
    aconf.setTablePrefix(tablePrefix);
    aconf.setFlush(false);
    ConfigUtils.setIndexers(aconf);

    // Since we're copying at the statement-level, ignore any given list of tables and determine
    // which tables we might need to create based on which indexers are desired.
    final TablePrefixLayoutStrategy prefixStrategy = new TablePrefixLayoutStrategy(tablePrefix);
    tables.clear();
    // Always include core tables
    tables.add(prefixStrategy.getSpo());
    tables.add(prefixStrategy.getOsp());
    tables.add(prefixStrategy.getPo());
    // Copy namespaces if they exist
    tables.add(prefixStrategy.getNs());
    // Add tables associated with any configured indexers
    /* TODO: SEE RYA-160
    if (aconf.getBoolean(ConfigUtils.USE_FREETEXT, false)) {
    tables.add(ConfigUtils.getFreeTextDocTablename(conf));
    tables.add(ConfigUtils.getFreeTextTermTablename(conf));
    }
    if (aconf.getBoolean(ConfigUtils.USE_GEO, false)) {
    tables.add(ConfigUtils.getGeoTablename(conf));
    }
    if (aconf.getBoolean(ConfigUtils.USE_TEMPORAL, false)) {
    tables.add(ConfigUtils.getTemporalTableName(conf));
    }
    if (aconf.getBoolean(ConfigUtils.USE_ENTITY, false)) {
    tables.add(ConfigUtils.getEntityTableName(conf));
    }
    */
    // Ignore anything else, e.g. statistics -- must be recalculated for the child if desired

    // Extract the ruleset, and copy the namespace table directly
    final AccumuloQueryRuleset ruleset = new AccumuloQueryRuleset(aconf);
    ruleset.addTable(prefixStrategy.getNs());
    for (final String line : ruleset.toString().split("\n")) {
        log.info(line);
    }

    // Create a Job and configure its input and output
    final Job job = Job.getInstance(aconf);
    job.setJarByClass(this.getClass());
    setupMultiTableInputFormat(job, ruleset);
    setupAccumuloOutput(job, "");

    if (useCopyFileOutput) {
        // Configure job for file output
        job.setJobName("Ruleset-based export to file: " + tablePrefix + " -> " + localBaseOutputDir);
        // Map (row) to (table+key, key+value)
        job.setMapperClass(RowRuleMapper.class);
        job.setMapOutputKeyClass(GroupedRow.class);
        job.setMapOutputValueClass(GroupedRow.class);
        // Group according to table and and sort according to key
        job.setGroupingComparatorClass(GroupedRow.GroupComparator.class);
        job.setSortComparatorClass(GroupedRow.SortComparator.class);
        // Reduce ([table+row], rows): output each row to the file for that table, in sorted order
        job.setReducerClass(MultipleFileReducer.class);
        job.setOutputKeyClass(Key.class);
        job.setOutputValueClass(Value.class);
    } else {
        // Configure job for table output
        job.setJobName("Ruleset-based copy: " + tablePrefix + " -> " + childTablePrefix);
        // Map (row): convert to statement, insert to child (for namespace table, output row directly)
        job.setMapperClass(AccumuloRyaRuleMapper.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Mutation.class);
        job.setNumReduceTasks(0);
        // Create the child tables, so mappers don't try to do this in parallel
        for (final String parentTable : tables) {
            final String childTable = parentTable.replaceFirst(tablePrefix, childTablePrefix);
            createTableIfNeeded(childTable);
        }
    }

    // Run the job and copy files to local filesystem if needed
    final Date beginTime = new Date();
    log.info("Job started: " + beginTime);
    final boolean success = job.waitForCompletion(true);
    if (success) {
        if (useCopyFileOutput) {
            log.info("Moving data from HDFS to the local file system");
            final Path baseOutputPath = new Path(baseOutputDir);
            for (final FileStatus status : FileSystem.get(conf).listStatus(baseOutputPath)) {
                if (status.isDirectory()) {
                    final String tableName = status.getPath().getName();
                    final Path hdfsPath = getPath(baseOutputDir, tableName);
                    final Path localPath = getPath(localBaseOutputDir, tableName);
                    log.info("HDFS directory: " + hdfsPath.toString());
                    log.info("Local directory: " + localPath.toString());
                    copyHdfsToLocal(hdfsPath, localPath);
                }
            }
        }
        final Date endTime = new Date();
        log.info("Job finished: " + endTime);
        log.info("The job took " + (endTime.getTime() - beginTime.getTime()) / 1000 + " seconds.");
        return 0;
    } else {
        log.error("Job failed!!!");
        return 1;
    }
}

From source file:org.apache.samza.system.hdfs.partitioner.HdfsFileSystemAdapter.java

License:Apache License

public List<FileMetadata> getAllFiles(String streamName) {
    List<FileMetadata> ret = new ArrayList<>();
    try {//from   ww  w.  ja va 2 s.  c  o  m
        Path streamPath = new Path(streamName);
        FileSystem fileSystem = streamPath.getFileSystem(new Configuration());
        FileStatus[] fileStatuses = fileSystem.listStatus(streamPath);
        for (FileStatus fileStatus : fileStatuses) {
            if (!fileStatus.isDirectory()) {
                ret.add(new FileMetadata(fileStatus.getPath().toString(), fileStatus.getLen()));
            } else {
                ret.addAll(getAllFiles(fileStatus.getPath().toString()));
            }
        }
    } catch (IOException e) {
        LOG.error("Failed to get the list of files for " + streamName, e);
        throw new SamzaException(e);
    }
    return ret;
}

From source file:org.apache.sentry.tests.e2e.hdfs.TestHDFSIntegration.java

License:Apache License

private void verifyOnAllSubDirs(Path p, FsAction fsAction, String group, boolean groupShouldExist,
        boolean recurse, int retry) throws Throwable {
    FileStatus fStatus = null;
    try {/*from  www.  ja va 2 s . co  m*/
        fStatus = miniDFS.getFileSystem().getFileStatus(p);
        if (groupShouldExist) {
            Assert.assertEquals("Error at verifying Path action : " + p + " ;", fsAction,
                    getAcls(p).get(group));
        } else {
            Assert.assertFalse("Error at verifying Path : " + p + " ," + " group : " + group + " ;",
                    getAcls(p).containsKey(group));
        }
    } catch (Throwable th) {
        if (retry > 0) {
            Thread.sleep(RETRY_WAIT);
            verifyOnAllSubDirs(p, fsAction, group, groupShouldExist, recurse, retry - 1);
        } else {
            throw th;
        }
    }
    if (recurse && fStatus.isDirectory()) {
        FileStatus[] children = miniDFS.getFileSystem().listStatus(p);
        for (FileStatus fs : children) {
            verifyOnAllSubDirs(fs.getPath(), fsAction, group, groupShouldExist, recurse, NUM_RETRIES);
        }
    }
}

From source file:org.apache.sentry.tests.e2e.hdfs.TestHDFSIntegrationBase.java

License:Apache License

private void verifyOnAllSubDirsHelper(Path p, FsAction fsAction, String user, String group, boolean shouldExist,
        boolean recurse, int retry) throws Throwable {
    FileStatus fStatus = null;
    // validate parent dir's acls
    try {/*from  w  w  w .  j av a2  s  .c o m*/
        fStatus = miniDFS.getFileSystem().getFileStatus(p);
        if (shouldExist) {
            if (!Strings.isNullOrEmpty(group)) {
                Assert.assertEquals("Error at verifying Path action : " + p + " ;", fsAction,
                        getAcls(AclEntryType.GROUP, p).get(group));
            }
            if (!Strings.isNullOrEmpty(user)) {
                Assert.assertEquals("Error at verifying Path action : " + p + " ;", fsAction,
                        getAcls(AclEntryType.USER, p).get(user));
            }
        } else {
            if (!Strings.isNullOrEmpty(group)) {
                assertFalse("Error at verifying Path : " + p + " ," + " group : " + group + " ;",
                        getAcls(AclEntryType.GROUP, p).containsKey(group));
            }
            if (!Strings.isNullOrEmpty(user)) {
                assertFalse("Error at verifying Path : " + p + " ," + " user : " + user + " ;",
                        getAcls(AclEntryType.USER, p).containsKey(user));
            }
        }
        LOGGER.info("Successfully found acls for path = " + p.getName());
    } catch (Throwable th) {
        if (retry > 0) {
            LOGGER.info("Retry: " + retry);
            Thread.sleep(RETRY_WAIT);
            verifyOnAllSubDirsHelper(p, fsAction, user, group, shouldExist, recurse, retry - 1);
        } else {
            throw th;
        }
    }
    // validate children dirs
    if (recurse && fStatus.isDirectory()) {
        FileStatus[] children = miniDFS.getFileSystem().listStatus(p);
        for (FileStatus fs : children) {
            verifyOnAllSubDirsHelper(fs.getPath(), fsAction, user, group, shouldExist, recurse, NUM_RETRIES);
        }
    }
}

From source file:org.apache.slider.common.tools.CoreFileSystem.java

License:Apache License

/**
 * List all application instances persisted for this user, giving the 
 * patha. The instance name is the last element in the path
 * @return a possibly empty map of application instance names to paths
 *//*from   w  ww.  ja  v  a 2 s . c o m*/
public Map<String, Path> listPersistentInstances() throws IOException {
    FileSystem fs = getFileSystem();
    Path path = new Path(getBaseApplicationPath(), SliderKeys.CLUSTER_DIRECTORY);
    log.debug("Looking for all persisted application at {}", path.toString());
    if (!fs.exists(path)) {
        // special case: no instances have ever been created
        return new HashMap<String, Path>(0);
    }
    FileStatus[] statuses = fs.listStatus(path);
    Map<String, Path> instances = new HashMap<String, Path>(statuses.length);

    // enum the child entries
    for (FileStatus status : statuses) {
        if (status.isDirectory()) {
            // for directories, look for an internal.json underneath
            Path child = status.getPath();
            Path internalJson = new Path(child, Filenames.INTERNAL);
            if (fs.exists(internalJson)) {
                // success => this is an instance
                instances.put(child.getName(), child);
            } else {
                log.info("Malformed cluster found at {}. It does not appear to be a valid persisted instance.",
                        child.toString());
            }
        }
    }
    return instances;
}

From source file:org.apache.slider.test.ContractTestUtils.java

License:Apache License

/**
 * Assert that a path refers to a directory
 * @param fileStatus stats to check/*w w  w . j a  v  a2  s .c  om*/
 */
public static void assertIsDirectory(FileStatus fileStatus) {
    assertTrue("Should be a directory -but isn't: " + fileStatus, fileStatus.isDirectory());
}

From source file:org.apache.slider.test.ContractTestUtils.java

License:Apache License

/**
 * Assert that a file exists and whose {@link FileStatus} entry
 * declares that this is a file and not a symlink or directory.
 * @param filename name of the file/*from www.  ja  v a2  s . c  o  m*/
 * @param status file status
 */
public static void assertIsFile(Path filename, FileStatus status) {
    String fileInfo = filename + "  " + status;
    assertFalse("File claims to be a directory " + fileInfo, status.isDirectory());
    assertFalse("File claims to be a symlink " + fileInfo, status.isSymlink());
}

From source file:org.apache.solr.hadoop.ForkedMapReduceIndexerTool.java

License:Apache License

public static int runIndexingPipeline(Job job, JobProcessCallback callback, Configuration conf, Options options,
        long programStartTime, FileSystem fs, Path fullInputList, long numFiles, int realMappers, int reducers)
        throws IOException, KeeperException, InterruptedException, ClassNotFoundException,
        FileNotFoundException {/*from   w  w  w.  j av a2  s  .  c o m*/
    long startTime;
    float secs;

    Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR);
    Path outputReduceDir = new Path(options.outputDir, "reducers");
    Path outputTreeMergeStep = new Path(options.outputDir, "mtree-merge-output");

    FileOutputFormat.setOutputPath(job, outputReduceDir);

    if (job.getConfiguration().get(JobContext.REDUCE_CLASS_ATTR) == null) { // enable customization
        job.setReducerClass(SolrReducer.class);
    }
    if (options.updateConflictResolver == null) {
        throw new IllegalArgumentException("updateConflictResolver must not be null");
    }
    job.getConfiguration().set(SolrReducer.UPDATE_CONFLICT_RESOLVER, options.updateConflictResolver);
    job.getConfiguration().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments);

    if (options.zkHost != null) {
        assert options.collection != null;
        /*
         * MapReduce partitioner that partitions the Mapper output such that each
         * SolrInputDocument gets sent to the SolrCloud shard that it would have
         * been sent to if the document were ingested via the standard SolrCloud
         * Near Real Time (NRT) API.
         * 
         * In other words, this class implements the same partitioning semantics
         * as the standard SolrCloud NRT API. This enables to mix batch updates
         * from MapReduce ingestion with updates from standard NRT ingestion on
         * the same SolrCloud cluster, using identical unique document keys.
         */
        if (job.getConfiguration().get(JobContext.PARTITIONER_CLASS_ATTR) == null) { // enable customization
            job.setPartitionerClass(ForkedSolrCloudPartitioner.class);
        }
        job.getConfiguration().set(ForkedSolrCloudPartitioner.ZKHOST, options.zkHost);
        job.getConfiguration().set(ForkedSolrCloudPartitioner.COLLECTION, options.collection);
    }
    job.getConfiguration().setInt(ForkedSolrCloudPartitioner.SHARDS, options.shards);

    job.setOutputFormatClass(SolrOutputFormat.class);
    if (options.solrHomeDir != null) {
        SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job);
    } else {
        assert options.zkHost != null;
        // use the config that this collection uses for the SolrHomeCache.
        ForkedZooKeeperInspector zki = new ForkedZooKeeperInspector();
        SolrZkClient zkClient = zki.getZkClient(options.zkHost);
        try {
            String configName = zki.readConfigName(zkClient, options.collection);
            File tmpSolrHomeDir = zki.downloadConfigDir(zkClient, configName);
            SolrOutputFormat.setupSolrHomeCache(tmpSolrHomeDir, job);
            LOG.debug("Using " + tmpSolrHomeDir + " as solr home");
            options.solrHomeDir = tmpSolrHomeDir;
        } finally {
            zkClient.close();
        }
    }

    //    MorphlineMapRunner runner = setupMorphline(job, options);
    //    if (options.isDryRun && runner != null) {
    //      LOG.info("Indexing {} files in dryrun mode", numFiles);
    //      startTime = System.currentTimeMillis();
    //      dryRun(job, runner, fs, fullInputList);
    //      secs = (System.currentTimeMillis() - startTime) / 1000.0f;
    //      LOG.info("Done. Indexing {} files in dryrun mode took {} secs", numFiles, secs);
    //      goodbye(null, programStartTime);
    //      return 0;
    //    }
    //    job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, options.morphlineFile.getName());

    job.setNumReduceTasks(reducers);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SolrInputDocumentWritable.class);
    LOG.info("Indexing data into {} reducers", new Object[] { reducers });
    startTime = System.currentTimeMillis();
    job.submit();
    callback.jobStarted(job.getJobID().toString(), job.getTrackingURL());
    if (!waitForCompletion(job, options.isVerbose)) {
        return -1; // job failed
    }

    secs = (System.currentTimeMillis() - startTime) / 1000.0f;
    LOG.info("Done. Indexing data into {} reducers took {} secs", new Object[] { reducers, secs });

    int mtreeMergeIterations = 0;
    if (reducers > options.shards) {
        mtreeMergeIterations = (int) Math.round(log(options.fanout, reducers / options.shards));
    }
    LOG.debug("MTree merge iterations to do: {}", mtreeMergeIterations);
    int mtreeMergeIteration = 1;
    while (reducers > options.shards) { // run a mtree merge iteration
        job = Job.getInstance(conf);
        job.setJarByClass(ForkedMapReduceIndexerTool.class);
        job.setJobName(ForkedMapReduceIndexerTool.class.getName() + "/"
                + Utils.getShortClassName(ForkedTreeMergeMapper.class));
        job.setMapperClass(ForkedTreeMergeMapper.class);
        job.setOutputFormatClass(ForkedTreeMergeOutputFormat.class);
        job.setNumReduceTasks(0);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);
        job.setInputFormatClass(NLineInputFormat.class);

        Path inputStepDir = new Path(options.outputDir, "mtree-merge-input-iteration" + mtreeMergeIteration);
        fullInputList = new Path(inputStepDir, FULL_INPUT_LIST);
        LOG.debug("MTree merge iteration {}/{}: Creating input list file for mappers {}",
                new Object[] { mtreeMergeIteration, mtreeMergeIterations, fullInputList });
        numFiles = createTreeMergeInputDirList(job, outputReduceDir, fs, fullInputList);
        if (numFiles != reducers) {
            throw new IllegalStateException("Not same reducers: " + reducers + ", numFiles: " + numFiles);
        }
        NLineInputFormat.addInputPath(job, fullInputList);
        NLineInputFormat.setNumLinesPerSplit(job, options.fanout);
        FileOutputFormat.setOutputPath(job, outputTreeMergeStep);

        LOG.info("MTree merge iteration {}/{}: Merging {} shards into {} shards using fanout {}",
                new Object[] { mtreeMergeIteration, mtreeMergeIterations, reducers, (reducers / options.fanout),
                        options.fanout });
        startTime = System.currentTimeMillis();
        job.submit();
        callback.jobStarted(job.getJobID().toString(), job.getTrackingURL());
        if (!waitForCompletion(job, options.isVerbose)) {
            return -1; // job failed
        }
        if (!renameTreeMergeShardDirs(outputTreeMergeStep, job, fs)) {
            return -1;
        }
        secs = (System.currentTimeMillis() - startTime) / 1000.0f;
        LOG.info(
                "MTree merge iteration {}/{}: Done. Merging {} shards into {} shards using fanout {} took {} secs",
                new Object[] { mtreeMergeIteration, mtreeMergeIterations, reducers, (reducers / options.fanout),
                        options.fanout, secs });

        if (!delete(outputReduceDir, true, fs)) {
            return -1;
        }
        if (!rename(outputTreeMergeStep, outputReduceDir, fs)) {
            return -1;
        }
        assert reducers % options.fanout == 0;
        reducers = reducers / options.fanout;
        mtreeMergeIteration++;
    }
    assert reducers == options.shards;

    // normalize output shard dir prefix, i.e.
    // rename part-r-00000 to part-00000 (stems from zero tree merge iterations)
    // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations)
    for (FileStatus stats : fs.listStatus(outputReduceDir)) {
        String dirPrefix = SolrOutputFormat.getOutputName(job);
        Path srcPath = stats.getPath();
        if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) {
            String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length());
            Path dstPath = new Path(srcPath.getParent(), dstName);
            if (!rename(srcPath, dstPath, fs)) {
                return -1;
            }
        }
    }
    ;

    // publish results dir
    if (!rename(outputReduceDir, outputResultsDir, fs)) {
        return -1;
    }

    if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) {
        return -1;
    }

    goodbye(job, programStartTime);
    return 0;
}