Example usage for org.apache.hadoop.fs FileStatus isDirectory

List of usage examples for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory() 

Source Link

Document

Is this a directory?

Usage

From source file:com.marklogic.contentpump.RDFReader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    if (version == null)
        throw new IOException("Server Version is null");
    String majorVersion = version.substring(0, version.indexOf('.'));
    graphSupported = Integer.valueOf(majorVersion) >= 8;
    conf = context.getConfiguration();/*  w w w.jav a  2  s  . c  o m*/

    String rdfopt = conf.get(ConfigConstants.RDF_STREAMING_MEMORY_THRESHOLD);
    if (rdfopt != null) {
        INMEMORYTHRESHOLD = Long.parseLong(rdfopt);
    }

    rdfopt = conf.get(ConfigConstants.RDF_TRIPLES_PER_DOCUMENT);
    if (rdfopt != null) {
        MAXTRIPLESPERDOCUMENT = Integer.parseInt(rdfopt);
    }

    String fnAsColl = conf.get(ConfigConstants.CONF_OUTPUT_FILENAME_AS_COLLECTION);
    if (fnAsColl != null) {
        LOG.warn(
                "The -filename_as_collection has no effect with input_type RDF, use -output_collections instead.");
    }

    String[] collections = conf.getStrings(MarkLogicConstants.OUTPUT_COLLECTION);
    outputGraph = conf.get(MarkLogicConstants.OUTPUT_GRAPH);
    outputOverrideGraph = conf.get(MarkLogicConstants.OUTPUT_OVERRIDE_GRAPH);
    //if no defulat-graph set and output_collections is set
    ignoreCollectionQuad = (outputGraph == null && collections != null) || outputOverrideGraph != null;
    hasOutputCol = (collections != null);

    Class<? extends Writable> valueClass = RDFWritable.class;

    @SuppressWarnings("unchecked")
    VALUEIN localValue = (VALUEIN) ReflectionUtils.newInstance(valueClass, conf);

    value = localValue;
    encoding = conf.get(MarkLogicConstants.OUTPUT_CONTENT_ENCODING, DEFAULT_ENCODING);

    setFile(((FileSplit) inSplit).getPath());
    fs = file.getFileSystem(context.getConfiguration());

    FileStatus status = fs.getFileStatus(file);
    if (status.isDirectory()) {
        iterator = new FileIterator((FileSplit) inSplit, context);
        inSplit = iterator.next();
    }

    try {
        initStream(inSplit);
    } catch (IOException e) {
        LOG.error("Invalid input: " + file.getName() + ": " + e.getMessage());
        throw e;
    }
    String[] perms = conf.getStrings(MarkLogicConstants.OUTPUT_PERMISSION);
    if (perms != null) {
        defaultPerms = PermissionUtil.getPermissions(perms).toArray(new ContentPermission[perms.length >> 1]);
    } else {
        List<ContentPermission> tmp = PermissionUtil.getDefaultPermissions(conf, roleMap);
        if (tmp != null)
            defaultPerms = tmp.toArray(new ContentPermission[tmp.size()]);
    }

    if (roleMapExists)
        initExistingMapPerms();
}

From source file:com.marklogic.contentpump.SequenceFileReader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    initConfig(context);/*ww  w .  j a  v  a  2s  .  co  m*/
    batchSize = conf.getInt(MarkLogicConstants.BATCH_SIZE, MarkLogicConstants.DEFAULT_BATCH_SIZE);

    setFile(((FileSplit) inSplit).getPath());
    fs = file.getFileSystem(context.getConfiguration());
    FileStatus status = fs.getFileStatus(file);
    if (status.isDirectory()) {
        iterator = new FileIterator((FileSplit) inSplit, context);
        inSplit = iterator.next();
    }

    initReader(inSplit);

}

From source file:com.marklogic.contentpump.utilities.FileIterator.java

License:Apache License

@Override
public FileSplit next() {
    while (iterator.hasNext() || !fileDirSplits.isEmpty()) {
        try {/* www.  j  a  v a 2 s.  c  om*/
            if (iterator.hasNext()) {
                FileSplit split = iterator.next();
                Path file = ((FileSplit) split).getPath();

                FileSystem fs = file.getFileSystem(conf);

                FileStatus status = fs.getFileStatus(file);
                if (status.isDirectory()) {
                    FileStatus[] children = fs.listStatus(status.getPath(), inputFilter);
                    for (FileStatus stat : children) {
                        FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null);
                        fileDirSplits.add(child);
                    }
                } else
                    return split;

            } else if (!fileDirSplits.isEmpty()) {
                FileSplit split = (FileSplit) fileDirSplits.remove(0);
                Path file = split.getPath();
                FileSystem fs = file.getFileSystem(conf);
                FileStatus status = fs.getFileStatus(file);

                if (!status.isDirectory()) {
                    return split;
                }
                FileStatus[] children = fs.listStatus(status.getPath(), inputFilter);

                List<FileSplit> expdFileSpts = new LinkedList<FileSplit>();
                for (FileStatus stat : children) {
                    FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null);
                    expdFileSpts.add(child);
                }
                iterator = expdFileSpts.iterator();
                continue;
            }
        } catch (IOException e) {
            LOG.error("Invalid next file", e);
        }
    }
    return null;
}

From source file:com.marklogic.mapreduce.ForestInputFormat.java

License:Apache License

protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = super.listStatus(job);
    for (Iterator<FileStatus> it = result.iterator(); it.hasNext();) {
        FileStatus file = it.next();
        String fileName = file.getPath().getName();
        if (!file.isDirectory() && fileName.equals("Obsolete")) {
            LOG.warn("Obsolete file found.  The forest is either live or isn't "
                    + "dismounted cleanly.  Ignoring forest " + file.getPath().getParent());
            return Collections.emptyList();
        }/*from www.ja v a2 s  .  c  om*/
        if (!file.isDirectory() || fileName.equals("Journals") || fileName.equals("Large")) {
            it.remove();
        }
    }
    return result;
}

From source file:com.ngdata.hbaseindexer.mr.TestUtils.java

License:Apache License

public static void validateSolrServerDocumentCount(File solrHomeDir, FileSystem fs, Path outDir,
        int expectedDocs, int expectedShards) throws IOException, SolrServerException {

    long actualDocs = 0;
    int actualShards = 0;
    for (FileStatus dir : fs.listStatus(outDir)) { // for each shard
        if (dir.getPath().getName().startsWith("part") && dir.isDirectory()) {
            actualShards++;/* w ww . j  a  v a 2  s . c om*/
            EmbeddedSolrServer solr = createEmbeddedSolrServer(solrHomeDir, fs, dir.getPath());

            try {
                SolrQuery query = new SolrQuery();
                query.setQuery("*:*");
                QueryResponse resp = solr.query(query);
                long numDocs = resp.getResults().getNumFound();
                actualDocs += numDocs;
            } finally {
                solr.close();
            }
        }
    }
    assertEquals(expectedShards, actualShards);
    assertEquals(expectedDocs, actualDocs);
}

From source file:com.pivotal.hawq.mapreduce.parquet.HAWQParquetInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext jobContext) throws IOException {
    List<FileStatus> result = Lists.newArrayList();
    for (HAWQFileStatus hawqFileStatus : hawqFileStatuses) {
        if (hawqFileStatus.getFileLength() == 0)
            continue; // skip empty file

        Path path = new Path(hawqFileStatus.getFilePath());
        FileSystem fs = path.getFileSystem(jobContext.getConfiguration());
        FileStatus dfsStat = fs.getFileStatus(path);
        // rewrite file length because HAWQ records the logicalEOF of file, which may
        // be smaller than the file's actual EOF
        FileStatus hawqStat = new FileStatus(hawqFileStatus.getFileLength(), // rewrite to logicalEOF
                dfsStat.isDirectory(), dfsStat.getReplication(), dfsStat.getBlockSize(),
                dfsStat.getModificationTime(), dfsStat.getAccessTime(), dfsStat.getPermission(),
                dfsStat.getOwner(), dfsStat.getGroup(), dfsStat.getPath());
        result.add(hawqStat);//ww  w.  j av a 2s. co m
    }

    return result;
}

From source file:com.quixey.hadoop.fs.oss.OSSFileSystem.java

License:Apache License

@Override
@Nonnull//  w ww .  j  a v  a  2  s  .c o m
public FSDataInputStream open(Path path, int bufferSize) throws IOException {
    path = checkNotNull(path);
    FileStatus fs = getFileStatus(path); // will throw if the file doesn't exist

    if (fs.isDirectory())
        throw new FileNotFoundException("'" + path + "' is a directory");
    LOG.info("Opening '{}' for reading", path);

    Path absolutePath = makeAbsolute(path);
    String key = pathToKey(absolutePath);
    return new FSDataInputStream(
            new BufferedFSInputStream(new OSSFileInputStream(store, key, of(statistics)), bufferSize));
}

From source file:com.quixey.hadoop.fs.oss.OSSFileSystem.java

License:Apache License

@Override
public boolean delete(Path path, boolean recursive) throws IOException {
    path = checkNotNull(path);/*from w w w . ja  v a 2 s  . c  om*/

    FileStatus status;
    try {
        status = getFileStatus(path);
    } catch (FileNotFoundException e) {
        LOG.debug("Delete called for '{}', but file does not exist, so returning false.", path);
        return false;
    }

    Path absolutePath = makeAbsolute(path);
    String key = pathToKey(absolutePath);

    if (status.isDirectory()) {
        if (!recursive && listStatus(path).length > 0) {
            throw new IOException(
                    "Cannot delete '" + path + "' as it is not an empty directory and recurse option is false");
        }

        // recursively delete directory
        createParent(path);

        LOG.debug("Deleting directory '{}'", path);
        String marker = null;
        do {
            PartialListing listing = store.list(key, maxListingLength, marker, true);
            for (FileMetadata file : listing.getFiles())
                store.delete(file.getKey());
            marker = listing.getMarker();
        } while (null != marker);

        rmdir(key);
    } else {
        LOG.debug("Deleting file '{}'", path);
        createParent(path);
        store.delete(key);
    }

    return true;
}

From source file:com.ruizhan.hadoop.hdfs.Ls.java

License:Apache License

@Override
protected void processPath(PathData item) throws IOException {
    FileStatus stat = item.stat;
    String line = String.format(lineFormat, (stat.isDirectory() ? "d" : "-"), stat.getPermission(),
            (stat.isFile() ? stat.getReplication() : "-"), stat.getOwner(), stat.getGroup(),
            formatSize(stat.getLen()), dateFormat.format(new Date(stat.getModificationTime())), item);
    out.println(line);// ww w .j av  a 2s. c  o  m
}

From source file:com.run.mapred.hbase2tsv.HFileInputFormat_mr1.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();

    // Explode out directories that match the original FileInputFormat
    // filters since HFiles are written to directories where the
    // directory name is the column name
    for (FileStatus status : super.listStatus(job)) {
        if (status.isDirectory()) {
            FileSystem fs = status.getPath().getFileSystem(job.getConfiguration());
            for (FileStatus match : fs.listStatus(status.getPath(), HIDDEN_FILE_FILTER)) {
                result.add(match);//from w  w w  .  j av a 2s . c  o m
            }
        } else {
            result.add(status);
        }
    }

    return result;
}