Example usage for org.apache.hadoop.fs FileStatus isDirectory

List of usage examples for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory() 

Source Link

Document

Is this a directory?

Usage

From source file:com.marklogic.contentpump.AggregateXMLReader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    initConfig(context);/* w  w  w .j  a v  a  2  s. co  m*/
    initAggConf(context);

    f = XMLInputFactory.newInstance();
    setFile(((FileSplit) inSplit).getPath());
    fs = file.getFileSystem(context.getConfiguration());
    FileStatus status = fs.getFileStatus(file);
    if (status.isDirectory()) {
        iterator = new FileIterator((FileSplit) inSplit, context);
        inSplit = iterator.next();
    }
    initStreamReader(inSplit);
}

From source file:com.marklogic.contentpump.ArchiveRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    initConfig(context);//w  w w.j a v  a  2  s . c  o m
    allowEmptyMeta = conf.getBoolean(CONF_INPUT_ARCHIVE_METADATA_OPTIONAL, false);

    setFile(((FileSplit) inSplit).getPath());
    fs = file.getFileSystem(context.getConfiguration());
    FileStatus status = fs.getFileStatus(file);
    if (status.isDirectory()) {
        iterator = new FileIterator((FileSplit) inSplit, context);
        inSplit = iterator.next();
    }
    initStream(inSplit);
}

From source file:com.marklogic.contentpump.CompressedAggXMLReader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    initConfig(context);/*w  ww.j  av  a 2  s . c o m*/
    initAggConf(context);
    f = XMLInputFactory.newInstance();
    setFile(((FileSplit) inSplit).getPath());
    fs = file.getFileSystem(context.getConfiguration());

    FileStatus status = fs.getFileStatus(file);
    if (status.isDirectory()) {
        iterator = new FileIterator((FileSplit) inSplit, context);
        inSplit = iterator.next();
    }
    initStreamReader(inSplit);
}

From source file:com.marklogic.contentpump.CompressedDelimitedTextReader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    initConfig(context);//  ww  w .j  a  v  a 2 s  .  c  o m
    initDocType();
    initDelimConf();

    setFile(((FileSplit) inSplit).getPath());
    fs = file.getFileSystem(context.getConfiguration());
    FileStatus status = fs.getFileStatus(file);
    if (status.isDirectory()) {
        iterator = new FileIterator((FileSplit) inSplit, context);
        inSplit = iterator.next();
    }

    initStream(inSplit);
}

From source file:com.marklogic.contentpump.CompressedDocumentReader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    initConfig(context);//from w  w w  . ja  va 2 s  .  com
    batchSize = conf.getInt(MarkLogicConstants.BATCH_SIZE, MarkLogicConstants.DEFAULT_BATCH_SIZE);
    setFile(((FileSplit) inSplit).getPath());
    fs = file.getFileSystem(conf);
    FileStatus status = fs.getFileStatus(file);
    if (status.isDirectory()) {
        iterator = new FileIterator((FileSplit) inSplit, context);
        inSplit = iterator.next();
    }
    initStream(inSplit);
}

From source file:com.marklogic.contentpump.DelimitedJSONReader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    /* Initialization in super class */
    initConfig(context);/*from   ww  w  .  j av  a2s .c o m*/
    /*  Get file(s) in input split */
    setFile(((FileSplit) inSplit).getPath());
    // Initialize reader properties
    generateId = conf.getBoolean(CONF_INPUT_GENERATE_URI, false);
    if (generateId) {
        idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart());
    } else {
        uriName = conf.get(CONF_INPUT_URI_ID, null);
        mapper = new ObjectMapper();
    }
    bytesRead = 0;
    totalBytes = inSplit.getLength();
    /* Check file status */
    fs = file.getFileSystem(context.getConfiguration());
    FileStatus status = fs.getFileStatus(file);
    if (status.isDirectory()) {
        iterator = new FileIterator((FileSplit) inSplit, context);
        inSplit = iterator.next();
    }
    /* Initialize buffered reader */
    initFileStream(inSplit);
}

From source file:com.marklogic.contentpump.DelimitedTextReader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    initConfig(context);/*from  w w w .  j av  a  2  s  .  c  o  m*/
    initDocType();
    initDelimConf();
    setFile(((FileSplit) inSplit).getPath());
    fs = file.getFileSystem(context.getConfiguration());
    FileStatus status = fs.getFileStatus(file);
    if (status.isDirectory()) {
        iterator = new FileIterator((FileSplit) inSplit, context);
        inSplit = iterator.next();
    }
    initParser(inSplit);
}

From source file:com.marklogic.contentpump.DocumentPathFilter.java

License:Apache License

@Override
public boolean accept(Path inPath) {
    String filename = inPath.getName();
    if (filename.matches(pattern) == true) {
        return true;
    }//from   w  ww.  j ava2  s.  co  m
    // take care of the case when INPUT_FILE_PATH is a DIR
    try {
        FileStatus[] status = fs.globStatus(inPath);
        if (status == null) {
            throw new IOException("Path in input_file_path doesn't exist: " + inPath);
        }
        for (FileStatus s : status) {
            if (s.isDirectory()) {
                return true;
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return false;
}

From source file:com.marklogic.contentpump.FileAndDirectoryInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Configuration conf = job.getConfiguration();
    try {/*from www  .j av a  2 s.  c  o m*/
        List<FileStatus> files = listStatus(job);

        long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
        long maxSize = getMaxSplitSize(job);
        for (FileStatus child : files) {
            Path path = child.getPath();
            FileSystem fs = path.getFileSystem(conf);
            // length is 0 for dir according to FSDirectory.java in 0.20
            // however, w/ Hadoop2, dir in local fs has non-zero length
            long length = child.getLen();
            BlockLocation[] blkLocations = null;
            if (!child.isDirectory() || fs instanceof DistributedFileSystem == false) {
                blkLocations = fs.getFileBlockLocations(child, 0, length);
            } else if (length != 0) {
                throw new IOException("non-zero length directory on HDFS:" + path.toUri().toString());
            }

            if ((length != 0) && isSplitable(job, path)) {
                long blockSize = child.getBlockSize();
                long splitSize = computeSplitSize(blockSize, minSize, maxSize);

                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
                            blkLocations[blkIndex].getHosts()));
                    bytesRemaining -= splitSize;
                }

                if (bytesRemaining != 0) {
                    splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                            blkLocations[blkLocations.length - 1].getHosts()));
                }
            } else if (length != 0) {
                splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
            } else {
                // Create empty hosts array for zero length files
                splits.add(new FileSplit(path, 0, length, new String[0]));
            }
        }
    } catch (InvalidInputException ex) {
        String inPath = conf.get(ConfigConstants.CONF_INPUT_DIRECTORY);
        String pattern = conf.get(ConfigConstants.CONF_INPUT_FILE_PATTERN, ".*");
        throw new IOException("No input files found with the specified input path " + inPath
                + " and input file pattern " + pattern, ex);
    }

    PathFilter jobFilter = getInputPathFilter(job);
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);
    // take a second pass of the splits generated to extract files from
    // directories
    int count = 0;
    // flatten directories until reaching SPLIT_COUNT_LIMIT
    while (count < splits.size() && splits.size() < SPLIT_COUNT_LIMIT) {
        FileSplit split = (FileSplit) splits.get(count);
        Path file = split.getPath();
        FileSystem fs = file.getFileSystem(conf);
        FileStatus status = fs.getFileStatus(file);
        if (status.isDirectory()) {
            FileStatus[] children = fs.listStatus(file, inputFilter);
            if (children.length + count < SPLIT_COUNT_LIMIT) {
                splits.remove(count);
                for (FileStatus stat : children) {
                    FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null);
                    splits.add(child);
                }
            } else {
                count++;
            }
        } else {
            count++;
        }
    }
    return splits;
}

From source file:com.marklogic.contentpump.FileAndDirectoryInputFormat.java

License:Apache License

private List<FileStatus> simpleListStatus(JobContext job, Path[] dirs, PathFilter inputFilter,
        boolean recursive) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();
    Configuration conf = job.getConfiguration();
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];//w w w . java  2  s  . co  m
        FileSystem fs = p.getFileSystem(conf);
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    FileStatus[] files = fs.listStatus(globStat.getPath(), inputFilter);
                    for (int j = 0; j < files.length; j++) {
                        if (recursive && files[j].isDirectory()) {
                            simpleAddInputPathRecursively(result, fs, files[j].getPath(), inputFilter);
                        } else {
                            result.add(files[j]);
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    return result;
}