Example usage for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory()

Source Link

Document

Is this a directory?

Usage

From source file:com.uber.hoodie.common.table.timeline.dto.FileStatusDTO.java

License:Apache License

public static FileStatusDTO fromFileStatus(FileStatus fileStatus) {
    if (null == fileStatus) {
        return null;
    }/*from w w w.  j  a  v a2s.  c  o m*/

    FileStatusDTO dto = new FileStatusDTO();
    try {
        dto.path = FilePathDTO.fromPath(fileStatus.getPath());
        dto.length = fileStatus.getLen();
        dto.isdir = fileStatus.isDirectory();
        dto.blockReplication = fileStatus.getReplication();
        dto.blocksize = fileStatus.getBlockSize();
        dto.modificationTime = fileStatus.getModificationTime();
        dto.accessTime = fileStatus.getModificationTime();
        dto.symlink = fileStatus.isSymlink() ? FilePathDTO.fromPath(fileStatus.getSymlink()) : null;
        safeReadAndSetMetadata(dto, fileStatus);
    } catch (IOException ioe) {
        throw new HoodieException(ioe);
    }
    return dto;
}

From source file:com.vertica.hadoop.FixedSplitFileInputFormat.java

License:Apache License

/**
 * Add files in the input path recursively into the results.
 * @param result//from   www .java2s . c  om
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path,
        PathFilter inputFilter) throws IOException {
    for (FileStatus stat : fs.listStatus(path, inputFilter)) {
        if (stat.isDirectory()) {
            addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
        } else {
            result.add(stat);
        }
    }
}

From source file:com.vertica.hadoop.FixedSplitFileInputFormat.java

License:Apache License

/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. //ww  w .  ja va  2 s  .c  om
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected FileStatus[] listStatus(JobConf job) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job);

    // Whether we need to recursive look into the directory structure
    boolean recursive = job.getBoolean("mapred.input.dir.recursive", false);

    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (Path p : dirs) {
        FileSystem fs = p.getFileSystem(job);
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        if (recursive && stat.isDirectory()) {
                            addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                        } else {
                            result.add(stat);
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result.toArray(new FileStatus[result.size()]);
}

From source file:com.vertica.hadoop.FixedSplitFileInputFormat.java

License:Apache License

/** Splits files returned by {@link #listStatus(JobConf)} when
 * they're too big.*//*w w w  . j a v  a 2  s .c om*/
@SuppressWarnings("deprecation")
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    FileStatus[] files = listStatus(job);

    // Save the number of input files for metrics/loadgen
    job.setLong(NUM_INPUT_FILES, files.length);
    long totalSize = 0; // compute total size
    for (FileStatus file : files) { // check we have valid files
        if (file.isDirectory()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }

    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
    long minSize = Math.max(job.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 1),
            minSplitSize);

    // generate splits
    ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
    NetworkTopology clusterMap = new NetworkTopology();
    for (FileStatus file : files) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job);
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(fs, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(goalSize, minSize, blockSize);

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                String[] splitHosts = getSplitHosts(blkLocations, length - bytesRemaining, splitSize,
                        clusterMap);
                splits.add(makeSplit(path, length - bytesRemaining, splitSize, splitHosts));
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                String[] splitHosts = getSplitHosts(blkLocations, length - bytesRemaining, bytesRemaining,
                        clusterMap);
                splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, splitHosts));
            }
        } else if (length != 0) {
            String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap);
            splits.add(makeSplit(path, 0, length, splitHosts));
        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
        }
    }
    LOG.debug("Total # of splits: " + splits.size());
    return splits.toArray(new FileSplit[splits.size()]);
}

From source file:com.wandisco.s3hdfs.rewrite.filter.TestCurlCommands.java

License:Apache License

@Test
public void testCurlCreateBucket1()
        throws IOException, URISyntaxException, S3ServiceException, InterruptedException {
    S3HdfsPath s3HdfsPath = testUtil.setUpS3HdfsPath("rewrite", null, "flavaflav");

    ProcessBuilder pb = new ProcessBuilder("curl", "-v", "-L", "-X", "PUT",
            "http://" + hostName + ":" + PROXY_PORT + "/rewrite?user.name=flavaflav");
    Process proc = pb.start();//from  w ww.  ja  va 2  s.  c om
    proc.waitFor();

    String out = testUtil.readInputStream(proc.getInputStream());
    String out2 = testUtil.readInputStream(proc.getErrorStream());
    System.err.println(out);
    System.err.println(out2);

    FileStatus retVal = hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0];
    System.out.println(hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0].getPath());

    assertEquals("rewrite", retVal.getPath().getName());
    assertEquals("flavaflav", retVal.getOwner());
    assertTrue(retVal.isDirectory());

    FileStatus[] inside = hdfs.listStatus(new Path(retVal.getPath().toString()));
    assertEquals(0, inside.length);
}

From source file:com.wandisco.s3hdfs.rewrite.filter.TestCurlCommands.java

License:Apache License

@Test
public void testCurlCreateBucket2()
        throws IOException, URISyntaxException, S3ServiceException, InterruptedException {
    S3HdfsPath s3HdfsPath = testUtil.setUpS3HdfsPath("rewrite", null, "flavaflav");

    ProcessBuilder pb = new ProcessBuilder("curl", "-v", "-L", "-X", "PUT", "-H", "Host: rewrite." + hostName,
            "http://" + hostName + ":" + PROXY_PORT + "/?user.name=flavaflav");
    Process proc = pb.start();//  w  w w. java 2s  .c o  m
    proc.waitFor();

    String out = testUtil.readInputStream(proc.getInputStream());
    String out2 = testUtil.readInputStream(proc.getErrorStream());
    System.out.println(out);
    System.out.println(out2);

    FileStatus retVal = hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0];
    System.out.println(hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0].getPath());

    assertEquals("rewrite", retVal.getPath().getName());
    assertEquals("flavaflav", retVal.getOwner());
    assertTrue(retVal.isDirectory());

    FileStatus[] inside = hdfs.listStatus(new Path(retVal.getPath().toString()));
    assertEquals(0, inside.length);
}

From source file:com.wandisco.s3hdfs.rewrite.filter.TestCurlCommands.java

License:Apache License

@Test
public void testCurlGetNonExistantObject()
        throws IOException, URISyntaxException, S3ServiceException, InterruptedException {
    // WITHOUT BUCKET
    S3HdfsPath s3HdfsPath = testUtil.setUpS3HdfsPath("myBucket", "S3HDFS%2Fslot%2D01special%2Dtapestart",
            "flavaflav");

    ProcessBuilder pb = new ProcessBuilder("curl", "-v", "-L", "-X", "GET", "http://" + hostName + ":"
            + PROXY_PORT + "/myBucket/" + s3HdfsPath.getObjectName() + "?user.name=flavaflav");
    Process proc = pb.start();//from   ww w .j  ava  2 s. c  o m
    proc.waitFor();

    String out = testUtil.readInputStream(proc.getInputStream());
    String out2 = testUtil.readInputStream(proc.getErrorStream());
    System.out.println(out);
    System.out.println(out2);

    assert out2.contains("HTTP/1.1 404 Not Found");

    // MAKE BUCKET
    ProcessBuilder pb2 = new ProcessBuilder("curl", "-v", "-L", "-X", "PUT", "-H", "Host: myBucket." + hostName,
            "http://" + hostName + ":" + PROXY_PORT + "/?user.name=flavaflav");
    Process proc2 = pb2.start();
    proc2.waitFor();

    out = testUtil.readInputStream(proc2.getInputStream());
    out2 = testUtil.readInputStream(proc2.getErrorStream());
    System.out.println(out);
    System.out.println(out2);

    FileStatus retVal = hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0];
    System.out.println(hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0].getPath());

    assertEquals("myBucket", retVal.getPath().getName());
    assertEquals("flavaflav", retVal.getOwner());
    assertTrue(retVal.isDirectory());

    FileStatus[] inside = hdfs.listStatus(new Path(retVal.getPath().toString()));
    assertEquals(0, inside.length);

    //WITH BUCKET
    ProcessBuilder pb3 = new ProcessBuilder("curl", "-v", "-L", "-X", "GET", "http://" + hostName + ":"
            + PROXY_PORT + "/myBucket/" + s3HdfsPath.getObjectName() + "?user.name=flavaflav");
    Process proc3 = pb3.start();
    proc3.waitFor();

    out = testUtil.readInputStream(proc3.getInputStream());
    out2 = testUtil.readInputStream(proc3.getErrorStream());
    System.out.println("LAST: " + out);
    System.out.println("LAST: " + out2);

    assert out2.contains("HTTP/1.1 404 Not Found");
}

From source file:com.xiaomi.linden.hadoop.indexing.reduce.FileSystemDirectory.java

License:Apache License

/**
 * Constructor/*  w  w  w  .  ja v a 2s  . c om*/
 * @param fs
 * @param directory
 * @param create
 * @param conf
 * @throws IOException
 */
public FileSystemDirectory(FileSystem fs, Path directory, boolean create, Configuration conf)
        throws IOException {

    this.fs = fs;
    this.directory = directory;
    this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096);

    if (create) {
        create();
    }

    boolean isDir = false;
    try {
        FileStatus status = fs.getFileStatus(directory);
        if (status != null) {
            isDir = status.isDirectory();
        }
    } catch (IOException e) {
        // file does not exist, isDir already set to false
    }
    if (!isDir) {
        throw new IOException(directory + " is not a directory");
    }
}

From source file:com.xiaomi.linden.hadoop.indexing.reduce.FileSystemDirectory.java

License:Apache License

private void create() throws IOException {
    if (!fs.exists(directory)) {
        fs.mkdirs(directory);//from w  w w .  j  a v a 2 s  .  com
    }

    boolean isDir = false;
    try {
        FileStatus status = fs.getFileStatus(directory);
        if (status != null) {
            isDir = status.isDirectory();
        }
    } catch (IOException e) {
        // file does not exist, isDir already set to false
    }
    if (!isDir) {
        throw new IOException(directory + " is not a directory");
    }

    // clear old index files
    FileStatus[] fileStatus = fs.listStatus(directory);
    for (int i = 0; i < fileStatus.length; i++) {
        if (!fs.delete(fileStatus[i].getPath(), true)) {
            throw new IOException("Cannot delete index file " + fileStatus[i].getPath());
        }
    }
}

From source file:com.yahoo.glimmer.util.MapReducePartInputStreamEnumeration.java

License:Open Source License

public MapReducePartInputStreamEnumeration(FileSystem fileSystem, Path srcPath) throws IOException {
    this.fileSystem = fileSystem;

    CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
    codecIfAny = factory.getCodec(srcPath);

    FileStatus srcFileStatus = fileSystem.getFileStatus(srcPath);
    if (srcFileStatus.isDirectory()) {
        // returns FileStatus objects sorted by filename.
        String partFilenamePattern = "part-?-?????";
        if (codecIfAny != null) {
            partFilenamePattern += codecIfAny.getDefaultExtension();
        }/*from  ww  w  . j  a  v  a2 s.c om*/
        Path partPathGlob = new Path(srcPath, partFilenamePattern);
        partFileStatuses = fileSystem.globStatus(partPathGlob);
    } else {
        partFileStatuses = new FileStatus[] { srcFileStatus };
    }

}