Example usage for org.apache.hadoop.fs FileStatus isDirectory

List of usage examples for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory() 

Source Link

Document

Is this a directory?

Usage

From source file:com.uber.hoodie.common.table.timeline.dto.FileStatusDTO.java

License:Apache License

public static FileStatusDTO fromFileStatus(FileStatus fileStatus) {
    if (null == fileStatus) {
        return null;
    }/*from w w w.  j  a  v a2s.  c  o m*/

    FileStatusDTO dto = new FileStatusDTO();
    try {
        dto.path = FilePathDTO.fromPath(fileStatus.getPath());
        dto.length = fileStatus.getLen();
        dto.isdir = fileStatus.isDirectory();
        dto.blockReplication = fileStatus.getReplication();
        dto.blocksize = fileStatus.getBlockSize();
        dto.modificationTime = fileStatus.getModificationTime();
        dto.accessTime = fileStatus.getModificationTime();
        dto.symlink = fileStatus.isSymlink() ? FilePathDTO.fromPath(fileStatus.getSymlink()) : null;
        safeReadAndSetMetadata(dto, fileStatus);
    } catch (IOException ioe) {
        throw new HoodieException(ioe);
    }
    return dto;
}

From source file:com.vertica.hadoop.FixedSplitFileInputFormat.java

License:Apache License

/**
 * Add files in the input path recursively into the results.
 * @param result//from   www .java2s . c  om
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path,
        PathFilter inputFilter) throws IOException {
    for (FileStatus stat : fs.listStatus(path, inputFilter)) {
        if (stat.isDirectory()) {
            addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
        } else {
            result.add(stat);
        }
    }
}

From source file:com.vertica.hadoop.FixedSplitFileInputFormat.java

License:Apache License

/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. //ww  w .  ja va  2 s  .c  om
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected FileStatus[] listStatus(JobConf job) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job);

    // Whether we need to recursive look into the directory structure
    boolean recursive = job.getBoolean("mapred.input.dir.recursive", false);

    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (Path p : dirs) {
        FileSystem fs = p.getFileSystem(job);
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        if (recursive && stat.isDirectory()) {
                            addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                        } else {
                            result.add(stat);
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result.toArray(new FileStatus[result.size()]);
}

From source file:com.vertica.hadoop.FixedSplitFileInputFormat.java

License:Apache License

/** Splits files returned by {@link #listStatus(JobConf)} when
 * they're too big.*//*w w w  . j a v  a 2  s .c om*/
@SuppressWarnings("deprecation")
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    FileStatus[] files = listStatus(job);

    // Save the number of input files for metrics/loadgen
    job.setLong(NUM_INPUT_FILES, files.length);
    long totalSize = 0; // compute total size
    for (FileStatus file : files) { // check we have valid files
        if (file.isDirectory()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }

    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
    long minSize = Math.max(job.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 1),
            minSplitSize);

    // generate splits
    ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
    NetworkTopology clusterMap = new NetworkTopology();
    for (FileStatus file : files) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job);
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(fs, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(goalSize, minSize, blockSize);

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                String[] splitHosts = getSplitHosts(blkLocations, length - bytesRemaining, splitSize,
                        clusterMap);
                splits.add(makeSplit(path, length - bytesRemaining, splitSize, splitHosts));
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                String[] splitHosts = getSplitHosts(blkLocations, length - bytesRemaining, bytesRemaining,
                        clusterMap);
                splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, splitHosts));
            }
        } else if (length != 0) {
            String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap);
            splits.add(makeSplit(path, 0, length, splitHosts));
        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
        }
    }
    LOG.debug("Total # of splits: " + splits.size());
    return splits.toArray(new FileSplit[splits.size()]);
}

From source file:com.wandisco.s3hdfs.rewrite.filter.TestCurlCommands.java

License:Apache License

@Test
public void testCurlCreateBucket1()
        throws IOException, URISyntaxException, S3ServiceException, InterruptedException {
    S3HdfsPath s3HdfsPath = testUtil.setUpS3HdfsPath("rewrite", null, "flavaflav");

    ProcessBuilder pb = new ProcessBuilder("curl", "-v", "-L", "-X", "PUT",
            "http://" + hostName + ":" + PROXY_PORT + "/rewrite?user.name=flavaflav");
    Process proc = pb.start();//from  w ww.  ja  va 2  s.  c om
    proc.waitFor();

    String out = testUtil.readInputStream(proc.getInputStream());
    String out2 = testUtil.readInputStream(proc.getErrorStream());
    System.err.println(out);
    System.err.println(out2);

    FileStatus retVal = hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0];
    System.out.println(hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0].getPath());

    assertEquals("rewrite", retVal.getPath().getName());
    assertEquals("flavaflav", retVal.getOwner());
    assertTrue(retVal.isDirectory());

    FileStatus[] inside = hdfs.listStatus(new Path(retVal.getPath().toString()));
    assertEquals(0, inside.length);
}

From source file:com.wandisco.s3hdfs.rewrite.filter.TestCurlCommands.java

License:Apache License

@Test
public void testCurlCreateBucket2()
        throws IOException, URISyntaxException, S3ServiceException, InterruptedException {
    S3HdfsPath s3HdfsPath = testUtil.setUpS3HdfsPath("rewrite", null, "flavaflav");

    ProcessBuilder pb = new ProcessBuilder("curl", "-v", "-L", "-X", "PUT", "-H", "Host: rewrite." + hostName,
            "http://" + hostName + ":" + PROXY_PORT + "/?user.name=flavaflav");
    Process proc = pb.start();//  w  w w. java 2s  .c o  m
    proc.waitFor();

    String out = testUtil.readInputStream(proc.getInputStream());
    String out2 = testUtil.readInputStream(proc.getErrorStream());
    System.out.println(out);
    System.out.println(out2);

    FileStatus retVal = hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0];
    System.out.println(hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0].getPath());

    assertEquals("rewrite", retVal.getPath().getName());
    assertEquals("flavaflav", retVal.getOwner());
    assertTrue(retVal.isDirectory());

    FileStatus[] inside = hdfs.listStatus(new Path(retVal.getPath().toString()));
    assertEquals(0, inside.length);
}

From source file:com.wandisco.s3hdfs.rewrite.filter.TestCurlCommands.java

License:Apache License

@Test
public void testCurlGetNonExistantObject()
        throws IOException, URISyntaxException, S3ServiceException, InterruptedException {
    // WITHOUT BUCKET
    S3HdfsPath s3HdfsPath = testUtil.setUpS3HdfsPath("myBucket", "S3HDFS%2Fslot%2D01special%2Dtapestart",
            "flavaflav");

    ProcessBuilder pb = new ProcessBuilder("curl", "-v", "-L", "-X", "GET", "http://" + hostName + ":"
            + PROXY_PORT + "/myBucket/" + s3HdfsPath.getObjectName() + "?user.name=flavaflav");
    Process proc = pb.start();//from   ww w .j  ava  2 s. c  o m
    proc.waitFor();

    String out = testUtil.readInputStream(proc.getInputStream());
    String out2 = testUtil.readInputStream(proc.getErrorStream());
    System.out.println(out);
    System.out.println(out2);

    assert out2.contains("HTTP/1.1 404 Not Found");

    // MAKE BUCKET
    ProcessBuilder pb2 = new ProcessBuilder("curl", "-v", "-L", "-X", "PUT", "-H", "Host: myBucket." + hostName,
            "http://" + hostName + ":" + PROXY_PORT + "/?user.name=flavaflav");
    Process proc2 = pb2.start();
    proc2.waitFor();

    out = testUtil.readInputStream(proc2.getInputStream());
    out2 = testUtil.readInputStream(proc2.getErrorStream());
    System.out.println(out);
    System.out.println(out2);

    FileStatus retVal = hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0];
    System.out.println(hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0].getPath());

    assertEquals("myBucket", retVal.getPath().getName());
    assertEquals("flavaflav", retVal.getOwner());
    assertTrue(retVal.isDirectory());

    FileStatus[] inside = hdfs.listStatus(new Path(retVal.getPath().toString()));
    assertEquals(0, inside.length);

    //WITH BUCKET
    ProcessBuilder pb3 = new ProcessBuilder("curl", "-v", "-L", "-X", "GET", "http://" + hostName + ":"
            + PROXY_PORT + "/myBucket/" + s3HdfsPath.getObjectName() + "?user.name=flavaflav");
    Process proc3 = pb3.start();
    proc3.waitFor();

    out = testUtil.readInputStream(proc3.getInputStream());
    out2 = testUtil.readInputStream(proc3.getErrorStream());
    System.out.println("LAST: " + out);
    System.out.println("LAST: " + out2);

    assert out2.contains("HTTP/1.1 404 Not Found");
}

From source file:com.xiaomi.linden.hadoop.indexing.reduce.FileSystemDirectory.java

License:Apache License

/**
 * Constructor/*  w  w  w  .  ja v a 2s  . c om*/
 * @param fs
 * @param directory
 * @param create
 * @param conf
 * @throws IOException
 */
public FileSystemDirectory(FileSystem fs, Path directory, boolean create, Configuration conf)
        throws IOException {

    this.fs = fs;
    this.directory = directory;
    this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096);

    if (create) {
        create();
    }

    boolean isDir = false;
    try {
        FileStatus status = fs.getFileStatus(directory);
        if (status != null) {
            isDir = status.isDirectory();
        }
    } catch (IOException e) {
        // file does not exist, isDir already set to false
    }
    if (!isDir) {
        throw new IOException(directory + " is not a directory");
    }
}

From source file:com.xiaomi.linden.hadoop.indexing.reduce.FileSystemDirectory.java

License:Apache License

private void create() throws IOException {
    if (!fs.exists(directory)) {
        fs.mkdirs(directory);//from w  w w .  j  a v a 2 s  .  com
    }

    boolean isDir = false;
    try {
        FileStatus status = fs.getFileStatus(directory);
        if (status != null) {
            isDir = status.isDirectory();
        }
    } catch (IOException e) {
        // file does not exist, isDir already set to false
    }
    if (!isDir) {
        throw new IOException(directory + " is not a directory");
    }

    // clear old index files
    FileStatus[] fileStatus = fs.listStatus(directory);
    for (int i = 0; i < fileStatus.length; i++) {
        if (!fs.delete(fileStatus[i].getPath(), true)) {
            throw new IOException("Cannot delete index file " + fileStatus[i].getPath());
        }
    }
}

From source file:com.yahoo.glimmer.util.MapReducePartInputStreamEnumeration.java

License:Open Source License

public MapReducePartInputStreamEnumeration(FileSystem fileSystem, Path srcPath) throws IOException {
    this.fileSystem = fileSystem;

    CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
    codecIfAny = factory.getCodec(srcPath);

    FileStatus srcFileStatus = fileSystem.getFileStatus(srcPath);
    if (srcFileStatus.isDirectory()) {
        // returns FileStatus objects sorted by filename.
        String partFilenamePattern = "part-?-?????";
        if (codecIfAny != null) {
            partFilenamePattern += codecIfAny.getDefaultExtension();
        }/*from  ww  w  . j  a  v  a2 s.c om*/
        Path partPathGlob = new Path(srcPath, partFilenamePattern);
        partFileStatuses = fileSystem.globStatus(partPathGlob);
    } else {
        partFileStatuses = new FileStatus[] { srcFileStatus };
    }

}