Example usage for org.apache.hadoop.fs Path getFileSystem

List of usage examples for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException 

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Returns the total number of visible (non-hidden) files in a directory.
 *//*from w  w  w  . ja va2s.  c o  m*/
public static int getTotalNumVisibleFiles(Path directory) throws IOException {
    FileSystem fs = directory.getFileSystem(CONF);
    Preconditions.checkState(fs.getFileStatus(directory).isDirectory());
    int numFiles = 0;
    for (FileStatus fStatus : fs.listStatus(directory)) {
        // Only delete files that are not hidden.
        if (fStatus.isFile() && !isHiddenFile(fStatus.getPath().getName())) {
            ++numFiles;
        }
    }
    return numFiles;
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Moves all visible (non-hidden) files from a source directory to a destination
 * directory. Any sub-directories within the source directory are skipped.
 * Returns the number of files moved as part of this operation.
 *//*  ww w . j a v a 2 s.  co m*/
public static int moveAllVisibleFiles(Path sourceDir, Path destDir) throws IOException {
    FileSystem fs = destDir.getFileSystem(CONF);
    Preconditions.checkState(fs.isDirectory(destDir));
    Preconditions.checkState(fs.isDirectory(sourceDir));

    // Use the same UUID to resolve all file name conflicts. This helps mitigate problems
    // that might happen if there is a conflict moving a set of files that have
    // dependent file names. For example, foo.lzo and foo.lzo_index.
    UUID uuid = UUID.randomUUID();

    // Enumerate all the files in the source
    int numFilesMoved = 0;
    for (FileStatus fStatus : fs.listStatus(sourceDir)) {
        if (fStatus.isDirectory()) {
            LOG.debug("Skipping copy of directory: " + fStatus.getPath());
            continue;
        } else if (isHiddenFile(fStatus.getPath().getName())) {
            continue;
        }

        Path destFile = new Path(destDir, fStatus.getPath().getName());
        if (fs.exists(destFile)) {
            destFile = new Path(destDir, appendToBaseFileName(destFile.getName(), uuid.toString()));
        }
        FileSystemUtil.moveFile(fStatus.getPath(), destFile, false);
        ++numFilesMoved;
    }
    return numFilesMoved;
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Moves (renames) the given file to a new location (either another directory or a
 * file. If renameIfAlreadyExists is true, no error will be thrown if a file with the
 * same name already exists in the destination location. Instead, a UUID will be
 * appended to the base file name, preserving the the existing file extension.
 * If renameIfAlreadyExists is false, an IOException will be thrown if there is a
 * file name conflict.//from   w  w  w.j a v a2 s. c  om
 */
public static void moveFile(Path sourceFile, Path dest, boolean renameIfAlreadyExists) throws IOException {
    FileSystem fs = dest.getFileSystem(CONF);

    Path destFile = fs.isDirectory(dest) ? new Path(dest, sourceFile.getName()) : dest;
    // If a file with the same name does not already exist in the destination location
    // then use the same file name. Otherwise, generate a unique file name.
    if (renameIfAlreadyExists && fs.exists(destFile)) {
        Path destDir = fs.isDirectory(dest) ? dest : dest.getParent();
        destFile = new Path(destDir, appendToBaseFileName(destFile.getName(), UUID.randomUUID().toString()));
    }
    LOG.debug(String.format("Moving '%s' to '%s'", sourceFile.toString(), destFile.toString()));
    // Move (rename) the file.
    fs.rename(sourceFile, destFile);
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Reads the file at path and returns the contents.
 *///from  w ww.j  a va2s. c om
public static String readFile(Path file) throws IOException {
    FileSystem fs = file.getFileSystem(CONF);
    InputStream fileStream = fs.open(file);
    try {
        return IOUtils.toString(fileStream);
    } finally {
        IOUtils.closeQuietly(fileStream);
    }
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Returns true if the given Path contains any sub directories, otherwise false.
 */// w w w. j a  va2  s .  c  om
public static boolean containsSubdirectory(Path directory) throws FileNotFoundException, IOException {
    FileSystem fs = directory.getFileSystem(CONF);
    // Enumerate all the files in the source
    for (FileStatus fStatus : fs.listStatus(directory)) {
        if (fStatus.isDirectory()) {
            return true;
        }
    }
    return false;
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Makes a temporary unique directory within the given directory.
 *//*w  w w .j a  va  2s. co m*/
public static Path makeTmpSubdirectory(Path directory) throws IOException {
    FileSystem fs = directory.getFileSystem(CONF);
    Path tmpDir = new Path(directory, ".tmp_" + UUID.randomUUID().toString());
    fs.mkdirs(tmpDir);
    return tmpDir;
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Return true iff path is on a DFS filesystem.
 *///  w ww . ja  v  a2s . c  o  m
public static boolean isDistributedFileSystem(Path path) throws IOException {
    FileSystem fs = path.getFileSystem(CONF);
    return fs instanceof DistributedFileSystem;
}

From source file:com.cloudera.impala.common.FileSystemUtil.java

License:Apache License

public static DistributedFileSystem getDistributedFileSystem(Path path) throws IOException {
    FileSystem fs = path.getFileSystem(CONF);
    Preconditions.checkState(fs instanceof DistributedFileSystem);
    return (DistributedFileSystem) fs;
}

From source file:com.cloudera.impala.util.AvroSchemaUtils.java

License:Apache License

/**
 * Gets an Avro table's JSON schema from the list of given table property search
 * locations. The schema may be specified as a string literal or provided as a
 * Hadoop FileSystem or http URL that points to the schema. Apart from ensuring
 * that the JSON schema is not SCHEMA_NONE, this function does not perform any
 * additional validation on the returned string (e.g., it may not be a valid
 * schema). Returns the Avro schema or null if none was specified in the search
 * locations. Throws an AnalysisException if a schema was specified, but could not
 * be retrieved, e.g., because of an invalid URL.
 *//*from   w ww  .jav a2 s.c om*/
public static String getAvroSchema(List<Map<String, String>> schemaSearchLocations) throws AnalysisException {
    String url = null;
    // Search all locations and break out on the first valid schema found.
    for (Map<String, String> schemaLocation : schemaSearchLocations) {
        if (schemaLocation == null)
            continue;

        String literal = schemaLocation.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName());
        if (literal != null && !literal.equals(AvroSerdeUtils.SCHEMA_NONE))
            return literal;

        url = schemaLocation.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_URL.getPropName());
        if (url != null && !url.equals(AvroSerdeUtils.SCHEMA_NONE)) {
            url = url.trim();
            break;
        }
    }
    if (url == null)
        return null;

    String schema = null;
    InputStream urlStream = null;
    try {
        // TODO: Add support for https:// here.
        if (url.toLowerCase().startsWith("http://")) {
            urlStream = new URL(url).openStream();
            schema = IOUtils.toString(urlStream);
        } else {
            Path path = new Path(url);
            FileSystem fs = null;
            fs = path.getFileSystem(FileSystemUtil.getConfiguration());
            StringBuilder errorMsg = new StringBuilder();
            if (!FileSystemUtil.isPathReachable(path, fs, errorMsg)) {
                throw new AnalysisException(String.format("Invalid avro.schema.url: %s. %s", url, errorMsg));
            }
            schema = FileSystemUtil.readFile(path);
        }
    } catch (AnalysisException e) {
        throw e;
    } catch (IOException e) {
        throw new AnalysisException(
                String.format("Failed to read Avro schema at: %s. %s ", url, e.getMessage()));
    } catch (Exception e) {
        throw new AnalysisException(String.format("Invalid avro.schema.url: %s. %s", url, e.getMessage()));
    } finally {
        if (urlStream != null)
            IOUtils.closeQuietly(urlStream);
    }
    return schema;
}

From source file:com.cloudera.kitten.appmaster.util.HDFSFileFinder.java

License:Open Source License

public static Map<String, Long> getNumBytesOfGlobHeldByDatanodes(Path p, Configuration conf)
        throws IOException {
    FileSystem fs = p.getFileSystem(conf);

    HashMap<String, Long> bytesHeld = Maps.newHashMap();
    for (FileStatus f : fs.globStatus(p)) {
        BlockLocation[] bls = fs.getFileBlockLocations(p, 0, f.getLen());
        if (bls.length > 0) {
            for (BlockLocation bl : bls) {
                long l = bl.getLength();
                for (String name : bl.getNames()) {
                    if (bytesHeld.containsKey(name))
                        bytesHeld.put(name, bytesHeld.get(name) + l);
                    else
                        bytesHeld.put(name, l);
                }/*from  ww  w . j av  a 2  s .  co  m*/
            }
        }
    }

    return bytesHeld;
}