Example usage for org.apache.hadoop.fs FileSystem isFile

List of usage examples for org.apache.hadoop.fs FileSystem isFile


In this page you can find the example usage for org.apache.hadoop.fs FileSystem isFile.


public boolean isFile(Path f) throws IOException 

Source Link


True iff the named path is a regular file.


From source file:io.amient.yarn1.YarnClient.java

License:Open Source License

 * Distribute all dependencies in a single jar both from Client to Master as well as Master to Container(s)
 *//*from  w  ww .j  av a 2s  .c o  m*/
public static void distributeResources(Configuration yarnConf, Properties appConf, String appName)
        throws IOException {
    final FileSystem distFs = FileSystem.get(yarnConf);
    final FileSystem localFs = FileSystem.getLocal(yarnConf);
    try {

        //distribute configuration
        final Path dstConfig = new Path(distFs.getHomeDirectory(), appName + ".configuration");
        final FSDataOutputStream fs = distFs.create(dstConfig);
        appConf.store(fs, "Yarn1 Application Config for " + appName);
        log.info("Updated resource " + dstConfig);

        //distribute main jar
        final String localPath = YarnClient.class.getProtectionDomain().getCodeSource().getLocation().getFile()
                .replace(".jar/", ".jar");
        final Path src;
        final String jarName = appName + ".jar";
        if (localPath.endsWith(".jar")) {
            log.info("Distributing local jar : " + localPath);
            src = new Path(localPath);
        } else {
            try {
                String localArchive = localPath + appName + ".jar";
                localFs.delete(new Path(localArchive), false);
                log.info("Unpacking compile scope dependencies: " + localPath);
                executeShell("mvn -f " + localPath + "/../.. generate-resources");
                log.info("Preparing application main jar " + localArchive);
                executeShell("jar cMf " + localArchive + " -C " + localPath + " ./");
                src = new Path(localArchive);

            } catch (InterruptedException e) {
                throw new IOException(e);

        byte[] digest;
        final MessageDigest md = MessageDigest.getInstance("MD5");
        try (InputStream is = new FileInputStream(src.toString())) {
            DigestInputStream dis = new DigestInputStream(is, md);
            byte[] buffer = new byte[8192];
            int numOfBytesRead;
            while ((numOfBytesRead = dis.read(buffer)) > 0) {
                md.update(buffer, 0, numOfBytesRead);
            digest = md.digest();
        log.info("Local check sum: " + Hex.encodeHexString(digest));

        final Path dst = new Path(distFs.getHomeDirectory(), jarName);
        Path remoteChecksumFile = new Path(distFs.getHomeDirectory(), jarName + ".md5");
        boolean checksumMatches = false;
        if (distFs.isFile(remoteChecksumFile)) {
            try (InputStream r = distFs.open(remoteChecksumFile)) {
                ByteArrayOutputStream buffer = new ByteArrayOutputStream();
                int nRead;
                byte[] data = new byte[1024];
                while ((nRead = r.read(data, 0, data.length)) != -1) {
                    buffer.write(data, 0, nRead);
                byte[] remoteDigest = buffer.toByteArray();
                log.info("Remote check sum: " + Hex.encodeHexString(remoteDigest));
                checksumMatches = Arrays.equals(digest, remoteDigest);

        if (!checksumMatches) {
            log.info("Updating resource " + dst + " ...");
            distFs.copyFromLocalFile(false, true, src, dst);
            try (FSDataOutputStream remoteChecksumStream = distFs.create(remoteChecksumFile)) {
                log.info("Updating checksum " + remoteChecksumFile + " ...");
            FileStatus scFileStatus = distFs.getFileStatus(dst);
            log.info("Updated resource " + dst + " " + scFileStatus.getLen());
    } catch (NoSuchAlgorithmException e) {
        throw new IOException(e);

From source file:io.transwarp.flume.sink.HDFSCompressedDataStream.java

License:Apache License

public void open(String filePath, CompressionCodec codec, CompressionType cType) throws IOException {
    Configuration conf = new Configuration();
    Path dstPath = new Path(filePath);
    FileSystem hdfs = dstPath.getFileSystem(conf);
    if (useRawLocalFileSystem) {
        if (hdfs instanceof LocalFileSystem) {
            hdfs = ((LocalFileSystem) hdfs).getRaw();
        } else {/*w w w . jav a  2 s  .  c  o  m*/
            logger.warn("useRawLocalFileSystem is set to true but file system "
                    + "is not of type LocalFileSystem: " + hdfs.getClass().getName());
    boolean appending = false;
    if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile(dstPath)) {
        fsOut = hdfs.append(dstPath);
        appending = true;
    } else {
        fsOut = hdfs.create(dstPath);
    if (compressor == null) {
        compressor = CodecPool.getCompressor(codec, conf);
    cmpOut = codec.createOutputStream(fsOut, compressor);
    serializer = EventSerializerFactory.getInstance(serializerType, serializerContext, cmpOut);
    if (appending && !serializer.supportsReopen()) {
        serializer = null;
        throw new IOException("serializer (" + serializerType + ") does not support append");

    registerCurrentStream(fsOut, hdfs, dstPath);

    if (appending) {
    } else {
    isFinished = false;

From source file:jp.ac.u.tokyo.m.pig.udf.load.LoadDataWithSchema.java

License:Apache License

public ResourceSchema getSchema(String aLocation, Job aJob) throws IOException {
    Configuration tConfiguration = aJob.getConfiguration();
    Path tDataPath = new Path(aLocation);
    FileSystem tFileSystem = tDataPath.getFileSystem(tConfiguration);
    Path tSchemaFilePath = tFileSystem.isFile(tDataPath)
            ? new Path(tDataPath.getParent(), StoreConstants.STORE_FILE_NAME_SCHEMA)
            : new Path(tDataPath, StoreConstants.STORE_FILE_NAME_SCHEMA);
    RowSchema tRowSchema = LoadSchemaUtil.loadSchemaFile(tFileSystem, tSchemaFilePath, mEncoding);

    ResourceSchema tResourceSchema = new ResourceSchema();
    TypeStringCasterPigToPigTypeByte tTypeCaster = TypeStringCasterPigToPigTypeByte.INSTANCE;
    List<ColumnSchema> tColumnSchemaList = tRowSchema.getColumnSchemaList();
    int tSize = tColumnSchemaList.size();
    ResourceFieldSchema[] tResourceFieldSchemas = new ResourceFieldSchema[tSize];
    int tIndex = 0;
    for (ColumnSchema tCurrentColumnSchema : tColumnSchemaList) {
        tResourceFieldSchemas[tIndex++] = new ResourceFieldSchema(new FieldSchema(
                tCurrentColumnSchema.getName(), tTypeCaster.castTypeString(tCurrentColumnSchema.getType())));
    }/*  w w w  . j a v a2 s .c  o m*/
    return tResourceSchema;

From source file:net.sf.katta.node.ShardManager.java

License:Apache License

private void installShard(String shardName, String shardPath, File localShardFolder) throws KattaException {
    LOG.info("install shard '" + shardName + "' from " + shardPath);
    // TODO sg: to fix HADOOP-4422 we try to download the shard 5 times
    int maxTries = 5;
    for (int i = 0; i < maxTries; i++) {
        URI uri;/*from   w  w  w . j a v a 2s .  co m*/
        try {
            uri = new URI(shardPath);
            FileSystem fileSystem = FileSystem.get(uri, new Configuration());
            if (_throttleSemaphore != null) {
                fileSystem = new ThrottledFileSystem(fileSystem, _throttleSemaphore);
            final Path path = new Path(shardPath);
            boolean isZip = fileSystem.isFile(path) && shardPath.endsWith(".zip");

            File shardTmpFolder = new File(localShardFolder.getAbsolutePath() + "_tmp");
            try {

                if (isZip) {
                    FileUtil.unzip(path, shardTmpFolder, fileSystem,
                            System.getProperty("katta.spool.zip.shards", "false").equalsIgnoreCase("true"));
                } else {
                    fileSystem.copyToLocalFile(path, new Path(shardTmpFolder.getAbsolutePath()));
            } finally {
                // Ensure that the tmp folder is deleted on an error
            // Looks like we were successful.
            if (i > 0) {
                LOG.error("Loaded shard:" + shardPath);
        } catch (final URISyntaxException e) {
            throw new KattaException("Can not parse uri for path: " + shardPath, e);
        } catch (final Exception e) {
            LOG.error(String.format("Error loading shard: %s (try %d of %d)", shardPath, i, maxTries), e);
            if (i >= maxTries - 1) {
                throw new KattaException("Can not load shard: " + shardPath, e);

From source file:net.team1.dev.HousingAnalysis.java

License:Apache License

 * The main entry point for the map/reduce runner.
 * @param args 2 args: \<input dir\> \<output dir\>
 * @throws Exception Throws IOException//from ww  w.j  av  a 2  s.  c o  m
public static void main(String[] args) throws Exception {
    Path inputDir = new Path(args[0]);
    Path outputDir = new Path(args[1]);
    FileSystem fs = FileSystem.get(new Configuration());

    if (!fs.exists(inputDir))
        throw new IOException("The input path does not exist.");
    if (fs.isFile(inputDir))
        throw new IOException("The input path is a file.");
    if (fs.exists(outputDir))
        fs.delete(outputDir, true);

    // set job configuration
    JobConf conf = new JobConf(HousingAnalysis.class);

    // set multiple input files
    HashMap<Path, Class<? extends Mapper>> inputMappers = getInputFilePaths(inputDir, fs);
    for (Path p : inputMappers.keySet()) {
        MultipleInputs.addInputPath(conf, p, TextInputFormat.class, inputMappers.get(p));
        LOG.info(p.getName() + ": " + inputMappers.get(p).getName());

    // set output
    FileOutputFormat.setOutputPath(conf, outputDir);

    // start the job

From source file:org.ankus.mapreduce.algorithms.clustering.kmeans.KMeansDriver.java

License:Apache License

private void setInitialClusterCenter(Configuration conf, String clusterOutputPath) throws Exception {
    /**/*from   w w  w. ja  va  2 s .c  om*/
     * TODO:
     * Current Process
     *       - get top n data (n is defined cluster count)
     *       - set each data to initial cluster center
     * Following Process is reasonable. => MR Job
     *       1. Distribution
     *          - get statistics(distribution) for all attributes
     *          - use min/max and freq for initial cluster center setting
     *       numeric => (max-min) / cluster count
     *       nominal => each value (freq sort) 
    FileSystem fs = FileSystem.get(conf);

    String readStr, tokens[];
    int index = 0;
    int clusterCnt = Integer.parseInt(conf.get(ArgumentsConstants.CLUSTER_COUNT, "1"));
    KMeansClusterInfoMgr clusters[] = new KMeansClusterInfoMgr[clusterCnt];

    Path inputPath = new Path(conf.get(ArgumentsConstants.INPUT_PATH, null));
    if (!fs.isFile(inputPath)) {
        boolean isFile = false;
        while (!isFile) {
            FileStatus[] status = fs.listStatus(inputPath);
            if (fs.isFile(status[0].getPath()))
                isFile = true;

            inputPath = status[0].getPath();

    FSDataInputStream fin = fs.open(inputPath);
    BufferedReader br = new BufferedReader(new InputStreamReader(fin, Constants.UTF8));

    while ((readStr = br.readLine()) != null) {
        clusters[index] = new KMeansClusterInfoMgr();

        tokens = readStr.split(conf.get(ArgumentsConstants.DELIMITER, "\t"));
        for (int i = 0; i < tokens.length; i++) {
            if (CommonMethods.isContainIndex(mIndexArr, i, true)
                    && !CommonMethods.isContainIndex(mExceptionIndexArr, i, false)) {
                if (CommonMethods.isContainIndex(mNominalIndexArr, i, false)) {
                    clusters[index].addAttributeValue(i, tokens[i], ConfigurationVariable.NOMINAL_ATTRIBUTE);
                } else
                    clusters[index].addAttributeValue(i, tokens[i], ConfigurationVariable.NUMERIC_ATTRIBUTE);

        if (index >= clusterCnt)


    FSDataOutputStream fout = fs.create(new Path(clusterOutputPath + "/part-r-00000"), true);
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fout, Constants.UTF8));

    for (int i = 0; i < clusters.length; i++) {
        bw.write(clusters[i].getClusterInfoString(conf.get(ArgumentsConstants.DELIMITER, "\t"),
                mNominalDelimiter) + "\n");



From source file:org.apache.accumulo.core.client.mock.MockTableOperations.java

License:Apache License

public void importDirectory(String tableName, String dir, String failureDir, boolean setTime)
        throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException {
    long time = System.currentTimeMillis();
    MockTable table = acu.tables.get(tableName);
    if (table == null) {
        throw new TableNotFoundException(null, tableName, "The table was not found");
    }/*  w ww  .j  ava 2  s .co m*/
    Path importPath = new Path(dir);
    Path failurePath = new Path(failureDir);

    FileSystem fs = acu.getFileSystem();
     * check preconditions
    // directories are directories
    if (fs.isFile(importPath)) {
        throw new IOException("Import path must be a directory.");
    if (fs.isFile(failurePath)) {
        throw new IOException("Failure path must be a directory.");
    // failures are writable
    Path createPath = failurePath.suffix("/.createFile");
    FSDataOutputStream createStream = null;
    try {
        createStream = fs.create(createPath);
    } catch (IOException e) {
        throw new IOException("Error path is not writable.");
    } finally {
        if (createStream != null) {
    fs.delete(createPath, false);
    // failures are empty
    FileStatus[] failureChildStats = fs.listStatus(failurePath);
    if (failureChildStats.length > 0) {
        throw new IOException("Error path must be empty.");
     * Begin the import - iterate the files in the path
    for (FileStatus importStatus : fs.listStatus(importPath)) {
        try {
            FileSKVIterator importIterator = FileOperations.getInstance().newReaderBuilder()
                    .forFile(importStatus.getPath().toString(), fs, fs.getConf())
            while (importIterator.hasTop()) {
                Key key = importIterator.getTopKey();
                Value value = importIterator.getTopValue();
                if (setTime) {
                Mutation mutation = new Mutation(key.getRow());
                if (!key.isDeleted()) {
                    mutation.put(key.getColumnFamily(), key.getColumnQualifier(),
                            new ColumnVisibility(key.getColumnVisibilityData().toArray()), key.getTimestamp(),
                } else {
                    mutation.putDelete(key.getColumnFamily(), key.getColumnQualifier(),
                            new ColumnVisibility(key.getColumnVisibilityData().toArray()), key.getTimestamp());
        } catch (Exception e) {
            FSDataOutputStream failureWriter = null;
            DataInputStream failureReader = null;
            try {
                failureWriter = fs.create(failurePath.suffix("/" + importStatus.getPath().getName()));
                failureReader = fs.open(importStatus.getPath());
                int read = 0;
                byte[] buffer = new byte[1024];
                while (-1 != (read = failureReader.read(buffer))) {
                    failureWriter.write(buffer, 0, read);
            } finally {
                if (failureReader != null)
                if (failureWriter != null)
        fs.delete(importStatus.getPath(), true);

From source file:org.apache.accumulo.core.client.mock.MockTableOperationsImpl.java

License:Apache License

public void importDirectory(String tableName, String dir, String failureDir, boolean setTime)
        throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException {
    long time = System.currentTimeMillis();
    MockTable table = acu.tables.get(tableName);
    if (table == null) {
        throw new TableNotFoundException(null, tableName, "The table was not found");
    }//w  w  w. ja v a2  s .c  om
    Path importPath = new Path(dir);
    Path failurePath = new Path(failureDir);

    FileSystem fs = acu.getFileSystem();
     * check preconditions
    // directories are directories
    if (fs.isFile(importPath)) {
        throw new IOException("Import path must be a directory.");
    if (fs.isFile(failurePath)) {
        throw new IOException("Failure path must be a directory.");
    // failures are writable
    Path createPath = failurePath.suffix("/.createFile");
    FSDataOutputStream createStream = null;
    try {
        createStream = fs.create(createPath);
    } catch (IOException e) {
        throw new IOException("Error path is not writable.");
    } finally {
        if (createStream != null) {
    fs.delete(createPath, false);
    // failures are empty
    FileStatus[] failureChildStats = fs.listStatus(failurePath);
    if (failureChildStats.length > 0) {
        throw new IOException("Error path must be empty.");
     * Begin the import - iterate the files in the path
    for (FileStatus importStatus : fs.listStatus(importPath)) {
        try {
            FileSKVIterator importIterator = FileOperations.getInstance().openReader(
                    importStatus.getPath().toString(), true, fs, fs.getConf(),
            while (importIterator.hasTop()) {
                Key key = importIterator.getTopKey();
                Value value = importIterator.getTopValue();
                if (setTime) {
                Mutation mutation = new Mutation(key.getRow());
                if (!key.isDeleted()) {
                    mutation.put(key.getColumnFamily(), key.getColumnQualifier(),
                            new ColumnVisibility(key.getColumnVisibilityData().toArray()), key.getTimestamp(),
                } else {
                    mutation.putDelete(key.getColumnFamily(), key.getColumnQualifier(),
                            new ColumnVisibility(key.getColumnVisibilityData().toArray()), key.getTimestamp());
        } catch (Exception e) {
            FSDataOutputStream failureWriter = null;
            DataInputStream failureReader = null;
            try {
                failureWriter = fs.create(failurePath.suffix("/" + importStatus.getPath().getName()));
                failureReader = fs.open(importStatus.getPath());
                int read = 0;
                byte[] buffer = new byte[1024];
                while (-1 != (read = failureReader.read(buffer))) {
                    failureWriter.write(buffer, 0, read);
            } finally {
                if (failureReader != null)
                if (failureWriter != null)
        fs.delete(importStatus.getPath(), true);

From source file:org.apache.ambari.fast_hdfs_resource.Resource.java

License:Apache License

public static void checkResourceParameters(Resource resource, FileSystem dfs)
        throws IllegalArgumentException, IOException {

    ArrayList<String> actionsAvailable = new ArrayList<String>();
    ArrayList<String> typesAvailable = new ArrayList<String>();

    if (resource.getTarget() == null)
        throw new IllegalArgumentException("Path to resource in HadoopFs must be filled.");

    if (resource.getAction() == null || !actionsAvailable.contains(resource.getAction()))
        throw new IllegalArgumentException("Action is not supported.");

    if (resource.getType() == null || !typesAvailable.contains(resource.getType()))
        throw new IllegalArgumentException("Type is not supported.");

    // Check consistency for ("type":"file" == file in hadoop)
    if (dfs.isFile(new Path(resource.getTarget())) && !"file".equals(resource.getType()))
        throw new IllegalArgumentException("Cannot create a directory " + resource.getTarget()
                + " because file is present on the given path.");
    // Check consistency for ("type":"directory" == directory in hadoop)
    else if (dfs.isDirectory(new Path(resource.getTarget())) && !"directory".equals(resource.getType()))
        throw new IllegalArgumentException("Cannot create a file " + resource.getTarget()
                + " because directory is present on the given path.");

    if (resource.getSource() != null) {
        File source = new File(resource.getSource());
        if (source.isFile() && !"file".equals(resource.getType()))
            throw new IllegalArgumentException("Cannot create a directory " + resource.getTarget()
                    + " because source " + resource.getSource() + "is a file");
        else if (source.isDirectory() && !"directory".equals(resource.getType()))
            throw new IllegalArgumentException("Cannot create a file " + resource.getTarget()
                    + " because source " + resource.getSource() + "is a directory");
    }/*from   w  w w .  j a v a  2s  .co  m*/

From source file:org.apache.avro.tool.Util.java

License:Apache License

/**If pathname is a file, this method returns a list with a single absolute Path to that file,
 * if pathname is a directory, this method returns a list of Pathes to all the files within
 * this directory.//w  ww .  j  a v a2s .  c  o m
 * Only files inside that directory are included, no subdirectories or files in subdirectories
 * will be added. 
 * The List is sorted alphabetically.
 * @param fileOrDirName filename or directoryname
 * @return A Path List 
 * @throws IOException
static List<Path> getFiles(String fileOrDirName) throws IOException {
    List<Path> pathList = new ArrayList<Path>();
    Path path = new Path(fileOrDirName);
    FileSystem fs = path.getFileSystem(new Configuration());

    if (fs.isFile(path)) {
    } else if (fs.getFileStatus(path).isDir()) {
        for (FileStatus status : fs.listStatus(path)) {
            if (!status.isDir()) {
    return pathList;