Example usage for org.apache.hadoop.fs FileSystem isFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isFile.

Prototype

@Deprecated
public boolean isFile(Path f) throws IOException

Source Link

Document

True iff the named path is a regular file.

Usage

From source file:io.amient.yarn1.YarnClient.java

License:Open Source License

/**
 * Distribute all dependencies in a single jar both from Client to Master as well as Master to Container(s)
 *//*from  w  ww .j  av a 2s  .c o  m*/
public static void distributeResources(Configuration yarnConf, Properties appConf, String appName)
        throws IOException {
    final FileSystem distFs = FileSystem.get(yarnConf);
    final FileSystem localFs = FileSystem.getLocal(yarnConf);
    try {

        //distribute configuration
        final Path dstConfig = new Path(distFs.getHomeDirectory(), appName + ".configuration");
        final FSDataOutputStream fs = distFs.create(dstConfig);
        appConf.store(fs, "Yarn1 Application Config for " + appName);
        fs.close();
        log.info("Updated resource " + dstConfig);

        //distribute main jar
        final String localPath = YarnClient.class.getProtectionDomain().getCodeSource().getLocation().getFile()
                .replace(".jar/", ".jar");
        final Path src;
        final String jarName = appName + ".jar";
        if (localPath.endsWith(".jar")) {
            log.info("Distributing local jar : " + localPath);
            src = new Path(localPath);
        } else {
            try {
                String localArchive = localPath + appName + ".jar";
                localFs.delete(new Path(localArchive), false);
                log.info("Unpacking compile scope dependencies: " + localPath);
                executeShell("mvn -f " + localPath + "/../.. generate-resources");
                log.info("Preparing application main jar " + localArchive);
                executeShell("jar cMf " + localArchive + " -C " + localPath + " ./");
                src = new Path(localArchive);

            } catch (InterruptedException e) {
                throw new IOException(e);
            }
        }

        byte[] digest;
        final MessageDigest md = MessageDigest.getInstance("MD5");
        try (InputStream is = new FileInputStream(src.toString())) {
            DigestInputStream dis = new DigestInputStream(is, md);
            byte[] buffer = new byte[8192];
            int numOfBytesRead;
            while ((numOfBytesRead = dis.read(buffer)) > 0) {
                md.update(buffer, 0, numOfBytesRead);
            }
            digest = md.digest();
        }
        log.info("Local check sum: " + Hex.encodeHexString(digest));

        final Path dst = new Path(distFs.getHomeDirectory(), jarName);
        Path remoteChecksumFile = new Path(distFs.getHomeDirectory(), jarName + ".md5");
        boolean checksumMatches = false;
        if (distFs.isFile(remoteChecksumFile)) {
            try (InputStream r = distFs.open(remoteChecksumFile)) {
                ByteArrayOutputStream buffer = new ByteArrayOutputStream();
                int nRead;
                byte[] data = new byte[1024];
                while ((nRead = r.read(data, 0, data.length)) != -1) {
                    buffer.write(data, 0, nRead);
                }
                buffer.flush();
                byte[] remoteDigest = buffer.toByteArray();
                log.info("Remote check sum: " + Hex.encodeHexString(remoteDigest));
                checksumMatches = Arrays.equals(digest, remoteDigest);

            }
        }
        if (!checksumMatches) {
            log.info("Updating resource " + dst + " ...");
            distFs.copyFromLocalFile(false, true, src, dst);
            try (FSDataOutputStream remoteChecksumStream = distFs.create(remoteChecksumFile)) {
                log.info("Updating checksum " + remoteChecksumFile + " ...");
                remoteChecksumStream.write(digest);
            }
            FileStatus scFileStatus = distFs.getFileStatus(dst);
            log.info("Updated resource " + dst + " " + scFileStatus.getLen());
        }
    } catch (NoSuchAlgorithmException e) {
        throw new IOException(e);
    }
}

From source file:io.transwarp.flume.sink.HDFSCompressedDataStream.java

License:Apache License

@Override
public void open(String filePath, CompressionCodec codec, CompressionType cType) throws IOException {
    Configuration conf = new Configuration();
    Path dstPath = new Path(filePath);
    FileSystem hdfs = dstPath.getFileSystem(conf);
    if (useRawLocalFileSystem) {
        if (hdfs instanceof LocalFileSystem) {
            hdfs = ((LocalFileSystem) hdfs).getRaw();
        } else {/*w w w . jav a  2 s  .  c  o  m*/
            logger.warn("useRawLocalFileSystem is set to true but file system "
                    + "is not of type LocalFileSystem: " + hdfs.getClass().getName());
        }
    }
    boolean appending = false;
    if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile(dstPath)) {
        fsOut = hdfs.append(dstPath);
        appending = true;
    } else {
        fsOut = hdfs.create(dstPath);
    }
    if (compressor == null) {
        compressor = CodecPool.getCompressor(codec, conf);
    }
    cmpOut = codec.createOutputStream(fsOut, compressor);
    serializer = EventSerializerFactory.getInstance(serializerType, serializerContext, cmpOut);
    if (appending && !serializer.supportsReopen()) {
        cmpOut.close();
        serializer = null;
        throw new IOException("serializer (" + serializerType + ") does not support append");
    }

    registerCurrentStream(fsOut, hdfs, dstPath);

    if (appending) {
        serializer.afterReopen();
    } else {
        serializer.afterCreate();
    }
    isFinished = false;
}

From source file:jp.ac.u.tokyo.m.pig.udf.load.LoadDataWithSchema.java

License:Apache License

@Override
public ResourceSchema getSchema(String aLocation, Job aJob) throws IOException {
    Configuration tConfiguration = aJob.getConfiguration();
    Path tDataPath = new Path(aLocation);
    FileSystem tFileSystem = tDataPath.getFileSystem(tConfiguration);
    Path tSchemaFilePath = tFileSystem.isFile(tDataPath)
            ? new Path(tDataPath.getParent(), StoreConstants.STORE_FILE_NAME_SCHEMA)
            : new Path(tDataPath, StoreConstants.STORE_FILE_NAME_SCHEMA);
    RowSchema tRowSchema = LoadSchemaUtil.loadSchemaFile(tFileSystem, tSchemaFilePath, mEncoding);

    ResourceSchema tResourceSchema = new ResourceSchema();
    TypeStringCasterPigToPigTypeByte tTypeCaster = TypeStringCasterPigToPigTypeByte.INSTANCE;
    List<ColumnSchema> tColumnSchemaList = tRowSchema.getColumnSchemaList();
    int tSize = tColumnSchemaList.size();
    ResourceFieldSchema[] tResourceFieldSchemas = new ResourceFieldSchema[tSize];
    int tIndex = 0;
    for (ColumnSchema tCurrentColumnSchema : tColumnSchemaList) {
        tResourceFieldSchemas[tIndex++] = new ResourceFieldSchema(new FieldSchema(
                tCurrentColumnSchema.getName(), tTypeCaster.castTypeString(tCurrentColumnSchema.getType())));
    }/*  w w w  . j a v a2 s .c  o m*/
    tResourceSchema.setFields(tResourceFieldSchemas);
    return tResourceSchema;
}

From source file:net.sf.katta.node.ShardManager.java

License:Apache License

private void installShard(String shardName, String shardPath, File localShardFolder) throws KattaException {
    LOG.info("install shard '" + shardName + "' from " + shardPath);
    // TODO sg: to fix HADOOP-4422 we try to download the shard 5 times
    int maxTries = 5;
    for (int i = 0; i < maxTries; i++) {
        URI uri;/*from   w  w  w . j a v a 2s .  co m*/
        try {
            uri = new URI(shardPath);
            FileSystem fileSystem = FileSystem.get(uri, new Configuration());
            if (_throttleSemaphore != null) {
                fileSystem = new ThrottledFileSystem(fileSystem, _throttleSemaphore);
            }
            final Path path = new Path(shardPath);
            boolean isZip = fileSystem.isFile(path) && shardPath.endsWith(".zip");

            File shardTmpFolder = new File(localShardFolder.getAbsolutePath() + "_tmp");
            try {
                FileUtil.deleteFolder(localShardFolder);
                FileUtil.deleteFolder(shardTmpFolder);

                if (isZip) {
                    FileUtil.unzip(path, shardTmpFolder, fileSystem,
                            System.getProperty("katta.spool.zip.shards", "false").equalsIgnoreCase("true"));
                } else {
                    fileSystem.copyToLocalFile(path, new Path(shardTmpFolder.getAbsolutePath()));
                }
                shardTmpFolder.renameTo(localShardFolder);
            } finally {
                // Ensure that the tmp folder is deleted on an error
                FileUtil.deleteFolder(shardTmpFolder);
            }
            // Looks like we were successful.
            if (i > 0) {
                LOG.error("Loaded shard:" + shardPath);
            }
            return;
        } catch (final URISyntaxException e) {
            throw new KattaException("Can not parse uri for path: " + shardPath, e);
        } catch (final Exception e) {
            LOG.error(String.format("Error loading shard: %s (try %d of %d)", shardPath, i, maxTries), e);
            if (i >= maxTries - 1) {
                throw new KattaException("Can not load shard: " + shardPath, e);
            }
        }
    }
}

From source file:net.team1.dev.HousingAnalysis.java

License:Apache License

/**
 * The main entry point for the map/reduce runner.
 *
 * @param args 2 args: \<input dir\> \<output dir\>
 * @throws Exception Throws IOException//from ww  w.j  av  a 2  s.  c o  m
 */
public static void main(String[] args) throws Exception {
    Path inputDir = new Path(args[0]);
    Path outputDir = new Path(args[1]);
    FileSystem fs = FileSystem.get(new Configuration());

    if (!fs.exists(inputDir))
        throw new IOException("The input path does not exist.");
    if (fs.isFile(inputDir))
        throw new IOException("The input path is a file.");
    if (fs.exists(outputDir))
        fs.delete(outputDir, true);

    // set job configuration
    JobConf conf = new JobConf(HousingAnalysis.class);
    conf.setJobName("housinganalysis");
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setCombinerClass(HousingReducer.class);
    conf.setReducerClass(HousingReducer.class);

    // set multiple input files
    HashMap<Path, Class<? extends Mapper>> inputMappers = getInputFilePaths(inputDir, fs);
    for (Path p : inputMappers.keySet()) {
        MultipleInputs.addInputPath(conf, p, TextInputFormat.class, inputMappers.get(p));
        LOG.info(p.getName() + ": " + inputMappers.get(p).getName());
    }

    // set output
    FileOutputFormat.setOutputPath(conf, outputDir);

    // start the job
    JobClient.runJob(conf);
}

From source file:org.ankus.mapreduce.algorithms.clustering.kmeans.KMeansDriver.java

License:Apache License

private void setInitialClusterCenter(Configuration conf, String clusterOutputPath) throws Exception {
    /**/*from   w w  w. ja  va  2 s .c  om*/
     * TODO:
     * Current Process
     *       - get top n data (n is defined cluster count)
     *       - set each data to initial cluster center
     * 
     * Following Process is reasonable. => MR Job
     *       1. Distribution
     *          - get statistics(distribution) for all attributes
     *          - use min/max and freq for initial cluster center setting
     *       numeric => (max-min) / cluster count
     *       nominal => each value (freq sort) 
     */
    FileSystem fs = FileSystem.get(conf);

    String readStr, tokens[];
    int index = 0;
    int clusterCnt = Integer.parseInt(conf.get(ArgumentsConstants.CLUSTER_COUNT, "1"));
    KMeansClusterInfoMgr clusters[] = new KMeansClusterInfoMgr[clusterCnt];

    Path inputPath = new Path(conf.get(ArgumentsConstants.INPUT_PATH, null));
    if (!fs.isFile(inputPath)) {
        boolean isFile = false;
        while (!isFile) {
            FileStatus[] status = fs.listStatus(inputPath);
            if (fs.isFile(status[0].getPath()))
                isFile = true;

            inputPath = status[0].getPath();
        }
    }

    FSDataInputStream fin = fs.open(inputPath);
    BufferedReader br = new BufferedReader(new InputStreamReader(fin, Constants.UTF8));

    while ((readStr = br.readLine()) != null) {
        clusters[index] = new KMeansClusterInfoMgr();
        clusters[index].setClusterID(index);

        tokens = readStr.split(conf.get(ArgumentsConstants.DELIMITER, "\t"));
        for (int i = 0; i < tokens.length; i++) {
            if (CommonMethods.isContainIndex(mIndexArr, i, true)
                    && !CommonMethods.isContainIndex(mExceptionIndexArr, i, false)) {
                if (CommonMethods.isContainIndex(mNominalIndexArr, i, false)) {
                    clusters[index].addAttributeValue(i, tokens[i], ConfigurationVariable.NOMINAL_ATTRIBUTE);
                } else
                    clusters[index].addAttributeValue(i, tokens[i], ConfigurationVariable.NUMERIC_ATTRIBUTE);
            }
        }

        index++;
        if (index >= clusterCnt)
            break;
    }

    br.close();
    fin.close();

    FSDataOutputStream fout = fs.create(new Path(clusterOutputPath + "/part-r-00000"), true);
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fout, Constants.UTF8));

    for (int i = 0; i < clusters.length; i++) {
        bw.write(clusters[i].getClusterInfoString(conf.get(ArgumentsConstants.DELIMITER, "\t"),
                mNominalDelimiter) + "\n");
    }

    bw.close();
    fout.close();

}

From source file:org.apache.accumulo.core.client.mock.MockTableOperations.java

License:Apache License

@Override
public void importDirectory(String tableName, String dir, String failureDir, boolean setTime)
        throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException {
    long time = System.currentTimeMillis();
    MockTable table = acu.tables.get(tableName);
    if (table == null) {
        throw new TableNotFoundException(null, tableName, "The table was not found");
    }/*  w ww  .j  ava 2  s .co m*/
    Path importPath = new Path(dir);
    Path failurePath = new Path(failureDir);

    FileSystem fs = acu.getFileSystem();
    /*
     * check preconditions
     */
    // directories are directories
    if (fs.isFile(importPath)) {
        throw new IOException("Import path must be a directory.");
    }
    if (fs.isFile(failurePath)) {
        throw new IOException("Failure path must be a directory.");
    }
    // failures are writable
    Path createPath = failurePath.suffix("/.createFile");
    FSDataOutputStream createStream = null;
    try {
        createStream = fs.create(createPath);
    } catch (IOException e) {
        throw new IOException("Error path is not writable.");
    } finally {
        if (createStream != null) {
            createStream.close();
        }
    }
    fs.delete(createPath, false);
    // failures are empty
    FileStatus[] failureChildStats = fs.listStatus(failurePath);
    if (failureChildStats.length > 0) {
        throw new IOException("Error path must be empty.");
    }
    /*
     * Begin the import - iterate the files in the path
     */
    for (FileStatus importStatus : fs.listStatus(importPath)) {
        try {
            FileSKVIterator importIterator = FileOperations.getInstance().newReaderBuilder()
                    .forFile(importStatus.getPath().toString(), fs, fs.getConf())
                    .withTableConfiguration(AccumuloConfiguration.getDefaultConfiguration()).seekToBeginning()
                    .build();
            while (importIterator.hasTop()) {
                Key key = importIterator.getTopKey();
                Value value = importIterator.getTopValue();
                if (setTime) {
                    key.setTimestamp(time);
                }
                Mutation mutation = new Mutation(key.getRow());
                if (!key.isDeleted()) {
                    mutation.put(key.getColumnFamily(), key.getColumnQualifier(),
                            new ColumnVisibility(key.getColumnVisibilityData().toArray()), key.getTimestamp(),
                            value);
                } else {
                    mutation.putDelete(key.getColumnFamily(), key.getColumnQualifier(),
                            new ColumnVisibility(key.getColumnVisibilityData().toArray()), key.getTimestamp());
                }
                table.addMutation(mutation);
                importIterator.next();
            }
        } catch (Exception e) {
            FSDataOutputStream failureWriter = null;
            DataInputStream failureReader = null;
            try {
                failureWriter = fs.create(failurePath.suffix("/" + importStatus.getPath().getName()));
                failureReader = fs.open(importStatus.getPath());
                int read = 0;
                byte[] buffer = new byte[1024];
                while (-1 != (read = failureReader.read(buffer))) {
                    failureWriter.write(buffer, 0, read);
                }
            } finally {
                if (failureReader != null)
                    failureReader.close();
                if (failureWriter != null)
                    failureWriter.close();
            }
        }
        fs.delete(importStatus.getPath(), true);
    }
}

From source file:org.apache.accumulo.core.client.mock.MockTableOperationsImpl.java

License:Apache License

@Override
public void importDirectory(String tableName, String dir, String failureDir, boolean setTime)
        throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException {
    long time = System.currentTimeMillis();
    MockTable table = acu.tables.get(tableName);
    if (table == null) {
        throw new TableNotFoundException(null, tableName, "The table was not found");
    }//w  w  w. ja v a2  s .c  om
    Path importPath = new Path(dir);
    Path failurePath = new Path(failureDir);

    FileSystem fs = acu.getFileSystem();
    /*
     * check preconditions
     */
    // directories are directories
    if (fs.isFile(importPath)) {
        throw new IOException("Import path must be a directory.");
    }
    if (fs.isFile(failurePath)) {
        throw new IOException("Failure path must be a directory.");
    }
    // failures are writable
    Path createPath = failurePath.suffix("/.createFile");
    FSDataOutputStream createStream = null;
    try {
        createStream = fs.create(createPath);
    } catch (IOException e) {
        throw new IOException("Error path is not writable.");
    } finally {
        if (createStream != null) {
            createStream.close();
        }
    }
    fs.delete(createPath, false);
    // failures are empty
    FileStatus[] failureChildStats = fs.listStatus(failurePath);
    if (failureChildStats.length > 0) {
        throw new IOException("Error path must be empty.");
    }
    /*
     * Begin the import - iterate the files in the path
     */
    for (FileStatus importStatus : fs.listStatus(importPath)) {
        try {
            FileSKVIterator importIterator = FileOperations.getInstance().openReader(
                    importStatus.getPath().toString(), true, fs, fs.getConf(),
                    AccumuloConfiguration.getDefaultConfiguration());
            while (importIterator.hasTop()) {
                Key key = importIterator.getTopKey();
                Value value = importIterator.getTopValue();
                if (setTime) {
                    key.setTimestamp(time);
                }
                Mutation mutation = new Mutation(key.getRow());
                if (!key.isDeleted()) {
                    mutation.put(key.getColumnFamily(), key.getColumnQualifier(),
                            new ColumnVisibility(key.getColumnVisibilityData().toArray()), key.getTimestamp(),
                            value);
                } else {
                    mutation.putDelete(key.getColumnFamily(), key.getColumnQualifier(),
                            new ColumnVisibility(key.getColumnVisibilityData().toArray()), key.getTimestamp());
                }
                table.addMutation(mutation);
                importIterator.next();
            }
        } catch (Exception e) {
            FSDataOutputStream failureWriter = null;
            DataInputStream failureReader = null;
            try {
                failureWriter = fs.create(failurePath.suffix("/" + importStatus.getPath().getName()));
                failureReader = fs.open(importStatus.getPath());
                int read = 0;
                byte[] buffer = new byte[1024];
                while (-1 != (read = failureReader.read(buffer))) {
                    failureWriter.write(buffer, 0, read);
                }
            } finally {
                if (failureReader != null)
                    failureReader.close();
                if (failureWriter != null)
                    failureWriter.close();
            }
        }
        fs.delete(importStatus.getPath(), true);
    }
}

From source file:org.apache.ambari.fast_hdfs_resource.Resource.java

License:Apache License

public static void checkResourceParameters(Resource resource, FileSystem dfs)
        throws IllegalArgumentException, IOException {

    ArrayList<String> actionsAvailable = new ArrayList<String>();
    actionsAvailable.add("create");
    actionsAvailable.add("delete");
    ArrayList<String> typesAvailable = new ArrayList<String>();
    typesAvailable.add("file");
    typesAvailable.add("directory");

    if (resource.getTarget() == null)
        throw new IllegalArgumentException("Path to resource in HadoopFs must be filled.");

    if (resource.getAction() == null || !actionsAvailable.contains(resource.getAction()))
        throw new IllegalArgumentException("Action is not supported.");

    if (resource.getType() == null || !typesAvailable.contains(resource.getType()))
        throw new IllegalArgumentException("Type is not supported.");

    // Check consistency for ("type":"file" == file in hadoop)
    if (dfs.isFile(new Path(resource.getTarget())) && !"file".equals(resource.getType()))
        throw new IllegalArgumentException("Cannot create a directory " + resource.getTarget()
                + " because file is present on the given path.");
    // Check consistency for ("type":"directory" == directory in hadoop)
    else if (dfs.isDirectory(new Path(resource.getTarget())) && !"directory".equals(resource.getType()))
        throw new IllegalArgumentException("Cannot create a file " + resource.getTarget()
                + " because directory is present on the given path.");

    if (resource.getSource() != null) {
        File source = new File(resource.getSource());
        if (source.isFile() && !"file".equals(resource.getType()))
            throw new IllegalArgumentException("Cannot create a directory " + resource.getTarget()
                    + " because source " + resource.getSource() + "is a file");
        else if (source.isDirectory() && !"directory".equals(resource.getType()))
            throw new IllegalArgumentException("Cannot create a file " + resource.getTarget()
                    + " because source " + resource.getSource() + "is a directory");
    }/*from   w  w w .  j a v a  2s  .co  m*/
}

From source file:org.apache.avro.tool.Util.java

License:Apache License

/**If pathname is a file, this method returns a list with a single absolute Path to that file,
 * if pathname is a directory, this method returns a list of Pathes to all the files within
 * this directory.//w  ww .  j  a v a2s .  c  o m
 * Only files inside that directory are included, no subdirectories or files in subdirectories
 * will be added. 
 * The List is sorted alphabetically.
 * @param fileOrDirName filename or directoryname
 * @return A Path List 
 * @throws IOException
 */
static List<Path> getFiles(String fileOrDirName) throws IOException {
    List<Path> pathList = new ArrayList<Path>();
    Path path = new Path(fileOrDirName);
    FileSystem fs = path.getFileSystem(new Configuration());

    if (fs.isFile(path)) {
        pathList.add(path);
    } else if (fs.getFileStatus(path).isDir()) {
        for (FileStatus status : fs.listStatus(path)) {
            if (!status.isDir()) {
                pathList.add(status.getPath());
            }
        }
    }
    Collections.sort(pathList);
    return pathList;
}