Example usage for org.apache.hadoop.fs FileSystem isFile

List of usage examples for org.apache.hadoop.fs FileSystem isFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isFile.

Prototype

@Deprecated
public boolean isFile(Path f) throws IOException 

Source Link

Document

True iff the named path is a regular file.

Usage

From source file:com.cloudera.impala.analysis.CreateTableLikeFileStmt.java

License:Apache License

/**
 * Reads the first block from the given HDFS file and returns the Parquet schema.
 * Throws Analysis exception for any failure, such as failing to read the file
 * or failing to parse the contents.//from ww w.  j  a v a 2s  . c o m
 */
private static parquet.schema.MessageType loadParquetSchema(Path pathToFile) throws AnalysisException {
    try {
        FileSystem fs = pathToFile.getFileSystem(FileSystemUtil.getConfiguration());
        if (!fs.isFile(pathToFile)) {
            throw new AnalysisException("Cannot infer schema, path is not a file: " + pathToFile);
        }
    } catch (IOException e) {
        throw new AnalysisException("Failed to connect to HDFS:" + e);
    }
    ParquetMetadata readFooter = null;
    try {
        readFooter = ParquetFileReader.readFooter(FileSystemUtil.getConfiguration(), pathToFile);
    } catch (FileNotFoundException e) {
        throw new AnalysisException("File not found: " + e);
    } catch (IOException e) {
        throw new AnalysisException("Failed to open HDFS file as a parquet file: " + e);
    } catch (RuntimeException e) {
        // Parquet throws a generic RuntimeException when reading a non-parquet file
        if (e.toString().contains("is not a Parquet file")) {
            throw new AnalysisException("File is not a parquet file: " + pathToFile);
        }
        // otherwise, who knows what we caught, throw it back up
        throw e;
    }
    return readFooter.getFileMetaData().getSchema();
}

From source file:com.dalabs.droop.util.password.FilePasswordLoader.java

License:Apache License

/**
 * Verify that given path leads to a file that we can read.
 *
 * @param fs Associated FileSystem/*from ww  w  .j  ava 2s.  c o  m*/
 * @param path Path
 * @throws IOException
 */
protected void verifyPath(FileSystem fs, Path path) throws IOException {
    if (!fs.exists(path)) {
        throw new IOException("The provided password file " + path + " does not exist!");
    }

    if (!fs.isFile(path)) {
        throw new IOException("The provided password file " + path + " is a directory!");
    }
}

From source file:com.dasasian.chok.node.ShardManager.java

License:Apache License

private void installShard(String shardName, String shardPath, File localShardFolder) throws ChokException {
    LOG.info("install shard '" + shardName + "' from " + shardPath);
    // TODO sg: to fix HADOOP-4422 we try to download the shard 5 times
    int maxTries = 5;
    for (int i = 0; i < maxTries; i++) {
        URI uri;//www .j av a 2  s  .co  m
        try {
            uri = new URI(shardPath);
            FileSystem fileSystem = FileSystem.get(uri, new Configuration());
            if (throttleSemaphore != null) {
                fileSystem = new ThrottledFileSystem(fileSystem, throttleSemaphore);
            }
            final Path path = new Path(shardPath);
            boolean isZip = fileSystem.isFile(path) && shardPath.endsWith(".zip");

            File shardTmpFolder = new File(localShardFolder.getAbsolutePath() + "_tmp");
            try {
                FileUtil.deleteFolder(localShardFolder);
                FileUtil.deleteFolder(shardTmpFolder);

                if (isZip) {
                    FileUtil.unzip(path, shardTmpFolder, fileSystem,
                            "true".equalsIgnoreCase(System.getProperty("chok.spool.zip.shards", "false")));
                } else {
                    fileSystem.copyToLocalFile(path, new Path(shardTmpFolder.getAbsolutePath()));
                }
                shardTmpFolder.renameTo(localShardFolder);
            } finally {
                // Ensure that the tmp folder is deleted on an error
                FileUtil.deleteFolder(shardTmpFolder);
            }
            // Looks like we were successful.
            if (i > 0) {
                LOG.error("Loaded shard:" + shardPath);
            }
            return;
        } catch (final URISyntaxException e) {
            throw new ChokException("Can not parse uri for path: " + shardPath, e);
        } catch (final Exception e) {
            LOG.error(String.format("Error loading shard: %s (try %d of %d)", shardPath, i, maxTries), e);
            if (i >= maxTries - 1) {
                throw new ChokException("Can not load shard: " + shardPath, e);
            }
        }
    }
}

From source file:com.datasayer.meerkat.MeerJobRunner.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from  w  ww. jav  a 2s  . com*/
public void bsp(final BSPPeer<Writable, Writable, Writable, Writable, Writable> peer)
        throws IOException, SyncException, InterruptedException {

    while (true) {
        try {
            long currentTime = System.currentTimeMillis();
            FileSystem fs = FileSystem.get(conf);
            if (!fs.isFile(logPath)) {
                System.out.println("can not read input file");
                return;
            }
            RandomAccessFile file = new RandomAccessFile(logPath.toString(), "r");
            long fileLength = file.length();

            if (fileLength > filePointer) {
                file.seek(filePointer);
                String line = null;
                while (file.length() > file.getFilePointer()) {
                    line = file.readLine();
                    line = new String(line.getBytes("8859_1"), "utf-8");
                    guardMeer.observe(line);
                }
                filePointer = file.getFilePointer();
            } else {
                // nothing to do
            }
            file.close();

            long timeDiff = currentTime - this.lastAggregatedTime;
            if (timeDiff >= this.aggregationInterval) {

                peer.sync();

                if (peer.getPeerName().equals(masterName)) {
                    bossMeer.masterCompute(new Iterator<Writable>() {

                        private final int producedMessages = peer.getNumCurrentMessages();
                        private int consumedMessages = 0;

                        @Override
                        public boolean hasNext() {
                            return producedMessages > consumedMessages;
                        }

                        @Override
                        public Writable next() throws NoSuchElementException {
                            if (consumedMessages >= producedMessages) {
                                throw new NoSuchElementException();
                            }

                            try {
                                consumedMessages++;
                                return peer.getCurrentMessage();
                            } catch (IOException e) {
                                throw new NoSuchElementException();
                            }
                        }

                        @Override
                        public void remove() {
                            // BSPPeer.getCurrentMessage originally deletes a message.
                            // Thus, it doesn't need to throw exception.
                            // throw new UnsupportedOperationException();
                        }

                    }, signalMeer);
                    this.lastAggregatedTime = currentTime;
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:com.ebay.erl.mobius.core.builder.AbstractDatasetBuilder.java

License:Apache License

/**
 * Add the <code>paths</code> to the underline dataset.  A boolean
 * flag <code>validatePathExistance</code> to specify if Mobius
 * needs to verify the specified <code>paths</code> exist or not.
 * <p>//from   w w w  . ja  v  a2s .  c  o  m
 * 
 * If <code>validatePathExistance</code> is true, and one of the
 * <code>paths</code> doesn't exist, <code>IOException</code> will
 * be thrown.
 * <p>
 * 
 * If a path exists and it's a folder, {@link #checkTouchFile(FileSystem, Path)} 
 * will be called to see if a touch file exists under that folder or not.
 * The default implementation of <code>checkTouchFile</code> always return
 * true, which means the dataset builder doesn't check touch file by default.
 * If this is a need to check touch file, the subclass should override that
 * function, and when the funciton return false, <code>IOException</code>
 * will be thrown here for that specific path.
 */
protected ACTUAL_BUILDER_IMPL addInputPath(boolean validatePathExistance, Path... paths) throws IOException {
    if (paths == null || paths.length == 0) {
        throw new IllegalArgumentException("Please specify at least one path");
    }

    FileSystem fs = FileSystem.get(this.mobiusJob.getConf());

    for (Path aPath : paths) {
        FileStatus[] fileStatus = null;

        try {
            fileStatus = fs.globStatus(aPath);
        } catch (NullPointerException e) {
            LOGGER.warn("FileSystem list globStatus thrown NPE", e);
        }

        if (fileStatus == null) {
            if (validatePathExistance) {
                throw new FileNotFoundException(aPath.toString() + " doesn't exist on file system.");
            } else {
                // no need to validate, as the input
                // for this dataset is coming from
                // the output of the other dataset.
                this.getDataset().addInputs(aPath);
            }
        } else {
            // file(s) exists, add inputs
            for (FileStatus aFileStatus : fileStatus) {
                Path p = aFileStatus.getPath();
                if (!fs.isFile(p)) {
                    if (!this.checkTouchFile(fs, p)) {
                        throw new IllegalStateException(
                                "No touch file under " + p.toString() + ", this dataset is not ready.");
                    } else {
                        this.getDataset().addInputs(p);
                    }
                } else {
                    this.getDataset().addInputs(p);
                }
            }
        }
    }

    return (ACTUAL_BUILDER_IMPL) this;
}

From source file:com.elex.dmp.lda.InMemoryCollapsedVariationalBayes0.java

License:Apache License

private static Matrix loadVectors(String vectorPathString, Configuration conf) throws IOException {
    Path vectorPath = new Path(vectorPathString);
    FileSystem fs = vectorPath.getFileSystem(conf);
    List<Path> subPaths = Lists.newArrayList();
    if (fs.isFile(vectorPath)) {
        subPaths.add(vectorPath);/*from  w  w  w .j a v a 2 s .c o  m*/
    } else {
        for (FileStatus fileStatus : fs.listStatus(vectorPath, PathFilters.logsCRCFilter())) {
            subPaths.add(fileStatus.getPath());
        }
    }
    List<Vector> vectorList = Lists.newArrayList();
    for (Path subPath : subPaths) {
        for (Pair<IntWritable, VectorWritable> record : new SequenceFileIterable<IntWritable, VectorWritable>(
                subPath, true, conf)) {
            vectorList.add(record.getSecond().get());
        }
    }
    int numRows = vectorList.size();
    int numCols = vectorList.get(0).size();
    return new SparseRowMatrix(numRows, numCols, vectorList.toArray(new Vector[vectorList.size()]), true,
            vectorList.get(0).isSequentialAccess());
}

From source file:com.facebook.hiveio.common.FileSystems.java

License:Apache License

/**
 * Move a file or directory from source to destination, recursively copying
 * subdirectories./*  w  w w  .  ja  v a  2s . c om*/
 *
 * @param fs FileSystem
 * @param file path to copy (file or directory)
 * @param src path to source directory
 * @param dest path to destination directory
 * @throws IOException I/O problems
 */
public static void move(FileSystem fs, Path file, Path src, Path dest) throws IOException {
    Path destFilePath = pathInDestination(file, src, dest);
    if (fs.isFile(file)) {
        if (fs.exists(destFilePath)) {
            if (!fs.delete(destFilePath, true)) {
                throw new IllegalArgumentException("Could not remove existing file " + destFilePath);
            }
        }
        if (!fs.rename(file, destFilePath)) {
            throw new IllegalArgumentException("Could not move " + file + " to " + destFilePath);
        }
    } else if (fs.getFileStatus(file).isDir()) {
        FileStatus[] statuses = fs.listStatus(file);
        fs.mkdirs(destFilePath);
        if (statuses != null) {
            for (FileStatus status : statuses) {
                move(fs, status.getPath(), src, dest);
            }
        }
    }
}

From source file:com.github.seqware.queryengine.plugins.hbasemr.MRHBasePluginRunner.java

License:Open Source License

public File handleFileResult(Path path) {
    FileSystem fs = null;
    try {/*from w  ww. j  ava2 s .  c om*/
        Path outputPartPath = new Path(path, "part-r-00000");
        // copy file from HDFS to local temporary file
        Logger.getLogger(FeaturesByFilterPlugin.class.getName())
                .info("Source file is " + outputPartPath.toString());
        Configuration conf = new Configuration();

        HBaseStorage.configureHBaseConfig(conf);

        HBaseConfiguration.addHbaseResources(conf);
        fs = FileSystem.get(conf);
        File createTempFile = File.createTempFile("vcf", "out");

        createTempFile.delete();
        Path outPath = new Path(createTempFile.toURI());
        FileSystem localSystem = FileSystem.get(new Configuration());

        Logger.getLogger(FeaturesByFilterPlugin.class.getName())
                .info("Destination file is " + outPath.toString());
        if (!fs.exists(outputPartPath)) {
            Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input file not found");
        }

        if (!fs.isFile(outputPartPath)) {
            Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input should be a file");
        }

        if (localSystem.exists(outPath)) {
            Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Output already exists");
        }
        // doesn't quite work yet, no time to finish before poster, check results manually on hdfs

        FileUtil.copy(fs, outputPartPath, localSystem, outPath, true, true, conf);
        return new File(outPath.toUri());
    } catch (IOException ex) {
        Logger.getLogger(VCFDumperPlugin.class.getName()).fatal(null, ex);
    } finally {
        if (fs != null) {
            try {
                fs.delete(path, true);
            } catch (IOException ex) {
                Logger.getLogger(VCFDumperPlugin.class.getName())
                        .warn("IOException when clearing after text output", ex);
            }
        }
    }

    return null;
}

From source file:com.huayu.metis.flume.sink.hdfs.HDFSSequenceFile.java

License:Apache License

@Override
public void open(String filePath) throws IOException {
    Configuration conf = new Configuration();
    Path dstPath = new Path(filePath);

    FileSystem fileSystem = dstPath.getFileSystem(conf);
    //2.2Hadoop, dfs.append.support ???
    if (fileSystem.exists(dstPath) && fileSystem.isFile(dstPath)) {
        outStream = fileSystem.append(dstPath);
    } else {/*w w  w  .ja v a  2s  .c o m*/
        outStream = fileSystem.create(dstPath);
    }

    writer = SequenceFile.createWriter(conf, SequenceFile.Writer.stream(outStream),
            SequenceFile.Writer.keyClass(serializer.getKeyClass()),
            SequenceFile.Writer.valueClass(serializer.getValueClass()));
    registerCurrentStream(outStream, fileSystem, dstPath);
}

From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java

License:Open Source License

/**
 * //w  w w.  j av  a2s .  c  om
 * @param srcFileName
 * @param fileName
 * @param rlen
 * @param clen
 * @param nnz
 * @throws IOException
 */
public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz)
        throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path src = new Path(srcFileName);
    Path merge = new Path(fileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(merge)) {
        hdfs.delete(merge, true);
    }

    OutputStream out = hdfs.create(merge, true);

    // write out the header first 
    StringBuilder sb = new StringBuilder();
    sb.append("%%MatrixMarket matrix coordinate real general\n");

    // output number of rows, number of columns and number of nnz
    sb.append(rlen + " " + clen + " " + nnz + "\n");
    out.write(sb.toString().getBytes());

    // if the source is a directory
    if (hdfs.getFileStatus(src).isDirectory()) {
        try {
            FileStatus[] contents = hdfs.listStatus(src);
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    InputStream in = hdfs.open(contents[i].getPath());
                    try {
                        IOUtils.copyBytes(in, out, conf, false);
                    } finally {
                        IOUtilFunctions.closeSilently(in);
                    }
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(src)) {
        InputStream in = null;
        try {
            in = hdfs.open(src);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(src.toString() + ": No such file or directory");
    }
}