Example usage for org.apache.hadoop.fs FileSystem isDirectory

List of usage examples for org.apache.hadoop.fs FileSystem isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isDirectory.

Prototype

@Deprecated
public boolean isDirectory(Path f) throws IOException 

Source Link

Document

True iff the named path is a directory.

Usage

From source file:org.apache.vxquery.metadata.VXQueryCollectionOperatorDescriptor.java

License:Apache License

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
        IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions)
        throws HyracksDataException {
    final FrameTupleAccessor fta = new FrameTupleAccessor(ctx.getFrameSize(),
            recordDescProvider.getInputRecordDescriptor(getActivityId(), 0));
    final int fieldOutputCount = recordDescProvider.getOutputRecordDescriptor(getActivityId(), 0)
            .getFieldCount();/*w  w w  .  j  a  v a  2  s.c o m*/
    final ByteBuffer frame = ctx.allocateFrame();
    final FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize(), fieldOutputCount);
    final short partitionId = (short) ctx.getTaskAttemptId().getTaskId().getPartition();
    final ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider(partitionId, dataSourceId,
            totalDataSources);
    final String nodeId = ctx.getJobletContext().getApplicationContext().getNodeId();
    final DynamicContext dCtx = (DynamicContext) ctx.getJobletContext().getGlobalJobData();

    final String collectionName = collectionPartitions[partition % collectionPartitions.length];
    final XMLParser parser = new XMLParser(false, nodeIdProvider, nodeId, frame, appender, childSeq,
            dCtx.getStaticContext());

    return new AbstractUnaryInputUnaryOutputOperatorNodePushable() {
        @Override
        public void open() throws HyracksDataException {
            appender.reset(frame, true);
            writer.open();
            hdfs = new HDFSFunctions();
        }

        @Override
        public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
            fta.reset(buffer);
            String collectionModifiedName = collectionName.replace("${nodeId}", nodeId);
            if (!collectionModifiedName.contains("hdfs:/")) {
                File collectionDirectory = new File(collectionModifiedName);
                //check if directory is in the local file system
                if (collectionDirectory.exists()) {
                    // Go through each tuple.
                    if (collectionDirectory.isDirectory()) {
                        for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) {
                            Iterator<File> it = FileUtils.iterateFiles(collectionDirectory,
                                    new VXQueryIOFileFilter(), TrueFileFilter.INSTANCE);
                            while (it.hasNext()) {
                                File xmlDocument = it.next();
                                if (LOGGER.isLoggable(Level.FINE)) {
                                    LOGGER.fine(
                                            "Starting to read XML document: " + xmlDocument.getAbsolutePath());
                                }
                                parser.parseElements(xmlDocument, writer, fta, tupleIndex);
                            }
                        }
                    } else {
                        throw new HyracksDataException("Invalid directory parameter (" + nodeId + ":"
                                + collectionDirectory.getAbsolutePath() + ") passed to collection.");
                    }
                }
            } else {
                // Else check in HDFS file system
                // Get instance of the HDFS filesystem
                FileSystem fs = hdfs.getFileSystem();
                if (fs != null) {
                    collectionModifiedName = collectionModifiedName.replaceAll("hdfs:/", "");
                    Path directory = new Path(collectionModifiedName);
                    Path xmlDocument;
                    if (tag != null) {
                        hdfs.setJob(directory.getName(), tag);
                        tag = "<" + tag + ">";
                        Job job = hdfs.getJob();
                        InputFormat inputFormat = hdfs.getinputFormat();
                        try {
                            hdfs.scheduleSplits();
                            ArrayList<Integer> schedule = hdfs
                                    .getScheduleForNode(InetAddress.getLocalHost().getHostName());
                            List<InputSplit> splits = hdfs.getSplits();
                            List<FileSplit> fileSplits = new ArrayList<FileSplit>();
                            for (int i : schedule) {
                                fileSplits.add((FileSplit) splits.get(i));
                            }
                            FileSplitsFactory splitsFactory = new FileSplitsFactory(fileSplits);
                            List<FileSplit> inputSplits = splitsFactory.getSplits();
                            ContextFactory ctxFactory = new ContextFactory();
                            int size = inputSplits.size();
                            InputStream stream;
                            String value;
                            RecordReader reader;
                            TaskAttemptContext context;
                            for (int i = 0; i < size; i++) {
                                //read split
                                context = ctxFactory.createContext(job.getConfiguration(), i);
                                try {
                                    reader = inputFormat.createRecordReader(inputSplits.get(i), context);
                                    reader.initialize(inputSplits.get(i), context);
                                    while (reader.nextKeyValue()) {
                                        value = reader.getCurrentValue().toString();
                                        //Split value if it contains more than one item with the tag
                                        if (StringUtils.countMatches(value, tag) > 1) {
                                            String items[] = value.split(tag);
                                            for (String item : items) {
                                                if (item.length() > 0) {
                                                    item = START_TAG + tag + item;
                                                    stream = new ByteArrayInputStream(
                                                            item.getBytes(StandardCharsets.UTF_8));
                                                    parser.parseHDFSElements(stream, writer, fta, i);
                                                }
                                            }
                                        } else {
                                            value = START_TAG + value;
                                            //create an input stream to the file currently reading and send it to parser
                                            stream = new ByteArrayInputStream(
                                                    value.getBytes(StandardCharsets.UTF_8));
                                            parser.parseHDFSElements(stream, writer, fta, i);
                                        }
                                    }

                                } catch (InterruptedException e) {
                                    if (LOGGER.isLoggable(Level.SEVERE)) {
                                        LOGGER.severe(e.getMessage());
                                    }
                                }
                            }

                        } catch (IOException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        } catch (ParserConfigurationException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        } catch (SAXException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        }
                    } else {
                        try {
                            //check if the path exists and is a directory
                            if (fs.exists(directory) && fs.isDirectory(directory)) {
                                for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) {
                                    //read every file in the directory
                                    RemoteIterator<LocatedFileStatus> it = fs.listFiles(directory, true);
                                    while (it.hasNext()) {
                                        xmlDocument = it.next().getPath();
                                        if (fs.isFile(xmlDocument)) {
                                            if (LOGGER.isLoggable(Level.FINE)) {
                                                LOGGER.fine("Starting to read XML document: "
                                                        + xmlDocument.getName());
                                            }
                                            //create an input stream to the file currently reading and send it to parser
                                            InputStream in = fs.open(xmlDocument).getWrappedStream();
                                            parser.parseHDFSElements(in, writer, fta, tupleIndex);
                                        }
                                    }
                                }
                            } else {
                                throw new HyracksDataException("Invalid HDFS directory parameter (" + nodeId
                                        + ":" + directory + ") passed to collection.");
                            }
                        } catch (FileNotFoundException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        } catch (IOException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        }
                    }
                    try {
                        fs.close();
                    } catch (IOException e) {
                        if (LOGGER.isLoggable(Level.SEVERE)) {
                            LOGGER.severe(e.getMessage());
                        }
                    }
                }
            }
        }

        @Override
        public void fail() throws HyracksDataException {
            writer.fail();
        }

        @Override
        public void close() throws HyracksDataException {
            // Check if needed?
            fta.reset(frame);
            if (fta.getTupleCount() > 0) {
                FrameUtils.flushFrame(frame, writer);
            }
            writer.close();
        }
    };
}

From source file:org.commoncrawl.util.SequenceFileUtils.java

License:Open Source License

@SuppressWarnings({ "unchecked", "deprecation" })
public static Class sniffValueTypeFromSequenceFile(FileSystem fs, Configuration conf, Path path)
        throws IOException {
    if (fs.isDirectory(path)) {
        path = new Path(path, "part-00000");
    }/* ww w.ja v  a2 s . c o m*/

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
    try {
        return reader.getValueClass();
    } finally {
        reader.close();
    }
}

From source file:org.datacleaner.spark.utils.HdfsHelper.java

License:Open Source License

public boolean isDirectory(final URI path) {
    final Resource resource = getResourceToUse(path);
    if (!resource.isExists()) {
        return false;
    }/*w  w w. ja  va2s.c  om*/
    if (resource instanceof FileResource) {
        return ((FileResource) resource).getFile().isDirectory();
    }
    if (resource instanceof HdfsResource) {
        final FileSystem fileSystem = ((HdfsResource) resource).getHadoopFileSystem();
        final Path hadoopPath = ((HdfsResource) resource).getHadoopPath();
        try {
            return fileSystem.isDirectory(hadoopPath);
        } catch (final IOException e) {
            throw new IllegalStateException(e);
        }
    }
    // actually we don't know, but most likely it's not a directory
    return false;
}

From source file:org.deeplearning4j.hadoop.datasetiterator.BaseHdfsDataSetIterator.java

License:Apache License

/**
 * Forget if need to close file system here.
 * @param path the path to open/*from  w w w .ja v  a2  s  .  co  m*/
 * @return the input stream for the path
 * @throws Exception if one occurs
 */
public InputStream openInputStream(Path path) throws Exception {
    FileSystem fs = FileSystem.get(conf);
    if (!fs.exists(path))
        throw new FileNotFoundException("File does not exist");
    if (fs.isDirectory(path))
        throw new IllegalArgumentException("Not a file");

    InputStream is = fs.open(path);
    return is;

}

From source file:org.godhuli.rhipe.FileUtils.java

License:Apache License

private void delete(Path src, FileSystem srcFs, boolean recursive) throws IOException {
    if (srcFs.isDirectory(src) && !recursive) {
        throw new IOException("Cannot remove directory \"" + src + "\", use -rmr instead");
    }/*from   w  ww  .  ja  v a 2 s .co m*/
    Trash trashTmp = new Trash(srcFs, getConf());
    if (trashTmp.moveToTrash(src)) {
        System.out.println("Moved to trash: " + src);
        return;
    }
    if (srcFs.delete(src, true)) {
        System.out.println("Deleted " + src);
    } else {
        if (!srcFs.exists(src)) {
            throw new FileNotFoundException("cannot remove " + src + ": No such file or directory.");
        }
        throw new IOException("Delete failed " + src);
    }
}

From source file:org.icgc.dcc.release.core.hadoop.FileGlobInputStream.java

License:Open Source License

private static Collection<Path> getPaths(FileSystem fileSystem, Path pathPattern)
        throws IOException, FileNotFoundException {
    FileStatus[] matches = fileSystem.globStatus(pathPattern, HIDDEN_PATH_FILTER);
    val paths = Lists.<Path>newArrayList();
    for (val match : matches) {
        if (fileSystem.isDirectory(match.getPath())) {
            FileStatus[] partFiles = fileSystem.listStatus(match.getPath(), HIDDEN_PATH_FILTER);
            for (val partFile : partFiles) {
                paths.add(partFile.getPath());
            }//from   w w w .j  ava2  s.c o m
        } else {
            paths.add(match.getPath());
        }
    }

    return paths;
}

From source file:org.kepler.ddp.actor.pattern.DDPDataSink.java

License:Open Source License

/** Merge a directory containing multiple output files into a single file.
 *  This method deletes the directory when finished.
 * /*  w  w w.j  a v a  2  s .  c  om*/
 * TODO move to parent class?
 */
private void _mergeMultiPartOutputs(String pathStr) throws IllegalActionException {

    Configuration configuration = new Configuration();

    Path srcPath = new Path(pathStr);

    try {
        FileSystem srcPathFileSystem = srcPath.getFileSystem(configuration);
        // only merge if the output is a directory.
        if (srcPathFileSystem.isDirectory(srcPath)) {

            Path destPath = new Path(pathStr + "-TMP1234");

            try {
                // TODO if there is only one part-r-nnnnnn file, copyMerge() will still
                // copy it instead of simply renaming it. 
                if (!FileUtil.copyMerge(srcPath.getFileSystem(configuration), srcPath,
                        destPath.getFileSystem(configuration), destPath, true, configuration, "")) {
                    throw new IllegalActionException(this, "Unable to merge output files in " + srcPath + "/.");
                }
            } catch (IOException e) {
                throw new IllegalActionException(this, e,
                        "Error merging multi-part output files in " + srcPath + "/.");
            }

            try {
                if (!destPath.getFileSystem(configuration).rename(destPath, srcPath)) {
                    throw new IllegalActionException(this, "Unable to rename " + destPath + " to " + srcPath);
                }
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    } catch (IOException e) {
        throw new IllegalActionException(this, e, "Error accessing output file " + srcPath);
    }
}

From source file:org.kitesdk.data.spi.filesystem.FileSystemPartitionIterator.java

License:Apache License

@SuppressWarnings("deprecation")
FileSystemPartitionIterator(FileSystem fs, Path root, PartitionStrategy strategy, Schema schema,
        final Predicate<StorageKey> predicate) throws IOException {
    Preconditions.checkArgument(fs.isDirectory(root));
    this.fs = fs;
    this.rootDirectory = root;
    this.iterator = Iterators.filter(Iterators.transform(
            new FileSystemIterator(Accessor.getDefault().getFieldPartitioners(strategy).size()),
            new MakeKey(strategy, schema)), predicate);
}

From source file:org.kitesdk.data.spi.filesystem.TestFileSystemView.java

License:Apache License

@SuppressWarnings("deprecation")
public static void assertDirectoriesExist(FileSystem fs, Path... dirs) throws IOException {
    for (Path path : dirs) {
        assertTrue("Directory should exist: " + path, fs.exists(path) && fs.isDirectory(path));
    }//from   w w w. ja  v  a 2 s .c  o  m
}

From source file:org.lab41.mapreduce.BlueprintsGraphDriver.java

License:Apache License

private Job configureJob2(Configuration baseConfiguration, FaunusGraph faunusGraph, FileSystem fs)
        throws IOException {
    Configuration job2Config = new Configuration(baseConfiguration);
    /** Job  2 Configuration **/
    Job job2 = new Job(job2Config);
    job2.setInputFormatClass(SequenceFileInputFormat.class);
    job2.setOutputFormatClass(faunusGraph.getGraphOutputFormat());
    job2.setJobName("BluePrintsGraphDriver Job2: " + faunusGraph.getInputLocation());
    job2.setJarByClass(BlueprintsGraphDriver.class);
    job2.setMapperClass(BlueprintsGraphOutputMapReduce.EdgeMap.class);
    job2.setMapOutputKeyClass(NullWritable.class);
    job2.setMapOutputValueClass(FaunusVertex.class);

    Path inputPath = faunusGraph.getInputLocation();

    FileInputFormat.setInputPaths(job2, inputPath);
    job2.setNumReduceTasks(0);/* w  ww  .j a va 2  s  .  c  om*/

    String strJob2OutputPath = faunusGraph.getOutputLocation().toString();
    Path job2Path = new Path(strJob2OutputPath + "/job2");

    if (fs.isDirectory(job2Path)) {
        logger.info("Exists" + strJob2OutputPath + " --deleteing");
        fs.delete(job2Path, true);
    }

    FileOutputFormat.setOutputPath(job2, job2Path);

    //TODO -- I don't think this actually does anything
    //reduce the size of the splits:
    long splitSize = (long) job2.getConfiguration().getLong("mapred.max.split.size", 67108864);
    job2.getConfiguration().setLong("mapred.max.split.size", splitSize / 2);

    return job2;
}