Example usage for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory()

Source Link

Document

Is this a directory?

Usage

From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java

License:LGPL

@Override
public List<DataFile> list(final DataFile file) throws IOException {

    final Path path = getPath(file);

    if (path == null) {
        throw new NullPointerException("Path to delete is null");
    }//w  w  w. java  2  s.  c om
    if (this.conf == null) {
        throw new NullPointerException("The configuration object is null");
    }

    final FileSystem fs = path.getFileSystem(this.conf);

    if (fs == null) {
        throw new IOException("Unable to delete the file, The FileSystem is null");
    }

    FileStatus fileStatus = fs.getFileStatus(path);

    if (!fs.exists(path)) {
        throw new FileNotFoundException("File not found: " + file);
    }

    if (!fileStatus.isDirectory()) {
        throw new IOException("The file is not a directory: " + file);
    }

    // List directory
    final FileStatus[] files = fs.listStatus(path);

    // Convert the File array to a list of DataFile
    final List<DataFile> result = new ArrayList<>(files.length);
    for (FileStatus f : files) {
        result.add(new DataFile(f.getPath().toUri().toString()));
    }

    // Return an unmodifiable list
    return Collections.unmodifiableList(result);
}

From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.PathDataProtocol.java

License:LGPL

@Override
public DataFileMetadata getMetadata(final DataFile src) throws IOException {

    if (!exists(src, true)) {
        throw new FileNotFoundException("File not found: " + src);
    }//from  ww w  . j a  v a 2 s . co m

    final Path path = getPath(src);
    final FileStatus status = path.getFileSystem(this.conf).getFileStatus(path);

    final SimpleDataFileMetadata result = new SimpleDataFileMetadata();
    result.setContentLength(status.getLen());
    result.setLastModified(status.getModificationTime());
    result.setDataFormat(DataFormatRegistry.getInstance().getDataFormatFromFilename(src.getName()));

    final CompressionType ct = CompressionType.getCompressionTypeByFilename(src.getSource());

    if (ct != null) {
        result.setContentEncoding(ct.getContentEncoding());
    }

    if (status.isDirectory()) {
        result.setDirectory(true);
    }

    if (status.isSymlink()) {
        result.setSymbolicLink(new DataFile(status.getSymlink().toUri()));
    }

    return result;
}

From source file:fuse4j.hadoopfs.HdfsClientReal.java

License:Apache License

/**
  * getFileInfo()//  w ww . j  a  v  a2s .c om
  */
public HdfsFileAttr getFileInfo(String path) {

    try {
        FileStatus dfsStat = dfs.getFileStatus(new Path(path));

        final boolean directory = dfsStat.isDirectory();
        final int inode = 0;
        final int mode = dfsStat.getPermission().toShort();
        final int uid = userCache.getUid(dfsStat.getOwner());
        final int gid = 0;

        // TODO: per-file block-size can't be retrieved correctly,
        //       using default block size for now.
        final long size = dfsStat.getLen();
        final int blocks = (int) Math.ceil(((double) size) / dfs.getDefaultBlockSize());

        // modification/create-times are the same as access-time
        final int modificationTime = (int) (dfsStat.getModificationTime() / 1000);

        HdfsFileAttr hdfsFileAttr = new HdfsFileAttr(directory, inode, mode, uid, gid, 1);
        hdfsFileAttr.setSize(size, blocks);
        hdfsFileAttr.setTime(modificationTime);

        // TODO Hack to set inode;
        hdfsFileAttr.inode = hdfsFileAttr.hashCode();

        return hdfsFileAttr;
    } catch (IOException ioe) {
        // fall through to failure
    }

    // failed
    return null;
}

From source file:fuse4j.hadoopfs.HdfsClientReal.java

License:Apache License

private HdfsDirEntry newHdfsDirEntry(FileStatus fileStatus) {
    final boolean directory = fileStatus.isDirectory();
    final String name = fileStatus.getPath().getName();
    final FsPermission permission = fileStatus.getPermission();

    return new HdfsDirEntry(directory, name, permission.toShort());
}

From source file:gaffer.analytic.impl.GraphStatistics.java

License:Apache License

public int run(String[] args) throws Exception {
    // Usage//from  w w  w . j  a  va  2 s. c  o  m
    if (args.length != 6 && args.length != 7) {
        System.err.println(USAGE);
        return 1;
    }

    // Parse options
    Path outputPath = new Path(args[0]);
    String accumuloPropertiesFile = args[1];
    int numReduceTasks;
    try {
        numReduceTasks = Integer.parseInt(args[2]);
    } catch (NumberFormatException e) {
        System.err.println(USAGE);
        return 1;
    }
    Date startDate = null;
    Date endDate = null;
    boolean useTimeWindow = false;
    if (!args[3].equals("null") && !args[4].equals("null")) {
        try {
            startDate = DATE_FORMAT.parse(args[3]);
            endDate = DATE_FORMAT.parse(args[4]);
        } catch (ParseException e) {
            System.err.println("Error parsing dates: " + args[3] + " " + args[4] + " " + e.getMessage());
            return 1;
        }
        useTimeWindow = true;
    }
    boolean rollUpOverTimeAndVisibility = Boolean.parseBoolean(args[5]);
    boolean seedsSpecified = (args.length == 7);
    String seedsFile = "";
    if (seedsSpecified) {
        seedsFile = args[6];
    }

    // Hadoop configuration
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    // Connect to Accumulo, so we can check connection and check that the
    // table exists
    AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile);
    Connector conn = Accumulo.connect(accConf);
    String tableName = accConf.getTable();
    Authorizations authorizations = conn.securityOperations().getUserAuthorizations(accConf.getUserName());

    // Check if the table exists
    if (!conn.tableOperations().exists(tableName)) {
        System.err.println("Table " + tableName + " does not exist.");
        return 1;
    }

    // Create graph and update configuration based on the view
    AccumuloBackedGraph graph = new AccumuloBackedGraph(conn, tableName);
    if (useTimeWindow) {
        graph.setTimeWindow(startDate, endDate);
    }
    graph.rollUpOverTimeAndVisibility(rollUpOverTimeAndVisibility);
    if (seedsSpecified) {
        Set<TypeValue> typeValues = new HashSet<TypeValue>();
        BufferedReader reader = new BufferedReader(new FileReader(seedsFile));
        String line;
        while ((line = reader.readLine()) != null) {
            String[] tokens = line.split("\\|");
            if (tokens.length != 2) {
                System.err.println("Invalid line: " + line);
                continue;
            }
            String type = tokens[0];
            String value = tokens[1];
            typeValues.add(new TypeValue(type, value));
        }
        reader.close();
        graph.setConfiguration(conf, typeValues, accConf);
    } else {
        graph.setConfiguration(conf, accConf);
    }

    // Conf
    conf.setBoolean("mapred.compress.map.output", true);
    conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class);

    // Job
    Job job = new Job(conf);
    job.setJarByClass(getClass());
    job.setJobName("Running MapReduce against Gaffer data in Accumulo: input = " + tableName + ", output = "
            + outputPath);

    // Input format - use BatchScannerElementInputFormat if seeds have been specified (as that creates fewer
    // splits); otherwise use ElementInputFormat which is based on the standard AccumuloInputFormat.
    if (seedsSpecified) {
        job.setInputFormatClass(BatchScannerElementInputFormat.class);
    } else {
        job.setInputFormatClass(ElementInputFormat.class);
    }

    // Mapper
    job.setMapperClass(GraphStatisticsMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(SetOfStatistics.class);

    // Combiner
    job.setCombinerClass(GraphStatisticsReducer.class);

    // Reducer
    job.setReducerClass(GraphStatisticsReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SetOfStatistics.class);
    job.setNumReduceTasks(numReduceTasks);

    // Output
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);

    System.out.println("Running MapReduce job over:");
    System.out.println("\tTable: " + accConf.getTable());
    System.out.println("\tUser: " + accConf.getUserName());
    System.out.println("\tAuths: " + authorizations);
    if (useTimeWindow) {
        System.out.println("\tFilter by time: start time is " + DATE_FORMAT.format(startDate) + ", "
                + DATE_FORMAT.format(endDate));
    } else {
        System.out.println("\tFilter by time is off");
    }
    System.out.println("\tRoll up over time and visibility: " + rollUpOverTimeAndVisibility);

    // Run job
    job.waitForCompletion(true);

    // Successful?
    if (!job.isSuccessful()) {
        System.err.println("Error running job");
        return 1;
    }

    // Write results out
    System.out.println("Summary of graph");
    for (FileStatus file : fs.listStatus(outputPath)) {
        if (!file.isDirectory() && !file.getPath().getName().contains("_SUCCESS")) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf);
            Text text = new Text();
            SetOfStatistics stats = new SetOfStatistics();
            while (reader.next(text, stats)) {
                System.out.println(text + ", " + stats);
            }
            reader.close();
        }
    }

    return 0;
}

From source file:gobblin.compaction.dataset.TimeBasedSubDirDatasetsFinder.java

License:Apache License

/**
 * Each subdir in {@link DatasetsFinder#inputDir} is considered a dataset, if it satisfies blacklist and whitelist.
 *//*  w  w  w .  j  a  v a 2  s  . c o m*/
@Override
public Set<Dataset> findDistinctDatasets() throws IOException {
    Set<Dataset> datasets = Sets.newHashSet();
    for (FileStatus datasetsFileStatus : this.fs.globStatus(new Path(inputDir, subDirPattern))) {
        log.info("Scanning directory : " + datasetsFileStatus.getPath().toString());
        if (datasetsFileStatus.isDirectory()) {
            String datasetName = getDatasetName(datasetsFileStatus.getPath().toString(), inputDir);
            if (DatasetFilterUtils.survived(datasetName, this.blacklist, this.whitelist)) {
                log.info("Found dataset: " + datasetName);
                Path inputPath = new Path(this.inputDir, new Path(datasetName, this.inputSubDir));
                Path inputLatePath = new Path(this.inputDir, new Path(datasetName, this.inputLateSubDir));
                Path outputPath = new Path(this.destDir, new Path(datasetName, this.destSubDir));
                Path outputLatePath = new Path(this.destDir, new Path(datasetName, this.destLateSubDir));
                Path outputTmpPath = new Path(this.tmpOutputDir, new Path(datasetName, this.destSubDir));
                double priority = this.getDatasetPriority(datasetName);

                String folderStructure = getFolderStructure();
                for (FileStatus status : this.fs.globStatus(new Path(inputPath, folderStructure))) {
                    Path jobInputPath = status.getPath();
                    DateTime folderTime = null;
                    try {
                        folderTime = getFolderTime(jobInputPath, inputPath);
                    } catch (RuntimeException e) {
                        log.warn("{} is not a valid folder. Will be skipped due to exception.", jobInputPath,
                                e);
                        continue;
                    }

                    if (folderWithinAllowedPeriod(jobInputPath, folderTime)) {
                        Path jobInputLatePath = appendFolderTime(inputLatePath, folderTime);
                        Path jobOutputPath = appendFolderTime(outputPath, folderTime);
                        Path jobOutputLatePath = appendFolderTime(outputLatePath, folderTime);
                        Path jobOutputTmpPath = appendFolderTime(outputTmpPath, folderTime);

                        Dataset timeBasedDataset = new Dataset.Builder().withPriority(priority)
                                .withDatasetName(datasetName)
                                .addInputPath(this.recompactDatasets ? jobOutputPath : jobInputPath)
                                .addInputLatePath(this.recompactDatasets ? jobOutputLatePath : jobInputLatePath)
                                .withOutputPath(jobOutputPath).withOutputLatePath(jobOutputLatePath)
                                .withOutputTmpPath(jobOutputTmpPath).build();
                        // Stores the extra information for timeBasedDataset
                        timeBasedDataset.setJobProp(MRCompactor.COMPACTION_JOB_DEST_PARTITION,
                                folderTime.toString(this.timeFormatter));
                        timeBasedDataset.setJobProp(MRCompactor.COMPACTION_INPUT_PATH_TIME,
                                folderTime.getMillis());
                        datasets.add(timeBasedDataset);
                    }
                }
            }
        }
    }
    return datasets;
}

From source file:gobblin.compaction.hive.HdfsReader.java

License:Apache License

public static String getFirstDataFilePathInDir(String dirInHdfs) throws IOException {
    FileStatus[] fileStatuses = getFileSystem().listStatus(new Path(dirInHdfs));
    for (FileStatus fileStatus : fileStatuses) {
        Path dataFilePath = fileStatus.getPath();
        if (!fileStatus.isDirectory() && !dataFilePath.getName().startsWith("_")) {
            return dataFilePath.toString();
        }/*from  w  w w .j a v a2  s . co m*/
    }
    String message = dirInHdfs + " does not contain a valid data file.";
    LOG.error(message);
    throw new RuntimeException(message);
}

From source file:gobblin.compaction.hive.HdfsWriter.java

License:Apache License

public static void moveSelectFiles(String extension, String source, String destination) throws IOException {
    FileSystem fs = getFileSystem();
    fs.mkdirs(new Path(destination));
    FileStatus[] fileStatuses = fs.listStatus(new Path(source));
    for (FileStatus fileStatus : fileStatuses) {
        Path path = fileStatus.getPath();
        if (!fileStatus.isDirectory() && path.toString().toLowerCase().endsWith(extension.toLowerCase())) {
            HadoopUtils.deleteIfExists(fs, new Path(destination), true);
            HadoopUtils.copyPath(fs, path, fs, new Path(destination), getConfiguration());
        }/*from  ww  w  . j av a 2s. co m*/
    }
}

From source file:gobblin.compaction.mapreduce.avro.MRCompactorAvroKeyDedupJobRunner.java

License:Apache License

public static Schema getNewestSchemaFromSource(Path sourceDir, FileSystem fs) throws IOException {
    FileStatus[] files = fs.listStatus(sourceDir);
    Arrays.sort(files, new LastModifiedDescComparator());
    for (FileStatus status : files) {
        if (status.isDirectory()) {
            Schema schema = getNewestSchemaFromSource(status.getPath(), fs);
            if (schema != null)
                return schema;
        } else if (FilenameUtils.isExtension(status.getPath().getName(), AVRO)) {
            return AvroUtils.getSchemaFromDataFile(status.getPath(), fs);
        }/*from w ww .jav  a2s .  co  m*/
    }
    return null;
}

From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStore.java

License:Apache License

/**
 * Retrieves all the children of the given {@link ConfigKeyPath} by doing a {@code ls} on the {@link Path} specified
 * by the {@link ConfigKeyPath}. If the {@link Path} described by the {@link ConfigKeyPath} does not exist, an empty
 * {@link Collection} is returned./*from  ww w .j  ava 2s  .  c  o  m*/
 *
 * @param  configKey      the config key path whose children are necessary.
 * @param  version        specify the configuration version in the configuration store.
 *
 * @return a {@link Collection} of {@link ConfigKeyPath} where each entry is a child of the given configKey.
 *
 * @throws VersionDoesNotExistException if the version specified cannot be found in the {@link ConfigStore}.
 */
@Override
public Collection<ConfigKeyPath> getChildren(ConfigKeyPath configKey, String version)
        throws VersionDoesNotExistException {
    Preconditions.checkNotNull(configKey, "configKey cannot be null!");
    Preconditions.checkArgument(!Strings.isNullOrEmpty(version), "version cannot be null or empty!");

    List<ConfigKeyPath> children = new ArrayList<>();
    Path datasetDir = getDatasetDirForKey(configKey, version);

    try {
        if (!this.fs.exists(datasetDir)) {
            return children;
        }

        for (FileStatus fileStatus : this.fs.listStatus(datasetDir)) {
            if (fileStatus.isDirectory()) {
                children.add(configKey.createChild(fileStatus.getPath().getName()));
            }
        }
        return children;
    } catch (IOException e) {
        throw new RuntimeException(
                String.format("Error while getting children for configKey: \"%s\"", configKey), e);
    }
}