Example usage for org.apache.hadoop.fs FileStatus isDirectory

List of usage examples for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory() 

Source Link

Document

Is this a directory?

Usage

From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java

License:LGPL

@Override
public List<DataFile> list(final DataFile file) throws IOException {

    final Path path = getPath(file);

    if (path == null) {
        throw new NullPointerException("Path to delete is null");
    }//w  w  w. java  2  s.  c om
    if (this.conf == null) {
        throw new NullPointerException("The configuration object is null");
    }

    final FileSystem fs = path.getFileSystem(this.conf);

    if (fs == null) {
        throw new IOException("Unable to delete the file, The FileSystem is null");
    }

    FileStatus fileStatus = fs.getFileStatus(path);

    if (!fs.exists(path)) {
        throw new FileNotFoundException("File not found: " + file);
    }

    if (!fileStatus.isDirectory()) {
        throw new IOException("The file is not a directory: " + file);
    }

    // List directory
    final FileStatus[] files = fs.listStatus(path);

    // Convert the File array to a list of DataFile
    final List<DataFile> result = new ArrayList<>(files.length);
    for (FileStatus f : files) {
        result.add(new DataFile(f.getPath().toUri().toString()));
    }

    // Return an unmodifiable list
    return Collections.unmodifiableList(result);
}

From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.PathDataProtocol.java

License:LGPL

@Override
public DataFileMetadata getMetadata(final DataFile src) throws IOException {

    if (!exists(src, true)) {
        throw new FileNotFoundException("File not found: " + src);
    }//from  ww w  . j a  v a 2 s . co m

    final Path path = getPath(src);
    final FileStatus status = path.getFileSystem(this.conf).getFileStatus(path);

    final SimpleDataFileMetadata result = new SimpleDataFileMetadata();
    result.setContentLength(status.getLen());
    result.setLastModified(status.getModificationTime());
    result.setDataFormat(DataFormatRegistry.getInstance().getDataFormatFromFilename(src.getName()));

    final CompressionType ct = CompressionType.getCompressionTypeByFilename(src.getSource());

    if (ct != null) {
        result.setContentEncoding(ct.getContentEncoding());
    }

    if (status.isDirectory()) {
        result.setDirectory(true);
    }

    if (status.isSymlink()) {
        result.setSymbolicLink(new DataFile(status.getSymlink().toUri()));
    }

    return result;
}

From source file:fuse4j.hadoopfs.HdfsClientReal.java

License:Apache License

/**
  * getFileInfo()//  w ww . j  a  v  a2s .c om
  */
public HdfsFileAttr getFileInfo(String path) {

    try {
        FileStatus dfsStat = dfs.getFileStatus(new Path(path));

        final boolean directory = dfsStat.isDirectory();
        final int inode = 0;
        final int mode = dfsStat.getPermission().toShort();
        final int uid = userCache.getUid(dfsStat.getOwner());
        final int gid = 0;

        // TODO: per-file block-size can't be retrieved correctly,
        //       using default block size for now.
        final long size = dfsStat.getLen();
        final int blocks = (int) Math.ceil(((double) size) / dfs.getDefaultBlockSize());

        // modification/create-times are the same as access-time
        final int modificationTime = (int) (dfsStat.getModificationTime() / 1000);

        HdfsFileAttr hdfsFileAttr = new HdfsFileAttr(directory, inode, mode, uid, gid, 1);
        hdfsFileAttr.setSize(size, blocks);
        hdfsFileAttr.setTime(modificationTime);

        // TODO Hack to set inode;
        hdfsFileAttr.inode = hdfsFileAttr.hashCode();

        return hdfsFileAttr;
    } catch (IOException ioe) {
        // fall through to failure
    }

    // failed
    return null;
}

From source file:fuse4j.hadoopfs.HdfsClientReal.java

License:Apache License

private HdfsDirEntry newHdfsDirEntry(FileStatus fileStatus) {
    final boolean directory = fileStatus.isDirectory();
    final String name = fileStatus.getPath().getName();
    final FsPermission permission = fileStatus.getPermission();

    return new HdfsDirEntry(directory, name, permission.toShort());
}

From source file:gaffer.analytic.impl.GraphStatistics.java

License:Apache License

public int run(String[] args) throws Exception {
    // Usage//from  w w  w . j  a  va  2 s. c  o  m
    if (args.length != 6 && args.length != 7) {
        System.err.println(USAGE);
        return 1;
    }

    // Parse options
    Path outputPath = new Path(args[0]);
    String accumuloPropertiesFile = args[1];
    int numReduceTasks;
    try {
        numReduceTasks = Integer.parseInt(args[2]);
    } catch (NumberFormatException e) {
        System.err.println(USAGE);
        return 1;
    }
    Date startDate = null;
    Date endDate = null;
    boolean useTimeWindow = false;
    if (!args[3].equals("null") && !args[4].equals("null")) {
        try {
            startDate = DATE_FORMAT.parse(args[3]);
            endDate = DATE_FORMAT.parse(args[4]);
        } catch (ParseException e) {
            System.err.println("Error parsing dates: " + args[3] + " " + args[4] + " " + e.getMessage());
            return 1;
        }
        useTimeWindow = true;
    }
    boolean rollUpOverTimeAndVisibility = Boolean.parseBoolean(args[5]);
    boolean seedsSpecified = (args.length == 7);
    String seedsFile = "";
    if (seedsSpecified) {
        seedsFile = args[6];
    }

    // Hadoop configuration
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    // Connect to Accumulo, so we can check connection and check that the
    // table exists
    AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile);
    Connector conn = Accumulo.connect(accConf);
    String tableName = accConf.getTable();
    Authorizations authorizations = conn.securityOperations().getUserAuthorizations(accConf.getUserName());

    // Check if the table exists
    if (!conn.tableOperations().exists(tableName)) {
        System.err.println("Table " + tableName + " does not exist.");
        return 1;
    }

    // Create graph and update configuration based on the view
    AccumuloBackedGraph graph = new AccumuloBackedGraph(conn, tableName);
    if (useTimeWindow) {
        graph.setTimeWindow(startDate, endDate);
    }
    graph.rollUpOverTimeAndVisibility(rollUpOverTimeAndVisibility);
    if (seedsSpecified) {
        Set<TypeValue> typeValues = new HashSet<TypeValue>();
        BufferedReader reader = new BufferedReader(new FileReader(seedsFile));
        String line;
        while ((line = reader.readLine()) != null) {
            String[] tokens = line.split("\\|");
            if (tokens.length != 2) {
                System.err.println("Invalid line: " + line);
                continue;
            }
            String type = tokens[0];
            String value = tokens[1];
            typeValues.add(new TypeValue(type, value));
        }
        reader.close();
        graph.setConfiguration(conf, typeValues, accConf);
    } else {
        graph.setConfiguration(conf, accConf);
    }

    // Conf
    conf.setBoolean("mapred.compress.map.output", true);
    conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class);

    // Job
    Job job = new Job(conf);
    job.setJarByClass(getClass());
    job.setJobName("Running MapReduce against Gaffer data in Accumulo: input = " + tableName + ", output = "
            + outputPath);

    // Input format - use BatchScannerElementInputFormat if seeds have been specified (as that creates fewer
    // splits); otherwise use ElementInputFormat which is based on the standard AccumuloInputFormat.
    if (seedsSpecified) {
        job.setInputFormatClass(BatchScannerElementInputFormat.class);
    } else {
        job.setInputFormatClass(ElementInputFormat.class);
    }

    // Mapper
    job.setMapperClass(GraphStatisticsMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(SetOfStatistics.class);

    // Combiner
    job.setCombinerClass(GraphStatisticsReducer.class);

    // Reducer
    job.setReducerClass(GraphStatisticsReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SetOfStatistics.class);
    job.setNumReduceTasks(numReduceTasks);

    // Output
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);

    System.out.println("Running MapReduce job over:");
    System.out.println("\tTable: " + accConf.getTable());
    System.out.println("\tUser: " + accConf.getUserName());
    System.out.println("\tAuths: " + authorizations);
    if (useTimeWindow) {
        System.out.println("\tFilter by time: start time is " + DATE_FORMAT.format(startDate) + ", "
                + DATE_FORMAT.format(endDate));
    } else {
        System.out.println("\tFilter by time is off");
    }
    System.out.println("\tRoll up over time and visibility: " + rollUpOverTimeAndVisibility);

    // Run job
    job.waitForCompletion(true);

    // Successful?
    if (!job.isSuccessful()) {
        System.err.println("Error running job");
        return 1;
    }

    // Write results out
    System.out.println("Summary of graph");
    for (FileStatus file : fs.listStatus(outputPath)) {
        if (!file.isDirectory() && !file.getPath().getName().contains("_SUCCESS")) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf);
            Text text = new Text();
            SetOfStatistics stats = new SetOfStatistics();
            while (reader.next(text, stats)) {
                System.out.println(text + ", " + stats);
            }
            reader.close();
        }
    }

    return 0;
}

From source file:gobblin.compaction.dataset.TimeBasedSubDirDatasetsFinder.java

License:Apache License

/**
 * Each subdir in {@link DatasetsFinder#inputDir} is considered a dataset, if it satisfies blacklist and whitelist.
 *//*  w  w  w .  j  a  v a 2  s  . c o m*/
@Override
public Set<Dataset> findDistinctDatasets() throws IOException {
    Set<Dataset> datasets = Sets.newHashSet();
    for (FileStatus datasetsFileStatus : this.fs.globStatus(new Path(inputDir, subDirPattern))) {
        log.info("Scanning directory : " + datasetsFileStatus.getPath().toString());
        if (datasetsFileStatus.isDirectory()) {
            String datasetName = getDatasetName(datasetsFileStatus.getPath().toString(), inputDir);
            if (DatasetFilterUtils.survived(datasetName, this.blacklist, this.whitelist)) {
                log.info("Found dataset: " + datasetName);
                Path inputPath = new Path(this.inputDir, new Path(datasetName, this.inputSubDir));
                Path inputLatePath = new Path(this.inputDir, new Path(datasetName, this.inputLateSubDir));
                Path outputPath = new Path(this.destDir, new Path(datasetName, this.destSubDir));
                Path outputLatePath = new Path(this.destDir, new Path(datasetName, this.destLateSubDir));
                Path outputTmpPath = new Path(this.tmpOutputDir, new Path(datasetName, this.destSubDir));
                double priority = this.getDatasetPriority(datasetName);

                String folderStructure = getFolderStructure();
                for (FileStatus status : this.fs.globStatus(new Path(inputPath, folderStructure))) {
                    Path jobInputPath = status.getPath();
                    DateTime folderTime = null;
                    try {
                        folderTime = getFolderTime(jobInputPath, inputPath);
                    } catch (RuntimeException e) {
                        log.warn("{} is not a valid folder. Will be skipped due to exception.", jobInputPath,
                                e);
                        continue;
                    }

                    if (folderWithinAllowedPeriod(jobInputPath, folderTime)) {
                        Path jobInputLatePath = appendFolderTime(inputLatePath, folderTime);
                        Path jobOutputPath = appendFolderTime(outputPath, folderTime);
                        Path jobOutputLatePath = appendFolderTime(outputLatePath, folderTime);
                        Path jobOutputTmpPath = appendFolderTime(outputTmpPath, folderTime);

                        Dataset timeBasedDataset = new Dataset.Builder().withPriority(priority)
                                .withDatasetName(datasetName)
                                .addInputPath(this.recompactDatasets ? jobOutputPath : jobInputPath)
                                .addInputLatePath(this.recompactDatasets ? jobOutputLatePath : jobInputLatePath)
                                .withOutputPath(jobOutputPath).withOutputLatePath(jobOutputLatePath)
                                .withOutputTmpPath(jobOutputTmpPath).build();
                        // Stores the extra information for timeBasedDataset
                        timeBasedDataset.setJobProp(MRCompactor.COMPACTION_JOB_DEST_PARTITION,
                                folderTime.toString(this.timeFormatter));
                        timeBasedDataset.setJobProp(MRCompactor.COMPACTION_INPUT_PATH_TIME,
                                folderTime.getMillis());
                        datasets.add(timeBasedDataset);
                    }
                }
            }
        }
    }
    return datasets;
}

From source file:gobblin.compaction.hive.HdfsReader.java

License:Apache License

public static String getFirstDataFilePathInDir(String dirInHdfs) throws IOException {
    FileStatus[] fileStatuses = getFileSystem().listStatus(new Path(dirInHdfs));
    for (FileStatus fileStatus : fileStatuses) {
        Path dataFilePath = fileStatus.getPath();
        if (!fileStatus.isDirectory() && !dataFilePath.getName().startsWith("_")) {
            return dataFilePath.toString();
        }/*from  w  w w .j a v a2  s . co m*/
    }
    String message = dirInHdfs + " does not contain a valid data file.";
    LOG.error(message);
    throw new RuntimeException(message);
}

From source file:gobblin.compaction.hive.HdfsWriter.java

License:Apache License

public static void moveSelectFiles(String extension, String source, String destination) throws IOException {
    FileSystem fs = getFileSystem();
    fs.mkdirs(new Path(destination));
    FileStatus[] fileStatuses = fs.listStatus(new Path(source));
    for (FileStatus fileStatus : fileStatuses) {
        Path path = fileStatus.getPath();
        if (!fileStatus.isDirectory() && path.toString().toLowerCase().endsWith(extension.toLowerCase())) {
            HadoopUtils.deleteIfExists(fs, new Path(destination), true);
            HadoopUtils.copyPath(fs, path, fs, new Path(destination), getConfiguration());
        }/*from  ww  w  . j av a 2s. co m*/
    }
}

From source file:gobblin.compaction.mapreduce.avro.MRCompactorAvroKeyDedupJobRunner.java

License:Apache License

public static Schema getNewestSchemaFromSource(Path sourceDir, FileSystem fs) throws IOException {
    FileStatus[] files = fs.listStatus(sourceDir);
    Arrays.sort(files, new LastModifiedDescComparator());
    for (FileStatus status : files) {
        if (status.isDirectory()) {
            Schema schema = getNewestSchemaFromSource(status.getPath(), fs);
            if (schema != null)
                return schema;
        } else if (FilenameUtils.isExtension(status.getPath().getName(), AVRO)) {
            return AvroUtils.getSchemaFromDataFile(status.getPath(), fs);
        }/*from w ww .jav  a2s .  co  m*/
    }
    return null;
}

From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStore.java

License:Apache License

/**
 * Retrieves all the children of the given {@link ConfigKeyPath} by doing a {@code ls} on the {@link Path} specified
 * by the {@link ConfigKeyPath}. If the {@link Path} described by the {@link ConfigKeyPath} does not exist, an empty
 * {@link Collection} is returned./*from  ww w .j  ava 2s  .  c  o  m*/
 *
 * @param  configKey      the config key path whose children are necessary.
 * @param  version        specify the configuration version in the configuration store.
 *
 * @return a {@link Collection} of {@link ConfigKeyPath} where each entry is a child of the given configKey.
 *
 * @throws VersionDoesNotExistException if the version specified cannot be found in the {@link ConfigStore}.
 */
@Override
public Collection<ConfigKeyPath> getChildren(ConfigKeyPath configKey, String version)
        throws VersionDoesNotExistException {
    Preconditions.checkNotNull(configKey, "configKey cannot be null!");
    Preconditions.checkArgument(!Strings.isNullOrEmpty(version), "version cannot be null or empty!");

    List<ConfigKeyPath> children = new ArrayList<>();
    Path datasetDir = getDatasetDirForKey(configKey, version);

    try {
        if (!this.fs.exists(datasetDir)) {
            return children;
        }

        for (FileStatus fileStatus : this.fs.listStatus(datasetDir)) {
            if (fileStatus.isDirectory()) {
                children.add(configKey.createChild(fileStatus.getPath().getName()));
            }
        }
        return children;
    } catch (IOException e) {
        throw new RuntimeException(
                String.format("Error while getting children for configKey: \"%s\"", configKey), e);
    }
}