Example usage for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:com.blm.orc.OrcInputFormat.java

License:Apache License

@Override
public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files) throws IOException {

    if (Utilities.isVectorMode(conf)) {
        return new VectorizedOrcInputFormat().validateInput(fs, conf, files);
    }/* w  ww. j  a  va 2s .c o m*/

    if (files.size() <= 0) {
        return false;
    }
    for (FileStatus file : files) {
        try {
            OrcFile.createReader(file.getPath(), OrcFile.readerOptions(conf).filesystem(fs));
        } catch (IOException e) {
            return false;
        }
    }
    return true;
}

From source file:com.blm.orc.OrcInputFormat.java

License:Apache License

static Path findOriginalBucket(FileSystem fs, Path directory, int bucket) throws IOException {
    for (FileStatus stat : fs.listStatus(directory)) {
        String name = stat.getPath().getName();
        String numberPart = name.substring(0, name.indexOf('_'));
        if (org.apache.commons.lang3.StringUtils.isNumeric(numberPart)
                && Integer.parseInt(numberPart) == bucket) {
            return stat.getPath();
        }// w ww. j av a2s . co m
    }
    throw new IllegalArgumentException("Can't find bucket " + bucket + " in " + directory);
}

From source file:com.blm.orc.VectorizedOrcInputFormat.java

License:Apache License

@Override
public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files) throws IOException {
    if (files.size() <= 0) {
        return false;
    }/*ww w.ja v a 2 s .c om*/
    for (FileStatus file : files) {
        try {
            OrcFile.createReader(file.getPath(), OrcFile.readerOptions(conf).filesystem(fs));
        } catch (IOException e) {
            return false;
        }
    }
    return true;
}

From source file:com.btoddb.chronicle.apps.AvroTools.java

License:Open Source License

private void go(String srcDir) throws URISyntaxException, IOException {
    hdfsFs = FileSystem.get(new URI(srcDir), hdfsConfig);

    System.out.println();//  w  w w  .  j  av  a2s  . co  m
    System.out.println("Processing files from " + srcDir);
    System.out.println();

    logger.debug("Searching for files in {}", srcDir);
    Path path = new Path(srcDir);
    if (!hdfsFs.exists(path)) {
        System.out.println("The path does not exist - cannot continue : " + path.toString());
        return;
    }

    FileStatus[] statuses = hdfsFs.listStatus(path, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            String name = path.getName();
            return !name.startsWith("_") && name.endsWith(".avro");
        }
    });

    for (FileStatus fs : statuses) {
        try {
            Path inPath = fs.getPath();
            long fileSize = hdfsFs.getFileStatus(inPath).getLen();
            System.out.println(String.format("Processing file, %s (%d)", inPath.toString(), fileSize));

            testFileAndFix(inPath);
        } catch (Exception e) {
            // don't care about the cause, the test should be able to read all files it cares about
            e.printStackTrace();
        }
    }
}

From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java

License:Apache License

@Override
public boolean delete(Path path, boolean recursive) throws IOException {
    Path absolutePath = makeAbsolute(path);
    INode inode = store.retrieveINode(absolutePath);
    if (inode == null) {
        return false;
    }/*from w ww  . j  a v  a2s .  c o  m*/
    if (inode.isFile()) {
        store.deleteINode(absolutePath);
        for (Block block : inode.getBlocks()) {
            store.deleteBlock(block);
        }
    } else {
        FileStatus[] contents = null;
        try {
            contents = listStatus(absolutePath);
        } catch (FileNotFoundException fnfe) {
            return false;
        }

        if ((contents.length != 0) && (!recursive)) {
            throw new IOException("Directory " + path.toString() + " is not empty.");
        }
        for (FileStatus p : contents) {
            if (!delete(p.getPath(), recursive)) {
                return false;
            }
        }
        store.deleteINode(absolutePath);
    }
    return true;
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java

License:Apache License

/**
 * Read the Frequent Patterns generated from Text
 * /*from w  w  w .j a v a  2  s. c  o m*/
 * @return List of TopK patterns for each string frequent feature
 */
public static List<Pair<String, TopKStringPatterns>> readFrequentPattern(Parameters params) throws IOException {

    Configuration conf = new Configuration();

    Path frequentPatternsPath = new Path(params.get(OUTPUT), FREQUENT_PATTERNS);
    FileSystem fs = FileSystem.get(frequentPatternsPath.toUri(), conf);
    FileStatus[] outputFiles = fs.globStatus(new Path(frequentPatternsPath, FILE_PATTERN));

    List<Pair<String, TopKStringPatterns>> ret = Lists.newArrayList();
    for (FileStatus fileStatus : outputFiles) {
        ret.addAll(FPGrowth.readFrequentPattern(conf, fileStatus.getPath()));
    }
    return ret;
}

From source file:com.chinamobile.bcbsp.bspcontroller.HDFSOperator.java

License:Apache License

/**
 * list all directory of th dir//from   w ww.  j  a  va 2 s.  c  o  m
 * @param dir
 *        file dircetory path
 * @return
 *        directory list
 * @throws IOException
 *         exceptions during handle BSP file
 */
public FileStatus[] listDirAll(String dir) throws IOException {
    // Path path = new Path(dir);
    // FileStatus[] status = fs.listStatus(path);
    for (FileStatus s : bspfs.listStatus(new BSPHdfsImpl().newPath(dir))) {
        System.out.println(s.getPath());
    }
    return bspfs.listStatus(new BSPHdfsImpl().newPath(dir));
}

From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java

License:Apache License

/**
 * Generate the list of files and make them into FileSplits.
 *
 * @param job/*from w  ww  .ja v  a  2s  .  co m*/
 *        The current BSPJob job
 * @return input splits
 */
@Override
public List<InputSplit> getSplits(BSPJob job) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    for (FileStatus file : listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConf());
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(job, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = 0L;
            if (job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1) == 1) {
                if (job.getSplitSize() == 0L) {
                    splitSize = blockSize;
                } else {
                    splitSize = job.getSplitSize();
                }
            } else {
                if (job.getSplitSize() == 0L) {
                    splitSize = blockSize * job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1);
                } else {
                    splitSize = job.getSplitSize() * job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1);
                }
            }
            LOG.info("[Split Size] " + (splitSize / (1024 * 1024)) + " MB");
            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
                        blkLocations[blkIndex].getHosts()));
                bytesRemaining -= splitSize;
            }
            if (bytesRemaining != 0) {
                splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                        blkLocations[blkLocations.length - 1].getHosts()));
            }
        } else if (length != 0) {
            splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
        } else {
            // Create empty hosts array for zero length files
            splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }
    LOG.info("[Split Number] " + splits.size());
    return splits;
}

From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java

License:Apache License

/**
 * List input directories. Subclasses may override to, e.g., select only files
 * matching a regular expression./*from w ww.j a va 2  s .c  o  m*/
 *
 * @param job
 *        the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException
 *         if zero items.
 */
protected List<FileStatus> listStatus(BSPJob job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }
    List<IOException> errors = new ArrayList<IOException>();
    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(HIDDEN_FILE_FILTER);
    PathFilter inputFilter = new MultiPathFilter(filters);
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConf());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }
    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:com.cloudera.cdk.data.filesystem.FileSystemDataset.java

License:Apache License

@Override
@Deprecated/*from  w w w. j a v a2  s.com*/
public Iterable<Dataset<E>> getPartitions() {
    Preconditions.checkState(descriptor.isPartitioned(),
            "Attempt to get partitions on a non-partitioned dataset (name:%s)", name);

    List<Dataset<E>> partitions = Lists.newArrayList();

    FileStatus[] fileStatuses;

    try {
        fileStatuses = fileSystem.listStatus(directory, PathFilters.notHidden());
    } catch (IOException e) {
        throw new DatasetException("Unable to list partition directory for directory " + directory, e);
    }

    for (FileStatus stat : fileStatuses) {
        Path p = fileSystem.makeQualified(stat.getPath());
        PartitionKey key = fromDirectoryName(p);
        PartitionStrategy subPartitionStrategy = Accessor.getDefault()
                .getSubpartitionStrategy(partitionStrategy, 1);
        Builder builder = new FileSystemDataset.Builder().name(name).fileSystem(fileSystem)
                .descriptor(new DatasetDescriptor.Builder(descriptor).location(p)
                        .partitionStrategy(subPartitionStrategy).build())
                .partitionKey(key);

        partitions.add(builder.<E>build());
    }

    return partitions;
}