Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.blm.orc.OrcInputFormat.java

License:Apache License

@Override
public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files) throws IOException {

    if (Utilities.isVectorMode(conf)) {
        return new VectorizedOrcInputFormat().validateInput(fs, conf, files);
    }/* w  ww. j  a  va 2s .c o m*/

    if (files.size() <= 0) {
        return false;
    }
    for (FileStatus file : files) {
        try {
            OrcFile.createReader(file.getPath(), OrcFile.readerOptions(conf).filesystem(fs));
        } catch (IOException e) {
            return false;
        }
    }
    return true;
}

From source file:com.blm.orc.OrcInputFormat.java

License:Apache License

static Path findOriginalBucket(FileSystem fs, Path directory, int bucket) throws IOException {
    for (FileStatus stat : fs.listStatus(directory)) {
        String name = stat.getPath().getName();
        String numberPart = name.substring(0, name.indexOf('_'));
        if (org.apache.commons.lang3.StringUtils.isNumeric(numberPart)
                && Integer.parseInt(numberPart) == bucket) {
            return stat.getPath();
        }// w ww. j av a2s . co m
    }
    throw new IllegalArgumentException("Can't find bucket " + bucket + " in " + directory);
}

From source file:com.blm.orc.VectorizedOrcInputFormat.java

License:Apache License

@Override
public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files) throws IOException {
    if (files.size() <= 0) {
        return false;
    }/*ww w.ja v a 2 s .c om*/
    for (FileStatus file : files) {
        try {
            OrcFile.createReader(file.getPath(), OrcFile.readerOptions(conf).filesystem(fs));
        } catch (IOException e) {
            return false;
        }
    }
    return true;
}

From source file:com.btoddb.chronicle.apps.AvroTools.java

License:Open Source License

private void go(String srcDir) throws URISyntaxException, IOException {
    hdfsFs = FileSystem.get(new URI(srcDir), hdfsConfig);

    System.out.println();//  w  w w  .  j  av  a2s  . co  m
    System.out.println("Processing files from " + srcDir);
    System.out.println();

    logger.debug("Searching for files in {}", srcDir);
    Path path = new Path(srcDir);
    if (!hdfsFs.exists(path)) {
        System.out.println("The path does not exist - cannot continue : " + path.toString());
        return;
    }

    FileStatus[] statuses = hdfsFs.listStatus(path, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            String name = path.getName();
            return !name.startsWith("_") && name.endsWith(".avro");
        }
    });

    for (FileStatus fs : statuses) {
        try {
            Path inPath = fs.getPath();
            long fileSize = hdfsFs.getFileStatus(inPath).getLen();
            System.out.println(String.format("Processing file, %s (%d)", inPath.toString(), fileSize));

            testFileAndFix(inPath);
        } catch (Exception e) {
            // don't care about the cause, the test should be able to read all files it cares about
            e.printStackTrace();
        }
    }
}

From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java

License:Apache License

@Override
public boolean delete(Path path, boolean recursive) throws IOException {
    Path absolutePath = makeAbsolute(path);
    INode inode = store.retrieveINode(absolutePath);
    if (inode == null) {
        return false;
    }/*from w ww  . j  a v  a2s .  c o  m*/
    if (inode.isFile()) {
        store.deleteINode(absolutePath);
        for (Block block : inode.getBlocks()) {
            store.deleteBlock(block);
        }
    } else {
        FileStatus[] contents = null;
        try {
            contents = listStatus(absolutePath);
        } catch (FileNotFoundException fnfe) {
            return false;
        }

        if ((contents.length != 0) && (!recursive)) {
            throw new IOException("Directory " + path.toString() + " is not empty.");
        }
        for (FileStatus p : contents) {
            if (!delete(p.getPath(), recursive)) {
                return false;
            }
        }
        store.deleteINode(absolutePath);
    }
    return true;
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java

License:Apache License

/**
 * Read the Frequent Patterns generated from Text
 * /*from w  w  w .j a v a  2  s. c  o m*/
 * @return List of TopK patterns for each string frequent feature
 */
public static List<Pair<String, TopKStringPatterns>> readFrequentPattern(Parameters params) throws IOException {

    Configuration conf = new Configuration();

    Path frequentPatternsPath = new Path(params.get(OUTPUT), FREQUENT_PATTERNS);
    FileSystem fs = FileSystem.get(frequentPatternsPath.toUri(), conf);
    FileStatus[] outputFiles = fs.globStatus(new Path(frequentPatternsPath, FILE_PATTERN));

    List<Pair<String, TopKStringPatterns>> ret = Lists.newArrayList();
    for (FileStatus fileStatus : outputFiles) {
        ret.addAll(FPGrowth.readFrequentPattern(conf, fileStatus.getPath()));
    }
    return ret;
}

From source file:com.chinamobile.bcbsp.bspcontroller.HDFSOperator.java

License:Apache License

/**
 * list all directory of th dir//from   w ww.  j  a  va 2 s.  c  o  m
 * @param dir
 *        file dircetory path
 * @return
 *        directory list
 * @throws IOException
 *         exceptions during handle BSP file
 */
public FileStatus[] listDirAll(String dir) throws IOException {
    // Path path = new Path(dir);
    // FileStatus[] status = fs.listStatus(path);
    for (FileStatus s : bspfs.listStatus(new BSPHdfsImpl().newPath(dir))) {
        System.out.println(s.getPath());
    }
    return bspfs.listStatus(new BSPHdfsImpl().newPath(dir));
}

From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java

License:Apache License

/**
 * Generate the list of files and make them into FileSplits.
 *
 * @param job/*from w  ww  .ja v  a  2s  .  co m*/
 *        The current BSPJob job
 * @return input splits
 */
@Override
public List<InputSplit> getSplits(BSPJob job) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    for (FileStatus file : listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConf());
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(job, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = 0L;
            if (job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1) == 1) {
                if (job.getSplitSize() == 0L) {
                    splitSize = blockSize;
                } else {
                    splitSize = job.getSplitSize();
                }
            } else {
                if (job.getSplitSize() == 0L) {
                    splitSize = blockSize * job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1);
                } else {
                    splitSize = job.getSplitSize() * job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1);
                }
            }
            LOG.info("[Split Size] " + (splitSize / (1024 * 1024)) + " MB");
            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
                        blkLocations[blkIndex].getHosts()));
                bytesRemaining -= splitSize;
            }
            if (bytesRemaining != 0) {
                splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                        blkLocations[blkLocations.length - 1].getHosts()));
            }
        } else if (length != 0) {
            splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
        } else {
            // Create empty hosts array for zero length files
            splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }
    LOG.info("[Split Number] " + splits.size());
    return splits;
}

From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java

License:Apache License

/**
 * List input directories. Subclasses may override to, e.g., select only files
 * matching a regular expression./*from w ww.j a va 2  s .c  o  m*/
 *
 * @param job
 *        the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException
 *         if zero items.
 */
protected List<FileStatus> listStatus(BSPJob job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }
    List<IOException> errors = new ArrayList<IOException>();
    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(HIDDEN_FILE_FILTER);
    PathFilter inputFilter = new MultiPathFilter(filters);
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConf());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }
    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:com.cloudera.cdk.data.filesystem.FileSystemDataset.java

License:Apache License

@Override
@Deprecated/*from  w w w. j a v a2  s.com*/
public Iterable<Dataset<E>> getPartitions() {
    Preconditions.checkState(descriptor.isPartitioned(),
            "Attempt to get partitions on a non-partitioned dataset (name:%s)", name);

    List<Dataset<E>> partitions = Lists.newArrayList();

    FileStatus[] fileStatuses;

    try {
        fileStatuses = fileSystem.listStatus(directory, PathFilters.notHidden());
    } catch (IOException e) {
        throw new DatasetException("Unable to list partition directory for directory " + directory, e);
    }

    for (FileStatus stat : fileStatuses) {
        Path p = fileSystem.makeQualified(stat.getPath());
        PartitionKey key = fromDirectoryName(p);
        PartitionStrategy subPartitionStrategy = Accessor.getDefault()
                .getSubpartitionStrategy(partitionStrategy, 1);
        Builder builder = new FileSystemDataset.Builder().name(name).fileSystem(fileSystem)
                .descriptor(new DatasetDescriptor.Builder(descriptor).location(p)
                        .partitionStrategy(subPartitionStrategy).build())
                .partitionKey(key);

        partitions.add(builder.<E>build());
    }

    return partitions;
}