Example usage for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:cc.solr.lucene.store.hdfs.HdfsDirectory.java

License:Apache License

@Override
public String[] listAll() throws IOException {
    FileStatus[] listStatus = getFileSystem().listStatus(_hdfsDirPath);
    List<String> files = new ArrayList<String>();
    if (listStatus == null) {
        return new String[] {};
    }//  w  ww.java 2 s .  c om
    for (FileStatus status : listStatus) {
        if (!status.isDir()) {
            files.add(status.getPath().getName());
        }
    }
    return getNormalNames(files);
}

From source file:ch.cern.db.hdfs.Main.java

License:GNU General Public License

private void printFileStatus(FileStatus status) {
    System.out.println();//www .j a  va  2 s.co m
    System.out.println("Showing metadata for: " + status.getPath());
    System.out.println("   isDirectory: " + status.isDirectory());
    System.out.println("   isFile: " + status.isFile());
    System.out.println("   isSymlink: " + status.isSymlink());
    System.out.println("   encrypted: " + status.isEncrypted());
    System.out.println("   length: " + status.getLen());
    System.out.println("   replication: " + status.getReplication());
    System.out.println("   blocksize: " + status.getBlockSize());
    System.out.println("   modification_time: " + new Date(status.getModificationTime()));
    System.out.println("   access_time: " + new Date(status.getAccessTime()));
    System.out.println("   owner: " + status.getOwner());
    System.out.println("   group: " + status.getGroup());
    System.out.println("   permission: " + status.getPermission());
    System.out.println();
}

From source file:ch.ethz.las.wikimining.mr.io.IntegerSequenceFileReader.java

@Override
protected void readContent(FileStatus status) throws IOException {
    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(), config)) {
        IntWritable key = (IntWritable) ReflectionUtils.newInstance(reader.getKeyClass(), config);
        IntWritable value = (IntWritable) ReflectionUtils.newInstance(reader.getValueClass(), config);
        while (reader.next(key, value)) {
            map.put(key.get(), value.get());
        }/*from   w  ww.  j a  va2s .  c o m*/
    }
}

From source file:ch.ethz.las.wikimining.mr.io.SequenceFileReader.java

public HashMap<E, V> read() throws IOException {
    if (!fs.exists(path)) {
        throw new IOException(path + " does not exist!");
    }/* w  w w.j  a v  a2s.  c  om*/

    final FileStatus[] statuses = fs.listStatus(path);
    for (FileStatus status : statuses) {
        if (status.isDirectory()) {
            continue;
        }
        if ("_SUCCESS".equals(status.getPath().getName())) {
            continue;
        }

        readContent(status);
    }

    return map;
}

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_SECOND)) {
            paths.put(pathName, path);//from  w w w. j a va  2 s. c  o  m
        }
    }

    File outFile = new File(outPath, Names.indexId2Node + ".dat");
    OutputStream out = new FileOutputStream(outFile);
    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        log.debug("Concatenating {} into {}...", path.toUri(), outFile.getAbsoluteFile());
        InputStream in = fs.open(new Path(path, Names.indexId2Node + ".dat"));
        IOUtils.copyBytes(in, out, configuration, false);
        in.close();
    }
    out.close();
}

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile2(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    // Find all the right paths and copy .gz files locally
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_FOURTH)) {
            paths.put(pathName, path);//  w w w. j  a v  a2s . c  o  m
        }
    }

    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        status = fs.listStatus(path);
        for (FileStatus fileStatus : status) {
            Path p = fileStatus.getPath();
            log.debug("Copying {} to {}...", p.toUri(), outPath);
            fs.copyToLocalFile(p, new Path(outPath, p.getName()));
        }
    }

    // Merge .gz files into indexName.gz
    File fileOutputPath = new File(outPath);
    File[] files = fileOutputPath.listFiles(new FileFilter() {
        @Override
        public boolean accept(File pathname) {
            return pathname.getName().endsWith(".gz");
        }
    });
    Arrays.sort(files);
    String prevIndexName = null;
    OutputStream out = null;
    for (File file : files) {
        log.debug("Processing {}... ", file.getName());
        String indexName = file.getName().substring(0, file.getName().indexOf("_"));
        if (prevIndexName == null)
            prevIndexName = indexName;
        if (out == null)
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        if (!prevIndexName.equals(indexName)) {
            if (out != null)
                out.close();
            log.debug("Index name set to {}", indexName);
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        }
        InputStream in = new GZIPInputStream(new FileInputStream(file));
        log.debug("Copying {} into {}.gz ...", file.getName(), indexName);
        IOUtils.copyBytes(in, out, 8192, false);
        in.close();
        file.delete();
        prevIndexName = indexName;
    }
    if (out != null)
        out.close();

    // build B+Tree indexes
    Location location = new Location(outPath);
    for (String idxName : Constants.indexNames) {
        log.debug("Creating {} index...", idxName);
        String indexFilename = location.absolute(idxName, "gz");
        if (new File(indexFilename).exists()) {
            new File(outPath, idxName + ".dat").delete();
            new File(outPath, idxName + ".idn").delete();
            CmdIndexBuild.main(location.getDirectoryPath(), idxName, indexFilename);
            // To save some disk space
            new File(indexFilename).delete();
        }
    }
}

From source file:cmd.tdbloader4.java

License:Apache License

private void createOffsetsFile(FileSystem fs, String input, String output) throws IOException {
    log.debug("Creating offsets file...");
    Map<Long, Long> offsets = new TreeMap<Long, Long>();
    FileStatus[] status = fs.listStatus(new Path(input));
    for (FileStatus fileStatus : status) {
        Path file = fileStatus.getPath();
        if (file.getName().startsWith("part-r-")) {
            log.debug("Processing: {}", file.getName());
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(file)));
            String line = in.readLine();
            String[] tokens = line.split("\\s");
            long partition = Long.valueOf(tokens[0]);
            long offset = Long.valueOf(tokens[1]);
            log.debug("Partition {} has offset {}", partition, offset);
            offsets.put(partition, offset);
        }//from   w  w w  .  j  av a 2  s  . c  o m
    }

    Path outputPath = new Path(output, Constants.OFFSETS_FILENAME);
    PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outputPath)));
    for (Long partition : offsets.keySet()) {
        out.println(partition + "\t" + offsets.get(partition));
    }
    out.close();
    log.debug("Offset file created.");
}

From source file:cn.lhfei.hadoop.ch03.ShowFileStatusTest.java

License:Apache License

@Test
public void fileStatusForFile() throws IOException {
    Path path = new Path("/dir/file");
    FileStatus stat = fs.getFileStatus(path);

    Assert.assertEquals(stat.getPath().toUri().getPath(), "/dir/file");
    Assert.assertTrue(stat.isFile());/*from   ww  w.jav  a2 s  .co m*/

    stat.getBlockSize();

}

From source file:co.cask.cdap.data.stream.StreamInputSplitFinder.java

License:Apache License

/**
 * Get the input splits for a stream./*from   w  w  w . jav a 2  s  .co  m*/
 *
 * @param conf Configuration of the filesystem the stream resides on.
 * @return List of input splits for the stream.
 * @throws IOException
 */
public List<T> getSplits(Configuration conf) throws IOException {
    List<T> splits = Lists.newArrayList();

    // Collects all stream event files timestamp, size and block locations information

    // First grab all directories (partition) that matches with the time range.
    FileSystem fs = path.getFileSystem(conf);
    for (FileStatus partitionStatus : fs.listStatus(path)) {

        // partition should be directory
        String pathName = partitionStatus.getPath().getName();
        if (!partitionStatus.isDirectory() || !StreamUtils.isPartition(pathName)) {
            continue;
        }

        // Match the time range
        long partitionStartTime = StreamUtils.getPartitionStartTime(pathName);
        long partitionEndTime = StreamUtils.getPartitionEndTime(pathName);
        if (partitionStartTime > endTime || partitionEndTime <= startTime) {
            continue;
        }

        // Collects all bucket file status in the partition.
        Collection<StreamDataFileSplitter> eventFiles = collectBuckets(fs, partitionStatus.getPath());

        // For each bucket inside the partition directory, compute the splits
        for (StreamDataFileSplitter splitter : eventFiles) {
            splitter.computeSplits(fs, minSplitSize, maxSplitSize, startTime, endTime, splits, splitFactory);
        }
    }

    return splits;
}

From source file:co.cask.cdap.data.stream.StreamInputSplitFinder.java

License:Apache License

/**
 * Collects file status of all buckets under a given partition.
 *//*from  ww w . j  a va2s.c o m*/
private Collection<StreamDataFileSplitter> collectBuckets(FileSystem fs, Path partitionPath)
        throws IOException {
    ImmutableList.Builder<StreamDataFileSplitter> builder = ImmutableList.builder();

    for (FileStatus fileStatus : fs.listStatus(partitionPath)) {
        if (StreamFileType.EVENT.isMatched(fileStatus.getPath().getName())) {
            builder.add(new StreamDataFileSplitter(fileStatus));
        }
    }
    return builder.build();
}