Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:cc.solr.lucene.store.hdfs.HdfsDirectory.java

License:Apache License

@Override
public String[] listAll() throws IOException {
    FileStatus[] listStatus = getFileSystem().listStatus(_hdfsDirPath);
    List<String> files = new ArrayList<String>();
    if (listStatus == null) {
        return new String[] {};
    }//  w  ww.java 2 s .  c om
    for (FileStatus status : listStatus) {
        if (!status.isDir()) {
            files.add(status.getPath().getName());
        }
    }
    return getNormalNames(files);
}

From source file:ch.cern.db.hdfs.Main.java

License:GNU General Public License

private void printFileStatus(FileStatus status) {
    System.out.println();//www .j a  va  2 s.co m
    System.out.println("Showing metadata for: " + status.getPath());
    System.out.println("   isDirectory: " + status.isDirectory());
    System.out.println("   isFile: " + status.isFile());
    System.out.println("   isSymlink: " + status.isSymlink());
    System.out.println("   encrypted: " + status.isEncrypted());
    System.out.println("   length: " + status.getLen());
    System.out.println("   replication: " + status.getReplication());
    System.out.println("   blocksize: " + status.getBlockSize());
    System.out.println("   modification_time: " + new Date(status.getModificationTime()));
    System.out.println("   access_time: " + new Date(status.getAccessTime()));
    System.out.println("   owner: " + status.getOwner());
    System.out.println("   group: " + status.getGroup());
    System.out.println("   permission: " + status.getPermission());
    System.out.println();
}

From source file:ch.ethz.las.wikimining.mr.io.IntegerSequenceFileReader.java

@Override
protected void readContent(FileStatus status) throws IOException {
    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(), config)) {
        IntWritable key = (IntWritable) ReflectionUtils.newInstance(reader.getKeyClass(), config);
        IntWritable value = (IntWritable) ReflectionUtils.newInstance(reader.getValueClass(), config);
        while (reader.next(key, value)) {
            map.put(key.get(), value.get());
        }/*from   w  ww.  j a  va2s .  c o m*/
    }
}

From source file:ch.ethz.las.wikimining.mr.io.SequenceFileReader.java

public HashMap<E, V> read() throws IOException {
    if (!fs.exists(path)) {
        throw new IOException(path + " does not exist!");
    }/* w  w w.j  a v  a2s.  c  om*/

    final FileStatus[] statuses = fs.listStatus(path);
    for (FileStatus status : statuses) {
        if (status.isDirectory()) {
            continue;
        }
        if ("_SUCCESS".equals(status.getPath().getName())) {
            continue;
        }

        readContent(status);
    }

    return map;
}

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_SECOND)) {
            paths.put(pathName, path);//from  w w w. j a va  2 s. c  o  m
        }
    }

    File outFile = new File(outPath, Names.indexId2Node + ".dat");
    OutputStream out = new FileOutputStream(outFile);
    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        log.debug("Concatenating {} into {}...", path.toUri(), outFile.getAbsoluteFile());
        InputStream in = fs.open(new Path(path, Names.indexId2Node + ".dat"));
        IOUtils.copyBytes(in, out, configuration, false);
        in.close();
    }
    out.close();
}

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile2(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    // Find all the right paths and copy .gz files locally
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_FOURTH)) {
            paths.put(pathName, path);//  w w w. j  a v  a2s . c  o  m
        }
    }

    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        status = fs.listStatus(path);
        for (FileStatus fileStatus : status) {
            Path p = fileStatus.getPath();
            log.debug("Copying {} to {}...", p.toUri(), outPath);
            fs.copyToLocalFile(p, new Path(outPath, p.getName()));
        }
    }

    // Merge .gz files into indexName.gz
    File fileOutputPath = new File(outPath);
    File[] files = fileOutputPath.listFiles(new FileFilter() {
        @Override
        public boolean accept(File pathname) {
            return pathname.getName().endsWith(".gz");
        }
    });
    Arrays.sort(files);
    String prevIndexName = null;
    OutputStream out = null;
    for (File file : files) {
        log.debug("Processing {}... ", file.getName());
        String indexName = file.getName().substring(0, file.getName().indexOf("_"));
        if (prevIndexName == null)
            prevIndexName = indexName;
        if (out == null)
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        if (!prevIndexName.equals(indexName)) {
            if (out != null)
                out.close();
            log.debug("Index name set to {}", indexName);
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        }
        InputStream in = new GZIPInputStream(new FileInputStream(file));
        log.debug("Copying {} into {}.gz ...", file.getName(), indexName);
        IOUtils.copyBytes(in, out, 8192, false);
        in.close();
        file.delete();
        prevIndexName = indexName;
    }
    if (out != null)
        out.close();

    // build B+Tree indexes
    Location location = new Location(outPath);
    for (String idxName : Constants.indexNames) {
        log.debug("Creating {} index...", idxName);
        String indexFilename = location.absolute(idxName, "gz");
        if (new File(indexFilename).exists()) {
            new File(outPath, idxName + ".dat").delete();
            new File(outPath, idxName + ".idn").delete();
            CmdIndexBuild.main(location.getDirectoryPath(), idxName, indexFilename);
            // To save some disk space
            new File(indexFilename).delete();
        }
    }
}

From source file:cmd.tdbloader4.java

License:Apache License

private void createOffsetsFile(FileSystem fs, String input, String output) throws IOException {
    log.debug("Creating offsets file...");
    Map<Long, Long> offsets = new TreeMap<Long, Long>();
    FileStatus[] status = fs.listStatus(new Path(input));
    for (FileStatus fileStatus : status) {
        Path file = fileStatus.getPath();
        if (file.getName().startsWith("part-r-")) {
            log.debug("Processing: {}", file.getName());
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(file)));
            String line = in.readLine();
            String[] tokens = line.split("\\s");
            long partition = Long.valueOf(tokens[0]);
            long offset = Long.valueOf(tokens[1]);
            log.debug("Partition {} has offset {}", partition, offset);
            offsets.put(partition, offset);
        }//from   w  w w  .  j  av a 2  s  . c  o m
    }

    Path outputPath = new Path(output, Constants.OFFSETS_FILENAME);
    PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outputPath)));
    for (Long partition : offsets.keySet()) {
        out.println(partition + "\t" + offsets.get(partition));
    }
    out.close();
    log.debug("Offset file created.");
}

From source file:cn.lhfei.hadoop.ch03.ShowFileStatusTest.java

License:Apache License

@Test
public void fileStatusForFile() throws IOException {
    Path path = new Path("/dir/file");
    FileStatus stat = fs.getFileStatus(path);

    Assert.assertEquals(stat.getPath().toUri().getPath(), "/dir/file");
    Assert.assertTrue(stat.isFile());/*from   ww  w.jav  a2 s  .co m*/

    stat.getBlockSize();

}

From source file:co.cask.cdap.data.stream.StreamInputSplitFinder.java

License:Apache License

/**
 * Get the input splits for a stream./*from   w  w  w . jav a 2  s  .co  m*/
 *
 * @param conf Configuration of the filesystem the stream resides on.
 * @return List of input splits for the stream.
 * @throws IOException
 */
public List<T> getSplits(Configuration conf) throws IOException {
    List<T> splits = Lists.newArrayList();

    // Collects all stream event files timestamp, size and block locations information

    // First grab all directories (partition) that matches with the time range.
    FileSystem fs = path.getFileSystem(conf);
    for (FileStatus partitionStatus : fs.listStatus(path)) {

        // partition should be directory
        String pathName = partitionStatus.getPath().getName();
        if (!partitionStatus.isDirectory() || !StreamUtils.isPartition(pathName)) {
            continue;
        }

        // Match the time range
        long partitionStartTime = StreamUtils.getPartitionStartTime(pathName);
        long partitionEndTime = StreamUtils.getPartitionEndTime(pathName);
        if (partitionStartTime > endTime || partitionEndTime <= startTime) {
            continue;
        }

        // Collects all bucket file status in the partition.
        Collection<StreamDataFileSplitter> eventFiles = collectBuckets(fs, partitionStatus.getPath());

        // For each bucket inside the partition directory, compute the splits
        for (StreamDataFileSplitter splitter : eventFiles) {
            splitter.computeSplits(fs, minSplitSize, maxSplitSize, startTime, endTime, splits, splitFactory);
        }
    }

    return splits;
}

From source file:co.cask.cdap.data.stream.StreamInputSplitFinder.java

License:Apache License

/**
 * Collects file status of all buckets under a given partition.
 *//*from  ww w . j  a va2s.c o m*/
private Collection<StreamDataFileSplitter> collectBuckets(FileSystem fs, Path partitionPath)
        throws IOException {
    ImmutableList.Builder<StreamDataFileSplitter> builder = ImmutableList.builder();

    for (FileStatus fileStatus : fs.listStatus(partitionPath)) {
        if (StreamFileType.EVENT.isMatched(fileStatus.getPath().getName())) {
            builder.add(new StreamDataFileSplitter(fileStatus));
        }
    }
    return builder.build();
}