Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java

License:Apache License

/**
 * Returns the all transaction info files.
 * @param conf the current configuration
 * @return target path/*from   w w w . j  a va  2s.co m*/
 * @throws IOException if failed to find files by I/O error
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
public static Collection<FileStatus> findAllTransactionInfoFiles(Configuration conf) throws IOException {
    if (conf == null) {
        throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
    }
    Path dir = getTransactionInfoDir(conf);
    FileSystem fs = dir.getFileSystem(conf);
    FileStatus[] statusArray;
    try {
        statusArray = fs.listStatus(dir);
    } catch (FileNotFoundException e) {
        statusArray = null;
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("Target file is not found: {0}", dir), e); //$NON-NLS-1$
        }
    }
    if (statusArray == null || statusArray.length == 0) {
        return Collections.emptyList();
    }
    Collection<FileStatus> results = new ArrayList<>();
    for (FileStatus stat : statusArray) {
        if (getTransactionInfoExecutionId(stat.getPath()) != null) {
            results.add(stat);
        }
    }
    return results;
}

From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java

License:Apache License

private static List<FileStatus> recursiveStep(FileSystem fs, List<FileStatus> current) throws IOException {
    assert fs != null;
    assert current != null;
    Set<Path> paths = new HashSet<>();
    List<FileStatus> results = new ArrayList<>();
    LinkedList<FileStatus> work = new LinkedList<>(current);
    while (work.isEmpty() == false) {
        FileStatus next = work.removeFirst();
        Path path = next.getPath();
        if (paths.contains(path) == false) {
            paths.add(path);//from w ww .j a  v a  2s. co m
            results.add(next);
            if (FileSystemCompatibility.isDirectory(next)) {
                FileStatus[] children;
                try {
                    children = fs.listStatus(path);
                } catch (FileNotFoundException e) {
                    children = null;
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(MessageFormat.format("Target file is not found: {0}", path), e); //$NON-NLS-1$
                    }
                }
                if (children != null) {
                    Collections.addAll(work, children);
                }
            }
        }
    }
    return results;
}

From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java

License:Apache License

private static List<FileStatus> globStep(FileSystem fs, List<FileStatus> current, List<Path> expressions)
        throws IOException {
    assert fs != null;
    assert current != null;
    assert expressions != null;
    Set<Path> paths = new HashSet<>();
    List<FileStatus> results = new ArrayList<>();
    for (FileStatus status : current) {
        if (FileSystemCompatibility.isDirectory(status) == false) {
            continue;
        }//from  w  w  w .j a va2 s. c  o m
        for (Path expression : expressions) {
            Path path = new Path(status.getPath(), expression);
            FileStatus[] expanded = fs.globStatus(path);
            if (expanded != null) {
                for (FileStatus s : expanded) {
                    Path p = s.getPath();
                    if (paths.contains(p) == false) {
                        paths.add(p);
                        results.add(s);
                    }
                }
            }
        }
    }
    return results;
}

From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java

License:Apache License

private static boolean contains(FileStatus dir, FileStatus target) {
    assert dir != null;
    assert target != null;
    assert FileSystemCompatibility.isDirectory(dir);
    Path parent = dir.getPath();
    Path child = target.getPath();
    return contains(parent, child);
}

From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java

License:Apache License

@SuppressWarnings("unchecked")
private static List<Path> createFileListRelative(Counter counter, FileSystem fs, Path source)
        throws IOException {
    assert counter != null;
    assert fs != null;
    assert source != null;
    assert source.isAbsolute();
    URI baseUri = source.toUri();
    FileStatus root;/*from w  w w .  j a v  a2 s  .c om*/
    try {
        root = fs.getFileStatus(source);
    } catch (FileNotFoundException e) {
        LOG.warn(MessageFormat.format("Source path is not found: {0} (May be already moved)", baseUri));
        return Collections.emptyList();
    }
    counter.add(1);
    List<FileStatus> all = recursiveStep(fs, Collections.singletonList(root));
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("Source path contains {1} files/directories: {0}", //$NON-NLS-1$
                baseUri, all.size()));
    }
    List<Path> results = new ArrayList<>();
    for (FileStatus stat : all) {
        if (FileSystemCompatibility.isDirectory(stat)) {
            continue;
        }
        Path path = stat.getPath();
        URI uri = path.toUri();
        URI relative = baseUri.relativize(uri);
        if (relative.equals(uri) == false) {
            results.add(new Path(relative));
        } else {
            throw new IOException(MessageFormat.format("Failed to compute relative path: base={0}, target={1}",
                    baseUri, uri));
        }
        counter.add(1);
    }
    Collections.sort(results);
    return results;
}

From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtilTest.java

License:Apache License

/**
 * Test for transaction info.// w  ww . ja v  a 2s. c o  m
 * @throws Exception if failed
 */
@Test
public void transactionInfo() throws Exception {
    Configuration conf = new Configuration();
    conf.set(HadoopDataSourceUtil.KEY_SYSTEM_DIR, folder.getRoot().getAbsoluteFile().toURI().toString());

    assertThat("empty system dir", folder.getRoot().listFiles(), is(new File[0]));
    assertThat(HadoopDataSourceUtil.findAllTransactionInfoFiles(conf).size(), is(0));

    Path t1 = HadoopDataSourceUtil.getTransactionInfoPath(conf, "ex1");
    assertThat(HadoopDataSourceUtil.getTransactionInfoExecutionId(t1), is("ex1"));
    t1.getFileSystem(conf).create(t1).close();

    assertThat(folder.getRoot().listFiles().length, is(greaterThan(0)));

    Path t2 = HadoopDataSourceUtil.getTransactionInfoPath(conf, "ex2");
    assertThat(t2, is(not(t1)));
    assertThat(HadoopDataSourceUtil.getTransactionInfoExecutionId(t2), is("ex2"));
    t2.getFileSystem(conf).create(t2).close();

    Path c2 = HadoopDataSourceUtil.getCommitMarkPath(conf, "ex2");
    assertThat(c2, is(not(t2)));
    c2.getFileSystem(conf).create(c2).close();

    List<Path> paths = new ArrayList<>();
    for (FileStatus stat : HadoopDataSourceUtil.findAllTransactionInfoFiles(conf)) {
        paths.add(stat.getPath());
    }
    assertThat(paths.size(), is(2));
    assertThat(paths, hasItem(t1));
    assertThat(paths, hasItem(t2));
}

From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtilTest.java

License:Apache License

/**
 * check covered./*  w  w w .j a v a 2  s  . c o  m*/
 * @throws Exception if failed
 */
@Test
public void minimalCovered_deep() throws Exception {
    touch("dir/a.csv");
    touch("dir/a/b.csv");
    touch("dir/a/b/c.csv");
    FileSystem fs = getTempFileSystem();
    List<FileStatus> raw = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("dir/**"));
    for (Iterator<FileStatus> iterator = raw.iterator(); iterator.hasNext();) {
        FileStatus fileStatus = iterator.next();
        if (fileStatus.getPath().getName().equals("dir")) {
            iterator.remove();
        }
    }
    assertThat(raw.size(), is(5));
    List<FileStatus> results = HadoopDataSourceUtil.onlyMinimalCovered(raw);
    assertThat(normalize(results), is(path("dir/a.csv", "dir/a")));
}

From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtilTest.java

License:Apache License

private List<String> normalize(List<FileStatus> stats) throws IOException {
    File base = folder.getRoot().getCanonicalFile();
    List<String> normalized = new ArrayList<>();
    for (FileStatus stat : stats) {
        URI uri = stat.getPath().toUri();
        try {// ww  w .  j a v a2s  . c o m
            File file = new File(uri).getCanonicalFile();
            String f = file.getAbsolutePath();
            String b = base.getAbsolutePath();
            assertThat(f, startsWith(b));
            String r = f.substring(b.length());
            while (r.startsWith(File.separator)) {
                r = r.substring(1);
            }
            if (File.separatorChar != '/') {
                r = r.replace(File.separatorChar, '/');
            }
            normalized.add(r);
        } catch (IOException e) {
            throw new AssertionError(e);
        }
    }
    Collections.sort(normalized);
    return normalized;
}

From source file:com.asakusafw.runtime.io.sequencefile.SequenceFileUtil.java

License:Apache License

/**
 * Creates a new reader.//from   ww w .j a  va  2  s  . co  m
 * @param in the source
 * @param status target file status
 * @param conf current configuration
 * @return the created sequence file reader
 * @throws IOException if failed to open the sequence file
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
public static SequenceFile.Reader openReader(InputStream in, FileStatus status, Configuration conf)
        throws IOException {
    if (in == null) {
        throw new IllegalArgumentException("in must not be null"); //$NON-NLS-1$
    }
    if (status == null) {
        throw new IllegalArgumentException("status must not be null"); //$NON-NLS-1$
    }
    if (conf == null) {
        throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("Creating sequence file reader for {0}", //$NON-NLS-1$
                status.getPath()));
    }
    return SequenceFileCompatibility.openReader(in, status.getLen(), conf);
}

From source file:com.asakusafw.runtime.stage.input.TemporaryInputFormat.java

License:Apache License

private List<InputSplit> getSplits(Configuration configuration, List<Path> paths) throws IOException {
    long splitSize = configuration.getLong(KEY_DEFAULT_SPLIT_SIZE, DEFAULT_SPLIT_SIZE);
    List<InputSplit> results = new ArrayList<>();
    for (Path path : paths) {
        FileSystem fs = path.getFileSystem(configuration);
        FileStatus[] statuses = fs.globStatus(path);
        if (statuses == null) {
            continue;
        }/* w ww. j a va2 s.com*/
        for (FileStatus status : statuses) {
            BlockMap blockMap = BlockMap.create(status.getPath().toString(), status.getLen(),
                    BlockMap.computeBlocks(fs, status), false);
            results.addAll(computeSplits(status.getPath(), blockMap, splitSize));
        }
    }
    return results;
}