List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java
License:Apache License
/** * Returns the all transaction info files. * @param conf the current configuration * @return target path/*from w w w . j a va 2s.co m*/ * @throws IOException if failed to find files by I/O error * @throws IllegalArgumentException if some parameters were {@code null} */ public static Collection<FileStatus> findAllTransactionInfoFiles(Configuration conf) throws IOException { if (conf == null) { throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$ } Path dir = getTransactionInfoDir(conf); FileSystem fs = dir.getFileSystem(conf); FileStatus[] statusArray; try { statusArray = fs.listStatus(dir); } catch (FileNotFoundException e) { statusArray = null; if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Target file is not found: {0}", dir), e); //$NON-NLS-1$ } } if (statusArray == null || statusArray.length == 0) { return Collections.emptyList(); } Collection<FileStatus> results = new ArrayList<>(); for (FileStatus stat : statusArray) { if (getTransactionInfoExecutionId(stat.getPath()) != null) { results.add(stat); } } return results; }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java
License:Apache License
private static List<FileStatus> recursiveStep(FileSystem fs, List<FileStatus> current) throws IOException { assert fs != null; assert current != null; Set<Path> paths = new HashSet<>(); List<FileStatus> results = new ArrayList<>(); LinkedList<FileStatus> work = new LinkedList<>(current); while (work.isEmpty() == false) { FileStatus next = work.removeFirst(); Path path = next.getPath(); if (paths.contains(path) == false) { paths.add(path);//from w ww .j a v a 2s. co m results.add(next); if (FileSystemCompatibility.isDirectory(next)) { FileStatus[] children; try { children = fs.listStatus(path); } catch (FileNotFoundException e) { children = null; if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Target file is not found: {0}", path), e); //$NON-NLS-1$ } } if (children != null) { Collections.addAll(work, children); } } } } return results; }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java
License:Apache License
private static List<FileStatus> globStep(FileSystem fs, List<FileStatus> current, List<Path> expressions) throws IOException { assert fs != null; assert current != null; assert expressions != null; Set<Path> paths = new HashSet<>(); List<FileStatus> results = new ArrayList<>(); for (FileStatus status : current) { if (FileSystemCompatibility.isDirectory(status) == false) { continue; }//from w w w .j a va2 s. c o m for (Path expression : expressions) { Path path = new Path(status.getPath(), expression); FileStatus[] expanded = fs.globStatus(path); if (expanded != null) { for (FileStatus s : expanded) { Path p = s.getPath(); if (paths.contains(p) == false) { paths.add(p); results.add(s); } } } } } return results; }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java
License:Apache License
private static boolean contains(FileStatus dir, FileStatus target) { assert dir != null; assert target != null; assert FileSystemCompatibility.isDirectory(dir); Path parent = dir.getPath(); Path child = target.getPath(); return contains(parent, child); }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java
License:Apache License
@SuppressWarnings("unchecked") private static List<Path> createFileListRelative(Counter counter, FileSystem fs, Path source) throws IOException { assert counter != null; assert fs != null; assert source != null; assert source.isAbsolute(); URI baseUri = source.toUri(); FileStatus root;/*from w w w . j a v a2 s .c om*/ try { root = fs.getFileStatus(source); } catch (FileNotFoundException e) { LOG.warn(MessageFormat.format("Source path is not found: {0} (May be already moved)", baseUri)); return Collections.emptyList(); } counter.add(1); List<FileStatus> all = recursiveStep(fs, Collections.singletonList(root)); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Source path contains {1} files/directories: {0}", //$NON-NLS-1$ baseUri, all.size())); } List<Path> results = new ArrayList<>(); for (FileStatus stat : all) { if (FileSystemCompatibility.isDirectory(stat)) { continue; } Path path = stat.getPath(); URI uri = path.toUri(); URI relative = baseUri.relativize(uri); if (relative.equals(uri) == false) { results.add(new Path(relative)); } else { throw new IOException(MessageFormat.format("Failed to compute relative path: base={0}, target={1}", baseUri, uri)); } counter.add(1); } Collections.sort(results); return results; }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtilTest.java
License:Apache License
/** * Test for transaction info.// w ww . ja v a 2s. c o m * @throws Exception if failed */ @Test public void transactionInfo() throws Exception { Configuration conf = new Configuration(); conf.set(HadoopDataSourceUtil.KEY_SYSTEM_DIR, folder.getRoot().getAbsoluteFile().toURI().toString()); assertThat("empty system dir", folder.getRoot().listFiles(), is(new File[0])); assertThat(HadoopDataSourceUtil.findAllTransactionInfoFiles(conf).size(), is(0)); Path t1 = HadoopDataSourceUtil.getTransactionInfoPath(conf, "ex1"); assertThat(HadoopDataSourceUtil.getTransactionInfoExecutionId(t1), is("ex1")); t1.getFileSystem(conf).create(t1).close(); assertThat(folder.getRoot().listFiles().length, is(greaterThan(0))); Path t2 = HadoopDataSourceUtil.getTransactionInfoPath(conf, "ex2"); assertThat(t2, is(not(t1))); assertThat(HadoopDataSourceUtil.getTransactionInfoExecutionId(t2), is("ex2")); t2.getFileSystem(conf).create(t2).close(); Path c2 = HadoopDataSourceUtil.getCommitMarkPath(conf, "ex2"); assertThat(c2, is(not(t2))); c2.getFileSystem(conf).create(c2).close(); List<Path> paths = new ArrayList<>(); for (FileStatus stat : HadoopDataSourceUtil.findAllTransactionInfoFiles(conf)) { paths.add(stat.getPath()); } assertThat(paths.size(), is(2)); assertThat(paths, hasItem(t1)); assertThat(paths, hasItem(t2)); }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtilTest.java
License:Apache License
/** * check covered./* w w w .j a v a 2 s . c o m*/ * @throws Exception if failed */ @Test public void minimalCovered_deep() throws Exception { touch("dir/a.csv"); touch("dir/a/b.csv"); touch("dir/a/b/c.csv"); FileSystem fs = getTempFileSystem(); List<FileStatus> raw = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("dir/**")); for (Iterator<FileStatus> iterator = raw.iterator(); iterator.hasNext();) { FileStatus fileStatus = iterator.next(); if (fileStatus.getPath().getName().equals("dir")) { iterator.remove(); } } assertThat(raw.size(), is(5)); List<FileStatus> results = HadoopDataSourceUtil.onlyMinimalCovered(raw); assertThat(normalize(results), is(path("dir/a.csv", "dir/a"))); }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtilTest.java
License:Apache License
private List<String> normalize(List<FileStatus> stats) throws IOException { File base = folder.getRoot().getCanonicalFile(); List<String> normalized = new ArrayList<>(); for (FileStatus stat : stats) { URI uri = stat.getPath().toUri(); try {// ww w . j a v a2s . c o m File file = new File(uri).getCanonicalFile(); String f = file.getAbsolutePath(); String b = base.getAbsolutePath(); assertThat(f, startsWith(b)); String r = f.substring(b.length()); while (r.startsWith(File.separator)) { r = r.substring(1); } if (File.separatorChar != '/') { r = r.replace(File.separatorChar, '/'); } normalized.add(r); } catch (IOException e) { throw new AssertionError(e); } } Collections.sort(normalized); return normalized; }
From source file:com.asakusafw.runtime.io.sequencefile.SequenceFileUtil.java
License:Apache License
/** * Creates a new reader.//from ww w .j a va 2 s . co m * @param in the source * @param status target file status * @param conf current configuration * @return the created sequence file reader * @throws IOException if failed to open the sequence file * @throws IllegalArgumentException if some parameters were {@code null} */ public static SequenceFile.Reader openReader(InputStream in, FileStatus status, Configuration conf) throws IOException { if (in == null) { throw new IllegalArgumentException("in must not be null"); //$NON-NLS-1$ } if (status == null) { throw new IllegalArgumentException("status must not be null"); //$NON-NLS-1$ } if (conf == null) { throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$ } if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Creating sequence file reader for {0}", //$NON-NLS-1$ status.getPath())); } return SequenceFileCompatibility.openReader(in, status.getLen(), conf); }
From source file:com.asakusafw.runtime.stage.input.TemporaryInputFormat.java
License:Apache License
private List<InputSplit> getSplits(Configuration configuration, List<Path> paths) throws IOException { long splitSize = configuration.getLong(KEY_DEFAULT_SPLIT_SIZE, DEFAULT_SPLIT_SIZE); List<InputSplit> results = new ArrayList<>(); for (Path path : paths) { FileSystem fs = path.getFileSystem(configuration); FileStatus[] statuses = fs.globStatus(path); if (statuses == null) { continue; }/* w ww. j a va2 s.com*/ for (FileStatus status : statuses) { BlockMap blockMap = BlockMap.create(status.getPath().toString(), status.getLen(), BlockMap.computeBlocks(fs, status), false); results.addAll(computeSplits(status.getPath(), blockMap, splitSize)); } } return results; }