List of usage examples for org.apache.hadoop.fs PathFilter PathFilter
PathFilter
From source file:org.mule.modules.hdfs.automation.functional.GlobStatusTestCases.java
License:Open Source License
@Test public void testGlobStatusWhenNoFileMatches() throws Exception { List<FileStatus> fileStatuses = getConnector().globStatus(PARENT_DIRECTORY + "/2013/*/*", new PathFilter() { @Override/* w w w. j a va 2s.c o m*/ public boolean accept(Path path) { return false; } }); Assert.assertThat(fileStatuses, notNullValue()); Assert.assertThat(fileStatuses, empty()); }
From source file:org.mule.modules.hdfs.automation.unit.HDFSConnectorTest.java
License:Open Source License
@Test public void testGlobStatus() { try {//from www . j a v a 2s.c o m when(fileSystem.globStatus(any(Path.class), any(PathFilter.class))).thenReturn(new FileStatus[0]); connector.globStatus("foo", new PathFilter() { @Override public boolean accept(Path path) { return false; } }); } catch (Exception e) { fail(ConnectorTestUtils.getStackTrace(e)); } }
From source file:org.mule.modules.hdfs.HDFSConnector.java
License:Open Source License
/** * List the statuses of the files/directories in the given path if the path is a directory * * @param path//from w w w . ja v a 2 s . co m * the given path * @param filter * the user supplied path filter * @return FileStatus the statuses of the files/directories in the given path * @throws HDFSConnectorException * if any issue occurs during the execution. */ @Processor public List<FileStatus> listStatus(final String path, @Optional final String filter) throws HDFSConnectorException { try { return runHdfsPathAction(path, new HdfsPathAction<List<FileStatus>>() { public List<FileStatus> run(final Path hdfsPath) throws Exception { // NOSONAR if (StringUtils.isNotEmpty(filter)) { final Pattern pattern = Pattern.compile(filter); PathFilter pathFilter = new PathFilter() { @Override public boolean accept(Path path) { return isDirectory(path, pattern); } }; return Arrays.asList(fileSystem.listStatus(hdfsPath, pathFilter)); } return Arrays.asList(fileSystem.listStatus(hdfsPath)); } }); } catch (Exception e) { throw new HDFSConnectorException(e); } }
From source file:org.mule.modules.hdfs.HDFSConnector.java
License:Open Source License
/** * Return all the files that match file pattern and are not checksum files. Results are sorted by their names. * * @param pathPattern/* w w w.j a v a 2 s . c o m*/ * a regular expression specifying the path pattern. * @param filter * the user supplied path filter * @return FileStatus an array of paths that match the path pattern. * @throws HDFSConnectorException * if any issue occurs during the execution. */ @Processor public List<FileStatus> globStatus(final String pathPattern, @Optional final PathFilter filter) throws HDFSConnectorException { try { return runHdfsPathAction(pathPattern, new HdfsPathAction<List<FileStatus>>() { public List<FileStatus> run(final Path hdfsPath) throws Exception { // NOSONAR PathFilter nonNullPathFilter = (filter != null) ? filter : new PathFilter() { @Override public boolean accept(Path path) { return true; } }; FileStatus[] fileStatusesAsArray = fileSystem.globStatus(hdfsPath, nonNullPathFilter); return (fileStatusesAsArray != null) ? Arrays.asList(fileStatusesAsArray) : new ArrayList<FileStatus>(); } }); } catch (Exception e) { throw new HDFSConnectorException(e); } }
From source file:org.oclc.firefly.hadoop.backup.BackupUtils.java
License:Apache License
/** * Looks under the table directory in the filesystem for files with a '.tableinfo' prefix. Returns * reference to the 'latest' instance.// w w w . j ava2 s . co m * * @param fs The filesytem where to look * @param tableDirPath the hdfs table directory * @return The 'current' tableinfo file. * @throws IOException If failed to read from file system */ public static FileStatus getTableInfoPath(final FileSystem fs, final Path tableDirPath) throws IOException { FileStatus ret = null; FileStatus[] status = FSUtils.listStatus(fs, tableDirPath, new PathFilter() { @Override public boolean accept(Path p) { // Accept any file that starts with TABLEINFO_NAME return p.getName().startsWith(FSTableDescriptors.TABLEINFO_NAME); } }); if (status != null && status.length > 0) { Arrays.sort(status, new Comparator<FileStatus>() { @Override public int compare(FileStatus left, FileStatus right) { return -left.compareTo(right); } }); if (status.length > 1) { // Clean away old versions of .tableinfo for (int i = 1; i < status.length; i++) { Path p = status[i].getPath(); // Clean up old versions if (!fs.delete(p, false)) { LOG.warn("Failed cleanup of " + status); } else { LOG.debug("Cleaned up old tableinfo file " + p); } } } ret = status[0]; } return ret; }
From source file:org.openflamingo.fs.hdfs.HdfsFileSystemProvider.java
License:Apache License
@Override public List<FileInfo> list(String path) { List<FileInfo> fileInfos = new ArrayList<FileInfo>(); try {/*from w w w. j a va2 s . c o m*/ FileStatus[] files = fs.listStatus(new Path(path), new PathFilter() { @Override public boolean accept(Path path) { try { return fs.isFile(path); } catch (IOException e) { // Hadoop FileSystem Access Error } return false; } }); for (FileStatus file : files) { fileInfos.add(new HdfsFileInfo(file)); } return fileInfos; } catch (Exception ex) { throw new FileSystemException(bundle.message("S_FS", "CANNOT_GET_LIST", path), ex); } }
From source file:org.pentaho.hadoop.shim.common.format.orc.PentahoOrcInputFormat.java
License:Apache License
private Reader getReader() throws Exception { return inClassloader(() -> { checkNullFileName();/* w ww . jav a 2 s .c o m*/ Path filePath; FileSystem fs; Reader orcReader; try { filePath = new Path(fileName); fs = FileSystem.get(filePath.toUri(), conf); if (!fs.exists(filePath)) { throw new NoSuchFileException(fileName); } if (fs.getFileStatus(filePath).isDirectory()) { PathFilter pathFilter = new PathFilter() { public boolean accept(Path file) { return file.getName().endsWith(".orc"); } }; FileStatus[] fileStatuses = fs.listStatus(filePath, pathFilter); if (fileStatuses.length == 0) { throw new NoSuchFileException(fileName); } filePath = fileStatuses[0].getPath(); } orcReader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf).filesystem(fs)); } catch (IOException e) { throw new RuntimeException("Unable to read data from file " + fileName, e); } return orcReader; }); }
From source file:org.pentaho.hadoop.shim.common.format.orc.PentahoOrcRecordReader.java
License:Apache License
public PentahoOrcRecordReader(String fileName, Configuration conf, List<? extends IOrcInputField> dialogInputFields) { this.conf = conf; this.dialogInputFields = dialogInputFields; Reader reader = null;/*from ww w . j a va 2s . c o m*/ try { filePath = new Path(fileName); fs = FileSystem.get(filePath.toUri(), conf); if (!fs.exists(filePath)) { throw new NoSuchFileException(fileName); } if (fs.getFileStatus(filePath).isDirectory()) { PathFilter pathFilter = new PathFilter() { public boolean accept(Path file) { return file.getName().endsWith(".orc"); } }; FileStatus[] fileStatuses = fs.listStatus(filePath, pathFilter); if (fileStatuses.length == 0) { throw new NoSuchFileException(fileName); } filePath = fileStatuses[0].getPath(); } reader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf).filesystem(fs)); } catch (IOException e) { throw new IllegalArgumentException("Unable to read data from file " + fileName, e); } try { recordReader = reader.rows(); } catch (IOException e) { throw new IllegalArgumentException("Unable to get record reader for file " + fileName, e); } typeDescription = reader.getSchema(); OrcSchemaConverter orcSchemaConverter = new OrcSchemaConverter(); orcInputFields = orcSchemaConverter.buildInputFields(typeDescription); IOrcMetaData.Reader orcMetaDataReader = new OrcMetaDataReader(reader); orcMetaDataReader.read(orcInputFields); batch = typeDescription.createRowBatch(); //Create a map of orc fields to meta columns Map<String, Integer> orcColumnNumberMap = new HashMap<String, Integer>(); int orcFieldNumber = 0; for (String orcFieldName : typeDescription.getFieldNames()) { orcColumnNumberMap.put(orcFieldName, orcFieldNumber++); } //Create a map of input fields to Orc Column numbers schemaToOrcSubcripts = new HashMap<String, Integer>(); for (IOrcInputField inputField : dialogInputFields) { if (inputField != null) { Integer colNumber = orcColumnNumberMap.get(inputField.getFormatFieldName()); if (colNumber == null) { throw new IllegalArgumentException("Column " + inputField.getFormatFieldName() + " does not exist in the ORC file. Please use the getFields button"); } else { schemaToOrcSubcripts.put(inputField.getPentahoFieldName(), colNumber); } } } try { setNextBatch(); } catch (IOException e) { throw new IllegalArgumentException("No rows to read in " + fileName, e); } }
From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java
License:Open Source License
protected void loadFromDir(TransientElement part, long numentries, FileSystem fs, Path path) throws IOException { PathFilter filter = new PathFilter() { @Override//from ww w . jav a 2s.c om public boolean accept(Path path) { return !path.getName().startsWith("_"); } }; FileStatus[] status = fs.listStatus(path, filter); if (status.length == 0) { System.out.println("Path [" + path + "] has no files. Initializing section."); part.initialize(0); } else { Arrays.sort(status, new FileStatusComparator()); System.out.println("Initializing section " + path); part.initialize(numentries); for (FileStatus file : status) { System.out.println("Reading file [" + file.getPath() + "]"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), this.conf.getConfigurationObject()); part.load(reader, this.listener); reader.close(); } System.out.println("Closing section " + path); part.close(); } }
From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java
License:Open Source License
protected void loadFromDir(TransientBitMapTriples part, long numentries, long maxpredicate, long maxobject, FileSystem fs, Path path) throws IOException { PathFilter filter = new PathFilter() { @Override/*www. j a va 2s. c om*/ public boolean accept(Path path) { return !path.getName().startsWith("_"); } }; FileStatus[] status = fs.listStatus(path, filter); if (status.length == 0) { System.out.println("Path [" + path + "] has no files. Initializing section."); part.initialize(0, 0); } else { Arrays.sort(status, new FileStatusComparator()); System.out.println("Initializing section " + path); part.initialize(numentries, maxpredicate, maxobject); for (FileStatus file : status) { System.out.println("Reading file [" + file.getPath() + "]"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), this.conf.getConfigurationObject()); part.load(reader, this.listener); reader.close(); } System.out.println("Closing section " + path); part.close(); } }