Example usage for org.apache.hadoop.fs PathFilter PathFilter

List of usage examples for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:org.mule.modules.hdfs.automation.functional.GlobStatusTestCases.java

License:Open Source License

@Test
public void testGlobStatusWhenNoFileMatches() throws Exception {
    List<FileStatus> fileStatuses = getConnector().globStatus(PARENT_DIRECTORY + "/2013/*/*", new PathFilter() {

        @Override/*  w  w  w. j  a  va  2s.c o  m*/
        public boolean accept(Path path) {
            return false;
        }
    });
    Assert.assertThat(fileStatuses, notNullValue());
    Assert.assertThat(fileStatuses, empty());
}

From source file:org.mule.modules.hdfs.automation.unit.HDFSConnectorTest.java

License:Open Source License

@Test
public void testGlobStatus() {
    try {//from  www .  j a v a 2s.c  o  m
        when(fileSystem.globStatus(any(Path.class), any(PathFilter.class))).thenReturn(new FileStatus[0]);
        connector.globStatus("foo", new PathFilter() {

            @Override
            public boolean accept(Path path) {
                return false;
            }
        });
    } catch (Exception e) {
        fail(ConnectorTestUtils.getStackTrace(e));
    }
}

From source file:org.mule.modules.hdfs.HDFSConnector.java

License:Open Source License

/**
 * List the statuses of the files/directories in the given path if the path is a directory
 *
 * @param path//from w  w w . ja v a 2  s .  co m
 *            the given path
 * @param filter
 *            the user supplied path filter
 * @return FileStatus the statuses of the files/directories in the given path
 * @throws HDFSConnectorException
 *             if any issue occurs during the execution.
 */
@Processor
public List<FileStatus> listStatus(final String path, @Optional final String filter)
        throws HDFSConnectorException {
    try {
        return runHdfsPathAction(path, new HdfsPathAction<List<FileStatus>>() {

            public List<FileStatus> run(final Path hdfsPath) throws Exception { // NOSONAR
                if (StringUtils.isNotEmpty(filter)) {
                    final Pattern pattern = Pattern.compile(filter);
                    PathFilter pathFilter = new PathFilter() {

                        @Override
                        public boolean accept(Path path) {
                            return isDirectory(path, pattern);
                        }
                    };
                    return Arrays.asList(fileSystem.listStatus(hdfsPath, pathFilter));
                }
                return Arrays.asList(fileSystem.listStatus(hdfsPath));
            }
        });
    } catch (Exception e) {
        throw new HDFSConnectorException(e);
    }
}

From source file:org.mule.modules.hdfs.HDFSConnector.java

License:Open Source License

/**
 * Return all the files that match file pattern and are not checksum files. Results are sorted by their names.
 *
 * @param pathPattern/* w  w  w.j a v a 2  s .  c  o  m*/
 *            a regular expression specifying the path pattern.
 * @param filter
 *            the user supplied path filter
 * @return FileStatus an array of paths that match the path pattern.
 * @throws HDFSConnectorException
 *             if any issue occurs during the execution.
 */
@Processor
public List<FileStatus> globStatus(final String pathPattern, @Optional final PathFilter filter)
        throws HDFSConnectorException {
    try {
        return runHdfsPathAction(pathPattern, new HdfsPathAction<List<FileStatus>>() {

            public List<FileStatus> run(final Path hdfsPath) throws Exception { // NOSONAR
                PathFilter nonNullPathFilter = (filter != null) ? filter : new PathFilter() {

                    @Override
                    public boolean accept(Path path) {
                        return true;
                    }
                };
                FileStatus[] fileStatusesAsArray = fileSystem.globStatus(hdfsPath, nonNullPathFilter);
                return (fileStatusesAsArray != null) ? Arrays.asList(fileStatusesAsArray)
                        : new ArrayList<FileStatus>();
            }
        });
    } catch (Exception e) {
        throw new HDFSConnectorException(e);
    }
}

From source file:org.oclc.firefly.hadoop.backup.BackupUtils.java

License:Apache License

/**
 * Looks under the table directory in the filesystem for files with a '.tableinfo' prefix. Returns
 * reference to the 'latest' instance.// w w w .  j ava2 s  .  co m
 * 
 * @param fs The filesytem where to look
 * @param tableDirPath the hdfs table directory
 * @return The 'current' tableinfo file.
 * @throws IOException If failed to read from file system
 */
public static FileStatus getTableInfoPath(final FileSystem fs, final Path tableDirPath) throws IOException {
    FileStatus ret = null;
    FileStatus[] status = FSUtils.listStatus(fs, tableDirPath, new PathFilter() {
        @Override
        public boolean accept(Path p) {
            // Accept any file that starts with TABLEINFO_NAME
            return p.getName().startsWith(FSTableDescriptors.TABLEINFO_NAME);
        }
    });

    if (status != null && status.length > 0) {
        Arrays.sort(status, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus left, FileStatus right) {
                return -left.compareTo(right);
            }
        });

        if (status.length > 1) {
            // Clean away old versions of .tableinfo
            for (int i = 1; i < status.length; i++) {
                Path p = status[i].getPath();

                // Clean up old versions
                if (!fs.delete(p, false)) {
                    LOG.warn("Failed cleanup of " + status);
                } else {
                    LOG.debug("Cleaned up old tableinfo file " + p);
                }
            }
        }

        ret = status[0];
    }

    return ret;
}

From source file:org.openflamingo.fs.hdfs.HdfsFileSystemProvider.java

License:Apache License

@Override
public List<FileInfo> list(String path) {
    List<FileInfo> fileInfos = new ArrayList<FileInfo>();
    try {/*from w  w  w.  j a va2 s  . c o  m*/
        FileStatus[] files = fs.listStatus(new Path(path), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                try {
                    return fs.isFile(path);
                } catch (IOException e) {
                    // Hadoop FileSystem Access Error
                }
                return false;
            }
        });

        for (FileStatus file : files) {
            fileInfos.add(new HdfsFileInfo(file));
        }
        return fileInfos;
    } catch (Exception ex) {
        throw new FileSystemException(bundle.message("S_FS", "CANNOT_GET_LIST", path), ex);
    }
}

From source file:org.pentaho.hadoop.shim.common.format.orc.PentahoOrcInputFormat.java

License:Apache License

private Reader getReader() throws Exception {
    return inClassloader(() -> {
        checkNullFileName();/*  w  ww . jav  a  2  s .c o m*/
        Path filePath;
        FileSystem fs;
        Reader orcReader;
        try {
            filePath = new Path(fileName);
            fs = FileSystem.get(filePath.toUri(), conf);
            if (!fs.exists(filePath)) {
                throw new NoSuchFileException(fileName);
            }

            if (fs.getFileStatus(filePath).isDirectory()) {
                PathFilter pathFilter = new PathFilter() {
                    public boolean accept(Path file) {
                        return file.getName().endsWith(".orc");
                    }
                };

                FileStatus[] fileStatuses = fs.listStatus(filePath, pathFilter);
                if (fileStatuses.length == 0) {
                    throw new NoSuchFileException(fileName);
                }

                filePath = fileStatuses[0].getPath();
            }

            orcReader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf).filesystem(fs));
        } catch (IOException e) {
            throw new RuntimeException("Unable to read data from file " + fileName, e);
        }
        return orcReader;
    });
}

From source file:org.pentaho.hadoop.shim.common.format.orc.PentahoOrcRecordReader.java

License:Apache License

public PentahoOrcRecordReader(String fileName, Configuration conf,
        List<? extends IOrcInputField> dialogInputFields) {
    this.conf = conf;
    this.dialogInputFields = dialogInputFields;

    Reader reader = null;/*from ww w . j a va  2s  .  c o m*/
    try {
        filePath = new Path(fileName);
        fs = FileSystem.get(filePath.toUri(), conf);
        if (!fs.exists(filePath)) {
            throw new NoSuchFileException(fileName);
        }

        if (fs.getFileStatus(filePath).isDirectory()) {
            PathFilter pathFilter = new PathFilter() {
                public boolean accept(Path file) {
                    return file.getName().endsWith(".orc");
                }
            };

            FileStatus[] fileStatuses = fs.listStatus(filePath, pathFilter);
            if (fileStatuses.length == 0) {
                throw new NoSuchFileException(fileName);
            }

            filePath = fileStatuses[0].getPath();
        }

        reader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf).filesystem(fs));
    } catch (IOException e) {
        throw new IllegalArgumentException("Unable to read data from file " + fileName, e);
    }
    try {
        recordReader = reader.rows();
    } catch (IOException e) {
        throw new IllegalArgumentException("Unable to get record reader for file " + fileName, e);
    }
    typeDescription = reader.getSchema();
    OrcSchemaConverter orcSchemaConverter = new OrcSchemaConverter();
    orcInputFields = orcSchemaConverter.buildInputFields(typeDescription);
    IOrcMetaData.Reader orcMetaDataReader = new OrcMetaDataReader(reader);
    orcMetaDataReader.read(orcInputFields);
    batch = typeDescription.createRowBatch();

    //Create a map of orc fields to meta columns
    Map<String, Integer> orcColumnNumberMap = new HashMap<String, Integer>();
    int orcFieldNumber = 0;
    for (String orcFieldName : typeDescription.getFieldNames()) {
        orcColumnNumberMap.put(orcFieldName, orcFieldNumber++);
    }

    //Create a map of input fields to Orc Column numbers
    schemaToOrcSubcripts = new HashMap<String, Integer>();
    for (IOrcInputField inputField : dialogInputFields) {
        if (inputField != null) {
            Integer colNumber = orcColumnNumberMap.get(inputField.getFormatFieldName());
            if (colNumber == null) {
                throw new IllegalArgumentException("Column " + inputField.getFormatFieldName()
                        + " does not exist in the ORC file.  Please use the getFields button");
            } else {
                schemaToOrcSubcripts.put(inputField.getPentahoFieldName(), colNumber);
            }
        }
    }

    try {
        setNextBatch();
    } catch (IOException e) {
        throw new IllegalArgumentException("No rows to read in " + fileName, e);
    }
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected void loadFromDir(TransientElement part, long numentries, FileSystem fs, Path path)
        throws IOException {
    PathFilter filter = new PathFilter() {
        @Override//from   ww w  .  jav  a  2s.c om
        public boolean accept(Path path) {
            return !path.getName().startsWith("_");
        }
    };
    FileStatus[] status = fs.listStatus(path, filter);

    if (status.length == 0) {
        System.out.println("Path [" + path + "] has no files. Initializing section.");
        part.initialize(0);
    } else {
        Arrays.sort(status, new FileStatusComparator());

        System.out.println("Initializing section " + path);
        part.initialize(numentries);
        for (FileStatus file : status) {
            System.out.println("Reading file [" + file.getPath() + "]");
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(),
                    this.conf.getConfigurationObject());
            part.load(reader, this.listener);
            reader.close();
        }
        System.out.println("Closing section " + path);
        part.close();
    }
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected void loadFromDir(TransientBitMapTriples part, long numentries, long maxpredicate, long maxobject,
        FileSystem fs, Path path) throws IOException {
    PathFilter filter = new PathFilter() {
        @Override/*www.  j  a va  2s. c om*/
        public boolean accept(Path path) {
            return !path.getName().startsWith("_");
        }
    };
    FileStatus[] status = fs.listStatus(path, filter);

    if (status.length == 0) {
        System.out.println("Path [" + path + "] has no files. Initializing section.");
        part.initialize(0, 0);
    } else {
        Arrays.sort(status, new FileStatusComparator());

        System.out.println("Initializing section " + path);
        part.initialize(numentries, maxpredicate, maxobject);
        for (FileStatus file : status) {
            System.out.println("Reading file [" + file.getPath() + "]");
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(),
                    this.conf.getConfigurationObject());
            part.load(reader, this.listener);
            reader.close();
        }
        System.out.println("Closing section " + path);
        part.close();
    }
}