Example usage for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:org.talend.components.simplefileio.runtime.utils.FileSystemUtil.java

License:Open Source License

/**
 * Return files in this folder, but do not return the hidden file(start with '_' or '.')
 * @param fs/*from  ww  w  .j a v  a 2  s .  c o m*/
 * @param folder
 */
public static FileStatus[] listSubFiles(FileSystem fs, Path folder) throws IOException {
    return fs.listStatus(folder, new PathFilter() {

        @Override
        public boolean accept(Path path) {
            String name = path.getName();
            return !name.startsWith("_") && !name.startsWith(".");
        }
    });
}

From source file:org.terrier.indexing.HadoopIndexerReducer.java

License:Mozilla Public License

protected LinkedList<MapData> loadRunData(Context context) throws IOException {
    // Load in Run Data
    ArrayList<String> mapTaskIDs = new ArrayList<String>();
    final LinkedList<MapData> runData = new LinkedList<MapData>();
    DataInputStream runDataIn;//  www.j av a2  s.  c om

    final String jobId = context.getTaskAttemptID().getJobID().toString().replaceAll("job", "task");

    final FileStatus[] files = FileSystem.get(context.getConfiguration())
            .listStatus(FileOutputFormat.getOutputPath(context), new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    final String name = path.getName();
                    //1. is this a run file
                    if (!(name.startsWith(jobId) && name.endsWith(".runs")))
                        return false;
                    return true;
                }
            });

    if (files == null || files.length == 0) {
        throw new IOException("No run status files found in " + FileOutputFormat.getOutputPath(context));
    }

    final int thisPartition = context.getTaskAttemptID().getTaskID().getId();
    final NewSplitEmittedTerm.SETPartitioner partitionChecker = new NewSplitEmittedTerm.SETPartitioner();
    partitionChecker.setConf(context.getConfiguration());

    MapData tempHRD;
    for (FileStatus file : files) {
        ExtensibleSinglePassIndexer.logger.info("Run data file " + file.getPath().toString() + " has length "
                + Files.length(file.getPath().toString()));
        runDataIn = new DataInputStream(Files.openFileStream(file.getPath().toString()));
        tempHRD = new MapData(runDataIn);
        //check to see if this file contained our split information
        if (mutipleIndices && partitionChecker.calculatePartition(tempHRD.getSplitnum(),
                context.getNumReduceTasks()) != thisPartition)
            continue;

        mapTaskIDs.add(tempHRD.getMap());
        runData.add(tempHRD);
        runDataIn.close();
    }

    // Sort by splitnum
    Collections.sort(runData);
    Collections.sort(mapTaskIDs, new IDComparator(runData));

    // A list of the index shards
    MapIndexPrefixes = mapTaskIDs.toArray(new String[0]);
    return runData;
}

From source file:org.wso2.carbon.hdfs.mgt.HDFSAdmin.java

License:Open Source License

/**
 * Mgt service return file and folder list of the give HDFS path
 * //from  ww  w .ja va  2 s  .  c o m
 * @param fsObjectPath
 *            file system path which user need info about files and folders
 * @return list with files and folders in the given path
 * @throws HDFSServerManagementException
 */
public FolderInformation[] getCurrentUserFSObjects(String fsObjectPath) throws HDFSServerManagementException {

    boolean isCurrentUserSuperTenant = false;
    //Checks if the current user has a role assigned. Else throws an error.
    try {
        checkCurrentTenantUserHasRole();
        isCurrentUserSuperTenant = hdfsAdminHelperInstance.isCurrentUserSuperTenant();

    } catch (HDFSServerManagementException e) {
        throw e;
    } catch (UserStoreException e) {
        handleException(" User store exception", e);
    }
    FileSystem hdfsFS = null;

    //The folder path is filtered to be getting only the items from /user/ directory.
    if (fsObjectPath == null
            || (!isCurrentUserSuperTenant && fsObjectPath.equals(HDFSConstants.HDFS_ROOT_FOLDER))) {
        fsObjectPath = HDFSConstants.HDFS_USER_ROOT;
    }

    try {
        hdfsFS = hdfsAdminHelperInstance.getFSforUser();
    } catch (IOException e1) {
        String msg = "Error occurred while trying to get File system instance";
        handleException(msg, e1);
    }
    FileStatus[] fileStatusList = null;
    List<FolderInformation> folderInfo = new ArrayList<FolderInformation>();
    try {
        if (hdfsFS != null && hdfsFS.exists(new Path(fsObjectPath))) {
            if (hdfsAdminHelperInstance.isCurrentUserSuperTenant()) {
                fileStatusList = hdfsFS.listStatus(new Path(fsObjectPath));
            } else {
                fileStatusList = hdfsFS.listStatus(new Path(fsObjectPath), new PathFilter() {

                    //the filter to be sent when retrieving the file paths.
                    @Override
                    public boolean accept(Path path) {
                        String filter = null;
                        CarbonContext carbonContext = CarbonContext.getThreadLocalCarbonContext();
                        if (hdfsAdminHelperInstance.isCurrentUserTenantAdmin()) {
                            filter = carbonContext.getTenantDomain();
                        } else {
                            filter = carbonContext.getTenantDomain() + HDFSConstants.UNDERSCORE
                                    + carbonContext.getUsername();
                        }
                        return path.toString().contains(filter);
                    }
                });
            }
            //List the statuses of the files/directories in the given path if the path is a directory.
            if (fileStatusList != null) {
                for (FileStatus fileStatus : fileStatusList) {
                    FolderInformation folder = new FolderInformation();
                    folder.setFolder(fileStatus.isDir());
                    folder.setName(fileStatus.getPath().getName());
                    folder.setFolderPath(fileStatus.getPath().toUri().getPath());
                    folder.setOwner(fileStatus.getOwner());
                    folder.setGroup(fileStatus.getGroup());
                    folder.setPermissions(fileStatus.getPermission().toString());
                    folderInfo.add(folder);
                }
                return folderInfo.toArray(new FolderInformation[folderInfo.size()]);
            }
        }
    } catch (Exception e) {
        String msg = "Error occurred while retrieving folder information";
        handleException(msg, e);
    }
    return null;

}

From source file:parquet.hadoop.PrintFooter.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.err.println("usage PrintFooter <path>");
        return;//www .  j  a  va  2  s.c  om
    }
    Path path = new Path(new URI(args[0]));
    final Configuration configuration = new Configuration();

    final FileSystem fs = path.getFileSystem(configuration);
    FileStatus fileStatus = fs.getFileStatus(path);
    Path summary = new Path(fileStatus.getPath(), PARQUET_METADATA_FILE);
    if (fileStatus.isDir() && fs.exists(summary)) {
        System.out.println("reading summary file");
        FileStatus summaryStatus = fs.getFileStatus(summary);
        List<Footer> readSummaryFile = ParquetFileReader.readSummaryFile(configuration, summaryStatus);
        for (Footer footer : readSummaryFile) {
            add(footer.getParquetMetadata());
        }
    } else {
        List<FileStatus> statuses;
        if (fileStatus.isDir()) {
            System.out.println("listing files in " + fileStatus.getPath());
            statuses = Arrays.asList(fs.listStatus(fileStatus.getPath(), new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    return !path.getName().startsWith("_");
                }
            }));
        } else {
            statuses = new ArrayList<FileStatus>();
            statuses.add(fileStatus);
        }
        System.out.println("opening " + statuses.size() + " files");
        int i = 0;
        ExecutorService threadPool = Executors.newFixedThreadPool(5);
        try {
            long t0 = System.currentTimeMillis();
            Deque<Future<ParquetMetadata>> footers = new LinkedBlockingDeque<Future<ParquetMetadata>>();
            for (final FileStatus currentFile : statuses) {
                footers.add(threadPool.submit(new Callable<ParquetMetadata>() {
                    @Override
                    public ParquetMetadata call() throws Exception {
                        try {
                            ParquetMetadata footer = ParquetFileReader.readFooter(configuration, currentFile);
                            return footer;
                        } catch (Exception e) {
                            throw new ParquetDecodingException("could not read footer", e);
                        }
                    }
                }));
            }
            int previousPercent = 0;
            int n = 60;
            System.out.print("0% [");
            for (int j = 0; j < n; j++) {
                System.out.print(" ");

            }
            System.out.print("] 100%");
            for (int j = 0; j < n + 6; j++) {
                System.out.print('\b');
            }
            while (!footers.isEmpty()) {
                Future<ParquetMetadata> futureFooter = footers.removeFirst();
                if (!futureFooter.isDone()) {
                    footers.addLast(futureFooter);
                    continue;
                }
                ParquetMetadata footer = futureFooter.get();
                int currentPercent = (++i * n / statuses.size());
                while (currentPercent > previousPercent) {
                    System.out.print("*");
                    previousPercent++;
                }
                add(footer);
            }
            System.out.println("");
            long t1 = System.currentTimeMillis();
            System.out.println("read all footers in " + (t1 - t0) + " ms");
        } finally {
            threadPool.shutdownNow();
        }
    }
    Set<Entry<ColumnDescriptor, ColStats>> entries = stats.entrySet();
    long total = 0;
    long totalUnc = 0;
    for (Entry<ColumnDescriptor, ColStats> entry : entries) {
        ColStats colStats = entry.getValue();
        total += colStats.allStats.total;
        totalUnc += colStats.uncStats.total;
    }

    for (Entry<ColumnDescriptor, ColStats> entry : entries) {
        ColStats colStats = entry.getValue();
        System.out.println(
                entry.getKey() + " " + percent(colStats.allStats.total, total) + "% of all space " + colStats);
    }

    System.out.println("number of blocks: " + blockCount);
    System.out.println("total data size: " + humanReadable(total) + " (raw " + humanReadable(totalUnc) + ")");
    System.out.println("total record: " + humanReadable(recordCount));
    System.out.println("average block size: " + humanReadable(total / blockCount) + " (raw "
            + humanReadable(totalUnc / blockCount) + ")");
    System.out.println("average record count: " + humanReadable(recordCount / blockCount));
}

From source file:parquet.hadoop.TestParquetFileWriter.java

License:Apache License

@Test
public void testMetaDataFile() throws Exception {

    File testDir = new File("target/test/TestParquetFileWriter/testMetaDataFileDir").getAbsoluteFile();

    Path testDirPath = new Path(testDir.toURI());
    Configuration configuration = new Configuration();

    final FileSystem fs = testDirPath.getFileSystem(configuration);
    fs.delete(testDirPath, true);/*from  w w  w. j a v a2 s  .c  o  m*/
    fs.mkdirs(testDirPath);

    MessageType schema = MessageTypeParser.parseMessageType(
            "message m { required group a {required binary b;} required group c { required int64 d; }}");
    createFile(configuration, new Path(testDirPath, "part0"), schema);
    createFile(configuration, new Path(testDirPath, "part1"), schema);
    createFile(configuration, new Path(testDirPath, "part2"), schema);

    FileStatus outputStatus = fs.getFileStatus(testDirPath);
    List<Footer> footers = ParquetFileReader.readAllFootersInParallel(configuration, outputStatus);
    validateFooters(footers);
    ParquetFileWriter.writeMetadataFile(configuration, testDirPath, footers);

    footers = ParquetFileReader.readFooters(configuration, outputStatus);
    validateFooters(footers);
    footers = ParquetFileReader.readFooters(configuration, fs.getFileStatus(new Path(testDirPath, "part0")));
    assertEquals(1, footers.size());

    final FileStatus metadataFile = fs
            .getFileStatus(new Path(testDirPath, ParquetFileWriter.PARQUET_METADATA_FILE));
    final List<Footer> metadata = ParquetFileReader.readSummaryFile(configuration, metadataFile);

    validateFooters(metadata);

    footers = ParquetFileReader.readAllFootersInParallelUsingSummaryFiles(configuration,
            Arrays.asList(fs.listStatus(testDirPath, new PathFilter() {
                @Override
                public boolean accept(Path p) {
                    return !p.getName().startsWith("_");
                }
            })));
    validateFooters(footers);

    fs.delete(metadataFile.getPath(), false);

    footers = ParquetFileReader.readAllFootersInParallelUsingSummaryFiles(configuration,
            Arrays.asList(fs.listStatus(testDirPath)));
    validateFooters(footers);

}

From source file:ph.fingra.hadoop.mapred.common.CopyToLocalFile.java

License:Apache License

public void dirToFile(String srcdir, String dstfile) throws IOException {

    FileSystem fs = FileSystem.get(URI.create(srcdir), getConf());
    FileSystem local = FileSystem.getLocal(getConf());
    Path srcPath = new Path(srcdir);
    Path dstPath = new Path(dstfile);

    // delete existed destination local file
    if (local.exists(dstPath)) {
        local.delete(dstPath, true);//  w  w w . j a v a 2s .  c  o m
    }

    // get hdfs file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX);
        }
    };

    FileStatus[] status = fs.listStatus(srcPath, resultFileFilter);

    Path[] listedPaths = FileUtil.stat2Paths(status);

    if (listedPaths.length > 0) {
        // create local output stream
        FSDataOutputStream out = local.create(dstPath);
        for (int i = 0; i < listedPaths.length; i++) {
            // create hdfs input stream
            FSDataInputStream in = fs.open(listedPaths[i]);
            byte buffer[] = new byte[256];
            int bytesRead = 0;
            while ((bytesRead = in.read(buffer)) > 0) {
                out.write(buffer, 0, bytesRead);
            }
            in.close();
        }
        out.close();
    }

    return;
}

From source file:ph.fingra.hadoop.mapred.common.CopyWithinHdfsFile.java

License:Apache License

public void dirToFile(String srcdir, String dstfile) throws IOException {

    FileSystem shfs = FileSystem.get(URI.create(srcdir), getConf());
    FileSystem thfs = FileSystem.get(URI.create(dstfile), getConf());

    Path srcPath = new Path(srcdir);
    Path dstPath = new Path(dstfile);

    // delete existed destination local file
    if (thfs.exists(dstPath)) {
        thfs.delete(dstPath, true);//from   w w  w.  ja  v  a  2  s. c  om
    }

    // get hdfs file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX);
        }
    };

    FileStatus[] status = shfs.listStatus(srcPath, resultFileFilter);

    Path[] listedPaths = FileUtil.stat2Paths(status);

    if (listedPaths.length > 0) {
        // create hdfs output stream
        FSDataOutputStream out = thfs.create(dstPath);
        for (int i = 0; i < listedPaths.length; i++) {
            // create hdfs input stream
            FSDataInputStream in = shfs.open(listedPaths[i]);
            byte buffer[] = new byte[256];
            int bytesRead = 0;
            while ((bytesRead = in.read(buffer)) > 0) {
                out.write(buffer, 0, bytesRead);
            }
            in.close();
        }
        out.close();
    }

    return;
}

From source file:ph.fingra.hadoop.mapred.common.HdfsFileUtil.java

License:Apache License

public static int getDateMatchedFileCount(Path srcpath) throws IOException {

    int count = 0;
    Path parentPath = null;//  w  ww.ja  va  2  s .  c o  m
    String date_ext = null;

    // directory path
    parentPath = srcpath.getParent();

    // date pattern
    Pattern p = Pattern.compile("([0-9]{4})\\-([0-9]{2})\\-([0-9]{2})");

    Matcher m = p.matcher(srcpath.getName());

    if (m.find()) {
        // suffix part like "yyyy-MM-dd.txt" in file name 
        date_ext = srcpath.getName().substring(m.start()/*, m.end()*/);
    }

    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);

    // get matched file list
    final String suffix = date_ext;
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().endsWith(suffix);
        }
    };

    try {
        FileStatus[] status = hdfs.listStatus(parentPath, resultFileFilter);

        if (status != null) {
            Path[] listedPaths = FileUtil.stat2Paths(status);

            if (listedPaths != null) {
                count = listedPaths.length;
            }
        }
    } catch (FileNotFoundException ignore) {
    } catch (InvalidInputException ignore) {
        ; // throw not FileNotFoundException but InvalidInputException
          // at Hadoop 1.x version
    }

    return count;
}

From source file:ph.fingra.hadoop.mapred.common.HdfsFileUtil.java

License:Apache License

public static boolean deleteNBackupFile(String srcdir, String srcfile, int maxcount, String runday,
        final String dbfnameprefix) throws IOException {

    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);

    Path targetPath = null;/*from  w  w  w  .  j  a  va2 s.  c o m*/
    Path rootPath = new Path(srcdir);
    Path sourcePath = new Path(srcfile);
    String target_day = "";
    String target_file = "";
    boolean success = false;

    // if not exist srcfile, stop backup and return true
    if (hdfs.exists(sourcePath) == false) {
        return true;
    }

    // make backup file name as yesterday date
    target_day = DateTimeUtil.addDays(runday, -1, "yyyyMMdd");
    target_file = srcfile + "-" + target_day;
    //System.out.println("target_file - " + target_file);
    targetPath = new Path(target_file);

    // delete backup file if exist same name, then rename source file to backup file
    if (hdfs.exists(new Path(target_file))) {
        hdfs.delete(targetPath, true);
    }
    success = hdfs.rename(sourcePath, targetPath);

    // get bakup file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(dbfnameprefix + "-");
        }
    };

    try {
        FileStatus[] status = hdfs.listStatus(rootPath, resultFileFilter);

        Path[] listedPaths = FileUtil.stat2Paths(status);

        // delete more than maximum number of backup files
        if (listedPaths.length > maxcount) {

            Comparator<Path> c = new Comparator<Path>() {
                public int compare(Path o1, Path o2) {
                    int ret = 0;
                    ret = o1.getName().compareTo(o2.getName());
                    return -(ret); // order by reverse of the period
                }
            };

            Arrays.sort(listedPaths, c);

            for (int i = maxcount; i < listedPaths.length; i++) {
                Path path = listedPaths[i];
                hdfs.delete(path, true);
            }
        }
    } catch (FileNotFoundException ignore) {
    } catch (InvalidInputException ignore) {
        ; // throw not FileNotFoundException but InvalidInputException
          // at Hadoop 1.x version
    }

    return success;
}

From source file:ph.fingra.hadoop.mapred.common.HdfsFileUtil.java

License:Apache License

public static boolean deleteOriginFiles(FingraphConfig config, String year, String month, String day)
        throws IOException {

    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);

    String root_uri = config.getHadoop_user_path() + (config.getHadoop_user_path().endsWith("/") ? "" : "/")
            + config.getSetting().getHfs_input_path()
            + (config.getSetting().getHfs_input_path().endsWith("/") ? "" : "/");
    root_uri = root_uri.replaceAll("\\{yyyy\\}", year);
    root_uri = root_uri.replaceAll("\\{MM\\}", month);
    root_uri = root_uri.replaceAll("\\{dd\\}", day);
    String file_uri = config.getSetting().getOrigin_input_file();
    file_uri = file_uri.replaceAll("\\{yyyy\\}", year);
    file_uri = file_uri.replaceAll("\\{MM\\}", month);
    file_uri = file_uri.replaceAll("\\{dd\\}", day);
    file_uri = file_uri.replace("*", "[\\w]*");
    final String patt = "^" + file_uri + "$";
    //System.out.println(patt);

    Path rootPath = new Path(root_uri);
    boolean success = false;

    // get matched file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override//from w w  w  .j av  a2s. c om
        public boolean accept(Path path) {
            return path.getName().matches(patt);
        }
    };

    try {
        FileStatus[] status = hdfs.listStatus(rootPath, resultFileFilter);

        if (status != null) {
            Path[] listedPaths = FileUtil.stat2Paths(status);

            if (listedPaths != null) {
                for (Path path : listedPaths) {
                    success = hdfs.delete(path, true);
                }
            }
        }
    } catch (FileNotFoundException ignore) {
    } catch (InvalidInputException ignore) {
        ; // throw not FileNotFoundException but InvalidInputException
          // at Hadoop 1.x version
    }

    return success;
}