Example usage for org.apache.hadoop.fs PathFilter PathFilter

List of usage examples for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:org.talend.components.simplefileio.runtime.utils.FileSystemUtil.java

License:Open Source License

/**
 * Return files in this folder, but do not return the hidden file(start with '_' or '.')
 * @param fs/*from  ww  w  .j a v  a 2  s .  c o m*/
 * @param folder
 */
public static FileStatus[] listSubFiles(FileSystem fs, Path folder) throws IOException {
    return fs.listStatus(folder, new PathFilter() {

        @Override
        public boolean accept(Path path) {
            String name = path.getName();
            return !name.startsWith("_") && !name.startsWith(".");
        }
    });
}

From source file:org.terrier.indexing.HadoopIndexerReducer.java

License:Mozilla Public License

protected LinkedList<MapData> loadRunData(Context context) throws IOException {
    // Load in Run Data
    ArrayList<String> mapTaskIDs = new ArrayList<String>();
    final LinkedList<MapData> runData = new LinkedList<MapData>();
    DataInputStream runDataIn;//  www.j av a2  s.  c om

    final String jobId = context.getTaskAttemptID().getJobID().toString().replaceAll("job", "task");

    final FileStatus[] files = FileSystem.get(context.getConfiguration())
            .listStatus(FileOutputFormat.getOutputPath(context), new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    final String name = path.getName();
                    //1. is this a run file
                    if (!(name.startsWith(jobId) && name.endsWith(".runs")))
                        return false;
                    return true;
                }
            });

    if (files == null || files.length == 0) {
        throw new IOException("No run status files found in " + FileOutputFormat.getOutputPath(context));
    }

    final int thisPartition = context.getTaskAttemptID().getTaskID().getId();
    final NewSplitEmittedTerm.SETPartitioner partitionChecker = new NewSplitEmittedTerm.SETPartitioner();
    partitionChecker.setConf(context.getConfiguration());

    MapData tempHRD;
    for (FileStatus file : files) {
        ExtensibleSinglePassIndexer.logger.info("Run data file " + file.getPath().toString() + " has length "
                + Files.length(file.getPath().toString()));
        runDataIn = new DataInputStream(Files.openFileStream(file.getPath().toString()));
        tempHRD = new MapData(runDataIn);
        //check to see if this file contained our split information
        if (mutipleIndices && partitionChecker.calculatePartition(tempHRD.getSplitnum(),
                context.getNumReduceTasks()) != thisPartition)
            continue;

        mapTaskIDs.add(tempHRD.getMap());
        runData.add(tempHRD);
        runDataIn.close();
    }

    // Sort by splitnum
    Collections.sort(runData);
    Collections.sort(mapTaskIDs, new IDComparator(runData));

    // A list of the index shards
    MapIndexPrefixes = mapTaskIDs.toArray(new String[0]);
    return runData;
}

From source file:org.wso2.carbon.hdfs.mgt.HDFSAdmin.java

License:Open Source License

/**
 * Mgt service return file and folder list of the give HDFS path
 * //from  ww  w .ja va  2 s  .  c o m
 * @param fsObjectPath
 *            file system path which user need info about files and folders
 * @return list with files and folders in the given path
 * @throws HDFSServerManagementException
 */
public FolderInformation[] getCurrentUserFSObjects(String fsObjectPath) throws HDFSServerManagementException {

    boolean isCurrentUserSuperTenant = false;
    //Checks if the current user has a role assigned. Else throws an error.
    try {
        checkCurrentTenantUserHasRole();
        isCurrentUserSuperTenant = hdfsAdminHelperInstance.isCurrentUserSuperTenant();

    } catch (HDFSServerManagementException e) {
        throw e;
    } catch (UserStoreException e) {
        handleException(" User store exception", e);
    }
    FileSystem hdfsFS = null;

    //The folder path is filtered to be getting only the items from /user/ directory.
    if (fsObjectPath == null
            || (!isCurrentUserSuperTenant && fsObjectPath.equals(HDFSConstants.HDFS_ROOT_FOLDER))) {
        fsObjectPath = HDFSConstants.HDFS_USER_ROOT;
    }

    try {
        hdfsFS = hdfsAdminHelperInstance.getFSforUser();
    } catch (IOException e1) {
        String msg = "Error occurred while trying to get File system instance";
        handleException(msg, e1);
    }
    FileStatus[] fileStatusList = null;
    List<FolderInformation> folderInfo = new ArrayList<FolderInformation>();
    try {
        if (hdfsFS != null && hdfsFS.exists(new Path(fsObjectPath))) {
            if (hdfsAdminHelperInstance.isCurrentUserSuperTenant()) {
                fileStatusList = hdfsFS.listStatus(new Path(fsObjectPath));
            } else {
                fileStatusList = hdfsFS.listStatus(new Path(fsObjectPath), new PathFilter() {

                    //the filter to be sent when retrieving the file paths.
                    @Override
                    public boolean accept(Path path) {
                        String filter = null;
                        CarbonContext carbonContext = CarbonContext.getThreadLocalCarbonContext();
                        if (hdfsAdminHelperInstance.isCurrentUserTenantAdmin()) {
                            filter = carbonContext.getTenantDomain();
                        } else {
                            filter = carbonContext.getTenantDomain() + HDFSConstants.UNDERSCORE
                                    + carbonContext.getUsername();
                        }
                        return path.toString().contains(filter);
                    }
                });
            }
            //List the statuses of the files/directories in the given path if the path is a directory.
            if (fileStatusList != null) {
                for (FileStatus fileStatus : fileStatusList) {
                    FolderInformation folder = new FolderInformation();
                    folder.setFolder(fileStatus.isDir());
                    folder.setName(fileStatus.getPath().getName());
                    folder.setFolderPath(fileStatus.getPath().toUri().getPath());
                    folder.setOwner(fileStatus.getOwner());
                    folder.setGroup(fileStatus.getGroup());
                    folder.setPermissions(fileStatus.getPermission().toString());
                    folderInfo.add(folder);
                }
                return folderInfo.toArray(new FolderInformation[folderInfo.size()]);
            }
        }
    } catch (Exception e) {
        String msg = "Error occurred while retrieving folder information";
        handleException(msg, e);
    }
    return null;

}

From source file:parquet.hadoop.PrintFooter.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.err.println("usage PrintFooter <path>");
        return;//www .  j  a  va  2  s.c  om
    }
    Path path = new Path(new URI(args[0]));
    final Configuration configuration = new Configuration();

    final FileSystem fs = path.getFileSystem(configuration);
    FileStatus fileStatus = fs.getFileStatus(path);
    Path summary = new Path(fileStatus.getPath(), PARQUET_METADATA_FILE);
    if (fileStatus.isDir() && fs.exists(summary)) {
        System.out.println("reading summary file");
        FileStatus summaryStatus = fs.getFileStatus(summary);
        List<Footer> readSummaryFile = ParquetFileReader.readSummaryFile(configuration, summaryStatus);
        for (Footer footer : readSummaryFile) {
            add(footer.getParquetMetadata());
        }
    } else {
        List<FileStatus> statuses;
        if (fileStatus.isDir()) {
            System.out.println("listing files in " + fileStatus.getPath());
            statuses = Arrays.asList(fs.listStatus(fileStatus.getPath(), new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    return !path.getName().startsWith("_");
                }
            }));
        } else {
            statuses = new ArrayList<FileStatus>();
            statuses.add(fileStatus);
        }
        System.out.println("opening " + statuses.size() + " files");
        int i = 0;
        ExecutorService threadPool = Executors.newFixedThreadPool(5);
        try {
            long t0 = System.currentTimeMillis();
            Deque<Future<ParquetMetadata>> footers = new LinkedBlockingDeque<Future<ParquetMetadata>>();
            for (final FileStatus currentFile : statuses) {
                footers.add(threadPool.submit(new Callable<ParquetMetadata>() {
                    @Override
                    public ParquetMetadata call() throws Exception {
                        try {
                            ParquetMetadata footer = ParquetFileReader.readFooter(configuration, currentFile);
                            return footer;
                        } catch (Exception e) {
                            throw new ParquetDecodingException("could not read footer", e);
                        }
                    }
                }));
            }
            int previousPercent = 0;
            int n = 60;
            System.out.print("0% [");
            for (int j = 0; j < n; j++) {
                System.out.print(" ");

            }
            System.out.print("] 100%");
            for (int j = 0; j < n + 6; j++) {
                System.out.print('\b');
            }
            while (!footers.isEmpty()) {
                Future<ParquetMetadata> futureFooter = footers.removeFirst();
                if (!futureFooter.isDone()) {
                    footers.addLast(futureFooter);
                    continue;
                }
                ParquetMetadata footer = futureFooter.get();
                int currentPercent = (++i * n / statuses.size());
                while (currentPercent > previousPercent) {
                    System.out.print("*");
                    previousPercent++;
                }
                add(footer);
            }
            System.out.println("");
            long t1 = System.currentTimeMillis();
            System.out.println("read all footers in " + (t1 - t0) + " ms");
        } finally {
            threadPool.shutdownNow();
        }
    }
    Set<Entry<ColumnDescriptor, ColStats>> entries = stats.entrySet();
    long total = 0;
    long totalUnc = 0;
    for (Entry<ColumnDescriptor, ColStats> entry : entries) {
        ColStats colStats = entry.getValue();
        total += colStats.allStats.total;
        totalUnc += colStats.uncStats.total;
    }

    for (Entry<ColumnDescriptor, ColStats> entry : entries) {
        ColStats colStats = entry.getValue();
        System.out.println(
                entry.getKey() + " " + percent(colStats.allStats.total, total) + "% of all space " + colStats);
    }

    System.out.println("number of blocks: " + blockCount);
    System.out.println("total data size: " + humanReadable(total) + " (raw " + humanReadable(totalUnc) + ")");
    System.out.println("total record: " + humanReadable(recordCount));
    System.out.println("average block size: " + humanReadable(total / blockCount) + " (raw "
            + humanReadable(totalUnc / blockCount) + ")");
    System.out.println("average record count: " + humanReadable(recordCount / blockCount));
}

From source file:parquet.hadoop.TestParquetFileWriter.java

License:Apache License

@Test
public void testMetaDataFile() throws Exception {

    File testDir = new File("target/test/TestParquetFileWriter/testMetaDataFileDir").getAbsoluteFile();

    Path testDirPath = new Path(testDir.toURI());
    Configuration configuration = new Configuration();

    final FileSystem fs = testDirPath.getFileSystem(configuration);
    fs.delete(testDirPath, true);/*from  w w  w. j a v a2 s  .c  o  m*/
    fs.mkdirs(testDirPath);

    MessageType schema = MessageTypeParser.parseMessageType(
            "message m { required group a {required binary b;} required group c { required int64 d; }}");
    createFile(configuration, new Path(testDirPath, "part0"), schema);
    createFile(configuration, new Path(testDirPath, "part1"), schema);
    createFile(configuration, new Path(testDirPath, "part2"), schema);

    FileStatus outputStatus = fs.getFileStatus(testDirPath);
    List<Footer> footers = ParquetFileReader.readAllFootersInParallel(configuration, outputStatus);
    validateFooters(footers);
    ParquetFileWriter.writeMetadataFile(configuration, testDirPath, footers);

    footers = ParquetFileReader.readFooters(configuration, outputStatus);
    validateFooters(footers);
    footers = ParquetFileReader.readFooters(configuration, fs.getFileStatus(new Path(testDirPath, "part0")));
    assertEquals(1, footers.size());

    final FileStatus metadataFile = fs
            .getFileStatus(new Path(testDirPath, ParquetFileWriter.PARQUET_METADATA_FILE));
    final List<Footer> metadata = ParquetFileReader.readSummaryFile(configuration, metadataFile);

    validateFooters(metadata);

    footers = ParquetFileReader.readAllFootersInParallelUsingSummaryFiles(configuration,
            Arrays.asList(fs.listStatus(testDirPath, new PathFilter() {
                @Override
                public boolean accept(Path p) {
                    return !p.getName().startsWith("_");
                }
            })));
    validateFooters(footers);

    fs.delete(metadataFile.getPath(), false);

    footers = ParquetFileReader.readAllFootersInParallelUsingSummaryFiles(configuration,
            Arrays.asList(fs.listStatus(testDirPath)));
    validateFooters(footers);

}

From source file:ph.fingra.hadoop.mapred.common.CopyToLocalFile.java

License:Apache License

public void dirToFile(String srcdir, String dstfile) throws IOException {

    FileSystem fs = FileSystem.get(URI.create(srcdir), getConf());
    FileSystem local = FileSystem.getLocal(getConf());
    Path srcPath = new Path(srcdir);
    Path dstPath = new Path(dstfile);

    // delete existed destination local file
    if (local.exists(dstPath)) {
        local.delete(dstPath, true);//  w  w w . j a v a 2s .  c  o m
    }

    // get hdfs file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX);
        }
    };

    FileStatus[] status = fs.listStatus(srcPath, resultFileFilter);

    Path[] listedPaths = FileUtil.stat2Paths(status);

    if (listedPaths.length > 0) {
        // create local output stream
        FSDataOutputStream out = local.create(dstPath);
        for (int i = 0; i < listedPaths.length; i++) {
            // create hdfs input stream
            FSDataInputStream in = fs.open(listedPaths[i]);
            byte buffer[] = new byte[256];
            int bytesRead = 0;
            while ((bytesRead = in.read(buffer)) > 0) {
                out.write(buffer, 0, bytesRead);
            }
            in.close();
        }
        out.close();
    }

    return;
}

From source file:ph.fingra.hadoop.mapred.common.CopyWithinHdfsFile.java

License:Apache License

public void dirToFile(String srcdir, String dstfile) throws IOException {

    FileSystem shfs = FileSystem.get(URI.create(srcdir), getConf());
    FileSystem thfs = FileSystem.get(URI.create(dstfile), getConf());

    Path srcPath = new Path(srcdir);
    Path dstPath = new Path(dstfile);

    // delete existed destination local file
    if (thfs.exists(dstPath)) {
        thfs.delete(dstPath, true);//from   w w  w.  ja  v  a  2  s. c  om
    }

    // get hdfs file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX);
        }
    };

    FileStatus[] status = shfs.listStatus(srcPath, resultFileFilter);

    Path[] listedPaths = FileUtil.stat2Paths(status);

    if (listedPaths.length > 0) {
        // create hdfs output stream
        FSDataOutputStream out = thfs.create(dstPath);
        for (int i = 0; i < listedPaths.length; i++) {
            // create hdfs input stream
            FSDataInputStream in = shfs.open(listedPaths[i]);
            byte buffer[] = new byte[256];
            int bytesRead = 0;
            while ((bytesRead = in.read(buffer)) > 0) {
                out.write(buffer, 0, bytesRead);
            }
            in.close();
        }
        out.close();
    }

    return;
}

From source file:ph.fingra.hadoop.mapred.common.HdfsFileUtil.java

License:Apache License

public static int getDateMatchedFileCount(Path srcpath) throws IOException {

    int count = 0;
    Path parentPath = null;//  w  ww.ja  va  2  s .  c o  m
    String date_ext = null;

    // directory path
    parentPath = srcpath.getParent();

    // date pattern
    Pattern p = Pattern.compile("([0-9]{4})\\-([0-9]{2})\\-([0-9]{2})");

    Matcher m = p.matcher(srcpath.getName());

    if (m.find()) {
        // suffix part like "yyyy-MM-dd.txt" in file name 
        date_ext = srcpath.getName().substring(m.start()/*, m.end()*/);
    }

    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);

    // get matched file list
    final String suffix = date_ext;
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().endsWith(suffix);
        }
    };

    try {
        FileStatus[] status = hdfs.listStatus(parentPath, resultFileFilter);

        if (status != null) {
            Path[] listedPaths = FileUtil.stat2Paths(status);

            if (listedPaths != null) {
                count = listedPaths.length;
            }
        }
    } catch (FileNotFoundException ignore) {
    } catch (InvalidInputException ignore) {
        ; // throw not FileNotFoundException but InvalidInputException
          // at Hadoop 1.x version
    }

    return count;
}

From source file:ph.fingra.hadoop.mapred.common.HdfsFileUtil.java

License:Apache License

public static boolean deleteNBackupFile(String srcdir, String srcfile, int maxcount, String runday,
        final String dbfnameprefix) throws IOException {

    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);

    Path targetPath = null;/*from  w  w  w  .  j  a  va2 s.  c o m*/
    Path rootPath = new Path(srcdir);
    Path sourcePath = new Path(srcfile);
    String target_day = "";
    String target_file = "";
    boolean success = false;

    // if not exist srcfile, stop backup and return true
    if (hdfs.exists(sourcePath) == false) {
        return true;
    }

    // make backup file name as yesterday date
    target_day = DateTimeUtil.addDays(runday, -1, "yyyyMMdd");
    target_file = srcfile + "-" + target_day;
    //System.out.println("target_file - " + target_file);
    targetPath = new Path(target_file);

    // delete backup file if exist same name, then rename source file to backup file
    if (hdfs.exists(new Path(target_file))) {
        hdfs.delete(targetPath, true);
    }
    success = hdfs.rename(sourcePath, targetPath);

    // get bakup file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(dbfnameprefix + "-");
        }
    };

    try {
        FileStatus[] status = hdfs.listStatus(rootPath, resultFileFilter);

        Path[] listedPaths = FileUtil.stat2Paths(status);

        // delete more than maximum number of backup files
        if (listedPaths.length > maxcount) {

            Comparator<Path> c = new Comparator<Path>() {
                public int compare(Path o1, Path o2) {
                    int ret = 0;
                    ret = o1.getName().compareTo(o2.getName());
                    return -(ret); // order by reverse of the period
                }
            };

            Arrays.sort(listedPaths, c);

            for (int i = maxcount; i < listedPaths.length; i++) {
                Path path = listedPaths[i];
                hdfs.delete(path, true);
            }
        }
    } catch (FileNotFoundException ignore) {
    } catch (InvalidInputException ignore) {
        ; // throw not FileNotFoundException but InvalidInputException
          // at Hadoop 1.x version
    }

    return success;
}

From source file:ph.fingra.hadoop.mapred.common.HdfsFileUtil.java

License:Apache License

public static boolean deleteOriginFiles(FingraphConfig config, String year, String month, String day)
        throws IOException {

    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);

    String root_uri = config.getHadoop_user_path() + (config.getHadoop_user_path().endsWith("/") ? "" : "/")
            + config.getSetting().getHfs_input_path()
            + (config.getSetting().getHfs_input_path().endsWith("/") ? "" : "/");
    root_uri = root_uri.replaceAll("\\{yyyy\\}", year);
    root_uri = root_uri.replaceAll("\\{MM\\}", month);
    root_uri = root_uri.replaceAll("\\{dd\\}", day);
    String file_uri = config.getSetting().getOrigin_input_file();
    file_uri = file_uri.replaceAll("\\{yyyy\\}", year);
    file_uri = file_uri.replaceAll("\\{MM\\}", month);
    file_uri = file_uri.replaceAll("\\{dd\\}", day);
    file_uri = file_uri.replace("*", "[\\w]*");
    final String patt = "^" + file_uri + "$";
    //System.out.println(patt);

    Path rootPath = new Path(root_uri);
    boolean success = false;

    // get matched file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override//from w w  w  .j av  a2s. c om
        public boolean accept(Path path) {
            return path.getName().matches(patt);
        }
    };

    try {
        FileStatus[] status = hdfs.listStatus(rootPath, resultFileFilter);

        if (status != null) {
            Path[] listedPaths = FileUtil.stat2Paths(status);

            if (listedPaths != null) {
                for (Path path : listedPaths) {
                    success = hdfs.delete(path, true);
                }
            }
        }
    } catch (FileNotFoundException ignore) {
    } catch (InvalidInputException ignore) {
        ; // throw not FileNotFoundException but InvalidInputException
          // at Hadoop 1.x version
    }

    return success;
}