Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.ikanow.aleph2.core.shared.utils.DirUtils.java

License:Apache License

/**
 * @param allPaths/*from w w  w.j a va2 s . c o m*/
 * @param fileContext
 * @param start
 * @param subDirectoryName
 * @param includeMatched
 */
public static void findAllSubdirectories(List<Path> allPaths, FileContext fileContext, Path start,
        String subDirectoryName, boolean includeMatched) {
    try {
        logger.debug("findAllSubdirectories :" + start.toString());
        FileStatus[] statuss = fileContext.util().listStatus(start);
        for (int i = 0; i < statuss.length; i++) {
            FileStatus dir = statuss[i];
            logger.debug("FileStatus:" + statuss[i].getPath().toString());
            if (dir.isDirectory()) {
                if (dir.getPath().getName().contains(subDirectoryName)) {
                    logger.debug("findOneSubdirectory match:" + dir.getPath().getName());
                    if (includeMatched) {
                        allPaths.add(dir.getPath());
                    } else {
                        allPaths.add(dir.getPath().getParent());
                    }
                } else {
                    findAllSubdirectories(allPaths, fileContext, dir.getPath(), subDirectoryName,
                            includeMatched);
                }
            }
        }

    } catch (Exception e) {
        logger.error("findAllSubdirectories Caught Exception", e);
    }
}

From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java

License:Open Source License

public static BasicDBList getBsonFromTextFiles(CustomMapReduceJobPojo cmr, int nLimit, String fields)
        throws IOException, SAXException, ParserConfigurationException {

    BasicDBList dbl = new BasicDBList();

    PropertiesManager props = new PropertiesManager();
    Configuration conf = getConfiguration(props);

    Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false);
    FileSystem fs = FileSystem.get(conf);

    FileStatus[] files = fs.globStatus(new Path(pathDir.toString() + "/part-*"));
    for (FileStatus file : files) {
        if (file.getLen() > 0) {
            FSDataInputStream in = fs.open(file.getPath());
            BufferedReader bin = new BufferedReader(new InputStreamReader(in));
            for (;;) {
                String s = bin.readLine();
                if (null == s)
                    break;

                String[] keyValue = s.split("\t", 2);
                BasicDBObject dbo = new BasicDBObject();
                if (keyValue.length > 1) {
                    dbo.put("key", keyValue[0]);
                    dbo.put("value", keyValue[1]);
                } else {
                    dbo.put("value", keyValue[0]);
                }/*from  w w  w .j a  v a  2 s .  com*/
                dbl.add(dbo);
            }
            in.close();
        }
    }
    return dbl;
}

From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java

License:Apache License

private List<FileToIndex> findNewFilesToIndex() {
    try {/*from www. j  av  a  2s.  com*/
        final List<FileToIndex> files = Lists.newArrayList();
        for (FileStatus dir : inputFS.listStatus(toIndexPath)) {
            if (!dir.isDir()) {
                continue;
            }
            final Path indexPath = dir.getPath();
            final String indexName = indexPath.getName();
            if (!indexName.matches(ALLOWED_INDEX_NAMES)) {
                log.info("Skipped directory " + indexPath + ". Index names should match regex "
                        + ALLOWED_INDEX_NAMES);
                continue;
            }
            for (FileStatus file : inputFS.listStatus(indexPath)) {
                if (file.isDir()) {
                    continue;
                }
                final Path filePath = file.getPath();
                String fileName = filePath.getName();

                boolean extFound = false;
                for (String allowedExt : ALLOWED_FILE_EXT) {
                    if (!fileName.endsWith(allowedExt)) {
                        continue;
                    }
                    fileName = fileName.substring(0, fileName.length() - allowedExt.length());
                    files.add(new FileToIndex(fileName, indexName, filePath));
                    extFound = true;
                    break;
                }
                if (!extFound) {
                    log.info("Not one of supported extensions (" + StringUtils.join(ALLOWED_FILE_EXT, ", ")
                            + ") file: " + filePath);
                }
            }
        }

        return files;

    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.indeed.iupload.core.filesystem.HDFSProxy.java

License:Apache License

@Override
public List<String> listFiles(String path) throws IOException {
    final Path pathObj = new Path(path);
    if (!getFileSystem(path).exists(pathObj)) {
        return Lists.newArrayList();
    }//ww  w .j  a  v  a 2  s.c  o  m
    List<String> results = new ArrayList<String>();
    for (FileStatus status : getFileSystem(path).listStatus(pathObj)) {
        if (!status.isDirectory()) {
            results.add(status.getPath().getName());
        }
    }
    return results;
}

From source file:com.indeed.iupload.core.filesystem.HDFSProxy.java

License:Apache License

@Override
public List<String> listDirs(String path) throws IOException {
    final Path pathObj = new Path(path);
    if (!getFileSystem(path).exists(pathObj)) {
        return Lists.newArrayList();
    }//from w ww  .ja  v  a2 s  . c  o  m
    List<String> results = new ArrayList<String>();
    for (FileStatus status : getFileSystem(path).listStatus(pathObj)) {
        if (status.isDirectory()) {
            results.add(status.getPath().getName());
        }
    }
    return results;
}

From source file:com.inmobi.conduit.AbstractService.java

License:Apache License

private Path getLatestDir(FileSystem fs, Path Dir) throws Exception {

    FileStatus[] fileStatus;// www .  j av a  2  s .  co  m
    try {
        fileStatus = fs.listStatus(Dir);
    } catch (FileNotFoundException fe) {
        fileStatus = null;
    }
    if (fileStatus != null && fileStatus.length > 0) {
        FileStatus latestfile = fileStatus[0];
        for (FileStatus currentfile : fileStatus) {
            if (currentfile.getPath().getName().compareTo(latestfile.getPath().getName()) > 0)
                latestfile = currentfile;
        }
        return latestfile.getPath();
    }
    return null;
}

From source file:com.inmobi.conduit.AbstractService.java

License:Apache License

private List<Path> listPartFiles(Path path, FileSystem fs) {
    List<Path> matches = new LinkedList<Path>();
    try {//from   w w w  .j  av a  2 s .  co m
        FileStatus[] statuses = fs.listStatus(path, new PathFilter() {
            public boolean accept(Path path) {
                return path.toString().contains("part");
            }
        });
        for (FileStatus status : statuses) {
            matches.add(status.getPath());
        }
    } catch (IOException e) {
        LOG.error(e.getMessage(), e);
    }
    return matches;
}

From source file:com.inmobi.conduit.distcp.DistcpBaseService.java

License:Apache License

protected Map<String, FileStatus> getDistCPInputFile() throws Exception {
    Map<String, FileStatus> result = new HashMap<String, FileStatus>();
    for (String stream : streamsToProcess) {
        int pathsAlreadyAdded = 0;
        LOG.info("Processing stream " + stream);
        byte[] value = provider.read(getCheckPointKey(stream));
        Path inputPath = new Path(getInputPath(), stream);
        Path lastCheckPointPath = null;
        Path nextPath = null;/*  ww  w. jav a 2s. com*/
        List<FileStatus> filesLastCopiedDir;
        if (value != null) {
            String checkPointValue = new String(value);
            // creating a path object from empty string throws exception;hence
            // checking for it
            if (!checkPointValue.trim().equals("")) {
                lastCheckPointPath = new Path(checkPointValue);
            }
            lastCheckPointPath = fullyQualifyCheckPointWithReadURL(lastCheckPointPath, srcCluster);
            if (lastCheckPointPath == null || !getSrcFs().exists(lastCheckPointPath)) {
                LOG.warn("Invalid checkpoint found [" + lastCheckPointPath + "] for stream " + stream
                        + ";Ignoring it");
            } else {
                Date lastDate = CalendarHelper.getDateFromStreamDir(inputPath, lastCheckPointPath);
                nextPath = CalendarHelper.getNextMinutePathFromDate(lastDate, inputPath);
            }

        }
        if (nextPath == null) {
            filesLastCopiedDir = new ArrayList<FileStatus>();
            LOG.info("Finding the starting directoryfor stream [" + stream + "]");
            nextPath = getStartingDirectory(stream, filesLastCopiedDir);
            if (nextPath == null) {
                LOG.debug("No start directory found,returning the empty result");
                continue;
            }
            LOG.debug("Uncopied Files from directory last copied are "
                    + FileUtil.toStringOfFileStatus(filesLastCopiedDir));
            for (FileStatus fileStatus : filesLastCopiedDir) {
                String destnPath = getFinalDestinationPath(fileStatus);
                if (destnPath != null) {
                    LOG.info("Adding to input of Distcp.Move [" + fileStatus.getPath() + "] to " + destnPath);
                    result.put(destnPath, fileStatus);
                }
            }
        }
        LOG.info("Starting directory for stream [" + stream + "]" + " is [" + nextPath + "]");
        Date nextDate = CalendarHelper.getDateFromStreamDir(inputPath, nextPath);
        // if next to next path exist than only add the next path so that the path
        // being added to disctp input is not the current path
        Path nextToNextPath = CalendarHelper.getNextMinutePathFromDate(nextDate, inputPath);
        Path lastPathAdded = null;
        FileStatus[] nextPathFileStatus;
        while (pathsAlreadyAdded <= numOfDirPerDistcpPerStream && srcFs.exists(nextToNextPath)
                && (nextPathFileStatus = FileUtil.listStatusAsPerHDFS(srcFs, nextPath)) != null) {
            if (nextPathFileStatus.length == 0) {
                LOG.info(nextPath + " is an empty directory");
                FileStatus srcFileStatus = srcFs.getFileStatus(nextPath);
                String destnPath = getFinalDestinationPath(srcFileStatus);
                if (destnPath != null) {
                    LOG.info("Adding to input of Distcp.Move [" + nextPath + "] to " + destnPath);
                    result.put(destnPath, srcFileStatus);
                }
            } else {
                for (FileStatus fStatus : nextPathFileStatus) {
                    String destnPath = getFinalDestinationPath(fStatus);
                    if (destnPath != null) {
                        LOG.info("Adding to input of Distcp.Move [" + fStatus.getPath() + "] to " + destnPath);
                        result.put(destnPath, fStatus);
                    }
                }
            }
            pathsAlreadyAdded++;
            lastPathAdded = nextPath;
            nextPath = nextToNextPath;
            nextDate = CalendarHelper.addAMinute(nextDate);
            nextToNextPath = CalendarHelper.getNextMinutePathFromDate(nextDate, inputPath);
        }
        if (lastPathAdded != null) {
            checkPointPaths.put(stream, lastPathAdded);
            Date lastDateAdded = CalendarHelper.getDateFromStreamDir(inputPath, lastPathAdded);
            lastProcessedFile.put(stream, lastDateAdded.getTime());
        }

    }
    return result;
}

From source file:com.inmobi.conduit.distcp.DistcpBaseService.java

License:Apache License

public static void createListing(FileSystem fs, FileStatus fileStatus, List<FileStatus> results)
        throws IOException {
    if (fileStatus.isDir()) {
        FileStatus[] stats = FileUtil.listStatusAsPerHDFS(fs, fileStatus.getPath());
        // stats can be null in case where purger deleted the path while this
        // method was called
        if (stats != null) {
            if (stats.length == 0) {
                results.add(fileStatus);
                LOG.debug("createListing :: Adding [" + fileStatus.getPath() + "]");
            }/*from w  w  w  .jav  a  2 s . c om*/
            for (FileStatus stat : stats) {
                createListing(fs, stat, results);
            }
        }
    } else {
        LOG.debug("createListing :: Adding [" + fileStatus.getPath() + "]");
        results.add(fileStatus);
    }
}

From source file:com.inmobi.conduit.distcp.MergedStreamService.java

License:Apache License

private Map<String, List<Path>> prepareForCommit(Path tmpOut) throws Exception {
    Map<String, List<Path>> categoriesToCommit = new HashMap<String, List<Path>>();
    FileStatus[] allFilesPerStream = null;
    Path tmpPathPerStream = null;
    for (String stream : streamsToProcess) {
        tmpPathPerStream = new Path(tmpOut, stream);
        try {/*w  w w . j  a  va  2s  .c om*/
            allFilesPerStream = FileUtil.listStatusAsPerHDFS(getDestFs(), tmpPathPerStream);
        } catch (FileNotFoundException ignored) {
        }

        if (allFilesPerStream != null) {
            for (FileStatus fileStatus : allFilesPerStream) {
                List<Path> fileList = categoriesToCommit.get(stream);
                if (fileList == null) {
                    fileList = new ArrayList<Path>();
                    fileList.add(fileStatus.getPath());
                    categoriesToCommit.put(stream, fileList);
                } else {
                    fileList.add(fileStatus.getPath());
                }
            }
        }
    }
    return categoriesToCommit;
}