Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java

License:Apache License

private void testClusterName(String configName, String currentClusterName) throws Exception {
    ConduitConfigParser parser = new ConduitConfigParser(configName);
    ConduitConfig config = parser.getConfig();
    Set<String> streamsToProcess = new HashSet<String>();
    streamsToProcess.addAll(config.getSourceStreams().keySet());
    Set<String> clustersToProcess = new HashSet<String>();
    Set<TestLocalStreamService> services = new HashSet<TestLocalStreamService>();
    Cluster currentCluster = null;/*from   www.ja va 2  s.  c o  m*/
    for (SourceStream sStream : config.getSourceStreams().values()) {
        for (String cluster : sStream.getSourceClusters()) {
            clustersToProcess.add(cluster);
        }
    }
    if (currentClusterName != null) {
        currentCluster = config.getClusters().get(currentClusterName);
    }
    for (String clusterName : clustersToProcess) {
        Cluster cluster = config.getClusters().get(clusterName);
        cluster.getHadoopConf().set("mapred.job.tracker", super.CreateJobConf().get("mapred.job.tracker"));
        TestLocalStreamService service = new TestLocalStreamService(config, cluster, currentCluster,
                new NullCheckPointProvider(), streamsToProcess);
        services.add(service);
    }

    for (TestLocalStreamService service : services) {
        FileSystem fs = service.getFileSystem();
        service.preExecute();
        if (currentClusterName != null)
            Assert.assertEquals(service.getCurrentCluster().getName(), currentClusterName);
        // creating a job with empty input path
        Path tmpJobInputPath = new Path("/tmp/job/input/path");
        Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>();
        Set<FileStatus> trashSet = new HashSet<FileStatus>();
        // checkpointKey, CheckPointPath
        Table<String, String, String> checkpointPaths = HashBasedTable.create();
        service.createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths);
        Job testJobConf = service.createJob(tmpJobInputPath, 1000);
        testJobConf.waitForCompletion(true);

        int numberOfCountersPerFile = 0;
        long sumOfCounterValues = 0;
        Path outputCounterPath = new Path(new Path(service.getCluster().getTmpPath(), service.getName()),
                "counters");
        FileStatus[] statuses = fs.listStatus(outputCounterPath, new PathFilter() {
            public boolean accept(Path path) {
                return path.toString().contains("part");
            }
        });
        for (FileStatus fileSt : statuses) {
            Scanner scanner = new Scanner(fs.open(fileSt.getPath()));
            while (scanner.hasNext()) {
                String counterNameValue = null;
                try {
                    counterNameValue = scanner.next();
                    String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
                    Assert.assertEquals(4, tmp.length);
                    Long numOfMsgs = Long.parseLong(tmp[3]);
                    numberOfCountersPerFile++;
                    sumOfCounterValues += numOfMsgs;
                } catch (Exception e) {
                    LOG.error("Counters file has malformed line with counter name =" + counterNameValue
                            + "..skipping the line", e);
                }
            }
        }
        // Should have 2 counters for each file
        Assert.assertEquals(NUMBER_OF_FILES * 2, numberOfCountersPerFile);
        // sum of all counter values should be equal to total number of messages
        Assert.assertEquals(NUMBER_OF_FILES * 3, sumOfCounterValues);

        Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                service.getCurrentCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
        Assert.assertEquals(testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY),
                service.getCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
        if (currentCluster == null)
            Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                    testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY));
        service.getFileSystem().delete(new Path(service.getCluster().getRootDir()), true);
    }

}

From source file:com.inmobi.conduit.purge.DataPurgerService.java

License:Apache License

private void getTrashPathsToPurge() throws Exception {
    Path trashRoot = cluster.getTrashPath();
    LOG.debug("Looking for trashPaths in [" + trashRoot + "]");
    FileStatus[] trashDatePaths = getAllFilesInDir(trashRoot, fs);
    // For each trashpath
    if (trashDatePaths != null && trashDatePaths.length >= 1) {
        for (FileStatus trashPath : trashDatePaths) {
            FileStatus[] trashHourPaths = getAllFilesInDir(trashPath.getPath(), fs);
            if (trashHourPaths != null && trashHourPaths.length >= 1) {
                for (FileStatus trashHourPath : trashHourPaths) {
                    try {
                        Calendar trashPathHourDate = getDateFromTrashPath(trashPath.getPath().getName(),
                                trashHourPath.getPath().getName());
                        if (isPurge(trashPathHourDate, getTrashPathRetentionInHours())) {
                            streamsToPurge.add(trashHourPath.getPath().makeQualified(fs));
                        }// w  w  w.ja  v a  2  s  . co  m
                    } catch (NumberFormatException e) {
                        streamsToPurge.add(trashHourPath.getPath().makeQualified(fs));
                    }
                }
            } else {
                try {
                    /*
                     * The date direcotry is empty. Check the time elapsed between the
                     * last hour of the empty directory (23rd hour) and the current time
                     * and add the date direcotry to the streamsToPurge only if the time
                     * elapsed is greater than the trashPathRetiontionHours
                     */
                    Calendar trashPathDate = getDateFromTrashPath(trashPath.getPath().getName(), "23");
                    if (isPurge(trashPathDate, getTrashPathRetentionInHours()))
                        streamsToPurge.add(trashPath.getPath().makeQualified(fs));
                } catch (NumberFormatException e) {
                    streamsToPurge.add(trashPath.getPath().makeQualified(fs));
                }
            }
        }
    }
}

From source file:com.inmobi.conduit.purge.DataPurgerService.java

License:Apache License

private Map<String, Path> getStreamsInCluster(String root) throws Exception {
    Map<String, Path> streams = new HashMap<String, Path>();
    LOG.debug("Find streams in [" + root + "]");
    FileStatus[] paths = getAllFilesInDir(new Path(root), fs);
    if (paths != null) {
        for (FileStatus fileStatus : paths) {
            streams.put(fileStatus.getPath().getName(), fileStatus.getPath().makeQualified(fs));
            LOG.debug("Purger working for stream [" + fileStatus.getPath() + "]");
        }/*  w  w w.j a  v a 2 s .c o m*/
    } else
        LOG.debug("No streams found in [" + root + "]");
    return streams;
}

From source file:com.inmobi.conduit.purge.DataPurgerService.java

License:Apache License

private void getStreamsPathToPurge(Map<String, Path> streamPathMap, boolean isLocal) throws Exception {
    Set<Map.Entry<String, Path>> streamsToProcess = streamPathMap.entrySet();
    Iterator it = streamsToProcess.iterator();
    while (it.hasNext()) {
        Map.Entry<String, Path> entry = (Map.Entry<String, Path>) it.next();
        String streamName = entry.getKey();
        Path streamRootPath = entry.getValue();
        String tableName = null;/*from  w  w w . j a v  a 2 s  . c o  m*/
        if (isLocal) {
            tableName = LOCAL_TABLE_PREFIX + "_" + streamName;
        } else {
            tableName = TABLE_PREFIX + "_" + streamName;
        }
        LOG.debug(
                "Find Paths to purge for stream [" + streamName + "] streamRootPath [" + streamRootPath + "]");
        // For each Stream, all years
        FileStatus[] years = getAllFilesInDir(streamRootPath, fs);
        if (years != null) {
            for (FileStatus year : years) {
                String yearVal = year.getPath().getName();
                // For each month
                FileStatus[] months = getAllFilesInDir(year.getPath(), fs);
                if (months != null && months.length >= 1) {
                    for (FileStatus month : months) {
                        String monthVal = month.getPath().getName();
                        // For each day
                        FileStatus[] days = getAllFilesInDir(month.getPath(), fs);
                        if (days != null && days.length >= 1) {
                            for (FileStatus day : days) {
                                String dayVal = day.getPath().getName();
                                // For each day
                                FileStatus[] hours = getAllFilesInDir(day.getPath(), fs);
                                if (hours != null && hours.length >= 1) {
                                    for (FileStatus hour : hours) {
                                        LOG.debug("Working for hour [" + hour.getPath() + "]");

                                        String hourVal = hour.getPath().getName();
                                        Calendar streamDate = CalendarHelper.getDateHour(yearVal, monthVal,
                                                dayVal, hourVal);
                                        LOG.debug("Validate [" + streamDate.toString()
                                                + "] against retentionHours [" + getRetentionPeriod(streamName)
                                                + "]");
                                        if (isPurge(streamDate, getRetentionPeriod(streamName))) {
                                            LOG.debug("Adding stream to purge [" + hour.getPath() + "]");
                                            Path hourPath = hour.getPath().makeQualified(fs);
                                            addPartitionToList(streamName, tableName, hourPath, yearVal,
                                                    monthVal, dayVal, hourVal);
                                            streamsToPurge.add(hourPath);
                                        }
                                    }
                                } else {
                                    Path dayPath = day.getPath().makeQualified(fs);
                                    // No hour found in day. Purge day
                                    addPartitionToList(streamName, tableName, dayPath, yearVal, monthVal,
                                            dayVal);
                                    streamsToPurge.add(dayPath);
                                }
                            } // each day
                        } else {
                            // No day found in month. Purge month
                            Path monthPath = month.getPath().makeQualified(fs);
                            addPartitionToList(streamName, tableName, monthPath, yearVal, monthVal);
                            streamsToPurge.add(monthPath);
                        }
                    } // each month
                } else {
                    // no months found in year. Purge Year.
                    Path yearPath = year.getPath().makeQualified(fs);
                    addPartitionToList(streamName, tableName, yearPath, yearVal);
                    streamsToPurge.add(year.getPath().makeQualified(fs));
                }
            } // each year
        }
    } // each stream
}

From source file:com.inmobi.conduit.utils.DatePathComparator.java

License:Apache License

@Override
public int compare(FileStatus fileStatus, FileStatus fileStatus1) {

    /*/* w w w . ja  va  2 s.c  o  m*/
    * Path eg:
    * <rootdir>/system/distcp_mirror_srcCluster_destCluster/conduit/streams
    * /<stream-Name>/2012/1/13/15/7/<hostname>-<streamName>-2012-01-16-07
    * -21_00000.gz
    *
    * in some cases paths can empty without files
    * eg:   /conduit/system/distcp_mirror_srcCluster_destCluster/conduit
    * /streams/<streamName>/2012/1/13/15/7/
    */

    Path streamDir = null;
    Path streamDir1 = null;
    Date streamDate1 = null;
    Date streamDate = null;

    if (fileStatus != null) {
        if (!fileStatus.isDir())
            streamDir = fileStatus.getPath().getParent();
        else
            streamDir = fileStatus.getPath();
        Path streamDirPrefix = streamDir.getParent().getParent().getParent().getParent().getParent();

        streamDate = CalendarHelper.getDateFromStreamDir(streamDirPrefix, streamDir);
    }

    if (fileStatus1 != null) {
        if (!fileStatus1.isDir())
            streamDir1 = fileStatus1.getPath().getParent();
        else
            streamDir1 = fileStatus1.getPath();

        Path streamDirPrefix1 = streamDir1.getParent().getParent().getParent().getParent().getParent();

        streamDate1 = CalendarHelper.getDateFromStreamDir(streamDirPrefix1, streamDir1);
    }

    if (streamDate != null && streamDate1 != null)
        return streamDate.compareTo(streamDate1);
    else
        return -1;

}

From source file:com.inmobi.databus.AbstractService.java

License:Apache License

private Path getLatestDir(FileSystem fs, Path Dir) throws Exception {
    FileStatus[] fileStatus = fs.listStatus(Dir);

    if (fileStatus != null && fileStatus.length > 0) {
        FileStatus latestfile = fileStatus[0];
        for (FileStatus currentfile : fileStatus) {
            if (currentfile.getPath().getName().compareTo(latestfile.getPath().getName()) > 0)
                latestfile = currentfile;
        }//w  w w  .jav  a 2 s .c  o  m
        return latestfile.getPath();
    }
    return null;
}

From source file:com.inmobi.databus.AbstractService.java

License:Apache License

protected Map<String, Set<Path>> publishMissingPaths(FileSystem fs, String destDir) throws Exception {
    Map<String, Set<Path>> missingDirectories = new HashMap<String, Set<Path>>();
    Set<Path> missingdirsinstream = null;
    FileStatus[] fileStatus = fs.listStatus(new Path(destDir));
    LOG.info("Create All the Missing Paths in " + destDir);
    if (fileStatus != null) {
        for (FileStatus file : fileStatus) {
            missingdirsinstream = publishMissingPaths(fs, destDir, System.currentTimeMillis(),
                    file.getPath().getName());
            if (missingdirsinstream.size() > 0)
                missingDirectories.put(file.getPath().getName(), missingdirsinstream);
        }// w w w  .j a v  a2s .c om
    }
    LOG.info("Done Creating All the Missing Paths in " + destDir);
    return missingDirectories;
}

From source file:com.inmobi.databus.distcp.MirrorStreamService.java

License:Apache License

LinkedHashMap<FileStatus, Path> prepareForCommit(Path tmpOut) throws Exception {
    /*//from w w w.j  a  va2  s.  c om
     * tmpOut would be like -
     * /databus/system/tmp/distcp_mirror_<srcCluster>_<destCluster>/ After
     * distcp paths inside tmpOut would be eg:
     *
     * /databus/system/distcp_mirror_ua1_uj1
     * /databus/streams/<streamName>/2012/1/13/15/7/
     * <hostname>-<streamName>-2012-01-16-07-21_00000.gz
     *
     * tmpStreamRoot eg: /databus/system/distcp_mirror_<srcCluster>_
     * <destCluster>/databus/streams/
     */

    Path tmpStreamRoot = new Path(tmpOut.makeQualified(getDestFs()).toString() + File.separator
            + getSrcCluster().getUnqaulifiedFinalDestDirRoot());
    LOG.debug("tmpStreamRoot [" + tmpStreamRoot + "]");

    /* tmpStreamRoot eg -
     * /databus/system/tmp/distcp_mirror_<srcCluster>_<destCluster>/databus
     * /streams/
     *
     * multiple streams can get mirrored from the same cluster
     * streams can get processed in any order but we have to retain order
     * of paths within a stream*/
    FileStatus[] fileStatuses = getDestFs().listStatus(tmpStreamRoot);

    //Retain the order of commitPaths
    LinkedHashMap<FileStatus, Path> commitPaths = new LinkedHashMap<FileStatus, Path>();
    if (fileStatuses != null) {
        for (FileStatus streamRoot : fileStatuses) {
            //for each stream : list the path in order of YYYY/mm/DD/HH/MM
            LOG.debug("StreamRoot [" + streamRoot.getPath() + "] streamName [" + streamRoot.getPath().getName()
                    + "]");
            List<FileStatus> streamPaths = new ArrayList<FileStatus>();
            createListing(getDestFs(), streamRoot, streamPaths);
            Collections.sort(streamPaths, new DatePathComparator());
            LOG.debug("createListing size: [" + streamPaths.size() + "]");
            createCommitPaths(commitPaths, streamPaths);
        }
    }
    return commitPaths;
}

From source file:com.inmobi.databus.distcp.MirrorStreamService.java

License:Apache License

private void createCommitPaths(LinkedHashMap<FileStatus, Path> commitPaths, List<FileStatus> streamPaths) {
    /*  Path eg in streamPaths -
     *  /databus/system/distcp_mirror_<srcCluster>_<destCluster>/databus/streams
     *  /<streamName>/2012/1/13/15/7/<hostname>-<streamName>-2012-01-16-07
     *  -21_00000.gz/*from   w ww. j a  va  2 s. c o m*/
     *
     * or it could be an emptyDir like
     *  /* Path eg in streamPaths -
     *  /databus/system/distcp_mirror_<srcCluster>_<destCluster>/databus/streams
     *  /<streamName>/2012/1/13/15/7/
     *
     */

    for (FileStatus fileStatus : streamPaths) {
        String fileName = null;

        Path prefixDir = null;
        if (fileStatus.isDir()) {
            //empty directory
            prefixDir = fileStatus.getPath();
        } else {
            fileName = fileStatus.getPath().getName();
            prefixDir = fileStatus.getPath().getParent();
        }

        Path min = prefixDir;
        Path hr = min.getParent();
        Path day = hr.getParent();
        Path month = day.getParent();
        Path year = month.getParent();
        Path streamName = year.getParent();

        String finalPath = getDestCluster().getFinalDestDirRoot() + File.separator + streamName.getName()
                + File.separator + year.getName() + File.separator + month.getName() + File.separator
                + day.getName() + File.separator + hr.getName() + File.separator + min.getName();

        if (fileName != null) {
            finalPath += File.separator + fileName;
        }

        commitPaths.put(fileStatus, new Path(finalPath));
        LOG.debug("Going to commit [" + fileStatus.getPath() + "] to [" + finalPath + "]");
    }

}

From source file:com.inmobi.databus.distcp.MirrorStreamService.java

License:Apache License

void createListing(FileSystem fs, FileStatus fileStatus, List<FileStatus> results) throws IOException {
    if (fileStatus.isDir()) {
        FileStatus[] stats = fs.listStatus(fileStatus.getPath());
        if (stats.length == 0) {
            results.add(fileStatus);// www.  jav  a2  s.  c  o m
            LOG.debug("createListing :: Adding [" + fileStatus.getPath() + "]");
        }
        for (FileStatus stat : stats) {
            createListing(fs, stat, results);
        }
    } else {
        LOG.debug("createListing :: Adding [" + fileStatus.getPath() + "]");
        results.add(fileStatus);
    }
}