Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.inmobi.databus.purge.DataPurgerService.java

License:Apache License

private void getTrashPathsToPurge() throws Exception {
    Path trashRoot = cluster.getTrashPath();
    LOG.debug("Looking for trashPaths in [" + trashRoot + "]");
    FileStatus[] trashDatePaths = getAllFilesInDir(trashRoot, fs);
    // For each trashpath
    if (trashDatePaths != null && trashDatePaths.length >= 1) {
        for (FileStatus trashPath : trashDatePaths) {
            FileStatus[] trashHourPaths = getAllFilesInDir(trashPath.getPath(), fs);
            if (trashHourPaths != null && trashHourPaths.length >= 1) {
                for (FileStatus trashHourPath : trashHourPaths) {
                    try {
                        Calendar trashPathDate = getDateFromTrashPath(trashPath.getPath().getName(),
                                trashHourPath.getPath().getName());
                        if (isPurge(trashPathDate, getTrashPathRetentionInHours()))
                            streamsToPurge.add(trashHourPath.getPath().makeQualified(fs));
                    } catch (NumberFormatException e) {
                        streamsToPurge.add(trashHourPath.getPath().makeQualified(fs));
                    }//from   w  ww .j  a  v a  2s. co m
                }
            }

        }
    }

}

From source file:com.inmobi.databus.purge.DataPurgerService.java

License:Apache License

private void getStreamsPathToPurge(Map<String, Path> streamPathMap) throws Exception {
    Set<Map.Entry<String, Path>> streamsToProcess = streamPathMap.entrySet();
    Iterator it = streamsToProcess.iterator();
    while (it.hasNext()) {
        Map.Entry<String, Path> entry = (Map.Entry<String, Path>) it.next();
        String streamName = entry.getKey();
        Path streamRootPath = entry.getValue();
        LOG.debug(/*from w  ww .j  a  v  a2s . c  o m*/
                "Find Paths to purge for stream [" + streamName + "] streamRootPath [" + streamRootPath + "]");
        // For each Stream, all years
        FileStatus[] years = getAllFilesInDir(streamRootPath, fs);
        if (years != null) {
            for (FileStatus year : years) {
                // For each month
                FileStatus[] months = getAllFilesInDir(year.getPath(), fs);
                if (months != null && months.length >= 1) {
                    for (FileStatus month : months) {
                        // For each day
                        FileStatus[] days = getAllFilesInDir(month.getPath(), fs);
                        if (days != null && days.length >= 1) {
                            for (FileStatus day : days) {
                                // For each day
                                FileStatus[] hours = getAllFilesInDir(day.getPath(), fs);
                                if (hours != null && hours.length >= 1) {
                                    for (FileStatus hour : hours) {
                                        LOG.debug("Working for hour [" + hour.getPath() + "]");
                                        Calendar streamDate = CalendarHelper.getDateHour(
                                                year.getPath().getName(), month.getPath().getName(),
                                                day.getPath().getName(), hour.getPath().getName());
                                        LOG.debug("Validate [" + streamDate.toString()
                                                + "] against retentionHours [" + getRetentionPeriod(streamName)
                                                + "]");
                                        if (isPurge(streamDate, getRetentionPeriod(streamName))) {
                                            LOG.debug("Adding stream to purge [" + hour.getPath() + "]");
                                            streamsToPurge.add(hour.getPath().makeQualified(fs));
                                        }
                                    }
                                } else {
                                    // No hour found in day. Purge day
                                    streamsToPurge.add(day.getPath().makeQualified(fs));
                                }
                            } // each day
                        } else {
                            // No day found in month. Purge month
                            streamsToPurge.add(month.getPath().makeQualified(fs));
                        }
                    } // each month
                } else {
                    // no months found in year. Purge Year.
                    streamsToPurge.add(year.getPath().makeQualified(fs));
                }
            } // each year
        }
    } // each stream
}

From source file:com.inmobi.databus.readers.CollectorStreamReader.java

License:Apache License

protected FileMap<CollectorFile> createFileMap() throws IOException {
    return new FileMap<CollectorFile>() {

        @Override/* www  . jav  a 2 s  . c o  m*/
        protected PathFilter createPathFilter() {
            return new PathFilter() {
                @Override
                public boolean accept(Path p) {
                    if (p.getName().endsWith("_current") || p.getName().endsWith("_stats")) {
                        return false;
                    }
                    return true;
                }
            };
        }

        /*
         * prepare a fileMap with files which are beyond the stopTime
         */
        @Override
        protected void buildList() throws IOException {
            if (fsIsPathExists(streamDir)) {
                FileStatus[] fileStatuses = fsListFileStatus(streamDir, pathFilter);
                if (fileStatuses == null || fileStatuses.length == 0) {
                    LOG.info("No files in directory:" + streamDir);
                    return;
                }
                if (stopTime == null) {
                    for (FileStatus file : fileStatuses) {
                        addPath(file);
                    }
                } else {
                    for (FileStatus file : fileStatuses) {
                        Date currentTimeStamp = getDateFromCollectorFile(file.getPath().getName());
                        if (stopTime.before(currentTimeStamp)) {
                            stopListing();
                            continue;
                        }
                        addPath(file);
                    }
                }
            } else {
                LOG.info("Collector directory does not exist");
            }
        }

        @Override
        protected TreeMap<CollectorFile, FileStatus> createFilesMap() {
            return new TreeMap<CollectorFile, FileStatus>();
        }

        @Override
        protected CollectorFile getStreamFile(String fileName) {
            return CollectorFile.create(fileName);
        }

        @Override
        protected CollectorFile getStreamFile(FileStatus file) {
            return CollectorFile.create(file.getPath().getName());
        }
    };
}

From source file:com.inmobi.databus.readers.CollectorStreamReader.java

License:Apache License

protected CollectorFile getStreamFile(FileStatus status) {
    return getCollectorFile(status.getPath().getName());
}

From source file:com.inmobi.databus.readers.DatabusStreamReader.java

License:Apache License

protected void doRecursiveListing(Path dir, PathFilter pathFilter, FileMap<T> fmap) throws IOException {
    FileStatus[] fileStatuses = fsListFileStatus(dir, pathFilter);
    if (fileStatuses == null || fileStatuses.length == 0) {
        LOG.debug("No files in directory:" + dir);
    } else {/*from w  w  w.j  av a2s .  co m*/
        for (FileStatus file : fileStatuses) {
            if (file.isDir()) {
                doRecursiveListing(file.getPath(), pathFilter, fmap);
            } else {
                fmap.addPath(file);
            }
        }
    }
}

From source file:com.inmobi.databus.readers.DatabusStreamReader.java

License:Apache License

public Date getTimestampFromStartOfStream(PathFilter pathFilter) throws IOException {
    FileStatus leastTimeStampFileStatus = null;
    Path dir = streamDir;/* ww w . j a va2  s.  com*/
    for (int d = 0; d < 5; d++) {
        FileStatus[] filestatuses = fsListFileStatus(dir, pathFilter);
        if (filestatuses != null && filestatuses.length > 0) {
            leastTimeStampFileStatus = filestatuses[0];
            for (int i = 1; i < filestatuses.length; i++) {
                if (leastTimeStampFileStatus.getPath().compareTo(filestatuses[i].getPath()) > 0) {
                    leastTimeStampFileStatus = filestatuses[i];
                }
            }
            dir = leastTimeStampFileStatus.getPath();
        } else {
            return null;
        }
    }
    LOG.info("Starting dir in the stream " + leastTimeStampFileStatus.getPath());
    return getDateFromStreamDir(streamDir, leastTimeStampFileStatus.getPath());
}

From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java

License:Apache License

/**
 * This method does the required setup before moving to next file. First it
 * checks whether the both current file and next file belongs to same minute
 * or different minutes. If files exists on across minutes then it has to
 * check the next file is same as checkpointed file. If not same and checkpointed
 * file exists then sets the iterator to the checkpointed file.
 * @return false if it reads from the checkpointed file.
 * @throws InterruptedException /*from w  ww. ja  v a 2 s . c o  m*/
 */
@Override
public boolean prepareMoveToNext(FileStatus currentFile, FileStatus nextFile)
        throws IOException, InterruptedException {
    Calendar next = Calendar.getInstance();
    Date nextFileTimeStamp = getDateFromStreamDir(streamDir, nextFile.getPath().getParent());
    next.setTime(nextFileTimeStamp);

    boolean readFromCheckpoint = false;
    if (currentMin != next.get(Calendar.MINUTE)) {
        if (currentFile != null) {
            Date currentFileTimeStamp = getDateFromStreamDir(streamDir, currentFile.getPath().getParent());
            setDeltaCheckpoint(getNextMinuteTimeStamp(currentFileTimeStamp), nextFileTimeStamp);
            // set the line number as -1 as current file was read fully.
            deltaCheckpoint.put(currentMin, new PartitionCheckpoint(getStreamFile(currentFile), -1));
        }
        // move to next file
        currentMin = next.get(Calendar.MINUTE);
        readFromCheckpoint = moveToCheckpoint(nextFile);
    } else {
        this.currentFile = nextFile;
    }
    setIterator();
    return !readFromCheckpoint;
}

From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java

License:Apache License

private boolean moveToCheckpoint(FileStatus fileToRead) throws IOException, InterruptedException {
    boolean ret = false;
    CheckpointInfo cpi = pChkpoints.get(currentMin);
    if (!cpi.processed) {
        cpi.processed = true;//from  w ww .j  a v a 2  s .c  om
        PartitionCheckpoint partitionCheckpoint = cpi.pck;
        HadoopStreamFile sFile = (HadoopStreamFile) partitionCheckpoint.getStreamFile();
        if (sFile.getFileName() != null) {
            Path checkPointedFileName = new Path(streamDir, partitionCheckpoint.getFileName());
            //set iterator to checkpointed file if there is a checkpoint
            if (!fileToRead.getPath().equals(checkPointedFileName)) {
                if (fsIsPathExists(checkPointedFileName)) {
                    fileToRead = fsGetFileStatus(checkPointedFileName);
                    currentLineNum = partitionCheckpoint.getLineNum();
                } else {
                    LOG.info("Checkpointed file " + partitionCheckpoint.getFileName() + " does not exist");
                    build(getDateFromStreamDir(streamDir, fileToRead.getPath()));
                    startFromNextHigher(sFile);
                    return true;
                }
            } else {
                currentLineNum = partitionCheckpoint.getLineNum();
            }
            ret = true;
        }
    }
    this.currentFile = fileToRead;
    return ret;
}

From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java

License:Apache License

/**
 * @returns Zero  if checkpoint is not present for that minute or
 *                checkpoint file and current file were not same.
 *          Line number from checkpoint//w  w w. ja v a2s .  com
 */
@Override
protected long getLineNumberForFirstFile(FileStatus firstFile) {
    int minute = getMinuteFromFile(firstFile);
    PartitionCheckpoint partitionChkPoint = pChkpoints.get(Integer.valueOf(minute)).pck;
    if (partitionChkPoint != null) {
        Path checkPointedFileName = new Path(streamDir, partitionChkPoint.getFileName());
        // check whether current file and checkpoint file are same
        if (checkPointedFileName.equals(firstFile.getPath())) {
            return partitionChkPoint.getLineNum();
        }
    }
    return 0;
}

From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java

License:Apache License

private int getMinuteFromFile(FileStatus firstFile) {
    Date currentTimeStamp = getDateFromStreamDir(streamDir, firstFile.getPath().getParent());
    Calendar cal = Calendar.getInstance();
    cal.setTime(currentTimeStamp);/*from   w w w. j  a  va2 s.co m*/
    return cal.get(Calendar.MINUTE);
}