List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.inmobi.databus.purge.DataPurgerService.java
License:Apache License
private void getTrashPathsToPurge() throws Exception { Path trashRoot = cluster.getTrashPath(); LOG.debug("Looking for trashPaths in [" + trashRoot + "]"); FileStatus[] trashDatePaths = getAllFilesInDir(trashRoot, fs); // For each trashpath if (trashDatePaths != null && trashDatePaths.length >= 1) { for (FileStatus trashPath : trashDatePaths) { FileStatus[] trashHourPaths = getAllFilesInDir(trashPath.getPath(), fs); if (trashHourPaths != null && trashHourPaths.length >= 1) { for (FileStatus trashHourPath : trashHourPaths) { try { Calendar trashPathDate = getDateFromTrashPath(trashPath.getPath().getName(), trashHourPath.getPath().getName()); if (isPurge(trashPathDate, getTrashPathRetentionInHours())) streamsToPurge.add(trashHourPath.getPath().makeQualified(fs)); } catch (NumberFormatException e) { streamsToPurge.add(trashHourPath.getPath().makeQualified(fs)); }//from w ww .j a v a 2s. co m } } } } }
From source file:com.inmobi.databus.purge.DataPurgerService.java
License:Apache License
private void getStreamsPathToPurge(Map<String, Path> streamPathMap) throws Exception { Set<Map.Entry<String, Path>> streamsToProcess = streamPathMap.entrySet(); Iterator it = streamsToProcess.iterator(); while (it.hasNext()) { Map.Entry<String, Path> entry = (Map.Entry<String, Path>) it.next(); String streamName = entry.getKey(); Path streamRootPath = entry.getValue(); LOG.debug(/*from w ww .j a v a2s . c o m*/ "Find Paths to purge for stream [" + streamName + "] streamRootPath [" + streamRootPath + "]"); // For each Stream, all years FileStatus[] years = getAllFilesInDir(streamRootPath, fs); if (years != null) { for (FileStatus year : years) { // For each month FileStatus[] months = getAllFilesInDir(year.getPath(), fs); if (months != null && months.length >= 1) { for (FileStatus month : months) { // For each day FileStatus[] days = getAllFilesInDir(month.getPath(), fs); if (days != null && days.length >= 1) { for (FileStatus day : days) { // For each day FileStatus[] hours = getAllFilesInDir(day.getPath(), fs); if (hours != null && hours.length >= 1) { for (FileStatus hour : hours) { LOG.debug("Working for hour [" + hour.getPath() + "]"); Calendar streamDate = CalendarHelper.getDateHour( year.getPath().getName(), month.getPath().getName(), day.getPath().getName(), hour.getPath().getName()); LOG.debug("Validate [" + streamDate.toString() + "] against retentionHours [" + getRetentionPeriod(streamName) + "]"); if (isPurge(streamDate, getRetentionPeriod(streamName))) { LOG.debug("Adding stream to purge [" + hour.getPath() + "]"); streamsToPurge.add(hour.getPath().makeQualified(fs)); } } } else { // No hour found in day. Purge day streamsToPurge.add(day.getPath().makeQualified(fs)); } } // each day } else { // No day found in month. Purge month streamsToPurge.add(month.getPath().makeQualified(fs)); } } // each month } else { // no months found in year. Purge Year. streamsToPurge.add(year.getPath().makeQualified(fs)); } } // each year } } // each stream }
From source file:com.inmobi.databus.readers.CollectorStreamReader.java
License:Apache License
protected FileMap<CollectorFile> createFileMap() throws IOException { return new FileMap<CollectorFile>() { @Override/* www . jav a 2 s . c o m*/ protected PathFilter createPathFilter() { return new PathFilter() { @Override public boolean accept(Path p) { if (p.getName().endsWith("_current") || p.getName().endsWith("_stats")) { return false; } return true; } }; } /* * prepare a fileMap with files which are beyond the stopTime */ @Override protected void buildList() throws IOException { if (fsIsPathExists(streamDir)) { FileStatus[] fileStatuses = fsListFileStatus(streamDir, pathFilter); if (fileStatuses == null || fileStatuses.length == 0) { LOG.info("No files in directory:" + streamDir); return; } if (stopTime == null) { for (FileStatus file : fileStatuses) { addPath(file); } } else { for (FileStatus file : fileStatuses) { Date currentTimeStamp = getDateFromCollectorFile(file.getPath().getName()); if (stopTime.before(currentTimeStamp)) { stopListing(); continue; } addPath(file); } } } else { LOG.info("Collector directory does not exist"); } } @Override protected TreeMap<CollectorFile, FileStatus> createFilesMap() { return new TreeMap<CollectorFile, FileStatus>(); } @Override protected CollectorFile getStreamFile(String fileName) { return CollectorFile.create(fileName); } @Override protected CollectorFile getStreamFile(FileStatus file) { return CollectorFile.create(file.getPath().getName()); } }; }
From source file:com.inmobi.databus.readers.CollectorStreamReader.java
License:Apache License
protected CollectorFile getStreamFile(FileStatus status) { return getCollectorFile(status.getPath().getName()); }
From source file:com.inmobi.databus.readers.DatabusStreamReader.java
License:Apache License
protected void doRecursiveListing(Path dir, PathFilter pathFilter, FileMap<T> fmap) throws IOException { FileStatus[] fileStatuses = fsListFileStatus(dir, pathFilter); if (fileStatuses == null || fileStatuses.length == 0) { LOG.debug("No files in directory:" + dir); } else {/*from w w w.j av a2s . co m*/ for (FileStatus file : fileStatuses) { if (file.isDir()) { doRecursiveListing(file.getPath(), pathFilter, fmap); } else { fmap.addPath(file); } } } }
From source file:com.inmobi.databus.readers.DatabusStreamReader.java
License:Apache License
public Date getTimestampFromStartOfStream(PathFilter pathFilter) throws IOException { FileStatus leastTimeStampFileStatus = null; Path dir = streamDir;/* ww w . j a va2 s. com*/ for (int d = 0; d < 5; d++) { FileStatus[] filestatuses = fsListFileStatus(dir, pathFilter); if (filestatuses != null && filestatuses.length > 0) { leastTimeStampFileStatus = filestatuses[0]; for (int i = 1; i < filestatuses.length; i++) { if (leastTimeStampFileStatus.getPath().compareTo(filestatuses[i].getPath()) > 0) { leastTimeStampFileStatus = filestatuses[i]; } } dir = leastTimeStampFileStatus.getPath(); } else { return null; } } LOG.info("Starting dir in the stream " + leastTimeStampFileStatus.getPath()); return getDateFromStreamDir(streamDir, leastTimeStampFileStatus.getPath()); }
From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java
License:Apache License
/** * This method does the required setup before moving to next file. First it * checks whether the both current file and next file belongs to same minute * or different minutes. If files exists on across minutes then it has to * check the next file is same as checkpointed file. If not same and checkpointed * file exists then sets the iterator to the checkpointed file. * @return false if it reads from the checkpointed file. * @throws InterruptedException /*from w ww. ja v a 2 s . c o m*/ */ @Override public boolean prepareMoveToNext(FileStatus currentFile, FileStatus nextFile) throws IOException, InterruptedException { Calendar next = Calendar.getInstance(); Date nextFileTimeStamp = getDateFromStreamDir(streamDir, nextFile.getPath().getParent()); next.setTime(nextFileTimeStamp); boolean readFromCheckpoint = false; if (currentMin != next.get(Calendar.MINUTE)) { if (currentFile != null) { Date currentFileTimeStamp = getDateFromStreamDir(streamDir, currentFile.getPath().getParent()); setDeltaCheckpoint(getNextMinuteTimeStamp(currentFileTimeStamp), nextFileTimeStamp); // set the line number as -1 as current file was read fully. deltaCheckpoint.put(currentMin, new PartitionCheckpoint(getStreamFile(currentFile), -1)); } // move to next file currentMin = next.get(Calendar.MINUTE); readFromCheckpoint = moveToCheckpoint(nextFile); } else { this.currentFile = nextFile; } setIterator(); return !readFromCheckpoint; }
From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java
License:Apache License
private boolean moveToCheckpoint(FileStatus fileToRead) throws IOException, InterruptedException { boolean ret = false; CheckpointInfo cpi = pChkpoints.get(currentMin); if (!cpi.processed) { cpi.processed = true;//from w ww .j a v a 2 s .c om PartitionCheckpoint partitionCheckpoint = cpi.pck; HadoopStreamFile sFile = (HadoopStreamFile) partitionCheckpoint.getStreamFile(); if (sFile.getFileName() != null) { Path checkPointedFileName = new Path(streamDir, partitionCheckpoint.getFileName()); //set iterator to checkpointed file if there is a checkpoint if (!fileToRead.getPath().equals(checkPointedFileName)) { if (fsIsPathExists(checkPointedFileName)) { fileToRead = fsGetFileStatus(checkPointedFileName); currentLineNum = partitionCheckpoint.getLineNum(); } else { LOG.info("Checkpointed file " + partitionCheckpoint.getFileName() + " does not exist"); build(getDateFromStreamDir(streamDir, fileToRead.getPath())); startFromNextHigher(sFile); return true; } } else { currentLineNum = partitionCheckpoint.getLineNum(); } ret = true; } } this.currentFile = fileToRead; return ret; }
From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java
License:Apache License
/** * @returns Zero if checkpoint is not present for that minute or * checkpoint file and current file were not same. * Line number from checkpoint//w w w. ja v a2s . com */ @Override protected long getLineNumberForFirstFile(FileStatus firstFile) { int minute = getMinuteFromFile(firstFile); PartitionCheckpoint partitionChkPoint = pChkpoints.get(Integer.valueOf(minute)).pck; if (partitionChkPoint != null) { Path checkPointedFileName = new Path(streamDir, partitionChkPoint.getFileName()); // check whether current file and checkpoint file are same if (checkPointedFileName.equals(firstFile.getPath())) { return partitionChkPoint.getLineNum(); } } return 0; }
From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java
License:Apache License
private int getMinuteFromFile(FileStatus firstFile) { Date currentTimeStamp = getDateFromStreamDir(streamDir, firstFile.getPath().getParent()); Calendar cal = Calendar.getInstance(); cal.setTime(currentTimeStamp);/*from w w w. j a va2 s.co m*/ return cal.get(Calendar.MINUTE); }