List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.inmobi.databus.files.FileMap.java
License:Apache License
public void addPath(FileStatus path) { T fileKey = getStreamFile(path);//from ww w . ja va 2 s . co m files.put(fileKey, path); LOG.info("Added path: " + path.getPath() + "timestamp [" + path.getModificationTime() + "]"); }
From source file:com.inmobi.databus.files.FileMap.java
License:Apache License
public boolean setIterator(FileStatus cfile) { if (cfile != null) { createIterator();//from ww w . jav a 2s .co m T file = getStreamFile(cfile); while (fileNameIterator.hasNext()) { StreamFile nextfile = fileNameIterator.next(); if (nextfile.equals(file)) { return true; } } LOG.info("Did not find file" + cfile.getPath()); } return false; }
From source file:com.inmobi.databus.files.HadoopStreamFile.java
License:Apache License
public static HadoopStreamFile create(FileStatus status) { return new HadoopStreamFile(status.getPath().getParent(), status.getPath().getName(), status.getModificationTime()); }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
private Map<Path, Path> prepareForCommit(long commitTime, Map<FileStatus, String> fileListing) throws Exception { FileSystem fs = FileSystem.get(cluster.getHadoopConf()); // find final destination paths Map<Path, Path> mvPaths = new LinkedHashMap<Path, Path>(); FileStatus[] categories = fs.listStatus(tmpJobOutputPath); for (FileStatus categoryDir : categories) { String categoryName = categoryDir.getPath().getName(); Path destDir = new Path(cluster.getLocalDestDir(categoryName, commitTime)); FileStatus[] files = fs.listStatus(categoryDir.getPath()); for (FileStatus file : files) { Path destPath = new Path(destDir, file.getPath().getName()); LOG.debug("Moving [" + file.getPath() + "] to [" + destPath + "]"); mvPaths.put(file.getPath(), destPath); }//from w ww . j ava 2 s . co m publishMissingPaths(fs, cluster.getLocalFinalDestDirRoot(), commitTime, categoryName); } // find input files for consumer Map<Path, Path> consumerCommitPaths = new HashMap<Path, Path>(); for (Cluster clusterEntry : getConfig().getClusters().values()) { Set<String> destStreams = clusterEntry.getDestinationStreams().keySet(); boolean consumeCluster = false; for (String destStream : destStreams) { if (clusterEntry.getPrimaryDestinationStreams().contains(destStream) && cluster.getSourceStreams().contains(destStream)) { consumeCluster = true; } } if (consumeCluster) { Path tmpConsumerPath = new Path(tmpPath, clusterEntry.getName()); boolean isFileOpened = false; FSDataOutputStream out = null; try { for (Path destPath : mvPaths.values()) { String category = getCategoryFromDestPath(destPath); if (clusterEntry.getDestinationStreams().containsKey(category)) { if (!isFileOpened) { out = fs.create(tmpConsumerPath); isFileOpened = true; } out.writeBytes(destPath.toString()); LOG.debug("Adding [" + destPath + "] for consumer [" + clusterEntry.getName() + "] to commit Paths in [" + tmpConsumerPath + "]"); out.writeBytes("\n"); } } } finally { if (isFileOpened) { out.close(); Path finalConsumerPath = new Path(cluster.getConsumePath(clusterEntry), Long.toString(System.currentTimeMillis())); LOG.debug("Moving [" + tmpConsumerPath + "] to [ " + finalConsumerPath + "]"); consumerCommitPaths.put(tmpConsumerPath, finalConsumerPath); } } } } Map<Path, Path> commitPaths = new LinkedHashMap<Path, Path>(); commitPaths.putAll(mvPaths); commitPaths.putAll(consumerCommitPaths); return commitPaths; }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
Map<Path, Path> populateTrashCommitPaths(Set<FileStatus> trashSet) { // find trash paths Map<Path, Path> trashPaths = new TreeMap<Path, Path>(); Path trash = cluster.getTrashPathWithDateHour(); Iterator<FileStatus> it = trashSet.iterator(); while (it.hasNext()) { FileStatus src = it.next(); Path target = null;/*ww w . ja va2 s .c o m*/ target = new Path(trash, src.getPath().getParent().getName() + "-" + src.getPath().getName()); LOG.debug("Trashing [" + src.getPath() + "] to [" + target + "]"); trashPaths.put(src.getPath(), target); } return trashPaths; }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
public void createListing(FileSystem fs, FileStatus fileStatus, Map<FileStatus, String> results, Set<FileStatus> trashSet, Map<String, FileStatus> checkpointPaths, long lastFileTimeout) throws IOException { FileStatus[] streams = fs.listStatus(fileStatus.getPath()); for (FileStatus stream : streams) { String streamName = stream.getPath().getName(); LOG.debug("createListing working on Stream [" + streamName + "]"); FileStatus[] collectors = fs.listStatus(stream.getPath()); for (FileStatus collector : collectors) { TreeMap<String, FileStatus> collectorPaths = new TreeMap<String, FileStatus>(); // check point for this collector String collectorName = collector.getPath().getName(); String checkPointKey = streamName + collectorName; String checkPointValue = null; byte[] value = checkpointProvider.read(checkPointKey); if (value != null) checkPointValue = new String(value); LOG.debug("CheckPoint Key [" + checkPointKey + "] value [ " + checkPointValue + "]"); FileStatus[] files = fs.listStatus(collector.getPath(), new CollectorPathFilter()); if (files == null) { LOG.warn("No Files Found in the Collector " + collector.getPath() + " Skipping Directory"); continue; }/* w w w .j av a 2s.c o m*/ String currentFile = getCurrentFile(fs, files, lastFileTimeout); for (FileStatus file : files) { processFile(file, currentFile, checkPointValue, fs, results, collectorPaths); } populateTrash(collectorPaths, trashSet); populateCheckpointPathForCollector(checkpointPaths, collectorPaths, checkPointKey); } // all files in a collector } }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
private void processFile(FileStatus file, String currentFile, String checkPointValue, FileSystem fs, Map<FileStatus, String> results, Map<String, FileStatus> collectorPaths) throws IOException { String fileName = file.getPath().getName(); if (fileName != null && !fileName.equalsIgnoreCase(currentFile)) { if (!isEmptyFile(file, fs)) { Path src = file.getPath().makeQualified(fs); String destDir = getCategoryJobOutTmpPath(getCategoryFromSrcPath(src)).toString(); if (aboveCheckpoint(checkPointValue, fileName)) results.put(file, destDir); collectorPaths.put(fileName, file); } else {/*w w w.j a v a 2 s .c om*/ LOG.info("Empty File [" + file.getPath() + "] found. " + "Deleting it"); fs.delete(file.getPath(), false); } } }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
private boolean isEmptyFile(FileStatus fileStatus, FileSystem fs) { boolean retVal = false; FSDataInputStream in = null;/*from w w w . j a v a 2 s.c o m*/ try { in = fs.open(fileStatus.getPath()); byte[] data = new byte[1]; //try reading 1 byte int bytesRead = in.read(data); if (bytesRead == 1) { //not empty file retVal = false; } else { //not able to read 1 bytes also then empty file retVal = true; } } catch (IOException e) { LOG.error("Unable to find if file is empty or not [" + fileStatus.getPath() + "]", e); } finally { if (in != null) { try { in.close(); } catch (IOException e1) { LOG.error("Error in closing file [" + fileStatus.getPath() + "]", e1); } } } return retVal; }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
protected String getCurrentFile(FileSystem fs, FileStatus[] files, long lastFileTimeout) { //Proposed Algo :-> Sort files based on timestamp //if ((currentTimeStamp - last file's timestamp) > 5min || // if there are no files) // then null (implying process this file as non-current file) // else//w w w.j av a 2 s .c om // return last file as the current file class FileTimeStampComparator implements Comparator { public int compare(Object o, Object o1) { FileStatus file1 = (FileStatus) o; FileStatus file2 = (FileStatus) o1; long file1Time = file1.getModificationTime(); long file2Time = file2.getModificationTime(); if ((file1Time < file2Time)) return -1; else return 1; } } if (files == null || files.length == 0) return null; TreeSet<FileStatus> sortedFiles = new TreeSet<FileStatus>(new FileTimeStampComparator()); for (FileStatus file : files) { sortedFiles.add(file); } //get last file from set FileStatus lastFile = sortedFiles.last(); long currentTime = System.currentTimeMillis(); long lastFileTime = lastFile.getModificationTime(); if (currentTime - lastFileTime >= lastFileTimeout) { return null; } else return lastFile.getPath().getName(); }
From source file:com.inmobi.databus.local.LocalStreamServiceTest.java
License:Apache License
private void testCreateListing() { try {//from w w w. ja va2s . c o m Cluster cluster = ClusterTest.buildLocalCluster(); FileSystem fs = mock(FileSystem.class); createMockForFileSystem(fs, cluster); Map<FileStatus, String> results = new TreeMap<FileStatus, java.lang.String>(); Set<FileStatus> trashSet = new HashSet<FileStatus>(); Map<String, FileStatus> checkpointPaths = new HashMap<String, FileStatus>(); fs.delete(cluster.getDataDir(), true); FileStatus dataDir = new FileStatus(20, false, 3, 23823, 2438232, cluster.getDataDir()); fs.delete(new Path(cluster.getRootDir() + "/databus-checkpoint"), true); TestLocalStreamService service = new TestLocalStreamService(null, cluster, new FSCheckpointProvider(cluster.getRootDir() + "/databus-checkpoint")); service.createListing(fs, dataDir, results, trashSet, checkpointPaths); Set<String> tmpResults = new LinkedHashSet<String>(); // print the results for (FileStatus status : results.keySet()) { tmpResults.add(status.getPath().toString()); LOG.debug("Results [" + status.getPath().toString() + "]"); } // print the trash Iterator<FileStatus> it = trashSet.iterator(); Set<String> tmpTrashPaths = new LinkedHashSet<String>(); while (it.hasNext()) { FileStatus trashfile = it.next(); tmpTrashPaths.add(trashfile.getPath().toString()); LOG.debug("trash file [" + trashfile.getPath()); } Map<String, String> tmpCheckPointPaths = new TreeMap<String, String>(); // print checkPointPaths for (String key : checkpointPaths.keySet()) { tmpCheckPointPaths.put(key, checkpointPaths.get(key).getPath().getName()); LOG.debug("CheckPoint key [" + key + "] value [" + checkpointPaths.get(key).getPath().getName() + "]"); } validateExpectedOutput(tmpResults, tmpTrashPaths, tmpCheckPointPaths); fs.delete(new Path(cluster.getRootDir() + "/databus-checkpoint"), true); fs.delete(cluster.getDataDir(), true); fs.close(); } catch (Exception e) { LOG.debug("Error in running testCreateListing", e); assert false; } }