List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.inmobi.conduit.distcp.tools.TestIntegration.java
License:Apache License
@Test public void testJobConters() { try {// w w w. j a v a2s . c o m Path listFile = new Path("target/tmp1/listing").makeQualified(fs); addEntries(listFile, "*"); createFileForAudit("/conduit/streams/test1/2013/10/10/10/10/file1.gz"); runTest(listFile, target, true); int numberOfCountersPerFile = 0; long sumOfCounterValues = 0; FileStatus[] statuses = fs.listStatus(counterOutputPath, new PathFilter() { public boolean accept(Path path) { return path.toString().contains("part"); } }); for (FileStatus status : statuses) { Scanner scanner = new Scanner(fs.open(status.getPath())); while (scanner.hasNext()) { String counterNameValue = null; try { counterNameValue = scanner.next(); String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER); Assert.assertEquals(4, tmp.length); Long numOfMsgs = Long.parseLong(tmp[3]); numberOfCountersPerFile++; sumOfCounterValues += numOfMsgs; } catch (Exception e) { LOG.error("Counters file has malformed line with counter name = " + counterNameValue + " ..skipping the line ", e); } } } // should have 2 conters per file Assert.assertEquals(2, numberOfCountersPerFile); // sum of all counter values should equal to total number of messages Assert.assertEquals(3, sumOfCounterValues); checkResult(target, 1); } catch (IOException e) { LOG.error("Exception encountered while testing distcp", e); Assert.fail("distcp failure"); } finally { TestDistCpUtils.delete(fs, root); } }
From source file:com.inmobi.conduit.distcp.tools.util.TestDistCpUtils.java
License:Apache License
public static boolean checkIfFoldersAreInSync(FileSystem fs, String targetBase, String sourceBase) throws IOException { Path base = new Path(targetBase); Stack<Path> stack = new Stack<Path>(); stack.push(base);/*ww w . ja va 2s . c o m*/ while (!stack.isEmpty()) { Path file = stack.pop(); if (!fs.exists(file)) continue; FileStatus[] fStatus = fs.listStatus(file); if (fStatus == null || fStatus.length == 0) continue; for (FileStatus status : fStatus) { if (status.isDir()) { stack.push(status.getPath()); } Assert.assertTrue(fs.exists(new Path( sourceBase + "/" + DistCpUtils.getRelativePath(new Path(targetBase), status.getPath())))); } } return true; }
From source file:com.inmobi.conduit.local.CopyMapper.java
License:Apache License
@Override public void map(Text key, FileStatus value, Context context) throws IOException, InterruptedException { Path src = value.getPath(); String dest = key.toString(); String collector = src.getParent().getName(); String category = src.getParent().getParent().getName(); Map<Long, Long> received = null; if (context.getConfiguration().getBoolean(ConduitConstants.AUDIT_ENABLED_KEY, true)) { received = new HashMap<Long, Long>(); }/*from ww w . j av a 2s . c o m*/ Configuration srcConf = new Configuration(); srcConf.set(FS_DEFAULT_NAME_KEY, context.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY)); FileSystem fs = FileSystem.get(srcConf); Path target = getTempPath(context, src, category, collector); if (FileUtil.gzip(src, target, srcConf, received)) { LOG.info("File " + src + " is empty hence returning without compressing"); return; } // move to final destination fs.mkdirs(new Path(dest).makeQualified(fs)); String destnFilename = collector + "-" + src.getName() + ".gz"; Path destPath = new Path(dest + File.separator + destnFilename); LOG.info("Renaming file " + target + " to " + destPath); fs.rename(target, destPath); if (received != null) { for (Entry<Long, Long> entry : received.entrySet()) { String counterNameValue = getCounterNameValue(category, destnFilename, entry.getKey(), entry.getValue()); context.write(NullWritable.get(), new Text(counterNameValue)); } } }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
Map<Path, Path> prepareForCommit(long commitTime) throws Exception { FileSystem fs = FileSystem.get(srcCluster.getHadoopConf()); // find final destination paths Map<Path, Path> mvPaths = new LinkedHashMap<Path, Path>(); FileStatus[] categories;/*from ww w .java2 s .c o m*/ try { categories = fs.listStatus(tmpJobOutputPath); } catch (FileNotFoundException e) { categories = new FileStatus[0]; } for (FileStatus categoryDir : categories) { String categoryName = categoryDir.getPath().getName(); Path destDir = new Path(srcCluster.getLocalDestDir(categoryName, commitTime)); FileStatus[] files; try { files = fs.listStatus(categoryDir.getPath()); } catch (FileNotFoundException e) { files = new FileStatus[0]; } for (FileStatus file : files) { Path destPath = new Path(destDir, file.getPath().getName()); LOG.debug("Moving [" + file.getPath() + "] to [" + destPath + "]"); mvPaths.put(file.getPath(), destPath); } } publishMissingPaths(fs, srcCluster.getLocalFinalDestDirRoot(), commitTime, streamsToProcess); return mvPaths; }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
Map<Path, Path> populateTrashCommitPaths(Set<FileStatus> trashSet) { // find trash paths Map<Path, Path> trashPaths = new TreeMap<Path, Path>(); Path trash = srcCluster.getTrashPathWithDateHour(); Iterator<FileStatus> it = trashSet.iterator(); while (it.hasNext()) { FileStatus src = it.next(); Path target = null;/* w w w.j av a 2 s .co m*/ target = new Path(trash, src.getPath().getParent().getName() + "-" + src.getPath().getName()); LOG.debug("Trashing [" + src.getPath() + "] to [" + target + "]"); trashPaths.put(src.getPath(), target); } return trashPaths; }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
public void createListing(FileSystem fs, FileStatus fileStatus, Map<FileStatus, String> results, Set<FileStatus> trashSet, Table<String, String, String> checkpointPaths) throws IOException { List<FileStatus> streamsFileStatus = new ArrayList<FileStatus>(); FileSystem srcFs = FileSystem.get(srcCluster.getHadoopConf()); for (String stream : streamsToProcess) { streamsFileStatus.add(srcFs.getFileStatus(new Path(srcCluster.getDataDir(), stream))); }/*from www. j a v a 2 s .c o m*/ for (FileStatus stream : streamsFileStatus) { String streamName = stream.getPath().getName(); LOG.debug("createListing working on Stream [" + streamName + "]"); FileStatus[] collectors; try { collectors = fs.listStatus(stream.getPath()); } catch (FileNotFoundException ex) { collectors = new FileStatus[0]; } long minOfLatestCollectorTimeStamp = -1; for (FileStatus collector : collectors) { TreeMap<String, FileStatus> collectorPaths = new TreeMap<String, FileStatus>(); // check point for this collector String collectorName = collector.getPath().getName(); String checkPointKey = getCheckPointKey(this.getClass().getSimpleName(), streamName, collectorName); String checkPointValue = null; byte[] value = checkpointProvider.read(checkPointKey); if (value == null) { // In case checkpointKey with newer name format is absent,read old // checkpoint key String oldCheckPointKey = streamName + collectorName; value = checkpointProvider.read(oldCheckPointKey); } if (value != null) checkPointValue = new String(value); LOG.debug("CheckPoint Key [" + checkPointKey + "] value [ " + checkPointValue + "]"); FileStatus[] files = null; try { files = fs.listStatus(collector.getPath(), new CollectorPathFilter()); } catch (FileNotFoundException e) { } if (files == null) { LOG.warn("No Files Found in the Collector " + collector.getPath() + " Skipping Directory"); continue; } TreeSet<FileStatus> sortedFiles = new TreeSet<FileStatus>(new FileTimeStampComparator()); String currentFile = getCurrentFile(fs, files, sortedFiles); LOG.debug("last file " + currentFile + " in the collector directory " + collector.getPath()); Iterator<FileStatus> it = sortedFiles.iterator(); numberOfFilesProcessed = 0; long latestCollectorFileTimeStamp = -1; while (it.hasNext() && numberOfFilesProcessed < filesPerCollector) { FileStatus file = it.next(); LOG.debug("Processing " + file.getPath()); /* * fileTimeStamp value will be -1 for the files which are already processed */ long fileTimeStamp = processFile(file, currentFile, checkPointValue, fs, results, collectorPaths, streamName); if (fileTimeStamp > latestCollectorFileTimeStamp) { latestCollectorFileTimeStamp = fileTimeStamp; } } populateTrash(collectorPaths, trashSet); populateCheckpointPathForCollector(checkpointPaths, collectorPaths); if ((latestCollectorFileTimeStamp < minOfLatestCollectorTimeStamp || minOfLatestCollectorTimeStamp == -1) && latestCollectorFileTimeStamp != -1) { minOfLatestCollectorTimeStamp = latestCollectorFileTimeStamp; } } // all files in a collector if (minOfLatestCollectorTimeStamp != -1) { lastProcessedFile.put(streamName, minOfLatestCollectorTimeStamp); } else { LOG.warn("No new files in " + streamName + " stream"); } } }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
private long processFile(FileStatus file, String currentFile, String checkPointValue, FileSystem fs, Map<FileStatus, String> results, Map<String, FileStatus> collectorPaths, String stream) throws IOException { long fileTimeStamp = -1; String fileName = file.getPath().getName(); if (fileName != null && (!fileName.equalsIgnoreCase(currentFile) || processLastFile)) { String destDir = getCategoryJobOutTmpPath(stream).toString(); if (aboveCheckpoint(checkPointValue, fileName)) { results.put(file, destDir);/*from w w w .jav a 2s.co m*/ fileTimeStamp = CalendarHelper.getDateFromCollectorFileName(fileName); /* * Depending on getLen() only for incrementing the * number of data files count */ if (file.getLen() != 0) { numberOfFilesProcessed++; } } collectorPaths.put(fileName, file); } return fileTimeStamp; }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
private boolean isEmptyFile(FileStatus fileStatus, FileSystem fs) { boolean retVal = false; FSDataInputStream in = null;// www . jav a2 s . co m try { in = fs.open(fileStatus.getPath()); byte[] data = new byte[1]; // try reading 1 byte int bytesRead = in.read(data); if (bytesRead == 1) { // not empty file retVal = false; } else { // not able to read 1 bytes also then empty file retVal = true; } } catch (IOException e) { LOG.error("Unable to find if file is empty or not [" + fileStatus.getPath() + "]", e); } finally { if (in != null) { try { in.close(); } catch (IOException e1) { LOG.error("Error in closing file [" + fileStatus.getPath() + "]", e1); } } } return retVal; }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
protected String getCurrentFile(FileSystem fs, FileStatus[] files, TreeSet<FileStatus> sortedFiles) { // Proposed Algo :-> Sort files based on timestamp // if there are no files) // then null (implying process this file as non-current file) // else// w ww . java2 s. co m // return last file as the current file if (files == null || files.length == 0) return null; for (FileStatus file : files) { sortedFiles.add(file); } // get last file from set FileStatus lastFile = sortedFiles.last(); long diff = (System.currentTimeMillis() - lastFile.getModificationTime()) / MILLISECONDS_IN_MINUTE; if (diff > timeoutToProcessLastCollectorFile) { processLastFile = true; } else { processLastFile = false; } return lastFile.getPath().getName(); }
From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java
License:Apache License
private void testCreateListing() { try {/* ww w.j a v a 2 s .c om*/ Cluster cluster = ClusterTest.buildLocalCluster(); FileSystem fs = mock(FileSystem.class); createMockForFileSystem(fs, cluster); Map<FileStatus, String> results = new TreeMap<FileStatus, java.lang.String>(); Set<FileStatus> trashSet = new HashSet<FileStatus>(); Table<String, String, String> checkpointPaths = HashBasedTable.create(); fs.delete(cluster.getDataDir(), true); FileStatus dataDir = new FileStatus(20, false, 3, 23823, 2438232, cluster.getDataDir()); fs.delete(new Path(cluster.getRootDir() + "/conduit-checkpoint"), true); Set<String> streamsToProcess = new HashSet<String>(); streamsToProcess.add("stream1"); streamsToProcess.add("stream2"); TestLocalStreamService service = new TestLocalStreamService(null, cluster, null, new FSCheckpointProvider(cluster.getRootDir() + "/conduit-checkpoint"), streamsToProcess); service.createListing(fs, dataDir, results, trashSet, checkpointPaths); Set<String> tmpResults = new LinkedHashSet<String>(); // print the results for (FileStatus status : results.keySet()) { tmpResults.add(status.getPath().toString()); LOG.debug("Results [" + status.getPath().toString() + "]"); } // print the trash Iterator<FileStatus> it = trashSet.iterator(); Set<String> tmpTrashPaths = new LinkedHashSet<String>(); while (it.hasNext()) { FileStatus trashfile = it.next(); tmpTrashPaths.add(trashfile.getPath().toString()); LOG.debug("trash file [" + trashfile.getPath()); } Map<String, String> tmpCheckPointPaths = new TreeMap<String, String>(); Set<String> streams = checkpointPaths.rowKeySet(); for (String streamName : streams) { Map<String, String> collectorCheckpointValueMap = checkpointPaths.row(streamName); for (String collector : collectorCheckpointValueMap.keySet()) { String checkpointKey = AbstractService.getCheckPointKey(service.getName(), streamName, collector); LOG.debug("Check Pointing Key [" + checkpointKey + "] with value [" + collectorCheckpointValueMap.get(collector) + "]"); tmpCheckPointPaths.put(checkpointKey, collectorCheckpointValueMap.get(collector)); } } validateExpectedOutput(tmpResults, tmpTrashPaths, tmpCheckPointPaths); fs.delete(new Path(cluster.getRootDir() + "/conduit-checkpoint"), true); fs.delete(cluster.getDataDir(), true); fs.close(); } catch (Exception e) { LOG.debug("Error in running testCreateListing", e); assert false; } }