List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java
License:Apache License
private void testClusterName(String configName, String currentClusterName) throws Exception { ConduitConfigParser parser = new ConduitConfigParser(configName); ConduitConfig config = parser.getConfig(); Set<String> streamsToProcess = new HashSet<String>(); streamsToProcess.addAll(config.getSourceStreams().keySet()); Set<String> clustersToProcess = new HashSet<String>(); Set<TestLocalStreamService> services = new HashSet<TestLocalStreamService>(); Cluster currentCluster = null;/*from www.ja va 2 s. c o m*/ for (SourceStream sStream : config.getSourceStreams().values()) { for (String cluster : sStream.getSourceClusters()) { clustersToProcess.add(cluster); } } if (currentClusterName != null) { currentCluster = config.getClusters().get(currentClusterName); } for (String clusterName : clustersToProcess) { Cluster cluster = config.getClusters().get(clusterName); cluster.getHadoopConf().set("mapred.job.tracker", super.CreateJobConf().get("mapred.job.tracker")); TestLocalStreamService service = new TestLocalStreamService(config, cluster, currentCluster, new NullCheckPointProvider(), streamsToProcess); services.add(service); } for (TestLocalStreamService service : services) { FileSystem fs = service.getFileSystem(); service.preExecute(); if (currentClusterName != null) Assert.assertEquals(service.getCurrentCluster().getName(), currentClusterName); // creating a job with empty input path Path tmpJobInputPath = new Path("/tmp/job/input/path"); Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>(); Set<FileStatus> trashSet = new HashSet<FileStatus>(); // checkpointKey, CheckPointPath Table<String, String, String> checkpointPaths = HashBasedTable.create(); service.createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths); Job testJobConf = service.createJob(tmpJobInputPath, 1000); testJobConf.waitForCompletion(true); int numberOfCountersPerFile = 0; long sumOfCounterValues = 0; Path outputCounterPath = new Path(new Path(service.getCluster().getTmpPath(), service.getName()), "counters"); FileStatus[] statuses = fs.listStatus(outputCounterPath, new PathFilter() { public boolean accept(Path path) { return path.toString().contains("part"); } }); for (FileStatus fileSt : statuses) { Scanner scanner = new Scanner(fs.open(fileSt.getPath())); while (scanner.hasNext()) { String counterNameValue = null; try { counterNameValue = scanner.next(); String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER); Assert.assertEquals(4, tmp.length); Long numOfMsgs = Long.parseLong(tmp[3]); numberOfCountersPerFile++; sumOfCounterValues += numOfMsgs; } catch (Exception e) { LOG.error("Counters file has malformed line with counter name =" + counterNameValue + "..skipping the line", e); } } } // Should have 2 counters for each file Assert.assertEquals(NUMBER_OF_FILES * 2, numberOfCountersPerFile); // sum of all counter values should be equal to total number of messages Assert.assertEquals(NUMBER_OF_FILES * 3, sumOfCounterValues); Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY), service.getCurrentCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY)); Assert.assertEquals(testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY), service.getCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY)); if (currentCluster == null) Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY), testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY)); service.getFileSystem().delete(new Path(service.getCluster().getRootDir()), true); } }
From source file:com.inmobi.conduit.purge.DataPurgerService.java
License:Apache License
private void getTrashPathsToPurge() throws Exception { Path trashRoot = cluster.getTrashPath(); LOG.debug("Looking for trashPaths in [" + trashRoot + "]"); FileStatus[] trashDatePaths = getAllFilesInDir(trashRoot, fs); // For each trashpath if (trashDatePaths != null && trashDatePaths.length >= 1) { for (FileStatus trashPath : trashDatePaths) { FileStatus[] trashHourPaths = getAllFilesInDir(trashPath.getPath(), fs); if (trashHourPaths != null && trashHourPaths.length >= 1) { for (FileStatus trashHourPath : trashHourPaths) { try { Calendar trashPathHourDate = getDateFromTrashPath(trashPath.getPath().getName(), trashHourPath.getPath().getName()); if (isPurge(trashPathHourDate, getTrashPathRetentionInHours())) { streamsToPurge.add(trashHourPath.getPath().makeQualified(fs)); }// w w w.ja v a 2 s . co m } catch (NumberFormatException e) { streamsToPurge.add(trashHourPath.getPath().makeQualified(fs)); } } } else { try { /* * The date direcotry is empty. Check the time elapsed between the * last hour of the empty directory (23rd hour) and the current time * and add the date direcotry to the streamsToPurge only if the time * elapsed is greater than the trashPathRetiontionHours */ Calendar trashPathDate = getDateFromTrashPath(trashPath.getPath().getName(), "23"); if (isPurge(trashPathDate, getTrashPathRetentionInHours())) streamsToPurge.add(trashPath.getPath().makeQualified(fs)); } catch (NumberFormatException e) { streamsToPurge.add(trashPath.getPath().makeQualified(fs)); } } } } }
From source file:com.inmobi.conduit.purge.DataPurgerService.java
License:Apache License
private Map<String, Path> getStreamsInCluster(String root) throws Exception { Map<String, Path> streams = new HashMap<String, Path>(); LOG.debug("Find streams in [" + root + "]"); FileStatus[] paths = getAllFilesInDir(new Path(root), fs); if (paths != null) { for (FileStatus fileStatus : paths) { streams.put(fileStatus.getPath().getName(), fileStatus.getPath().makeQualified(fs)); LOG.debug("Purger working for stream [" + fileStatus.getPath() + "]"); }/* w w w.j a v a 2 s .c o m*/ } else LOG.debug("No streams found in [" + root + "]"); return streams; }
From source file:com.inmobi.conduit.purge.DataPurgerService.java
License:Apache License
private void getStreamsPathToPurge(Map<String, Path> streamPathMap, boolean isLocal) throws Exception { Set<Map.Entry<String, Path>> streamsToProcess = streamPathMap.entrySet(); Iterator it = streamsToProcess.iterator(); while (it.hasNext()) { Map.Entry<String, Path> entry = (Map.Entry<String, Path>) it.next(); String streamName = entry.getKey(); Path streamRootPath = entry.getValue(); String tableName = null;/*from w w w . j a v a 2 s . c o m*/ if (isLocal) { tableName = LOCAL_TABLE_PREFIX + "_" + streamName; } else { tableName = TABLE_PREFIX + "_" + streamName; } LOG.debug( "Find Paths to purge for stream [" + streamName + "] streamRootPath [" + streamRootPath + "]"); // For each Stream, all years FileStatus[] years = getAllFilesInDir(streamRootPath, fs); if (years != null) { for (FileStatus year : years) { String yearVal = year.getPath().getName(); // For each month FileStatus[] months = getAllFilesInDir(year.getPath(), fs); if (months != null && months.length >= 1) { for (FileStatus month : months) { String monthVal = month.getPath().getName(); // For each day FileStatus[] days = getAllFilesInDir(month.getPath(), fs); if (days != null && days.length >= 1) { for (FileStatus day : days) { String dayVal = day.getPath().getName(); // For each day FileStatus[] hours = getAllFilesInDir(day.getPath(), fs); if (hours != null && hours.length >= 1) { for (FileStatus hour : hours) { LOG.debug("Working for hour [" + hour.getPath() + "]"); String hourVal = hour.getPath().getName(); Calendar streamDate = CalendarHelper.getDateHour(yearVal, monthVal, dayVal, hourVal); LOG.debug("Validate [" + streamDate.toString() + "] against retentionHours [" + getRetentionPeriod(streamName) + "]"); if (isPurge(streamDate, getRetentionPeriod(streamName))) { LOG.debug("Adding stream to purge [" + hour.getPath() + "]"); Path hourPath = hour.getPath().makeQualified(fs); addPartitionToList(streamName, tableName, hourPath, yearVal, monthVal, dayVal, hourVal); streamsToPurge.add(hourPath); } } } else { Path dayPath = day.getPath().makeQualified(fs); // No hour found in day. Purge day addPartitionToList(streamName, tableName, dayPath, yearVal, monthVal, dayVal); streamsToPurge.add(dayPath); } } // each day } else { // No day found in month. Purge month Path monthPath = month.getPath().makeQualified(fs); addPartitionToList(streamName, tableName, monthPath, yearVal, monthVal); streamsToPurge.add(monthPath); } } // each month } else { // no months found in year. Purge Year. Path yearPath = year.getPath().makeQualified(fs); addPartitionToList(streamName, tableName, yearPath, yearVal); streamsToPurge.add(year.getPath().makeQualified(fs)); } } // each year } } // each stream }
From source file:com.inmobi.conduit.utils.DatePathComparator.java
License:Apache License
@Override public int compare(FileStatus fileStatus, FileStatus fileStatus1) { /*/* w w w . ja va 2 s.c o m*/ * Path eg: * <rootdir>/system/distcp_mirror_srcCluster_destCluster/conduit/streams * /<stream-Name>/2012/1/13/15/7/<hostname>-<streamName>-2012-01-16-07 * -21_00000.gz * * in some cases paths can empty without files * eg: /conduit/system/distcp_mirror_srcCluster_destCluster/conduit * /streams/<streamName>/2012/1/13/15/7/ */ Path streamDir = null; Path streamDir1 = null; Date streamDate1 = null; Date streamDate = null; if (fileStatus != null) { if (!fileStatus.isDir()) streamDir = fileStatus.getPath().getParent(); else streamDir = fileStatus.getPath(); Path streamDirPrefix = streamDir.getParent().getParent().getParent().getParent().getParent(); streamDate = CalendarHelper.getDateFromStreamDir(streamDirPrefix, streamDir); } if (fileStatus1 != null) { if (!fileStatus1.isDir()) streamDir1 = fileStatus1.getPath().getParent(); else streamDir1 = fileStatus1.getPath(); Path streamDirPrefix1 = streamDir1.getParent().getParent().getParent().getParent().getParent(); streamDate1 = CalendarHelper.getDateFromStreamDir(streamDirPrefix1, streamDir1); } if (streamDate != null && streamDate1 != null) return streamDate.compareTo(streamDate1); else return -1; }
From source file:com.inmobi.databus.AbstractService.java
License:Apache License
private Path getLatestDir(FileSystem fs, Path Dir) throws Exception { FileStatus[] fileStatus = fs.listStatus(Dir); if (fileStatus != null && fileStatus.length > 0) { FileStatus latestfile = fileStatus[0]; for (FileStatus currentfile : fileStatus) { if (currentfile.getPath().getName().compareTo(latestfile.getPath().getName()) > 0) latestfile = currentfile; }//w w w .jav a 2 s .c o m return latestfile.getPath(); } return null; }
From source file:com.inmobi.databus.AbstractService.java
License:Apache License
protected Map<String, Set<Path>> publishMissingPaths(FileSystem fs, String destDir) throws Exception { Map<String, Set<Path>> missingDirectories = new HashMap<String, Set<Path>>(); Set<Path> missingdirsinstream = null; FileStatus[] fileStatus = fs.listStatus(new Path(destDir)); LOG.info("Create All the Missing Paths in " + destDir); if (fileStatus != null) { for (FileStatus file : fileStatus) { missingdirsinstream = publishMissingPaths(fs, destDir, System.currentTimeMillis(), file.getPath().getName()); if (missingdirsinstream.size() > 0) missingDirectories.put(file.getPath().getName(), missingdirsinstream); }// w w w .j a v a2s .c om } LOG.info("Done Creating All the Missing Paths in " + destDir); return missingDirectories; }
From source file:com.inmobi.databus.distcp.MirrorStreamService.java
License:Apache License
LinkedHashMap<FileStatus, Path> prepareForCommit(Path tmpOut) throws Exception { /*//from w w w.j a va2 s. c om * tmpOut would be like - * /databus/system/tmp/distcp_mirror_<srcCluster>_<destCluster>/ After * distcp paths inside tmpOut would be eg: * * /databus/system/distcp_mirror_ua1_uj1 * /databus/streams/<streamName>/2012/1/13/15/7/ * <hostname>-<streamName>-2012-01-16-07-21_00000.gz * * tmpStreamRoot eg: /databus/system/distcp_mirror_<srcCluster>_ * <destCluster>/databus/streams/ */ Path tmpStreamRoot = new Path(tmpOut.makeQualified(getDestFs()).toString() + File.separator + getSrcCluster().getUnqaulifiedFinalDestDirRoot()); LOG.debug("tmpStreamRoot [" + tmpStreamRoot + "]"); /* tmpStreamRoot eg - * /databus/system/tmp/distcp_mirror_<srcCluster>_<destCluster>/databus * /streams/ * * multiple streams can get mirrored from the same cluster * streams can get processed in any order but we have to retain order * of paths within a stream*/ FileStatus[] fileStatuses = getDestFs().listStatus(tmpStreamRoot); //Retain the order of commitPaths LinkedHashMap<FileStatus, Path> commitPaths = new LinkedHashMap<FileStatus, Path>(); if (fileStatuses != null) { for (FileStatus streamRoot : fileStatuses) { //for each stream : list the path in order of YYYY/mm/DD/HH/MM LOG.debug("StreamRoot [" + streamRoot.getPath() + "] streamName [" + streamRoot.getPath().getName() + "]"); List<FileStatus> streamPaths = new ArrayList<FileStatus>(); createListing(getDestFs(), streamRoot, streamPaths); Collections.sort(streamPaths, new DatePathComparator()); LOG.debug("createListing size: [" + streamPaths.size() + "]"); createCommitPaths(commitPaths, streamPaths); } } return commitPaths; }
From source file:com.inmobi.databus.distcp.MirrorStreamService.java
License:Apache License
private void createCommitPaths(LinkedHashMap<FileStatus, Path> commitPaths, List<FileStatus> streamPaths) { /* Path eg in streamPaths - * /databus/system/distcp_mirror_<srcCluster>_<destCluster>/databus/streams * /<streamName>/2012/1/13/15/7/<hostname>-<streamName>-2012-01-16-07 * -21_00000.gz/*from w ww. j a va 2 s. c o m*/ * * or it could be an emptyDir like * /* Path eg in streamPaths - * /databus/system/distcp_mirror_<srcCluster>_<destCluster>/databus/streams * /<streamName>/2012/1/13/15/7/ * */ for (FileStatus fileStatus : streamPaths) { String fileName = null; Path prefixDir = null; if (fileStatus.isDir()) { //empty directory prefixDir = fileStatus.getPath(); } else { fileName = fileStatus.getPath().getName(); prefixDir = fileStatus.getPath().getParent(); } Path min = prefixDir; Path hr = min.getParent(); Path day = hr.getParent(); Path month = day.getParent(); Path year = month.getParent(); Path streamName = year.getParent(); String finalPath = getDestCluster().getFinalDestDirRoot() + File.separator + streamName.getName() + File.separator + year.getName() + File.separator + month.getName() + File.separator + day.getName() + File.separator + hr.getName() + File.separator + min.getName(); if (fileName != null) { finalPath += File.separator + fileName; } commitPaths.put(fileStatus, new Path(finalPath)); LOG.debug("Going to commit [" + fileStatus.getPath() + "] to [" + finalPath + "]"); } }
From source file:com.inmobi.databus.distcp.MirrorStreamService.java
License:Apache License
void createListing(FileSystem fs, FileStatus fileStatus, List<FileStatus> results) throws IOException { if (fileStatus.isDir()) { FileStatus[] stats = fs.listStatus(fileStatus.getPath()); if (stats.length == 0) { results.add(fileStatus);// www. jav a2 s. c o m LOG.debug("createListing :: Adding [" + fileStatus.getPath() + "]"); } for (FileStatus stat : stats) { createListing(fs, stat, results); } } else { LOG.debug("createListing :: Adding [" + fileStatus.getPath() + "]"); results.add(fileStatus); } }