List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.ikanow.aleph2.core.shared.utils.DirUtils.java
License:Apache License
/** * @param allPaths/*from w w w.j a va2 s . c o m*/ * @param fileContext * @param start * @param subDirectoryName * @param includeMatched */ public static void findAllSubdirectories(List<Path> allPaths, FileContext fileContext, Path start, String subDirectoryName, boolean includeMatched) { try { logger.debug("findAllSubdirectories :" + start.toString()); FileStatus[] statuss = fileContext.util().listStatus(start); for (int i = 0; i < statuss.length; i++) { FileStatus dir = statuss[i]; logger.debug("FileStatus:" + statuss[i].getPath().toString()); if (dir.isDirectory()) { if (dir.getPath().getName().contains(subDirectoryName)) { logger.debug("findOneSubdirectory match:" + dir.getPath().getName()); if (includeMatched) { allPaths.add(dir.getPath()); } else { allPaths.add(dir.getPath().getParent()); } } else { findAllSubdirectories(allPaths, fileContext, dir.getPath(), subDirectoryName, includeMatched); } } } } catch (Exception e) { logger.error("findAllSubdirectories Caught Exception", e); } }
From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java
License:Open Source License
public static BasicDBList getBsonFromTextFiles(CustomMapReduceJobPojo cmr, int nLimit, String fields) throws IOException, SAXException, ParserConfigurationException { BasicDBList dbl = new BasicDBList(); PropertiesManager props = new PropertiesManager(); Configuration conf = getConfiguration(props); Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false); FileSystem fs = FileSystem.get(conf); FileStatus[] files = fs.globStatus(new Path(pathDir.toString() + "/part-*")); for (FileStatus file : files) { if (file.getLen() > 0) { FSDataInputStream in = fs.open(file.getPath()); BufferedReader bin = new BufferedReader(new InputStreamReader(in)); for (;;) { String s = bin.readLine(); if (null == s) break; String[] keyValue = s.split("\t", 2); BasicDBObject dbo = new BasicDBObject(); if (keyValue.length > 1) { dbo.put("key", keyValue[0]); dbo.put("value", keyValue[1]); } else { dbo.put("value", keyValue[0]); }/*from w w w .j a v a 2 s . com*/ dbl.add(dbo); } in.close(); } } return dbl; }
From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java
License:Apache License
private List<FileToIndex> findNewFilesToIndex() { try {/*from www. j av a 2s. com*/ final List<FileToIndex> files = Lists.newArrayList(); for (FileStatus dir : inputFS.listStatus(toIndexPath)) { if (!dir.isDir()) { continue; } final Path indexPath = dir.getPath(); final String indexName = indexPath.getName(); if (!indexName.matches(ALLOWED_INDEX_NAMES)) { log.info("Skipped directory " + indexPath + ". Index names should match regex " + ALLOWED_INDEX_NAMES); continue; } for (FileStatus file : inputFS.listStatus(indexPath)) { if (file.isDir()) { continue; } final Path filePath = file.getPath(); String fileName = filePath.getName(); boolean extFound = false; for (String allowedExt : ALLOWED_FILE_EXT) { if (!fileName.endsWith(allowedExt)) { continue; } fileName = fileName.substring(0, fileName.length() - allowedExt.length()); files.add(new FileToIndex(fileName, indexName, filePath)); extFound = true; break; } if (!extFound) { log.info("Not one of supported extensions (" + StringUtils.join(ALLOWED_FILE_EXT, ", ") + ") file: " + filePath); } } } return files; } catch (IOException e) { throw Throwables.propagate(e); } }
From source file:com.indeed.iupload.core.filesystem.HDFSProxy.java
License:Apache License
@Override public List<String> listFiles(String path) throws IOException { final Path pathObj = new Path(path); if (!getFileSystem(path).exists(pathObj)) { return Lists.newArrayList(); }//ww w .j a v a 2 s.c o m List<String> results = new ArrayList<String>(); for (FileStatus status : getFileSystem(path).listStatus(pathObj)) { if (!status.isDirectory()) { results.add(status.getPath().getName()); } } return results; }
From source file:com.indeed.iupload.core.filesystem.HDFSProxy.java
License:Apache License
@Override public List<String> listDirs(String path) throws IOException { final Path pathObj = new Path(path); if (!getFileSystem(path).exists(pathObj)) { return Lists.newArrayList(); }//from w ww .ja v a2 s . c o m List<String> results = new ArrayList<String>(); for (FileStatus status : getFileSystem(path).listStatus(pathObj)) { if (status.isDirectory()) { results.add(status.getPath().getName()); } } return results; }
From source file:com.inmobi.conduit.AbstractService.java
License:Apache License
private Path getLatestDir(FileSystem fs, Path Dir) throws Exception { FileStatus[] fileStatus;// www . j av a 2 s . co m try { fileStatus = fs.listStatus(Dir); } catch (FileNotFoundException fe) { fileStatus = null; } if (fileStatus != null && fileStatus.length > 0) { FileStatus latestfile = fileStatus[0]; for (FileStatus currentfile : fileStatus) { if (currentfile.getPath().getName().compareTo(latestfile.getPath().getName()) > 0) latestfile = currentfile; } return latestfile.getPath(); } return null; }
From source file:com.inmobi.conduit.AbstractService.java
License:Apache License
private List<Path> listPartFiles(Path path, FileSystem fs) { List<Path> matches = new LinkedList<Path>(); try {//from w w w .j av a 2 s . co m FileStatus[] statuses = fs.listStatus(path, new PathFilter() { public boolean accept(Path path) { return path.toString().contains("part"); } }); for (FileStatus status : statuses) { matches.add(status.getPath()); } } catch (IOException e) { LOG.error(e.getMessage(), e); } return matches; }
From source file:com.inmobi.conduit.distcp.DistcpBaseService.java
License:Apache License
protected Map<String, FileStatus> getDistCPInputFile() throws Exception { Map<String, FileStatus> result = new HashMap<String, FileStatus>(); for (String stream : streamsToProcess) { int pathsAlreadyAdded = 0; LOG.info("Processing stream " + stream); byte[] value = provider.read(getCheckPointKey(stream)); Path inputPath = new Path(getInputPath(), stream); Path lastCheckPointPath = null; Path nextPath = null;/* ww w. jav a 2s. com*/ List<FileStatus> filesLastCopiedDir; if (value != null) { String checkPointValue = new String(value); // creating a path object from empty string throws exception;hence // checking for it if (!checkPointValue.trim().equals("")) { lastCheckPointPath = new Path(checkPointValue); } lastCheckPointPath = fullyQualifyCheckPointWithReadURL(lastCheckPointPath, srcCluster); if (lastCheckPointPath == null || !getSrcFs().exists(lastCheckPointPath)) { LOG.warn("Invalid checkpoint found [" + lastCheckPointPath + "] for stream " + stream + ";Ignoring it"); } else { Date lastDate = CalendarHelper.getDateFromStreamDir(inputPath, lastCheckPointPath); nextPath = CalendarHelper.getNextMinutePathFromDate(lastDate, inputPath); } } if (nextPath == null) { filesLastCopiedDir = new ArrayList<FileStatus>(); LOG.info("Finding the starting directoryfor stream [" + stream + "]"); nextPath = getStartingDirectory(stream, filesLastCopiedDir); if (nextPath == null) { LOG.debug("No start directory found,returning the empty result"); continue; } LOG.debug("Uncopied Files from directory last copied are " + FileUtil.toStringOfFileStatus(filesLastCopiedDir)); for (FileStatus fileStatus : filesLastCopiedDir) { String destnPath = getFinalDestinationPath(fileStatus); if (destnPath != null) { LOG.info("Adding to input of Distcp.Move [" + fileStatus.getPath() + "] to " + destnPath); result.put(destnPath, fileStatus); } } } LOG.info("Starting directory for stream [" + stream + "]" + " is [" + nextPath + "]"); Date nextDate = CalendarHelper.getDateFromStreamDir(inputPath, nextPath); // if next to next path exist than only add the next path so that the path // being added to disctp input is not the current path Path nextToNextPath = CalendarHelper.getNextMinutePathFromDate(nextDate, inputPath); Path lastPathAdded = null; FileStatus[] nextPathFileStatus; while (pathsAlreadyAdded <= numOfDirPerDistcpPerStream && srcFs.exists(nextToNextPath) && (nextPathFileStatus = FileUtil.listStatusAsPerHDFS(srcFs, nextPath)) != null) { if (nextPathFileStatus.length == 0) { LOG.info(nextPath + " is an empty directory"); FileStatus srcFileStatus = srcFs.getFileStatus(nextPath); String destnPath = getFinalDestinationPath(srcFileStatus); if (destnPath != null) { LOG.info("Adding to input of Distcp.Move [" + nextPath + "] to " + destnPath); result.put(destnPath, srcFileStatus); } } else { for (FileStatus fStatus : nextPathFileStatus) { String destnPath = getFinalDestinationPath(fStatus); if (destnPath != null) { LOG.info("Adding to input of Distcp.Move [" + fStatus.getPath() + "] to " + destnPath); result.put(destnPath, fStatus); } } } pathsAlreadyAdded++; lastPathAdded = nextPath; nextPath = nextToNextPath; nextDate = CalendarHelper.addAMinute(nextDate); nextToNextPath = CalendarHelper.getNextMinutePathFromDate(nextDate, inputPath); } if (lastPathAdded != null) { checkPointPaths.put(stream, lastPathAdded); Date lastDateAdded = CalendarHelper.getDateFromStreamDir(inputPath, lastPathAdded); lastProcessedFile.put(stream, lastDateAdded.getTime()); } } return result; }
From source file:com.inmobi.conduit.distcp.DistcpBaseService.java
License:Apache License
public static void createListing(FileSystem fs, FileStatus fileStatus, List<FileStatus> results) throws IOException { if (fileStatus.isDir()) { FileStatus[] stats = FileUtil.listStatusAsPerHDFS(fs, fileStatus.getPath()); // stats can be null in case where purger deleted the path while this // method was called if (stats != null) { if (stats.length == 0) { results.add(fileStatus); LOG.debug("createListing :: Adding [" + fileStatus.getPath() + "]"); }/*from w w w .jav a 2 s . c om*/ for (FileStatus stat : stats) { createListing(fs, stat, results); } } } else { LOG.debug("createListing :: Adding [" + fileStatus.getPath() + "]"); results.add(fileStatus); } }
From source file:com.inmobi.conduit.distcp.MergedStreamService.java
License:Apache License
private Map<String, List<Path>> prepareForCommit(Path tmpOut) throws Exception { Map<String, List<Path>> categoriesToCommit = new HashMap<String, List<Path>>(); FileStatus[] allFilesPerStream = null; Path tmpPathPerStream = null; for (String stream : streamsToProcess) { tmpPathPerStream = new Path(tmpOut, stream); try {/*w w w . j a va 2s .c om*/ allFilesPerStream = FileUtil.listStatusAsPerHDFS(getDestFs(), tmpPathPerStream); } catch (FileNotFoundException ignored) { } if (allFilesPerStream != null) { for (FileStatus fileStatus : allFilesPerStream) { List<Path> fileList = categoriesToCommit.get(stream); if (fileList == null) { fileList = new ArrayList<Path>(); fileList.add(fileStatus.getPath()); categoriesToCommit.put(stream, fileList); } else { fileList.add(fileStatus.getPath()); } } } } return categoriesToCommit; }