List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.flipkart.fdp.migration.distcp.codec.GenericHadoopCodec.java
License:Apache License
public List<FileTuple> getFileStatusRecursive(Path path, Collection<String> excludeList) throws IOException, AuthenticationException { List<FileTuple> response = new ArrayList<FileTuple>(); FileStatus file = fs.getFileStatus(path); //TODO excludeList to be checked if file (not folder) is mentioned in excludeList. if (file != null && file.isFile()) { response.add(new FileTuple(MirrorUtils.getSimplePath(file.getPath()), file.getLen(), file.getModificationTime())); return response; }/*from ww w .j a va2 s .c o m*/ FileStatus[] fstats = fs.listStatus(path); if (fstats != null && fstats.length > 0) { for (FileStatus fstat : fstats) { if (fstat.isDirectory() && !excludeList.contains(MirrorUtils.getSimplePath(fstat.getPath()))) { response.addAll(getFileStatusRecursive(fstat.getPath(), excludeList)); } else { //TODO excludeList to be checked if file (not folder) is mentioned in excludeList. response.add(new FileTuple(MirrorUtils.getSimplePath(fstat.getPath()), fstat.getLen(), fstat.getModificationTime())); } } } return response; }
From source file:com.flipkart.fdp.migration.distcp.state.HDFSStateManager.java
License:Apache License
public Map<String, TransferStatus> getPreviousTransferStatus() throws IOException { Map<String, TransferStatus> status = new HashMap<String, TransferStatus>(); FileStatus fstats[] = null;/* w w w .ja v a2 s. c om*/ try { fstats = fs.listStatus(batchBasePath); } catch (Exception e) { System.out.println("No Previous states found: " + e.getMessage()); } if (fstats == null || fstats.length <= 0) return status; List<FileStatus> fstatList = new ArrayList<FileStatus>(); for (FileStatus fstat : fstats) { if (fstat.isDirectory()) fstatList.add(fstat); } Collections.sort(fstatList, new Comparator<FileStatus>() { @Override public int compare(FileStatus o1, FileStatus o2) { return (int) (o2.getModificationTime() - o1.getModificationTime()); // decending order sort by timestamp } }); // ignore the current state folder as well. fstatList.remove(0); for (FileStatus fstat : fstatList) { System.out.println("Processing State History: " + fstat.getPath()); Path spath = new Path(fstat.getPath(), PREVIOUS_STATE_FILE_NAME); List<TransferStatus> stats = getAllStats(new Path(fstat.getPath(), REPORT_PATH)); mergeStates(status, stats); if (fs.exists(spath)) { stats = getAllStats(spath); mergeStates(status, stats); break; } } return status; }
From source file:com.flipkart.fdp.migration.distcp.state.HDFSStateManager.java
License:Apache License
private List<TransferStatus> getAllStats(Path path) throws IOException { Gson gson = new Gson(); List<TransferStatus> status = new ArrayList<TransferStatus>(); FileStatus fstats[] = null;//www . ja v a 2 s . c o m if (fs.isDirectory(path)) { fstats = fs.listStatus(path); } else { try { fstats = new FileStatus[1]; fstats[0] = fs.getFileStatus(path); } catch (Exception e) { return status; } } if (fstats == null || fstats.length <= 0) return status; for (FileStatus fstat : fstats) { if (fstat.isFile()) { try { BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fstat.getPath()))); String line = null; while (null != (line = reader.readLine())) { if (line.trim().length() <= 1) continue; try { TransferStatus tstat = gson.fromJson(line, TransferStatus.class); if (tstat != null) status.add(tstat); } catch (Exception ein) { System.out.println("Exception Reading from location: " + fstat.getPath() + ", Message: " + ein.getMessage()); } } reader.close(); } catch (Exception e) { System.out.println("Exception reading previous state: " + e.getMessage()); } } } return status; }
From source file:com.flipkart.fdp.migration.distcp.utils.FileCountDriver.java
License:Apache License
public List<String> getAllFilePath(Path filePath, FileSystem fs, String destBasePath) throws IOException { List<String> fileList = new ArrayList<String>(); List<String> inputPaths = new ArrayList<String>(); FileStatus[] fileStatus = fs.globStatus(filePath); for (FileStatus fileStat : fileStatus) { if (fileStat.isFile()) { fileList.add(trimExtension(fileStat.getPath().toUri().getPath(), destBasePath)); } else {//ww w. ja va 2 s.co m //System.out.println("Found a directory : " + fileStat.getPath().toUri().getPath()); inputPaths.add(fileStat.getPath().toUri().getPath()); } } System.out.println("InputPaths size : " + inputPaths.size()); if (inputPaths.size() > 0) { for (String path : inputPaths) { List<String> fstat = getFileStatusRecursive(new Path(path), fs, destBasePath); fileList.addAll(fstat); } } return fileList; }
From source file:com.flipkart.fdp.migration.distcp.utils.FileCountDriver.java
License:Apache License
public List<String> getFileStatusRecursive(Path path, FileSystem fs, String destBasePath) throws IOException { List<String> response = new ArrayList<String>(); FileStatus file = fs.getFileStatus(path); if (file != null && file.isFile()) { response.add(trimExtension(file.getPath().toUri().getPath(), destBasePath)); return response; }/*from w w w.j a v a2 s . c om*/ FileStatus[] fstats = fs.listStatus(path); if (fstats != null && fstats.length > 0) { for (FileStatus fstat : fstats) { if (fstat.isDirectory()) { response.addAll(getFileStatusRecursive(fstat.getPath(), fs, destBasePath)); } else { response.add(trimExtension(fstat.getPath().toUri().getPath(), destBasePath)); } } } return response; }
From source file:com.flipkart.fdp.migration.distcp.utils.FileStatsDriver.java
License:Apache License
public List<String> getAllFilePath(Path filePath) throws IOException { List<String> fileList = new ArrayList<String>(); FileStatus[] fileStatus = fs.listStatus(filePath); for (FileStatus fileStat : fileStatus) { if (fileStat.isDirectory()) { fileList.addAll(getAllFilePath(fileStat.getPath())); } else {/*www .j a va2s.co m*/ long ts = fileStat.getModificationTime(); if (ts >= startTS && ts <= endTS) fileList.add(fileStat.getPath().toUri().getPath() + "," + fileStat.getLen()); } } return fileList; }
From source file:com.flipkart.fdp.migration.distcp.utils.HistoricFileCleanUpDriver.java
License:Apache License
public List<String> getAllFilePath(Path filePath) throws IOException { List<String> fileList = new ArrayList<String>(); FileStatus[] fileStatus = fs.listStatus(filePath); for (FileStatus fileStat : fileStatus) { if (fileStat.isDirectory()) { if (fileStat.getModificationTime() >= startTS && fileStat.getModificationTime() <= endTS && filePath.toUri().getPath().toString() != rootpath) fileList.add(fileStat.getPath().toUri().getPath()); else//from w w w. j a va 2 s . co m fileList.addAll(getAllFilePath(fileStat.getPath())); } // } else { // if (fileStat.getModificationTime() >= startTS // && fileStat.getModificationTime() <= endTS) // fileList.add(fileStat.getPath().toUri().getPath()); // } } return fileList; }
From source file:com.flipkart.fdp.migration.distcp.utils.ZeroFileCleanUpDriver.java
License:Apache License
public List<String> getAllFilePath(Path filePath) throws IOException { List<String> fileList = new ArrayList<String>(); FileStatus[] fileStatus = fs.listStatus(filePath); for (FileStatus fileStat : fileStatus) { if (fileStat.isDirectory()) { fileList.addAll(getAllFilePath(fileStat.getPath())); } else {/*from w w w .ja va 2 s .c om*/ if (fileStat.getLen() == 0) fileList.add(fileStat.getPath().toUri().getPath()); } } return fileList; }
From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableInputFormat.java
License:Apache License
@Override protected List<FileStatus> listStatus(final JobContext job) throws IOException { final List<FileStatus> files = Lists.newArrayList(); for (FileStatus file : super.listStatus(job)) { files.addAll(handleFile(file, job)); }/* ww w . ja v a 2 s . c o m*/ LOG.debug("Initial file list: {} {}", files.size(), files); final Configuration configuration = job.getConfiguration(); for (Iterator<FileStatus> iterator = files.iterator(); iterator.hasNext();) { final FileStatus fileStatus = iterator.next(); final Path file = fileStatus.getPath(); final FileSystem fs = file.getFileSystem(configuration); if (!SSTablePredicates.IS_SSTABLE.apply(file.toString())) { // Ignore non-sstable date files, always (for now) LOG.debug("Removing non-sstable file: {}", file); iterator.remove(); } else { // read the index file LOG.debug("Reading index file for sstable file: {}", file); final Path indexFile = SSTableFunctions.INDEX_FILE.apply(file); LOG.debug("Reading index file: {}", indexFile); final SSTableIndexIndex index = SSTableIndexIndex.readIndex(fs, indexFile); indexes.put(file, index); } } LOG.debug("Final file list: {} {}", files.size(), files); return files; }
From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableInputFormat.java
License:Apache License
/** * If we have a directory recursively gather the files we care about for this job. * * @param file Root file/directory./*from www . j av a 2 s.co m*/ * @param job Job context. * @return All files we care about. * @throws IOException */ private Collection<FileStatus> handleFile(final FileStatus file, final JobContext job) throws IOException { final List<FileStatus> results = Lists.newArrayList(); if (file.isDir()) { final Path p = file.getPath(); LOG.debug("Expanding {}", p); final FileSystem fs = p.getFileSystem(job.getConfiguration()); final FileStatus[] children = fs.listStatus(p); for (FileStatus child : children) { results.addAll(handleFile(child, job)); } } else { results.add(file); } return results; }