List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.alexholmes.hdfsslurper.FileSystemManager.java
License:Apache License
public boolean fileCopyComplete(FileStatus fs) throws IOException { boolean success; String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter(); String batchId = fs.getPath().toString().substring( fs.getPath().toString().lastIndexOf(filenameBatchidDelimiter) + 1, fs.getPath().toString().length()); if (config.isRemove()) { log.info("event#File copy successful, deleting source " + fs.getPath() + "$batchId#" + batchId); success = config.getSrcFs().delete(fs.getPath(), false); if (!success) { log.info("event#File deletion unsuccessful" + "$batchId#" + batchId); }/* w ww. ja va2s . c om*/ } else { Path completedPath = new Path(config.getCompleteDir(), fs.getPath().getName()); log.info("event#File copy successful, moving source " + fs.getPath() + " to completed file " + completedPath + "$batchId#" + batchId); success = config.getSrcFs().rename(fs.getPath(), completedPath); if (!success) { log.info("event#File move unsuccessful" + "$batchId#" + batchId); } } return success; }
From source file:com.alexholmes.hdfsslurper.FileSystemManager.java
License:Apache License
public boolean fileCopyError(FileStatus fs) throws IOException, InterruptedException { Path errorPath = new Path(config.getErrorDir(), fs.getPath().getName()); log.info("event#Found file in work directory, moving " + fs.getPath() + " to error file " + errorPath); return config.getSrcFs().rename(fs.getPath(), errorPath); }
From source file:com.alexholmes.hdfsslurper.FileSystemManager.java
License:Apache License
public void moveWorkFilesToError() throws IOException, InterruptedException { for (FileStatus fs : config.getSrcFs().listStatus(config.getWorkDir())) { if (!fs.isDir()) { if (fs.getPath().getName().startsWith(".")) { log.debug("event#Ignoring hidden file '" + fs.getPath() + "'"); continue; }//from w ww. j a va 2 s. c om fileCopyError(fs); } } }
From source file:com.alexholmes.hdfsslurper.FileSystemManager.java
License:Apache License
public Path getStagingFile(FileStatus srcFileStatus, Path destFile) { int hash = Math.abs( (srcFileStatus.getPath().toString() + destFile.toString()).hashCode() + new Random().nextInt()); return new Path(config.getDestStagingDir(), String.valueOf(hash)); }
From source file:com.alexholmes.hdfsslurper.WorkerThread.java
License:Apache License
private void process(FileStatus srcFileStatus) throws IOException, InterruptedException { Path stagingFile = null;/* w w w . ja va2 s.c om*/ FileSystem destFs = null; String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter(); try { FileSystem srcFs = srcFileStatus.getPath().getFileSystem(config.getConfig()); // run a script which can change the name of the file as well as // write out a new version of the file // if (config.getWorkScript() != null) { Path newSrcFile = stageSource(srcFileStatus); srcFileStatus = srcFileStatus.getPath().getFileSystem(config.getConfig()).getFileStatus(newSrcFile); } Path srcFile = srcFileStatus.getPath(); // get the target HDFS file // Path destFile = getHdfsTargetPath(srcFileStatus); if (config.getCodec() != null) { String ext = config.getCodec().getDefaultExtension(); if (!destFile.getName().endsWith(ext)) { destFile = new Path(destFile.toString() + ext); } } destFs = destFile.getFileSystem(config.getConfig()); // get the staging HDFS file // stagingFile = fileSystemManager.getStagingFile(srcFileStatus, destFile); String batchId = srcFile.toString().substring( srcFile.toString().lastIndexOf(filenameBatchidDelimiter) + 1, srcFile.toString().length()); log.info("event#Copying source file '" + srcFile + "' to staging destination '" + stagingFile + "'" + "$batchId#" + batchId); // if the directory of the target file doesn't exist, attempt to // create it // Path destParentDir = destFile.getParent(); if (!destFs.exists(destParentDir)) { log.info("event#Attempting creation of target directory: " + destParentDir.toUri()); if (!destFs.mkdirs(destParentDir)) { throw new IOException("event#Failed to create target directory: " + destParentDir.toUri()); } } // if the staging directory doesn't exist, attempt to create it // Path destStagingParentDir = stagingFile.getParent(); if (!destFs.exists(destStagingParentDir)) { log.info("event#Attempting creation of staging directory: " + destStagingParentDir.toUri()); if (!destFs.mkdirs(destStagingParentDir)) { throw new IOException("event#Failed to create staging directory: " + destParentDir.toUri()); } } // copy the file // InputStream is = null; OutputStream os = null; CRC32 crc = new CRC32(); try { is = new BufferedInputStream(srcFs.open(srcFile)); if (config.isVerify()) { is = new CheckedInputStream(is, crc); } os = destFs.create(stagingFile); if (config.getCodec() != null) { os = config.getCodec().createOutputStream(os); } IOUtils.copyBytes(is, os, 4096, false); } finally { IOUtils.closeStream(is); IOUtils.closeStream(os); } long srcFileSize = srcFs.getFileStatus(srcFile).getLen(); long destFileSize = destFs.getFileStatus(stagingFile).getLen(); if (config.getCodec() == null && srcFileSize != destFileSize) { throw new IOException( "event#File sizes don't match, source = " + srcFileSize + ", dest = " + destFileSize); } log.info("event#Local file size = " + srcFileSize + ", HDFS file size = " + destFileSize + "$batchId#" + batchId); if (config.isVerify()) { verify(stagingFile, crc.getValue()); } if (destFs.exists(destFile)) { destFs.delete(destFile, false); } log.info("event#Moving staging file '" + stagingFile + "' to destination '" + destFile + "'" + "$batchId#" + batchId); if (!destFs.rename(stagingFile, destFile)) { throw new IOException("event#Failed to rename file"); } if (config.isCreateLzopIndex() && destFile.getName().endsWith(lzopExt)) { Path lzoIndexPath = new Path(destFile.toString() + LzoIndex.LZO_INDEX_SUFFIX); if (destFs.exists(lzoIndexPath)) { log.info("event#Deleting index file as it already exists"); destFs.delete(lzoIndexPath, false); } indexer.index(destFile); } fileSystemManager.fileCopyComplete(srcFileStatus); } catch (Throwable t) { log.error("event#Caught exception working on file " + srcFileStatus.getPath(), t); // delete the staging file if it still exists // try { if (destFs != null && destFs.exists(stagingFile)) { destFs.delete(stagingFile, false); } } catch (Throwable t2) { log.error("event#Failed to delete staging file " + stagingFile, t2); } fileSystemManager.fileCopyError(srcFileStatus); } }
From source file:com.alexholmes.hdfsslurper.WorkerThread.java
License:Apache License
private Path stageSource(FileStatus srcFile) throws IOException { String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter(); Path p = new Path(ScriptExecutor.getStdOutFromScript(config.getWorkScript(), srcFile.getPath().toString(), 60, TimeUnit.SECONDS, config.getFileNameBatchIdDelimiter())); String batchId = p.toString().substring(p.toString().lastIndexOf(filenameBatchidDelimiter) + 1, p.toString().length());// w w w . j a v a 2s . c om if (p.toUri().getScheme() == null) { throw new IOException( "event#Work path from script must be a URI with a scheme: '" + p + "'" + "$batchId#" + batchId); } log.info("event#Staging script returned new file '" + p + " for old " + srcFile.getPath() + "$batchId#" + batchId); return p; }
From source file:com.alexholmes.hdfsslurper.WorkerThread.java
License:Apache License
private Path getHdfsTargetPath(FileStatus srcFile) throws IOException { if (config.getDestDir() != null) { if (config.getCodec() != null) { return new Path(config.getDestDir(), srcFile.getPath().getName() + config.getCodec().getDefaultExtension()); } else {/*from w ww.ja v a 2s. co m*/ return new Path(config.getDestDir(), srcFile.getPath().getName()); } } else { return getDestPathFromScript(srcFile); } }
From source file:com.alexholmes.hdfsslurper.WorkerThread.java
License:Apache License
private Path getDestPathFromScript(FileStatus srcFile) throws IOException { Path p = new Path(ScriptExecutor.getStdOutFromScript(config.getScript(), srcFile.getPath().toString(), 60, TimeUnit.SECONDS, config.getFileNameBatchIdDelimiter())); String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter(); String batchId = p.toString().substring(p.toString().lastIndexOf(filenameBatchidDelimiter) + 1, p.toString().length());/*from www . ja va 2 s. co m*/ if (p.toUri().getScheme() == null) { throw new IOException("event#Destination path from script must be a URI with a scheme: '" + p + "'" + "$batchId#" + batchId); } return p; }
From source file:com.alibaba.jstorm.hdfs.blobstore.HdfsBlobStoreImpl.java
License:Apache License
protected Iterator<BlobStoreFile> listBlobStoreFiles(Path path) throws IOException { ArrayList<BlobStoreFile> ret = new ArrayList<BlobStoreFile>(); FileStatus[] files = _fs.listStatus(new Path[] { path }); if (files != null) { for (FileStatus sub : files) { try { ret.add(new HdfsBlobStoreFile(sub.getPath().getParent(), sub.getPath().getName(), _hadoopConf)); } catch (IllegalArgumentException e) { //Ignored the file did not match LOG.warn("Found an unexpected file in {} {}", path, sub.getPath().getName()); }/*from w w w.j av a 2 s. c o m*/ } } return ret.iterator(); }
From source file:com.alibaba.jstorm.hdfs.blobstore.HdfsBlobStoreImpl.java
License:Apache License
protected Iterator<String> listKeys(Path path) throws IOException { ArrayList<String> ret = new ArrayList<String>(); FileStatus[] files = _fs.listStatus(new Path[] { path }); if (files != null) { for (FileStatus sub : files) { try { ret.add(sub.getPath().getName().toString()); } catch (IllegalArgumentException e) { //Ignored the file did not match LOG.debug("Found an unexpected file in {} {}", path, sub.getPath().getName()); }//from w ww . j a va 2s . c om } } return ret.iterator(); }