Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.alexholmes.hdfsslurper.FileSystemManager.java

License:Apache License

public boolean fileCopyComplete(FileStatus fs) throws IOException {
    boolean success;
    String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter();
    String batchId = fs.getPath().toString().substring(
            fs.getPath().toString().lastIndexOf(filenameBatchidDelimiter) + 1,
            fs.getPath().toString().length());
    if (config.isRemove()) {
        log.info("event#File copy successful, deleting source " + fs.getPath() + "$batchId#" + batchId);
        success = config.getSrcFs().delete(fs.getPath(), false);
        if (!success) {
            log.info("event#File deletion unsuccessful" + "$batchId#" + batchId);
        }/* w ww. ja va2s . c  om*/
    } else {
        Path completedPath = new Path(config.getCompleteDir(), fs.getPath().getName());
        log.info("event#File copy successful, moving source " + fs.getPath() + " to completed file "
                + completedPath + "$batchId#" + batchId);
        success = config.getSrcFs().rename(fs.getPath(), completedPath);
        if (!success) {
            log.info("event#File move unsuccessful" + "$batchId#" + batchId);
        }
    }
    return success;
}

From source file:com.alexholmes.hdfsslurper.FileSystemManager.java

License:Apache License

public boolean fileCopyError(FileStatus fs) throws IOException, InterruptedException {
    Path errorPath = new Path(config.getErrorDir(), fs.getPath().getName());
    log.info("event#Found file in work directory, moving " + fs.getPath() + " to error file " + errorPath);
    return config.getSrcFs().rename(fs.getPath(), errorPath);
}

From source file:com.alexholmes.hdfsslurper.FileSystemManager.java

License:Apache License

public void moveWorkFilesToError() throws IOException, InterruptedException {
    for (FileStatus fs : config.getSrcFs().listStatus(config.getWorkDir())) {
        if (!fs.isDir()) {
            if (fs.getPath().getName().startsWith(".")) {
                log.debug("event#Ignoring hidden file '" + fs.getPath() + "'");
                continue;
            }//from   w ww.  j  a  va  2  s. c  om

            fileCopyError(fs);
        }
    }
}

From source file:com.alexholmes.hdfsslurper.FileSystemManager.java

License:Apache License

public Path getStagingFile(FileStatus srcFileStatus, Path destFile) {
    int hash = Math.abs(
            (srcFileStatus.getPath().toString() + destFile.toString()).hashCode() + new Random().nextInt());
    return new Path(config.getDestStagingDir(), String.valueOf(hash));
}

From source file:com.alexholmes.hdfsslurper.WorkerThread.java

License:Apache License

private void process(FileStatus srcFileStatus) throws IOException, InterruptedException {

    Path stagingFile = null;/*  w w w . ja  va2  s.c  om*/
    FileSystem destFs = null;
    String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter();

    try {
        FileSystem srcFs = srcFileStatus.getPath().getFileSystem(config.getConfig());

        // run a script which can change the name of the file as well as
        // write out a new version of the file
        //
        if (config.getWorkScript() != null) {
            Path newSrcFile = stageSource(srcFileStatus);
            srcFileStatus = srcFileStatus.getPath().getFileSystem(config.getConfig()).getFileStatus(newSrcFile);
        }

        Path srcFile = srcFileStatus.getPath();

        // get the target HDFS file
        //
        Path destFile = getHdfsTargetPath(srcFileStatus);

        if (config.getCodec() != null) {
            String ext = config.getCodec().getDefaultExtension();
            if (!destFile.getName().endsWith(ext)) {
                destFile = new Path(destFile.toString() + ext);
            }
        }

        destFs = destFile.getFileSystem(config.getConfig());

        // get the staging HDFS file
        //
        stagingFile = fileSystemManager.getStagingFile(srcFileStatus, destFile);
        String batchId = srcFile.toString().substring(
                srcFile.toString().lastIndexOf(filenameBatchidDelimiter) + 1, srcFile.toString().length());

        log.info("event#Copying source file '" + srcFile + "' to staging destination '" + stagingFile + "'"
                + "$batchId#" + batchId);

        // if the directory of the target file doesn't exist, attempt to
        // create it
        //
        Path destParentDir = destFile.getParent();
        if (!destFs.exists(destParentDir)) {
            log.info("event#Attempting creation of target directory: " + destParentDir.toUri());
            if (!destFs.mkdirs(destParentDir)) {
                throw new IOException("event#Failed to create target directory: " + destParentDir.toUri());
            }
        }

        // if the staging directory doesn't exist, attempt to create it
        //
        Path destStagingParentDir = stagingFile.getParent();
        if (!destFs.exists(destStagingParentDir)) {
            log.info("event#Attempting creation of staging directory: " + destStagingParentDir.toUri());
            if (!destFs.mkdirs(destStagingParentDir)) {
                throw new IOException("event#Failed to create staging directory: " + destParentDir.toUri());
            }
        }

        // copy the file
        //
        InputStream is = null;
        OutputStream os = null;
        CRC32 crc = new CRC32();
        try {
            is = new BufferedInputStream(srcFs.open(srcFile));
            if (config.isVerify()) {
                is = new CheckedInputStream(is, crc);
            }
            os = destFs.create(stagingFile);

            if (config.getCodec() != null) {
                os = config.getCodec().createOutputStream(os);
            }

            IOUtils.copyBytes(is, os, 4096, false);
        } finally {
            IOUtils.closeStream(is);
            IOUtils.closeStream(os);
        }

        long srcFileSize = srcFs.getFileStatus(srcFile).getLen();
        long destFileSize = destFs.getFileStatus(stagingFile).getLen();
        if (config.getCodec() == null && srcFileSize != destFileSize) {
            throw new IOException(
                    "event#File sizes don't match, source = " + srcFileSize + ", dest = " + destFileSize);
        }

        log.info("event#Local file size = " + srcFileSize + ", HDFS file size = " + destFileSize + "$batchId#"
                + batchId);

        if (config.isVerify()) {
            verify(stagingFile, crc.getValue());
        }

        if (destFs.exists(destFile)) {
            destFs.delete(destFile, false);
        }

        log.info("event#Moving staging file '" + stagingFile + "' to destination '" + destFile + "'"
                + "$batchId#" + batchId);
        if (!destFs.rename(stagingFile, destFile)) {
            throw new IOException("event#Failed to rename file");
        }

        if (config.isCreateLzopIndex() && destFile.getName().endsWith(lzopExt)) {
            Path lzoIndexPath = new Path(destFile.toString() + LzoIndex.LZO_INDEX_SUFFIX);
            if (destFs.exists(lzoIndexPath)) {
                log.info("event#Deleting index file as it already exists");
                destFs.delete(lzoIndexPath, false);
            }
            indexer.index(destFile);
        }

        fileSystemManager.fileCopyComplete(srcFileStatus);

    } catch (Throwable t) {
        log.error("event#Caught exception working on file " + srcFileStatus.getPath(), t);

        // delete the staging file if it still exists
        //
        try {
            if (destFs != null && destFs.exists(stagingFile)) {
                destFs.delete(stagingFile, false);
            }
        } catch (Throwable t2) {
            log.error("event#Failed to delete staging file " + stagingFile, t2);
        }

        fileSystemManager.fileCopyError(srcFileStatus);
    }

}

From source file:com.alexholmes.hdfsslurper.WorkerThread.java

License:Apache License

private Path stageSource(FileStatus srcFile) throws IOException {
    String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter();
    Path p = new Path(ScriptExecutor.getStdOutFromScript(config.getWorkScript(), srcFile.getPath().toString(),
            60, TimeUnit.SECONDS, config.getFileNameBatchIdDelimiter()));
    String batchId = p.toString().substring(p.toString().lastIndexOf(filenameBatchidDelimiter) + 1,
            p.toString().length());// w w w  . j a v a 2s . c om
    if (p.toUri().getScheme() == null) {
        throw new IOException(
                "event#Work path from script must be a URI with a scheme: '" + p + "'" + "$batchId#" + batchId);
    }
    log.info("event#Staging script returned new file '" + p + " for old " + srcFile.getPath() + "$batchId#"
            + batchId);
    return p;
}

From source file:com.alexholmes.hdfsslurper.WorkerThread.java

License:Apache License

private Path getHdfsTargetPath(FileStatus srcFile) throws IOException {
    if (config.getDestDir() != null) {
        if (config.getCodec() != null) {
            return new Path(config.getDestDir(),
                    srcFile.getPath().getName() + config.getCodec().getDefaultExtension());
        } else {/*from w ww.ja  v  a  2s.  co  m*/
            return new Path(config.getDestDir(), srcFile.getPath().getName());
        }
    } else {
        return getDestPathFromScript(srcFile);
    }
}

From source file:com.alexholmes.hdfsslurper.WorkerThread.java

License:Apache License

private Path getDestPathFromScript(FileStatus srcFile) throws IOException {
    Path p = new Path(ScriptExecutor.getStdOutFromScript(config.getScript(), srcFile.getPath().toString(), 60,
            TimeUnit.SECONDS, config.getFileNameBatchIdDelimiter()));
    String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter();
    String batchId = p.toString().substring(p.toString().lastIndexOf(filenameBatchidDelimiter) + 1,
            p.toString().length());/*from  www  . ja  va 2 s.  co  m*/
    if (p.toUri().getScheme() == null) {
        throw new IOException("event#Destination path from script must be a URI with a scheme: '" + p + "'"
                + "$batchId#" + batchId);
    }
    return p;
}

From source file:com.alibaba.jstorm.hdfs.blobstore.HdfsBlobStoreImpl.java

License:Apache License

protected Iterator<BlobStoreFile> listBlobStoreFiles(Path path) throws IOException {
    ArrayList<BlobStoreFile> ret = new ArrayList<BlobStoreFile>();
    FileStatus[] files = _fs.listStatus(new Path[] { path });
    if (files != null) {
        for (FileStatus sub : files) {
            try {
                ret.add(new HdfsBlobStoreFile(sub.getPath().getParent(), sub.getPath().getName(), _hadoopConf));
            } catch (IllegalArgumentException e) {
                //Ignored the file did not match
                LOG.warn("Found an unexpected file in {} {}", path, sub.getPath().getName());
            }/*from  w w  w.j  av a  2 s.  c  o m*/
        }
    }
    return ret.iterator();
}

From source file:com.alibaba.jstorm.hdfs.blobstore.HdfsBlobStoreImpl.java

License:Apache License

protected Iterator<String> listKeys(Path path) throws IOException {
    ArrayList<String> ret = new ArrayList<String>();
    FileStatus[] files = _fs.listStatus(new Path[] { path });
    if (files != null) {
        for (FileStatus sub : files) {
            try {
                ret.add(sub.getPath().getName().toString());
            } catch (IllegalArgumentException e) {
                //Ignored the file did not match
                LOG.debug("Found an unexpected file in {} {}", path, sub.getPath().getName());
            }//from   w ww  . j  a va 2s .  c om
        }
    }
    return ret.iterator();
}