Example usage for org.apache.hadoop.fs FileChecksum getAlgorithmName

List of usage examples for org.apache.hadoop.fs FileChecksum getAlgorithmName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileChecksum getAlgorithmName.

Prototype

public abstract String getAlgorithmName();

Source Link

Document

The checksum algorithm name

Usage

From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksums.java

License:Apache License

@Override
public void onTrigger(@Nonnull final ProcessContext context, @Nonnull final ProcessSession session)
        throws ProcessException {
    FlowFile flowFile = session.get();/*from w ww.j a  v  a2  s .  c o  m*/
    if (flowFile == null) {
        return;
    }
    final FileSystem fs = getFileSystem(context);
    if (fs == null) {
        getLog().error("Couldn't initialize HDFS");
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    String filesJSON = context.getProperty(FILES).evaluateAttributeExpressions(flowFile).getValue();
    String absolutePath = context.getProperty(DIRECTORY).evaluateAttributeExpressions(flowFile).getValue();
    Boolean failIfWrongChecksum = context.getProperty(FAIL_IF_INCORRECT_CHECKSUM)
            .evaluateAttributeExpressions(flowFile).asBoolean();
    Gson jsonParser = new Gson();
    File[] filesList;
    try {
        filesList = jsonParser.fromJson(filesJSON, File[].class);
        if (filesList == null) {
            filesList = new File[0];
        }

        for (File f : filesList) {
            String name = f.getName();
            Path filePath;
            if (absolutePath == null || absolutePath.isEmpty()) {
                filePath = new Path(name);
            } else {
                filePath = new Path(absolutePath, name);
            }
            FileChecksum computed_checksum = fs.getFileChecksum(filePath);
            String b64_checksum = Base64.getEncoder().encodeToString(computed_checksum.getBytes());
            f.setComputedChecksum(
                    new Checksum(b64_checksum.length(), b64_checksum, computed_checksum.getAlgorithmName()));
            if (failIfWrongChecksum && !Objects.equals(b64_checksum, f.getChecksum().getValue())) {
                getLog().error("Checksums don't match! File: " + filePath.toString() + " checksum provided: "
                        + f.getChecksum().getValue() + " checksum computed: " + b64_checksum);
                session.transfer(flowFile, REL_FAILURE);
                return;
            }
        }
    } catch (JsonSyntaxException e) {
        getLog().error("Files list attribute does not contain a proper JSON array");
        session.transfer(flowFile, REL_FAILURE);
        return;
    } catch (FileNotFoundException e) {
        getLog().error("One of the provided files not found.\n" + e.getMessage());
        session.transfer(flowFile, REL_FAILURE);
        return;
    } catch (IOException e) {
        throw new ProcessException(e);
    }
    flowFile = session.putAttribute(flowFile, FILES.getName(), jsonParser.toJson(filesList));
    session.transfer(flowFile, REL_SUCCESS);
}

From source file:com.twitter.elephanttwin.indexing.AbstractBlockIndexingJob.java

License:Open Source License

/**
 * Create a FileIndexDescriptor to describe what columns have been indexed
 * @param path/* w  w w. j a va2s .c  o m*/
 *          the path to the directory where index files are stored for the
 *          input file
 * @return FileIndexDescriptor
 * @throws IOException
 */

protected void createIndexDescriptors(FileStatus inputFile, FileSystem fs) throws IOException {
    Path indexFilePath = new Path(getIndex() + inputFile.getPath().toUri().getRawPath());

    FileIndexDescriptor fid = new FileIndexDescriptor();
    fid.setSourcePath(inputFile.getPath().toString());
    fid.setDocType(getExpectedDocType());
    LOG.info("getting checksum from:" + inputFile.getPath());
    FileChecksum cksum = fs.getFileChecksum(inputFile.getPath());
    com.twitter.elephanttwin.gen.FileChecksum fidCksum = null;
    if (cksum != null)
        fidCksum = new com.twitter.elephanttwin.gen.FileChecksum(cksum.getAlgorithmName(),
                ByteBuffer.wrap(cksum.getBytes()), cksum.getLength());
    fid.setChecksum(fidCksum);
    fid.setIndexedFields(getIndexedFields());
    fid.setIndexType(getIndexType());
    fid.setIndexVersion(getIndexVersion());

    Path idxPath = new Path(indexFilePath + "/" + BlockIndexedFileInputFormat.INDEXMETAFILENAME);
    FSDataOutputStream os = fs.create(idxPath, true);
    @SuppressWarnings("unchecked")
    ThriftWritable<FileIndexDescriptor> writable = (ThriftWritable<FileIndexDescriptor>) ThriftWritable
            .newInstance(fid.getClass());
    writable.set(fid);
    writable.write(os);
    os.close();
}

From source file:com.twitter.elephanttwin.lucene.indexing.AbstractLuceneIndexingJob.java

License:Apache License

/**
 * Populates FileIndexDescriptor with common things like name, checksum, etc.
 * @param path/*from w  w  w.  ja  v  a2  s.c  o m*/
 * @return
 * @throws IOException
 */
protected FileIndexDescriptor buildFileIndexDescriptor(Path path) throws IOException {
    FileIndexDescriptor fid = new FileIndexDescriptor();
    fid.setSourcePath(path.toString());
    fid.setDocType(getExpectedDocType());
    FileChecksum cksum = path.getFileSystem(getConf()).getFileChecksum(path);
    com.twitter.elephanttwin.gen.FileChecksum fidCksum = new com.twitter.elephanttwin.gen.FileChecksum(
            cksum.getAlgorithmName(), ByteBuffer.wrap(cksum.getBytes()), cksum.getLength());
    fid.setChecksum(fidCksum);
    fid.setIndexedFields(getIndexedFields());
    fid.setIndexType(getIndexType());
    fid.setIndexVersion(getIndexVersion());
    return fid;
}