Example usage for org.apache.hadoop.fs FileChecksum getAlgorithmName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileChecksum getAlgorithmName.

Prototype

public abstract String getAlgorithmName();

Source Link

Document

The checksum algorithm name

Usage

From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksums.java

License:Apache License

@Override
public void onTrigger(@Nonnull final ProcessContext context, @Nonnull final ProcessSession session)
        throws ProcessException {
    FlowFile flowFile = session.get();/*from w ww.j a  v  a2  s .  c o  m*/
    if (flowFile == null) {
        return;
    }
    final FileSystem fs = getFileSystem(context);
    if (fs == null) {
        getLog().error("Couldn't initialize HDFS");
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    String filesJSON = context.getProperty(FILES).evaluateAttributeExpressions(flowFile).getValue();
    String absolutePath = context.getProperty(DIRECTORY).evaluateAttributeExpressions(flowFile).getValue();
    Boolean failIfWrongChecksum = context.getProperty(FAIL_IF_INCORRECT_CHECKSUM)
            .evaluateAttributeExpressions(flowFile).asBoolean();
    Gson jsonParser = new Gson();
    File[] filesList;
    try {
        filesList = jsonParser.fromJson(filesJSON, File[].class);
        if (filesList == null) {
            filesList = new File[0];
        }

        for (File f : filesList) {
            String name = f.getName();
            Path filePath;
            if (absolutePath == null || absolutePath.isEmpty()) {
                filePath = new Path(name);
            } else {
                filePath = new Path(absolutePath, name);
            }
            FileChecksum computed_checksum = fs.getFileChecksum(filePath);
            String b64_checksum = Base64.getEncoder().encodeToString(computed_checksum.getBytes());
            f.setComputedChecksum(
                    new Checksum(b64_checksum.length(), b64_checksum, computed_checksum.getAlgorithmName()));
            if (failIfWrongChecksum && !Objects.equals(b64_checksum, f.getChecksum().getValue())) {
                getLog().error("Checksums don't match! File: " + filePath.toString() + " checksum provided: "
                        + f.getChecksum().getValue() + " checksum computed: " + b64_checksum);
                session.transfer(flowFile, REL_FAILURE);
                return;
            }
        }
    } catch (JsonSyntaxException e) {
        getLog().error("Files list attribute does not contain a proper JSON array");
        session.transfer(flowFile, REL_FAILURE);
        return;
    } catch (FileNotFoundException e) {
        getLog().error("One of the provided files not found.\n" + e.getMessage());
        session.transfer(flowFile, REL_FAILURE);
        return;
    } catch (IOException e) {
        throw new ProcessException(e);
    }
    flowFile = session.putAttribute(flowFile, FILES.getName(), jsonParser.toJson(filesList));
    session.transfer(flowFile, REL_SUCCESS);
}

From source file:com.twitter.elephanttwin.indexing.AbstractBlockIndexingJob.java

License:Open Source License

/**
 * Create a FileIndexDescriptor to describe what columns have been indexed
 * @param path/* w  w w. j a va2s .c  o m*/
 *          the path to the directory where index files are stored for the
 *          input file
 * @return FileIndexDescriptor
 * @throws IOException
 */

protected void createIndexDescriptors(FileStatus inputFile, FileSystem fs) throws IOException {
    Path indexFilePath = new Path(getIndex() + inputFile.getPath().toUri().getRawPath());

    FileIndexDescriptor fid = new FileIndexDescriptor();
    fid.setSourcePath(inputFile.getPath().toString());
    fid.setDocType(getExpectedDocType());
    LOG.info("getting checksum from:" + inputFile.getPath());
    FileChecksum cksum = fs.getFileChecksum(inputFile.getPath());
    com.twitter.elephanttwin.gen.FileChecksum fidCksum = null;
    if (cksum != null)
        fidCksum = new com.twitter.elephanttwin.gen.FileChecksum(cksum.getAlgorithmName(),
                ByteBuffer.wrap(cksum.getBytes()), cksum.getLength());
    fid.setChecksum(fidCksum);
    fid.setIndexedFields(getIndexedFields());
    fid.setIndexType(getIndexType());
    fid.setIndexVersion(getIndexVersion());

    Path idxPath = new Path(indexFilePath + "/" + BlockIndexedFileInputFormat.INDEXMETAFILENAME);
    FSDataOutputStream os = fs.create(idxPath, true);
    @SuppressWarnings("unchecked")
    ThriftWritable<FileIndexDescriptor> writable = (ThriftWritable<FileIndexDescriptor>) ThriftWritable
            .newInstance(fid.getClass());
    writable.set(fid);
    writable.write(os);
    os.close();
}

From source file:com.twitter.elephanttwin.lucene.indexing.AbstractLuceneIndexingJob.java

License:Apache License

/**
 * Populates FileIndexDescriptor with common things like name, checksum, etc.
 * @param path/*from w  w  w.  ja  v  a2  s.c  o m*/
 * @return
 * @throws IOException
 */
protected FileIndexDescriptor buildFileIndexDescriptor(Path path) throws IOException {
    FileIndexDescriptor fid = new FileIndexDescriptor();
    fid.setSourcePath(path.toString());
    fid.setDocType(getExpectedDocType());
    FileChecksum cksum = path.getFileSystem(getConf()).getFileChecksum(path);
    com.twitter.elephanttwin.gen.FileChecksum fidCksum = new com.twitter.elephanttwin.gen.FileChecksum(
            cksum.getAlgorithmName(), ByteBuffer.wrap(cksum.getBytes()), cksum.getLength());
    fid.setChecksum(fidCksum);
    fid.setIndexedFields(getIndexedFields());
    fid.setIndexType(getIndexType());
    fid.setIndexVersion(getIndexVersion());
    return fid;
}