Example usage for org.apache.hadoop.fs FileChecksum getBytes

List of usage examples for org.apache.hadoop.fs FileChecksum getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileChecksum getBytes.

Prototype

public abstract byte[] getBytes();

Source Link

Document

The value of the checksum in bytes

Usage

From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksums.java

License:Apache License

@Override
public void onTrigger(@Nonnull final ProcessContext context, @Nonnull final ProcessSession session)
        throws ProcessException {
    FlowFile flowFile = session.get();//w  w w .ja v a2 s.co  m
    if (flowFile == null) {
        return;
    }
    final FileSystem fs = getFileSystem(context);
    if (fs == null) {
        getLog().error("Couldn't initialize HDFS");
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    String filesJSON = context.getProperty(FILES).evaluateAttributeExpressions(flowFile).getValue();
    String absolutePath = context.getProperty(DIRECTORY).evaluateAttributeExpressions(flowFile).getValue();
    Boolean failIfWrongChecksum = context.getProperty(FAIL_IF_INCORRECT_CHECKSUM)
            .evaluateAttributeExpressions(flowFile).asBoolean();
    Gson jsonParser = new Gson();
    File[] filesList;
    try {
        filesList = jsonParser.fromJson(filesJSON, File[].class);
        if (filesList == null) {
            filesList = new File[0];
        }

        for (File f : filesList) {
            String name = f.getName();
            Path filePath;
            if (absolutePath == null || absolutePath.isEmpty()) {
                filePath = new Path(name);
            } else {
                filePath = new Path(absolutePath, name);
            }
            FileChecksum computed_checksum = fs.getFileChecksum(filePath);
            String b64_checksum = Base64.getEncoder().encodeToString(computed_checksum.getBytes());
            f.setComputedChecksum(
                    new Checksum(b64_checksum.length(), b64_checksum, computed_checksum.getAlgorithmName()));
            if (failIfWrongChecksum && !Objects.equals(b64_checksum, f.getChecksum().getValue())) {
                getLog().error("Checksums don't match! File: " + filePath.toString() + " checksum provided: "
                        + f.getChecksum().getValue() + " checksum computed: " + b64_checksum);
                session.transfer(flowFile, REL_FAILURE);
                return;
            }
        }
    } catch (JsonSyntaxException e) {
        getLog().error("Files list attribute does not contain a proper JSON array");
        session.transfer(flowFile, REL_FAILURE);
        return;
    } catch (FileNotFoundException e) {
        getLog().error("One of the provided files not found.\n" + e.getMessage());
        session.transfer(flowFile, REL_FAILURE);
        return;
    } catch (IOException e) {
        throw new ProcessException(e);
    }
    flowFile = session.putAttribute(flowFile, FILES.getName(), jsonParser.toJson(filesList));
    session.transfer(flowFile, REL_SUCCESS);
}

From source file:com.twitter.elephanttwin.indexing.AbstractBlockIndexingJob.java

License:Open Source License

/**
 * Create a FileIndexDescriptor to describe what columns have been indexed
 * @param path/*from  w w  w  .j a v  a 2  s  . c  o  m*/
 *          the path to the directory where index files are stored for the
 *          input file
 * @return FileIndexDescriptor
 * @throws IOException
 */

protected void createIndexDescriptors(FileStatus inputFile, FileSystem fs) throws IOException {
    Path indexFilePath = new Path(getIndex() + inputFile.getPath().toUri().getRawPath());

    FileIndexDescriptor fid = new FileIndexDescriptor();
    fid.setSourcePath(inputFile.getPath().toString());
    fid.setDocType(getExpectedDocType());
    LOG.info("getting checksum from:" + inputFile.getPath());
    FileChecksum cksum = fs.getFileChecksum(inputFile.getPath());
    com.twitter.elephanttwin.gen.FileChecksum fidCksum = null;
    if (cksum != null)
        fidCksum = new com.twitter.elephanttwin.gen.FileChecksum(cksum.getAlgorithmName(),
                ByteBuffer.wrap(cksum.getBytes()), cksum.getLength());
    fid.setChecksum(fidCksum);
    fid.setIndexedFields(getIndexedFields());
    fid.setIndexType(getIndexType());
    fid.setIndexVersion(getIndexVersion());

    Path idxPath = new Path(indexFilePath + "/" + BlockIndexedFileInputFormat.INDEXMETAFILENAME);
    FSDataOutputStream os = fs.create(idxPath, true);
    @SuppressWarnings("unchecked")
    ThriftWritable<FileIndexDescriptor> writable = (ThriftWritable<FileIndexDescriptor>) ThriftWritable
            .newInstance(fid.getClass());
    writable.set(fid);
    writable.write(os);
    os.close();
}

From source file:com.twitter.elephanttwin.lucene.indexing.AbstractLuceneIndexingJob.java

License:Apache License

/**
 * Populates FileIndexDescriptor with common things like name, checksum, etc.
 * @param path// w  w w  .j  a v a 2 s  .c om
 * @return
 * @throws IOException
 */
protected FileIndexDescriptor buildFileIndexDescriptor(Path path) throws IOException {
    FileIndexDescriptor fid = new FileIndexDescriptor();
    fid.setSourcePath(path.toString());
    fid.setDocType(getExpectedDocType());
    FileChecksum cksum = path.getFileSystem(getConf()).getFileChecksum(path);
    com.twitter.elephanttwin.gen.FileChecksum fidCksum = new com.twitter.elephanttwin.gen.FileChecksum(
            cksum.getAlgorithmName(), ByteBuffer.wrap(cksum.getBytes()), cksum.getLength());
    fid.setChecksum(fidCksum);
    fid.setIndexedFields(getIndexedFields());
    fid.setIndexType(getIndexType());
    fid.setIndexVersion(getIndexVersion());
    return fid;
}

From source file:fr.ens.biologie.genomique.eoulsan.modules.mapping.hadoop.ReadsMapperHadoopModule.java

License:LGPL

/**
 * Compute the checksum of a ZIP file or use the HDFS checksum if available.
 * @param file the zip input file/*from  w w  w.j  av a 2 s.c  o  m*/
 * @param conf The Hadoop configuration
 * @return the checksum as a string
 * @throws IOException if an error occurs while creating the checksum
 */
static String computeZipCheckSum(final DataFile file, final Configuration conf) throws IOException {

    final Path path = new Path(file.getSource());

    FileSystem fs = FileSystem.get(path.toUri(), conf);
    final FileChecksum checksum = fs.getFileChecksum(path);

    // If exists use checksum provided by the file system
    if (checksum != null) {
        return new BigInteger(1, checksum.getBytes()).toString(16);
    }

    // Fallback solution
    return computeZipCheckSum(file.open());
}