Example usage for org.apache.hadoop.util DataChecksum newDataChecksum

List of usage examples for org.apache.hadoop.util DataChecksum newDataChecksum

Introduction

In this page you can find the example usage for org.apache.hadoop.util DataChecksum newDataChecksum.

Prototype

public static DataChecksum newDataChecksum(byte bytes[], int offset) 

Source Link

Document

Creates a DataChecksum from HEADER_LEN bytes from arr[offset].

Usage

From source file:backup.datanode.DataNodeRestoreProcessor.java

License:Apache License

public void restoreBlock(ExtendedBlock extendedBlock) throws Exception {
    if (!_backupStore.hasBlock(extendedBlock)) {
        LOG.error("Can not restore block, not in block store {}", extendedBlock);
        return;//from w  w w  .  j a  v  a 2 s .c  o  m
    }
    FsDatasetSpi<?> fsDataset = _datanode.getFSDataset();
    org.apache.hadoop.hdfs.protocol.ExtendedBlock heb = BackupUtil.toHadoop(extendedBlock);
    if (fsDataset.isValidBlock(heb)) {
        LOG.info("Block already restored {}", extendedBlock);
        return;
    }
    try {
        _restoreInProgress.incrementAndGet();
        LOG.info("Restoring block {}", extendedBlock);
        boolean allowLazyPersist = true;
        // org.apache.hadoop.fs.StorageType storageType =
        // org.apache.hadoop.fs.StorageType.DEFAULT;
        org.apache.hadoop.hdfs.StorageType storageType = org.apache.hadoop.hdfs.StorageType.DEFAULT;
        ReplicaHandler replicaHandler = fsDataset.createRbw(storageType, heb, allowLazyPersist);
        ReplicaInPipelineInterface pipelineInterface = replicaHandler.getReplica();
        boolean isCreate = true;
        DataChecksum requestedChecksum = DataChecksum.newDataChecksum(_checksumType, _bytesPerChecksum);
        int bytesCopied = 0;
        try (ReplicaOutputStreams streams = pipelineInterface.createStreams(isCreate, requestedChecksum)) {
            try (OutputStream checksumOut = streams.getChecksumOut()) {
                try (InputStream metaData = _backupStore.getMetaDataInputStream(extendedBlock)) {
                    LOG.info("Restoring meta data for block {}", extendedBlock);
                    IOUtils.copy(trackThroughPut(metaData), checksumOut);
                }
            }
            try (OutputStream dataOut = streams.getDataOut()) {
                try (InputStream data = _backupStore.getDataInputStream(extendedBlock)) {
                    LOG.info("Restoring data for block {}", extendedBlock);
                    bytesCopied = IOUtils.copy(trackThroughPut(data), dataOut);
                }
            }
        }
        pipelineInterface.setNumBytes(bytesCopied);
        LOG.info("Finalizing restored block {}", extendedBlock);
        fsDataset.finalizeBlock(heb);

        // datanode.notifyNamenodeReceivedBlock(extendedBlock, "",
        // pipelineInterface.getStorageUuid();
        _datanode.notifyNamenodeReceivedBlock(heb, "", pipelineInterface.getStorageUuid(),
                pipelineInterface.isOnTransientStorage());
    } catch (ReplicaAlreadyExistsException e) {
        LOG.info("Restoring block already exists {}", extendedBlock);
    } finally {
        _restoreInProgress.decrementAndGet();
    }
}

From source file:com.mellanox.r4h.DFSOutputStream.java

License:Apache License

/**
 * @return the object for computing checksum.
 *         The type is NULL if checksum is not computed.
 *///from  w w  w  . j ava2 s  . co m
private static DataChecksum getChecksum4Compute(DataChecksum checksum, HdfsFileStatus stat) {
    if (isLazyPersist(stat) && stat.getReplication() == 1) {
        // do not compute checksum for writing to single replica to memory
        return DataChecksum.newDataChecksum(Type.NULL, checksum.getBytesPerChecksum());
    }
    return checksum;
}

From source file:com.tripadvisor.hadoop.ExternalHDFSChecksumGenerator.java

License:Apache License

/**
 *
 * This is the function that calculates the hdfs-style checksum for a local file in the same way that
 * hdfs does it in a parallel fashion on all of the blocks in hdsf.
 *
 * @param strPath//ww  w .  j a v  a2s . c om
 * @param bytesPerCRC
 * @param lBlockSize
 * @return
 * @throws IOException
 */
public MD5MD5CRC32FileChecksum getLocalFilesystemHDFSStyleChecksum(String strPath, int bytesPerCRC,
        long lBlockSize) throws IOException {
    long lFileSize = 0;
    int iBlockCount = 0;
    DataOutputBuffer md5outDataBuffer = new DataOutputBuffer();
    DataChecksum chksm = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32, 512);
    InputStream in = null;
    MD5MD5CRC32FileChecksum returnChecksum = null;
    long crc_per_block = lBlockSize / bytesPerCRC;

    java.io.File file = new java.io.File(strPath);

    // FileStatus f_stats = srcFs.getFileStatus( srcPath );
    lFileSize = file.length();

    iBlockCount = (int) Math.ceil((double) lFileSize / (double) lBlockSize);

    // System.out.println( "Debug > getLen == " + f_stats.getLen() +
    // " bytes" );
    // System.out.println( "Debug > iBlockCount == " + iBlockCount );

    if (file.isDirectory()) {
        throw new IOException("Cannot compute local hdfs hash, " + strPath + " is a directory! ");
    }

    try {
        in = new FileInputStream(file);
        long lTotalBytesRead = 0;

        for (int x = 0; x < iBlockCount; x++) {

            ByteArrayOutputStream ar_CRC_Bytes = new ByteArrayOutputStream();

            byte crc[] = new byte[4];
            byte buf[] = new byte[512];

            try {

                int bytesRead = 0;

                while ((bytesRead = in.read(buf)) > 0) {

                    lTotalBytesRead += bytesRead;

                    chksm.reset();
                    chksm.update(buf, 0, bytesRead);
                    chksm.writeValue(crc, 0, true);
                    ar_CRC_Bytes.write(crc);

                    if (lTotalBytesRead >= (x + 1) * lBlockSize) {

                        break;
                    }

                } // while

                DataInputStream inputStream = new DataInputStream(
                        new ByteArrayInputStream(ar_CRC_Bytes.toByteArray()));

                // this actually computes one ---- run on the server
                // (DataXceiver) side
                final MD5Hash md5_dataxceiver = MD5Hash.digest(inputStream);
                md5_dataxceiver.write(md5outDataBuffer);

            } catch (IOException e) {

                e.printStackTrace();

            } catch (Exception e) {

                e.printStackTrace();

            }

        } // for

        // this is in 0.19.0 style with the extra padding bug
        final MD5Hash md5_of_md5 = MD5Hash.digest(md5outDataBuffer.getData());
        returnChecksum = new MD5MD5CRC32FileChecksum(bytesPerCRC, crc_per_block, md5_of_md5);

    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {

        e.printStackTrace();

    } finally {
        in.close();
    } // try

    return returnChecksum;

}

From source file:org.apache.tez.engine.common.sort.impl.IFileInputStream.java

License:Apache License

/**
 * Create a checksum input stream that reads
 * @param in The input stream to be verified for checksum.
 * @param len The length of the input stream including checksum bytes.
 *//*from   w  w  w.  ja va2 s  .c  om*/
public IFileInputStream(InputStream in, long len, Configuration conf) {
    this.in = in;
    this.inFd = getFileDescriptorIfAvail(in);
    sum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, Integer.MAX_VALUE);
    checksumSize = sum.getChecksumSize();
    buffer = new byte[4096];
    offset = 0;
    length = len;
    dataLength = length - checksumSize;

    conf = (conf != null) ? conf : new Configuration();
    readahead = conf.getBoolean(TezJobConfig.TEZ_ENGINE_IFILE_READAHEAD,
            TezJobConfig.DEFAULT_TEZ_ENGINE_IFILE_READAHEAD);
    readaheadLength = conf.getInt(TezJobConfig.TEZ_ENGINE_IFILE_READAHEAD_BYTES,
            TezJobConfig.DEFAULT_TEZ_ENGINE_IFILE_READAHEAD_BYTES);

    doReadahead();
}

From source file:org.apache.tez.engine.common.sort.impl.IFileOutputStream.java

License:Apache License

/**
 * Create a checksum output stream that writes
 * the bytes to the given stream./*from w w w . ja v a 2 s .c o m*/
 * @param out
 */
public IFileOutputStream(OutputStream out) {
    super(out);
    sum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, Integer.MAX_VALUE);
    barray = new byte[sum.getChecksumSize()];
    buffer = new byte[4096];
    offset = 0;
}

From source file:org.apache.tez.runtime.library.common.sort.impl.IFileInputStream.java

License:Apache License

/**
 * Create a checksum input stream that reads
 * @param in The input stream to be verified for checksum.
 * @param len The length of the input stream including checksum bytes.
 * @param readAhead Whether to attempt readAhead for this stream
 * @param readAheadLength Number of bytes to readAhead if it is enabled
 *//*from   ww  w .  j  a  v a2  s. c  o  m*/
public IFileInputStream(InputStream in, long len, boolean readAhead, int readAheadLength) {
    this.in = in;
    sum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, Integer.MAX_VALUE);
    checksumSize = sum.getChecksumSize();
    buffer = new byte[4096];
    offset = 0;
    length = len;
    dataLength = length - checksumSize;

    readahead = readAhead;
    readaheadLength = readAheadLength;

    if (readahead) {
        this.inFd = getFileDescriptorIfAvail(in);
        doReadahead();
    } else {
        this.inFd = null;
    }
}