Example usage for com.google.common.hash Hashing murmur3

Introduction

In this page you can find the example usage for com.google.common.hash Hashing murmur3_32.

Prototype

public static HashFunction murmur3_32()

Source Link

Document

Returns a hash function implementing the <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant), using a seed value of zero.

Usage

From source file:org.apache.kylin.engine.mr.steps.fdc2.FactDistinctHiveColumnsMapper2.java

@Override
protected void setup(Context context) throws IOException {
    super.setup(context);
    keyBuffer = ByteBuffer.allocate(4096);
    collectStatistics = Boolean//from   w  ww. j a  v a2  s .co  m
            .parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
    if (collectStatistics) {
        samplingPercentage = Integer
                .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
        cuboidScheduler = new CuboidScheduler(cubeDesc);
        nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;

        List<Long> cuboidIdList = Lists.newArrayList();
        List<Integer[]> allCuboidsBitSetList = Lists.newArrayList();
        addCuboidBitSet(baseCuboidId, allCuboidsBitSetList, cuboidIdList);

        allCuboidsBitSet = allCuboidsBitSetList.toArray(new Integer[cuboidIdList.size()][]);
        cuboidIds = cuboidIdList.toArray(new Long[cuboidIdList.size()]);

        allCuboidsHLL = new HyperLogLogPlusCounter[cuboidIds.length];
        for (int i = 0; i < cuboidIds.length; i++) {
            allCuboidsHLL[i] = new HyperLogLogPlusCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision());
        }

        hf = Hashing.murmur3_32();
        row_hashcodes = new ByteArray[nRowKey];
        for (int i = 0; i < nRowKey; i++) {
            row_hashcodes[i] = new ByteArray();
        }

        TblColRef partitionColRef = cubeDesc.getModel().getPartitionDesc().getPartitionDateColumnRef();
        if (partitionColRef != null) {
            partitionColumnIndex = intermediateTableDesc.getColumnIndex(partitionColRef);
        }

        // check whether need fetch the partition col values
        if (partitionColumnIndex < 0) {
            // if partition col not on cube, no need
            needFetchPartitionCol = false;
        } else {
            for (int x : dictionaryColumnIndex) {
                if (x == partitionColumnIndex) {
                    // if partition col already build dict, no need
                    needFetchPartitionCol = false;
                    break;
                }
            }
        }

    }
}

From source file:org.apache.kylin.engine.mr.steps.FactDistinctHiveColumnsMapper.java

@Override
protected void setup(Context context) throws IOException {
    super.setup(context);
    tmpbuf = ByteBuffer.allocate(4096);
    collectStatistics = Boolean//from   ww  w.j av  a2  s.  c om
            .parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
    if (collectStatistics) {
        samplingPercentage = Integer
                .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
        cuboidScheduler = new CuboidScheduler(cubeDesc);
        nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;

        List<Long> cuboidIdList = Lists.newArrayList();
        List<Integer[]> allCuboidsBitSetList = Lists.newArrayList();
        addCuboidBitSet(baseCuboidId, allCuboidsBitSetList, cuboidIdList);

        allCuboidsBitSet = allCuboidsBitSetList.toArray(new Integer[cuboidIdList.size()][]);
        cuboidIds = cuboidIdList.toArray(new Long[cuboidIdList.size()]);

        allCuboidsHLL = new HLLCounter[cuboidIds.length];
        for (int i = 0; i < cuboidIds.length; i++) {
            allCuboidsHLL[i] = new HLLCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision());
        }

        hf = Hashing.murmur3_32();
        row_hashcodes = new ByteArray[nRowKey];
        for (int i = 0; i < nRowKey; i++) {
            row_hashcodes[i] = new ByteArray();
        }

        TblColRef partitionColRef = cubeDesc.getModel().getPartitionDesc().getPartitionDateColumnRef();
        if (partitionColRef != null) {
            partitionColumnIndex = intermediateTableDesc.getColumnIndex(partitionColRef);
        }

        // check whether need fetch the partition col values
        if (partitionColumnIndex < 0) {
            // if partition col not on cube, no need
            needFetchPartitionCol = false;
        } else {
            needFetchPartitionCol = true;
        }
    }
}

From source file:org.apache.jackrabbit.oak.plugins.segment.standby.codec.ReplyDecoder.java

private Segment decodeSegment(ByteBuf in, int len, byte type) {
    long msb = in.readLong();
    long lsb = in.readLong();
    long hash = in.readLong();

    // #readBytes throws a 'REPLAY' exception if there are not enough bytes
    // available for reading
    ByteBuf data = in.readBytes(len - 25);
    byte[] segment;
    if (data.hasArray()) {
        segment = data.array();//from  w w  w . j a  v a2s . co m
    } else {
        segment = new byte[len - 25];
        in.readBytes(segment);
    }

    Hasher hasher = Hashing.murmur3_32().newHasher();
    long check = hasher.putBytes(segment).hash().padToLong();
    if (hash == check) {
        SegmentId id = new SegmentId(store.getTracker(), msb, lsb);
        Segment s = new Segment(store.getTracker(), id, ByteBuffer.wrap(segment));
        log.debug("received segment with id {} and size {}", id, s.size());
        return s;
    }
    log.debug("received corrupted segment {}, ignoring", new UUID(msb, lsb));
    return null;
}

From source file:org.apache.kylin.engine.mr.steps.CalculateStatsFromBaseCuboidMapper.java

@Override
protected void doSetup(Context context) throws IOException {
    Configuration conf = context.getConfiguration();
    HadoopUtil.setCurrentConfiguration(conf);
    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeDesc cubeDesc = cube.getDescriptor();
    CubeSegment cubeSegment = cube.getSegmentById(conf.get(BatchConstants.CFG_CUBE_SEGMENT_ID));

    baseCuboidId = cube.getCuboidScheduler().getBaseCuboidId();
    nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;

    String cuboidModeName = conf.get(BatchConstants.CFG_CUBOID_MODE);
    Set<Long> cuboidIdSet = cube.getCuboidsByMode(cuboidModeName);

    cuboidIds = cuboidIdSet.toArray(new Long[cuboidIdSet.size()]);
    allCuboidsBitSet = CuboidUtil.getCuboidBitSet(cuboidIds, nRowKey);

    samplingPercentage = Integer/*from www .  j a  va 2 s .c  o m*/
            .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));

    allCuboidsHLL = new HLLCounter[cuboidIds.length];
    for (int i = 0; i < cuboidIds.length; i++) {
        allCuboidsHLL[i] = new HLLCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision());
    }

    //for KYLIN-2518 backward compatibility
    if (KylinVersion.isBefore200(cubeDesc.getVersion())) {
        isUsePutRowKeyToHllNewAlgorithm = false;
        hf = Hashing.murmur3_32();
        logger.info("Found KylinVersion : {}. Use old algorithm for cuboid sampling.", cubeDesc.getVersion());
    } else {
        isUsePutRowKeyToHllNewAlgorithm = true;
        rowHashCodesLong = new long[nRowKey];
        hf = Hashing.murmur3_128();
        logger.info(
                "Found KylinVersion : {}. Use new algorithm for cuboid sampling. About the details of the new algorithm, please refer to KYLIN-2518",
                cubeDesc.getVersion());
    }

    rowKeyDecoder = new RowKeyDecoder(cubeSegment);
}

From source file:org.apache.fluo.recipes.core.combine.CombineQueueImpl.java

@Override
public void addAll(TransactionBase tx, Map<K, V> updates) {
    Preconditions.checkState(numBuckets > 0, "Not initialized");

    Set<String> buckets = new HashSet<>();

    BytesBuilder rowBuilder = Bytes.builder();
    rowBuilder.append(updatePrefix);//from www . ja v a 2  s .  com
    int prefixLength = rowBuilder.getLength();

    byte[] startTs = encSeq(tx.getStartTimestamp());

    for (Entry<K, V> entry : updates.entrySet()) {
        byte[] k = serializer.serialize(entry.getKey());
        int hash = Hashing.murmur3_32().hashBytes(k).asInt();
        String bucketId = genBucketId(Math.abs(hash % numBuckets), numBuckets);

        // reset to the common row prefix
        rowBuilder.setLength(prefixLength);

        Bytes row = rowBuilder.append(bucketId).append(':').append(k).append(startTs).toBytes();
        Bytes val = Bytes.of(serializer.serialize(entry.getValue()));

        // TODO set if not exists would be comforting here.... but
        // collisions on bucketId+key+uuid should never occur
        tx.set(row, UPDATE_COL, val);

        buckets.add(bucketId);
    }

    for (String bucketId : buckets) {
        rowBuilder.setLength(prefixLength);
        rowBuilder.append(bucketId).append(':');

        Bytes row = rowBuilder.toBytes();

        tx.setWeakNotification(row, notifyColumn);
    }
}

From source file:org.lightjason.agentspeak.action.builtin.crypto.CHash.java

/**
 * runs hashing function with difference between Google Guava hashing and Java default digest
 *
 * @param p_context execution context/*from   w ww .ja v  a  2 s  . co  m*/
 * @param p_algorithm algorithm name
 * @param p_data byte data representation
 * @return hash value
 */
private static String hash(@Nonnull final IContext p_context, @Nonnull final String p_algorithm,
        @Nonnull final byte[] p_data) {
    switch (p_algorithm.trim().toLowerCase(Locale.ROOT)) {
    case "adler-32":
        return Hashing.adler32().newHasher().putBytes(p_data).hash().toString();

    case "crc-32":
        return Hashing.crc32().newHasher().putBytes(p_data).hash().toString();

    case "crc-32c":
        return Hashing.crc32c().newHasher().putBytes(p_data).hash().toString();

    case "murmur3-32":
        return Hashing.murmur3_32().newHasher().putBytes(p_data).hash().toString();

    case "murmur3-128":
        return Hashing.murmur3_128().newHasher().putBytes(p_data).hash().toString();

    case "siphash-2-4":
        return Hashing.sipHash24().newHasher().putBytes(p_data).hash().toString();

    default:
        try {
            return BaseEncoding.base16().encode(MessageDigest.getInstance(p_algorithm).digest(p_data))
                    .toLowerCase(Locale.ROOT);
        } catch (final NoSuchAlgorithmException l_exception) {
            throw new CRuntimeException(l_exception, p_context);
        }
    }
}

From source file:org.lightjason.agentspeak.action.buildin.crypto.CHash.java

/**
 * runs hashing function with difference between Google Guava hashing and Java default digest
 *
 * @param p_algorithm algorithm name//from w  w w . ja v a  2s  .  com
 * @param p_data byte data representation
 * @return hash value
 *
 * @throws NoSuchAlgorithmException on unknown hashing algorithm
 */
private String hash(final String p_algorithm, final byte[] p_data) throws NoSuchAlgorithmException {
    switch (p_algorithm.trim().toLowerCase(Locale.ROOT)) {
    case "adler-32":
        return Hashing.adler32().newHasher().putBytes(p_data).hash().toString();

    case "crc-32":
        return Hashing.crc32().newHasher().putBytes(p_data).hash().toString();

    case "crc-32c":
        return Hashing.crc32c().newHasher().putBytes(p_data).hash().toString();

    case "murmur3-32":
        return Hashing.murmur3_32().newHasher().putBytes(p_data).hash().toString();

    case "murmur3-128":
        return Hashing.murmur3_128().newHasher().putBytes(p_data).hash().toString();

    case "siphash-2-4":
        return Hashing.sipHash24().newHasher().putBytes(p_data).hash().toString();

    default:
        return String.format("%032x", new BigInteger(1, MessageDigest.getInstance(p_algorithm).digest(p_data)));
    }
}

From source file:cc.gospy.core.entity.Task.java

@Override
public int hashCode() {
    return Hashing.murmur3_32().newHasher().putObject(this, DIGEST).hash().hashCode();
}

From source file:org.apache.jackrabbit.oak.plugins.segment.standby.codec.ReplyDecoder.java

private IdArrayBasedBlob decodeBlob(ByteBuf in, int length, byte type) {
    int inIdLen = in.readInt();
    byte[] bid = new byte[inIdLen];
    in.readBytes(bid);//www . j  av a2  s  . c o m
    String id = new String(bid, Charset.forName("UTF-8"));

    long hash = in.readLong();
    // #readBytes throws a 'REPLAY' exception if there are not enough bytes
    // available for reading
    ByteBuf data = in.readBytes(length);
    byte[] blob;
    if (data.hasArray()) {
        blob = data.array();
    } else {
        blob = new byte[length];
        data.readBytes(blob);
    }

    Hasher hasher = Hashing.murmur3_32().newHasher();
    long check = hasher.putBytes(blob).hash().padToLong();
    if (hash == check) {
        log.debug("received blob with id {} and size {}", id, blob.length);
        return new IdArrayBasedBlob(blob, id);
    }
    log.debug("received corrupted binary {}, ignoring", id);
    return null;
}

From source file:com.jeffplaisance.util.fingertree.list.IndexedList.java

@Override
public int hashCode() {
    final Hasher hasher = Hashing.murmur3_32().newHasher();
    for (T t : this) {
        hasher.putInt(t.hashCode());/*w w  w .j a  v  a2  s  .  c o  m*/
    }
    return hasher.hash().asInt();
}

Example usage for com.google.common.hash Hashing murmur3_32

Introduction

Prototype

Document

Usage