Example usage for com.google.common.hash Hashing murmur3_128

List of usage examples for com.google.common.hash Hashing murmur3_128

Introduction

In this page you can find the example usage for com.google.common.hash Hashing murmur3_128.

Prototype

public static HashFunction murmur3_128(int seed) 

Source Link

Document

Returns a hash function implementing the <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant), using the given seed value.

Usage

From source file:edu.umd.marbl.mhap.utils.Utils.java

public final static long[][] computeKmerHashesExact(final String seq, final int kmerSize, final int numWords,
        final int seed) {
    HashFunction hf = Hashing.murmur3_128(seed);

    long[][] hashes = new long[seq.length() - kmerSize + 1][numWords];
    for (int iter = 0; iter < hashes.length; iter++) {
        String subStr = seq.substring(iter, iter + kmerSize);

        for (int word = 0; word < numWords; word++) {
            HashCode hc = hf.newHasher().putUnencodedChars(subStr).putInt(word).hash();
            hashes[iter][word] = hc.asLong();
        }//from   w  ww . j a v  a 2  s  .c om
    }

    return hashes;
}

From source file:org.apache.beam.sdk.io.synthetic.SyntheticOptions.java

public HashFunction hashFunction() {
    // due to field's transiency initialize when null.
    if (hashFunction == null) {
        this.hashFunction = Hashing.murmur3_128(seed);
    }// w  w  w.ja  v a 2 s .c  o  m

    return hashFunction;
}

From source file:edu.umd.marbl.mhap.utils.Utils.java

public final static long[] computeSequenceHashesLongValidKmers(final String seq, final int kmerSize,
        final int seed) {
    HashFunction hf = Hashing.murmur3_128(seed);

    long[] hashes = new long[seq.length() - kmerSize + 1];
    for (int iter = 0; iter < hashes.length; iter++) {
        String subSeq = seq.substring(iter, iter + kmerSize);
        long hashYGS;

        hashes[iter] = Long.MAX_VALUE;

        if (!subSeq.contains("N")) {
            hashYGS = computeHashYGS(subSeq);

            //if(MhapMain.getValidKmersHashes().get(hashYGS)) //Bernardo
            if (MhapMain.getValidKmersHashes().fastGet(hashYGS)) {
                HashCode hc = hf.newHasher().putUnencodedChars(subSeq).hash();
                hashes[iter] = hc.asLong();
            }/*from   w  w  w . ja  v  a 2 s .c om*/
        }
    }

    return hashes;
}

From source file:edu.umd.marbl.mhap.utils.Utils.java

public final static long[] computeSequenceHashesLong(final String seq, final int kmerSize, final int seed) {
    HashFunction hf = Hashing.murmur3_128(seed);

    long[] hashes = new long[seq.length() - kmerSize + 1];
    for (int iter = 0; iter < hashes.length; iter++) {
        HashCode hc = hf.newHasher().putUnencodedChars(seq.substring(iter, iter + kmerSize)).hash();
        hashes[iter] = hc.asLong();/* ww w  .ja  v  a2  s. com*/
    }

    return hashes;
}

From source file:org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator.java

/**
 * Returns a map with a hash for each {@link StreamNode} of the {@link
 * StreamGraph}. The hash is used as the {@link JobVertexID} in order to
 * identify nodes across job submissions if they didn't change.
 *
 * <p>The complete {@link StreamGraph} is traversed. The hash is either
 * computed from the transformation's user-specified id (see
 * {@link StreamTransformation#getUid()}) or generated in a deterministic way.
 *
 * <p>The generated hash is deterministic with respect to:
 * <ul>/*ww  w  .j  a v a2 s. co m*/
 * <li>node-local properties (like parallelism, UDF, node ID),
 * <li>chained output nodes, and
 * <li>input nodes hashes
 * </ul>
 *
 * @return A map from {@link StreamNode#id} to hash as 16-byte array.
 */
private Map<Integer, byte[]> traverseStreamGraphAndGenerateHashes() {
    // The hash function used to generate the hash
    final HashFunction hashFunction = Hashing.murmur3_128(0);
    final Map<Integer, byte[]> hashes = new HashMap<>();

    Set<Integer> visited = new HashSet<>();
    Queue<StreamNode> remaining = new ArrayDeque<>();

    // We need to make the source order deterministic. The source IDs are
    // not returned in the same order, which means that submitting the same
    // program twice might result in different traversal, which breaks the
    // deterministic hash assignment.
    List<Integer> sources = new ArrayList<>();
    for (Integer sourceNodeId : streamGraph.getSourceIDs()) {
        sources.add(sourceNodeId);
    }
    Collections.sort(sources);

    //
    // Traverse the graph in a breadth-first manner. Keep in mind that
    // the graph is not a tree and multiple paths to nodes can exist.
    //

    // Start with source nodes
    for (Integer sourceNodeId : sources) {
        remaining.add(streamGraph.getStreamNode(sourceNodeId));
        visited.add(sourceNodeId);
    }

    StreamNode currentNode;
    while ((currentNode = remaining.poll()) != null) {
        // Generate the hash code. Because multiple path exist to each
        // node, we might not have all required inputs available to
        // generate the hash code.
        if (generateNodeHash(currentNode, hashFunction, hashes)) {
            // Add the child nodes
            for (StreamEdge outEdge : currentNode.getOutEdges()) {
                StreamNode child = outEdge.getTargetVertex();

                if (!visited.contains(child.getId())) {
                    remaining.add(child);
                    visited.add(child.getId());
                }
            }
        } else {
            // We will revisit this later.
            visited.remove(currentNode.getId());
        }
    }

    return hashes;
}