Example usage for com.google.common.hash Hashing murmur3

Introduction

In this page you can find the example usage for com.google.common.hash Hashing murmur3_32.

Prototype

public static HashFunction murmur3_32()

Source Link

Document

Returns a hash function implementing the <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant), using a seed value of zero.

Usage

From source file:org.apache.kylin.engine.mr.steps.FactDistinctColumnsMapper.java

@Override
protected void setup(Context context) throws IOException {
    super.setup(context);
    tmpbuf = ByteBuffer.allocate(4096);
    collectStatistics = Boolean/*w  ww.  j  a va  2  s  . c  o m*/
            .parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
    if (collectStatistics) {
        samplingPercentage = Integer
                .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
        cuboidScheduler = new CuboidScheduler(cubeDesc);
        nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;

        List<Long> cuboidIdList = Lists.newArrayList();
        List<Integer[]> allCuboidsBitSetList = Lists.newArrayList();
        addCuboidBitSet(baseCuboidId, allCuboidsBitSetList, cuboidIdList);

        allCuboidsBitSet = allCuboidsBitSetList.toArray(new Integer[cuboidIdList.size()][]);
        cuboidIds = cuboidIdList.toArray(new Long[cuboidIdList.size()]);

        allCuboidsHLL = new HLLCounter[cuboidIds.length];
        for (int i = 0; i < cuboidIds.length; i++) {
            allCuboidsHLL[i] = new HLLCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision(),
                    RegisterType.DENSE);
        }

        TblColRef partitionColRef = cubeDesc.getModel().getPartitionDesc().getPartitionDateColumnRef();
        if (partitionColRef != null) {
            partitionColumnIndex = intermediateTableDesc.getColumnIndex(partitionColRef);
        }

        // check whether need fetch the partition col values
        if (partitionColumnIndex < 0) {
            // if partition col not on cube, no need
            needFetchPartitionCol = false;
        } else {
            needFetchPartitionCol = true;
        }
        //for KYLIN-2518 backward compatibility
        if (KylinVersion.isBefore200(cubeDesc.getVersion())) {
            isUsePutRowKeyToHllNewAlgorithm = false;
            row_hashcodes = new ByteArray[nRowKey];
            for (int i = 0; i < nRowKey; i++) {
                row_hashcodes[i] = new ByteArray();
            }
            hf = Hashing.murmur3_32();
            logger.info("Found KylinVersion : {}. Use old algorithm for cuboid sampling.",
                    cubeDesc.getVersion());
        } else {
            isUsePutRowKeyToHllNewAlgorithm = true;
            rowHashCodesLong = new long[nRowKey];
            hf = Hashing.murmur3_128();
            logger.info(
                    "Found KylinVersion : {}. Use new algorithm for cuboid sampling. About the details of the new algorithm, please refer to KYLIN-2518",
                    cubeDesc.getVersion());
        }
    }

}

From source file:diskCacheV111.srm.SrmService.java

@Override
public void setCellAddress(CellAddressCore address) {
    this.address = address;
    this.id = Hashing.murmur3_32().hashString(address.toString(), US_ASCII).toString();
}

From source file:com.facebook.presto.tpch.GeneratingTpchDataFileLoader.java

@Override
public File getDataFile(TpchTableHandle tableHandle, TpchColumnHandle columnHandle,
        BlocksFileEncoding encoding) {/*from w  w  w.  j a  va 2  s.c  o  m*/
    checkNotNull(tableHandle, "tableHandle is null");
    checkNotNull(columnHandle, "columnHandle is null");
    checkNotNull(encoding, "encoding is null");

    String tableName = tableHandle.getTableName();
    ExecutorService executor = Executors.newCachedThreadPool(Threads.daemonThreadsNamed("tpch-generate-%s"));
    try {
        String hash = ByteStreams
                .hash(ByteStreams.slice(tableInputSupplierFactory.getInputSupplier(tableName), 0, 1024 * 1024),
                        Hashing.murmur3_32())
                .toString();

        File cachedFile = new File(new File(cacheDirectory, tableName + "-" + hash),
                "new-" + createFileName(columnHandle, encoding));
        if (cachedFile.exists()) {
            return cachedFile;
        }

        Files.createParentDirs(cachedFile);

        InputSupplier<InputStream> inputSupplier = tableInputSupplierFactory.getInputSupplier(tableName);

        ColumnMetadata columnMetadata = new TpchMetadata().getColumnMetadata(tableHandle, columnHandle);

        DelimitedRecordSet records = new DelimitedRecordSet(newReaderSupplier(inputSupplier, UTF_8),
                Splitter.on("|"), columnMetadata);

        Session session = new Session("user", "source", "catalog", "schema", "address", "agent");
        OperatorContext operatorContext = new TaskContext(new TaskId("query", "stage", "task"), executor,
                session).addPipelineContext(true, true).addDriverContext().addOperatorContext(0,
                        "tpch-generate");

        RecordProjectOperator source = new RecordProjectOperator(operatorContext, records);

        ColumnFileHandle columnFileHandle = ColumnFileHandle.builder(0)
                .addColumn(columnHandle, cachedFile, encoding).build();

        while (!source.isFinished()) {
            Page page = source.getOutput();
            if (page != null) {
                columnFileHandle.append(page);
            }
        }
        columnFileHandle.commit();

        return cachedFile;
    } catch (IOException e) {
        throw Throwables.propagate(e);
    } finally {
        executor.shutdownNow();
    }
}

From source file:org.apache.fluo.recipes.core.map.CollisionFreeMap.java

/**
 * This method will retrieve the current value for key and any outstanding updates and combine
 * them using the configured {@link Combiner}. The result from the combiner is returned.
 *//* w w  w.  ja va  2  s. c o m*/
public V get(SnapshotBase tx, K key) {

    byte[] k = serializer.serialize(key);

    int hash = Hashing.murmur3_32().hashBytes(k).asInt();
    String bucketId = genBucketId(Math.abs(hash % numBuckets), numBuckets);

    BytesBuilder rowBuilder = Bytes.builder();
    rowBuilder.append(updatePrefix).append(bucketId).append(':').append(k);

    Iterator<RowColumnValue> iter = tx.scanner().over(Span.prefix(rowBuilder.toBytes())).build().iterator();

    Iterator<V> ui;

    if (iter.hasNext()) {
        ui = Iterators.transform(iter, rcv -> deserVal(rcv.getValue()));
    } else {
        ui = Collections.<V>emptyList().iterator();
    }

    rowBuilder.setLength(0);
    rowBuilder.append(dataPrefix).append(bucketId).append(':').append(k);

    Bytes dataRow = rowBuilder.toBytes();

    Bytes cv = tx.get(dataRow, DATA_COLUMN);

    if (!ui.hasNext()) {
        if (cv == null) {
            return null;
        } else {
            return deserVal(cv);
        }
    }

    return combiner.combine(key, concat(ui, cv)).orElse(null);
}

From source file:org.springside.modules.security.utils.Digests.java

/**
 * murmur32/*from  w  w  w. j  av a2 s.  c o m*/
 */
public static int murmur32(String input) {
    return Hashing.murmur3_32().hashString(input, Charsets.UTF_8).asInt();
}

From source file:org.apache.rya.indexing.pcj.fluo.app.util.BindingHashShardingFunction.java

private static String genHash(Bytes row) {
    int hash = Hashing.murmur3_32().hashBytes(row.toArray()).asInt();
    hash = hash & 0x7fffffff;// ww w . j  a v a2 s  .c o m
    // base 36 gives a lot more bins in 4 bytes than hex, but it is still human readable which is
    // nice for debugging.
    String hashString = Strings.padStart(Integer.toString(hash, Character.MAX_RADIX), HASH_LEN, '0');
    hashString = hashString.substring(hashString.length() - HASH_LEN);
    return hashString;
}

From source file:com.qubole.rubix.spi.CacheConfig.java

public static String getLocalDirFor(String remotePath, Configuration conf) {
    int numDisks = numDisks(conf);
    int numBuckets = 100 * numDisks;
    HashFunction hf = Hashing.murmur3_32();
    HashCode hc = hf.hashString(remotePath, Charsets.UTF_8);
    int bucket = Math.abs(hc.asInt()) % numBuckets;
    int dirNum = (bucket / numDisks) % numDisks;

    String dirname = getDirPath(conf, dirNum) + CacheConfig.fileCacheDirSuffixConf;
    return dirname;
}

From source file:com.jeffplaisance.util.fingertree.rope.Rope.java

@Override
public int hashCode() {
    final Hasher hasher = Hashing.murmur3_32().newHasher();
    for (String str : data) {
        hasher.putString(str);/*w  ww .ja  v a 2 s  . com*/
    }
    return hasher.hash().asInt();
}

From source file:org.apache.mahout.freqtermsets.PFPGrowth.java

public static void loadEarlierFHashMaps(JobContext context, Parameters params, long intervalStart,
        OpenIntObjectHashMap<String> idStringMapOut, OpenObjectIntHashMap<String> stringIdMapOut)
        throws IOException {
    // I resist the urge to cache this list because I don't know what exactly would happen
    // when the job is run in hadoop where every job has its own JVM.. will static
    // fields somehow leak? Can I be sure that the static WeakHashMap used as a cache is mine?
    // FINALLY.. the list would be loaded only twice, once for mapper, and once for reducer

    OpenObjectLongHashMap<String> prevFLists = PFPGrowth.readOlderCachedFLists(context.getConfiguration(),
            intervalStart, TimeWeightFunction.getDefault(params));

    LinkedList<String> terms = Lists.newLinkedList();
    prevFLists.keysSortedByValue(terms);
    Iterator<String> termsIter = terms.descendingIterator();
    while (termsIter.hasNext()) {

        String t = termsIter.next();
        int id = Hashing.murmur3_32().hashString(t, Charset.forName("UTF-8")).asInt();
        int c = 0;
        while (idStringMapOut.containsKey(id)) {
            // Best effort
            if (c < t.length()) {
                id = Hashing.murmur3_32((int) t.charAt(c++)).hashString(t, Charset.forName("UTF-8")).asInt();
            } else {
                ++id;//  w  ww.  j a v a2  s.  c o m
            }
        }

        idStringMapOut.put(id, t);
        stringIdMapOut.put(t, id);
    }
}

From source file:com.dcits.govsbu.southernbase.baseproject2.modules.utils.Digests.java

/**
 * murmur32
 */
public static int murmur32(byte[] input) {
    return Hashing.murmur3_32().hashBytes(input).asInt();
}

Example usage for com.google.common.hash Hashing murmur3_32

Introduction

Prototype

Document

Usage