List of usage examples for com.google.common.hash Hashing murmur3_32
public static HashFunction murmur3_32()
From source file:org.apache.kylin.engine.mr.steps.FactDistinctColumnsMapper.java
@Override protected void setup(Context context) throws IOException { super.setup(context); tmpbuf = ByteBuffer.allocate(4096); collectStatistics = Boolean/*w ww. j a va 2 s . c o m*/ .parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED)); if (collectStatistics) { samplingPercentage = Integer .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT)); cuboidScheduler = new CuboidScheduler(cubeDesc); nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length; List<Long> cuboidIdList = Lists.newArrayList(); List<Integer[]> allCuboidsBitSetList = Lists.newArrayList(); addCuboidBitSet(baseCuboidId, allCuboidsBitSetList, cuboidIdList); allCuboidsBitSet = allCuboidsBitSetList.toArray(new Integer[cuboidIdList.size()][]); cuboidIds = cuboidIdList.toArray(new Long[cuboidIdList.size()]); allCuboidsHLL = new HLLCounter[cuboidIds.length]; for (int i = 0; i < cuboidIds.length; i++) { allCuboidsHLL[i] = new HLLCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision(), RegisterType.DENSE); } TblColRef partitionColRef = cubeDesc.getModel().getPartitionDesc().getPartitionDateColumnRef(); if (partitionColRef != null) { partitionColumnIndex = intermediateTableDesc.getColumnIndex(partitionColRef); } // check whether need fetch the partition col values if (partitionColumnIndex < 0) { // if partition col not on cube, no need needFetchPartitionCol = false; } else { needFetchPartitionCol = true; } //for KYLIN-2518 backward compatibility if (KylinVersion.isBefore200(cubeDesc.getVersion())) { isUsePutRowKeyToHllNewAlgorithm = false; row_hashcodes = new ByteArray[nRowKey]; for (int i = 0; i < nRowKey; i++) { row_hashcodes[i] = new ByteArray(); } hf = Hashing.murmur3_32(); logger.info("Found KylinVersion : {}. Use old algorithm for cuboid sampling.", cubeDesc.getVersion()); } else { isUsePutRowKeyToHllNewAlgorithm = true; rowHashCodesLong = new long[nRowKey]; hf = Hashing.murmur3_128(); logger.info( "Found KylinVersion : {}. Use new algorithm for cuboid sampling. About the details of the new algorithm, please refer to KYLIN-2518", cubeDesc.getVersion()); } } }
From source file:diskCacheV111.srm.SrmService.java
@Override public void setCellAddress(CellAddressCore address) { this.address = address; this.id = Hashing.murmur3_32().hashString(address.toString(), US_ASCII).toString(); }
From source file:com.facebook.presto.tpch.GeneratingTpchDataFileLoader.java
@Override public File getDataFile(TpchTableHandle tableHandle, TpchColumnHandle columnHandle, BlocksFileEncoding encoding) {/*from w w w. j a va 2 s.c o m*/ checkNotNull(tableHandle, "tableHandle is null"); checkNotNull(columnHandle, "columnHandle is null"); checkNotNull(encoding, "encoding is null"); String tableName = tableHandle.getTableName(); ExecutorService executor = Executors.newCachedThreadPool(Threads.daemonThreadsNamed("tpch-generate-%s")); try { String hash = ByteStreams .hash(ByteStreams.slice(tableInputSupplierFactory.getInputSupplier(tableName), 0, 1024 * 1024), Hashing.murmur3_32()) .toString(); File cachedFile = new File(new File(cacheDirectory, tableName + "-" + hash), "new-" + createFileName(columnHandle, encoding)); if (cachedFile.exists()) { return cachedFile; } Files.createParentDirs(cachedFile); InputSupplier<InputStream> inputSupplier = tableInputSupplierFactory.getInputSupplier(tableName); ColumnMetadata columnMetadata = new TpchMetadata().getColumnMetadata(tableHandle, columnHandle); DelimitedRecordSet records = new DelimitedRecordSet(newReaderSupplier(inputSupplier, UTF_8), Splitter.on("|"), columnMetadata); Session session = new Session("user", "source", "catalog", "schema", "address", "agent"); OperatorContext operatorContext = new TaskContext(new TaskId("query", "stage", "task"), executor, session).addPipelineContext(true, true).addDriverContext().addOperatorContext(0, "tpch-generate"); RecordProjectOperator source = new RecordProjectOperator(operatorContext, records); ColumnFileHandle columnFileHandle = ColumnFileHandle.builder(0) .addColumn(columnHandle, cachedFile, encoding).build(); while (!source.isFinished()) { Page page = source.getOutput(); if (page != null) { columnFileHandle.append(page); } } columnFileHandle.commit(); return cachedFile; } catch (IOException e) { throw Throwables.propagate(e); } finally { executor.shutdownNow(); } }
From source file:org.apache.fluo.recipes.core.map.CollisionFreeMap.java
/** * This method will retrieve the current value for key and any outstanding updates and combine * them using the configured {@link Combiner}. The result from the combiner is returned. *//* w w w. ja va 2 s. c o m*/ public V get(SnapshotBase tx, K key) { byte[] k = serializer.serialize(key); int hash = Hashing.murmur3_32().hashBytes(k).asInt(); String bucketId = genBucketId(Math.abs(hash % numBuckets), numBuckets); BytesBuilder rowBuilder = Bytes.builder(); rowBuilder.append(updatePrefix).append(bucketId).append(':').append(k); Iterator<RowColumnValue> iter = tx.scanner().over(Span.prefix(rowBuilder.toBytes())).build().iterator(); Iterator<V> ui; if (iter.hasNext()) { ui = Iterators.transform(iter, rcv -> deserVal(rcv.getValue())); } else { ui = Collections.<V>emptyList().iterator(); } rowBuilder.setLength(0); rowBuilder.append(dataPrefix).append(bucketId).append(':').append(k); Bytes dataRow = rowBuilder.toBytes(); Bytes cv = tx.get(dataRow, DATA_COLUMN); if (!ui.hasNext()) { if (cv == null) { return null; } else { return deserVal(cv); } } return combiner.combine(key, concat(ui, cv)).orElse(null); }
From source file:org.springside.modules.security.utils.Digests.java
/** * murmur32/*from w w w. j av a2 s. c o m*/ */ public static int murmur32(String input) { return Hashing.murmur3_32().hashString(input, Charsets.UTF_8).asInt(); }
From source file:org.apache.rya.indexing.pcj.fluo.app.util.BindingHashShardingFunction.java
private static String genHash(Bytes row) { int hash = Hashing.murmur3_32().hashBytes(row.toArray()).asInt(); hash = hash & 0x7fffffff;// ww w . j a v a2 s .c o m // base 36 gives a lot more bins in 4 bytes than hex, but it is still human readable which is // nice for debugging. String hashString = Strings.padStart(Integer.toString(hash, Character.MAX_RADIX), HASH_LEN, '0'); hashString = hashString.substring(hashString.length() - HASH_LEN); return hashString; }
From source file:com.qubole.rubix.spi.CacheConfig.java
public static String getLocalDirFor(String remotePath, Configuration conf) { int numDisks = numDisks(conf); int numBuckets = 100 * numDisks; HashFunction hf = Hashing.murmur3_32(); HashCode hc = hf.hashString(remotePath, Charsets.UTF_8); int bucket = Math.abs(hc.asInt()) % numBuckets; int dirNum = (bucket / numDisks) % numDisks; String dirname = getDirPath(conf, dirNum) + CacheConfig.fileCacheDirSuffixConf; return dirname; }
From source file:com.jeffplaisance.util.fingertree.rope.Rope.java
@Override public int hashCode() { final Hasher hasher = Hashing.murmur3_32().newHasher(); for (String str : data) { hasher.putString(str);/*w ww .ja v a 2 s . com*/ } return hasher.hash().asInt(); }
From source file:org.apache.mahout.freqtermsets.PFPGrowth.java
public static void loadEarlierFHashMaps(JobContext context, Parameters params, long intervalStart, OpenIntObjectHashMap<String> idStringMapOut, OpenObjectIntHashMap<String> stringIdMapOut) throws IOException { // I resist the urge to cache this list because I don't know what exactly would happen // when the job is run in hadoop where every job has its own JVM.. will static // fields somehow leak? Can I be sure that the static WeakHashMap used as a cache is mine? // FINALLY.. the list would be loaded only twice, once for mapper, and once for reducer OpenObjectLongHashMap<String> prevFLists = PFPGrowth.readOlderCachedFLists(context.getConfiguration(), intervalStart, TimeWeightFunction.getDefault(params)); LinkedList<String> terms = Lists.newLinkedList(); prevFLists.keysSortedByValue(terms); Iterator<String> termsIter = terms.descendingIterator(); while (termsIter.hasNext()) { String t = termsIter.next(); int id = Hashing.murmur3_32().hashString(t, Charset.forName("UTF-8")).asInt(); int c = 0; while (idStringMapOut.containsKey(id)) { // Best effort if (c < t.length()) { id = Hashing.murmur3_32((int) t.charAt(c++)).hashString(t, Charset.forName("UTF-8")).asInt(); } else { ++id;// w ww. j a v a2 s. c o m } } idStringMapOut.put(id, t); stringIdMapOut.put(t, id); } }
From source file:com.dcits.govsbu.southernbase.baseproject2.modules.utils.Digests.java
/** * murmur32 */ public static int murmur32(byte[] input) { return Hashing.murmur3_32().hashBytes(input).asInt(); }