List of usage examples for org.apache.mahout.common RandomUtils getRandom
public static RandomWrapper getRandom()
From source file:GavaFactorizer.java
License:Apache License
protected void prepareTraining() throws TasteException { Random random = RandomUtils.getRandom(); userVectors = new double[dataModel.getNumUsers()][numFeatures]; itemVectors = new double[dataModel.getNumItems()][numFeatures]; double globalAverage = getAveragePreference(); for (int userIndex = 0; userIndex < userVectors.length; userIndex++) { userVectors[userIndex][0] = globalAverage; userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias for (int feature = featureOffset; feature < numFeatures; feature++) { userVectors[userIndex][feature] = random.nextGaussian() * randomNoise; }/*from w w w . j a v a2 s. co m*/ } for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) { itemVectors[itemIndex][0] = 1; // corresponding user feature contains global average itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias for (int feature = featureOffset; feature < numFeatures; feature++) { itemVectors[itemIndex][feature] = random.nextGaussian() * randomNoise; } } cachePreferences(); shufflePreferences(); }
From source file:GavaFactorizer.java
License:Apache License
protected void shufflePreferences() { Random random = RandomUtils.getRandom(); /* Durstenfeld shuffle */ for (int currentPos = cachedUserIDs.length - 1; currentPos > 0; currentPos--) { int swapPos = random.nextInt(currentPos + 1); swapCachedPreferences(currentPos, swapPos); }/*from ww w. j av a2 s . co m*/ }
From source file:com.clearspring.analytics.stream.quantile.GroupTreeTest.java
License:Apache License
@Test public void testRandomRebalance() { Random gen = RandomUtils.getRandom(); GroupTree x = new GroupTree(); List<Double> y = Lists.newArrayList(); for (int i = 0; i < 1000; i++) { double v = gen.nextDouble(); x.add(new TDigest.Group(v)); y.add(v);//from ww w . j a va 2 s. co m x.checkBalance(); } Collections.sort(y); Iterator<Double> i = y.iterator(); for (TDigest.Group group : x) { assertEquals(i.next(), group.mean(), 0.0); } for (int j = 0; j < 100; j++) { double v = y.get(gen.nextInt(y.size())); y.remove(v); x.remove(x.floor(new TDigest.Group(v))); } Collections.sort(y); i = y.iterator(); for (TDigest.Group group : x) { assertEquals(i.next(), group.mean(), 0.0); } for (int j = 0; j < y.size(); j++) { double v = y.get(j); y.set(j, v + 10); TDigest.Group g = x.floor(new TDigest.Group(v)); x.remove(g); x.checkBalance(); g.add(g.mean() + 20, 1); x.add(g); x.checkBalance(); } i = y.iterator(); for (TDigest.Group group : x) { assertEquals(i.next(), group.mean(), 0.0); } }
From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java
License:Apache License
@Test public void testUniform() { Random gen = RandomUtils.getRandom(); for (int i = 0; i < repeats(); i++) { runTest(new Uniform(0, 1, gen), 100, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "uniform", true, gen); }//from ww w. j av a 2 s. c o m }
From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java
License:Apache License
@Test public void testGamma() { // this Gamma distribution is very heavily skewed. The 0.1%-ile is 6.07e-30 while // the median is 0.006 and the 99.9th %-ile is 33.6 while the mean is 1. // this severe skew means that we have to have positional accuracy that // varies by over 11 orders of magnitude. Random gen = RandomUtils.getRandom(); for (int i = 0; i < repeats(); i++) { runTest(new Gamma(0.1, 0.1, gen), 100, // new double[]{6.0730483624079e-30, 6.0730483624079e-20, 6.0730483627432e-10, 5.9339110446023e-03, // 2.6615455373884e+00, 1.5884778179295e+01, 3.3636770117188e+01}, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "gamma", true, gen); }/* w ww . ja v a 2 s . c om*/ }
From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java
License:Apache License
@Test public void testNarrowNormal() { // this mixture of a uniform and normal distribution has a very narrow peak which is centered // near the median. Our system should be scale invariant and work well regardless. final Random gen = RandomUtils.getRandom(); AbstractContinousDistribution mix = new AbstractContinousDistribution() { AbstractContinousDistribution normal = new Normal(0, 1e-5, gen); AbstractContinousDistribution uniform = new Uniform(-1, 1, gen); @Override/*from w w w . j a v a 2s.c o m*/ public double nextDouble() { double x; if (gen.nextDouble() < 0.5) { x = uniform.nextDouble(); } else { x = normal.nextDouble(); } return x; } }; for (int i = 0; i < repeats(); i++) { runTest(mix, 100, new double[] { 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999 }, "mixture", false, gen); } }
From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java
License:Apache License
@Test public void testRepeatedValues() { final Random gen = RandomUtils.getRandom(); // 5% of samples will be 0 or 1.0. 10% for each of the values 0.1 through 0.9 AbstractContinousDistribution mix = new AbstractContinousDistribution() { @Override// w ww . j a v a2 s. co m public double nextDouble() { return Math.rint(gen.nextDouble() * 10) / 10.0; } }; TDigest dist = new TDigest((double) 1000, gen); long t0 = System.nanoTime(); List<Double> data = Lists.newArrayList(); for (int i1 = 0; i1 < 100000; i1++) { double x = mix.nextDouble(); data.add(x); dist.add(x); } System.out.printf("# %fus per point\n", (System.nanoTime() - t0) * 1e-3 / 100000); System.out.printf("# %d centroids\n", dist.centroidCount()); // I would be happier with 5x compression, but repeated values make things kind of weird assertTrue("Summary is too large", dist.centroidCount() < 10 * (double) 1000); // all quantiles should round to nearest actual value for (int i = 0; i < 10; i++) { double z = i / 10.0; // we skip over troublesome points that are nearly halfway between for (double delta : new double[] { 0.01, 0.02, 0.03, 0.07, 0.08, 0.09 }) { double q = z + delta; double cdf = dist.cdf(q); // we also relax the tolerances for repeated values assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f", z, q, cdf), z + 0.05, cdf, 0.005); double estimate = dist.quantile(q); assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f, estimate = %.3f", z, q, cdf, estimate), Math.rint(q * 10) / 10.0, estimate, 0.001); } } }
From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java
License:Apache License
@Test public void testSequentialPoints() { Random gen = RandomUtils.getRandom(); for (int i = 0; i < repeats(); i++) { runTest(new AbstractContinousDistribution() { double base = 0; @Override//from w w w. j ava2 s. c om public double nextDouble() { base += Math.PI * 1e-5; return base; } }, 100, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "sequential", true, gen); } }
From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java
License:Apache License
@Test public void testSerialization() { Random gen = RandomUtils.getRandom(); TDigest dist = new TDigest(100, gen); for (int i = 0; i < 100000; i++) { double x = gen.nextDouble(); dist.add(x);//from w ww .j a v a2 s . c o m } dist.compress(); ByteBuffer buf = ByteBuffer.allocate(20000); dist.asBytes(buf); assertTrue(buf.position() < 11000); assertEquals(buf.position(), dist.byteSize()); buf.clear(); dist.asSmallBytes(buf); assertTrue(buf.position() < 6000); assertEquals(buf.position(), dist.smallByteSize()); System.out.printf("# big %d bytes\n", buf.position()); buf.flip(); TDigest dist2 = TDigest.fromBytes(buf); assertEquals(dist.centroidCount(), dist2.centroidCount()); assertEquals(dist.compression(), dist2.compression(), 0); assertEquals(dist.size(), dist2.size()); for (double q = 0; q < 1; q += 0.01) { assertEquals(dist.quantile(q), dist2.quantile(q), 1e-8); } Iterator<? extends TDigest.Group> ix = dist2.centroids().iterator(); for (TDigest.Group group : dist.centroids()) { assertTrue(ix.hasNext()); assertEquals(group.count(), ix.next().count()); } assertFalse(ix.hasNext()); buf.flip(); dist.asSmallBytes(buf); assertTrue(buf.position() < 6000); System.out.printf("# small %d bytes\n", buf.position()); buf.flip(); dist2 = TDigest.fromBytes(buf); assertEquals(dist.centroidCount(), dist2.centroidCount()); assertEquals(dist.compression(), dist2.compression(), 0); assertEquals(dist.size(), dist2.size()); for (double q = 0; q < 1; q += 0.01) { assertEquals(dist.quantile(q), dist2.quantile(q), 1e-6); } ix = dist2.centroids().iterator(); for (TDigest.Group group : dist.centroids()) { assertTrue(ix.hasNext()); assertEquals(group.count(), ix.next().count()); } assertFalse(ix.hasNext()); }
From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java
License:Apache License
@Test public void testIntEncoding() { Random gen = RandomUtils.getRandom(); ByteBuffer buf = ByteBuffer.allocate(10000); List<Integer> ref = Lists.newArrayList(); for (int i = 0; i < 3000; i++) { int n = gen.nextInt(); n = n >>> (i / 100);//from ww w. j a va 2 s . co m ref.add(n); TDigest.encode(buf, n); } buf.flip(); for (int i = 0; i < 3000; i++) { int n = TDigest.decode(buf); assertEquals(String.format("%d:", i), ref.get(i).intValue(), n); } }