Example usage for org.apache.mahout.common RandomUtils getRandom

List of usage examples for org.apache.mahout.common RandomUtils getRandom

Introduction

In this page you can find the example usage for org.apache.mahout.common RandomUtils getRandom.

Prototype

public static RandomWrapper getRandom() 

Source Link

Usage

From source file:GavaFactorizer.java

License:Apache License

protected void prepareTraining() throws TasteException {
    Random random = RandomUtils.getRandom();
    userVectors = new double[dataModel.getNumUsers()][numFeatures];
    itemVectors = new double[dataModel.getNumItems()][numFeatures];

    double globalAverage = getAveragePreference();
    for (int userIndex = 0; userIndex < userVectors.length; userIndex++) {
        userVectors[userIndex][0] = globalAverage;
        userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias
        userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias
        for (int feature = featureOffset; feature < numFeatures; feature++) {
            userVectors[userIndex][feature] = random.nextGaussian() * randomNoise;
        }/*from w  w w  .  j a  v  a2  s.  co  m*/
    }
    for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) {
        itemVectors[itemIndex][0] = 1; // corresponding user feature contains global average
        itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias
        itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias
        for (int feature = featureOffset; feature < numFeatures; feature++) {
            itemVectors[itemIndex][feature] = random.nextGaussian() * randomNoise;
        }
    }

    cachePreferences();
    shufflePreferences();
}

From source file:GavaFactorizer.java

License:Apache License

protected void shufflePreferences() {
    Random random = RandomUtils.getRandom();
    /* Durstenfeld shuffle */
    for (int currentPos = cachedUserIDs.length - 1; currentPos > 0; currentPos--) {
        int swapPos = random.nextInt(currentPos + 1);
        swapCachedPreferences(currentPos, swapPos);
    }/*from ww w. j av a2  s  .  co m*/
}

From source file:com.clearspring.analytics.stream.quantile.GroupTreeTest.java

License:Apache License

@Test
public void testRandomRebalance() {
    Random gen = RandomUtils.getRandom();
    GroupTree x = new GroupTree();
    List<Double> y = Lists.newArrayList();
    for (int i = 0; i < 1000; i++) {
        double v = gen.nextDouble();
        x.add(new TDigest.Group(v));
        y.add(v);//from  ww w  .  j a  va 2 s. co m
        x.checkBalance();
    }

    Collections.sort(y);

    Iterator<Double> i = y.iterator();
    for (TDigest.Group group : x) {
        assertEquals(i.next(), group.mean(), 0.0);
    }

    for (int j = 0; j < 100; j++) {
        double v = y.get(gen.nextInt(y.size()));
        y.remove(v);
        x.remove(x.floor(new TDigest.Group(v)));
    }

    Collections.sort(y);
    i = y.iterator();
    for (TDigest.Group group : x) {
        assertEquals(i.next(), group.mean(), 0.0);
    }

    for (int j = 0; j < y.size(); j++) {
        double v = y.get(j);
        y.set(j, v + 10);
        TDigest.Group g = x.floor(new TDigest.Group(v));
        x.remove(g);
        x.checkBalance();
        g.add(g.mean() + 20, 1);
        x.add(g);
        x.checkBalance();
    }

    i = y.iterator();
    for (TDigest.Group group : x) {
        assertEquals(i.next(), group.mean(), 0.0);
    }
}

From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java

License:Apache License

@Test
public void testUniform() {
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < repeats(); i++) {
        runTest(new Uniform(0, 1, gen), 100, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 },
                "uniform", true, gen);
    }//from ww w. j  av  a 2  s.  c o m
}

From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java

License:Apache License

@Test
public void testGamma() {
    // this Gamma distribution is very heavily skewed.  The 0.1%-ile is 6.07e-30 while
    // the median is 0.006 and the 99.9th %-ile is 33.6 while the mean is 1.
    // this severe skew means that we have to have positional accuracy that
    // varies by over 11 orders of magnitude.
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < repeats(); i++) {
        runTest(new Gamma(0.1, 0.1, gen), 100,
                //                    new double[]{6.0730483624079e-30, 6.0730483624079e-20, 6.0730483627432e-10, 5.9339110446023e-03,
                //                            2.6615455373884e+00, 1.5884778179295e+01, 3.3636770117188e+01},
                new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "gamma", true, gen);
    }/* w  ww  .  ja v a  2  s .  c  om*/
}

From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java

License:Apache License

@Test
public void testNarrowNormal() {
    // this mixture of a uniform and normal distribution has a very narrow peak which is centered
    // near the median.  Our system should be scale invariant and work well regardless.
    final Random gen = RandomUtils.getRandom();
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        AbstractContinousDistribution normal = new Normal(0, 1e-5, gen);
        AbstractContinousDistribution uniform = new Uniform(-1, 1, gen);

        @Override/*from  w w  w  . j a v  a  2s.c o m*/
        public double nextDouble() {
            double x;
            if (gen.nextDouble() < 0.5) {
                x = uniform.nextDouble();
            } else {
                x = normal.nextDouble();
            }
            return x;
        }
    };

    for (int i = 0; i < repeats(); i++) {
        runTest(mix, 100, new double[] { 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999 }, "mixture", false,
                gen);
    }
}

From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java

License:Apache License

@Test
public void testRepeatedValues() {
    final Random gen = RandomUtils.getRandom();

    // 5% of samples will be 0 or 1.0.  10% for each of the values 0.1 through 0.9
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        @Override// w  ww  . j  a  v a2  s. co  m
        public double nextDouble() {
            return Math.rint(gen.nextDouble() * 10) / 10.0;
        }
    };

    TDigest dist = new TDigest((double) 1000, gen);
    long t0 = System.nanoTime();
    List<Double> data = Lists.newArrayList();
    for (int i1 = 0; i1 < 100000; i1++) {
        double x = mix.nextDouble();
        data.add(x);
        dist.add(x);
    }

    System.out.printf("# %fus per point\n", (System.nanoTime() - t0) * 1e-3 / 100000);
    System.out.printf("# %d centroids\n", dist.centroidCount());

    // I would be happier with 5x compression, but repeated values make things kind of weird
    assertTrue("Summary is too large", dist.centroidCount() < 10 * (double) 1000);

    // all quantiles should round to nearest actual value
    for (int i = 0; i < 10; i++) {
        double z = i / 10.0;
        // we skip over troublesome points that are nearly halfway between
        for (double delta : new double[] { 0.01, 0.02, 0.03, 0.07, 0.08, 0.09 }) {
            double q = z + delta;
            double cdf = dist.cdf(q);
            // we also relax the tolerances for repeated values
            assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f", z, q, cdf), z + 0.05, cdf, 0.005);

            double estimate = dist.quantile(q);
            assertEquals(String.format("z=%.1f, q = %.3f, cdf = %.3f, estimate = %.3f", z, q, cdf, estimate),
                    Math.rint(q * 10) / 10.0, estimate, 0.001);
        }
    }
}

From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java

License:Apache License

@Test
public void testSequentialPoints() {
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < repeats(); i++) {
        runTest(new AbstractContinousDistribution() {
            double base = 0;

            @Override//from   w  w w. j  ava2 s.  c om
            public double nextDouble() {
                base += Math.PI * 1e-5;
                return base;
            }
        }, 100, new double[] { 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999 }, "sequential", true, gen);
    }
}

From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java

License:Apache License

@Test
public void testSerialization() {
    Random gen = RandomUtils.getRandom();
    TDigest dist = new TDigest(100, gen);
    for (int i = 0; i < 100000; i++) {
        double x = gen.nextDouble();
        dist.add(x);//from   w  ww .j  a v a2 s  .  c  o m
    }
    dist.compress();

    ByteBuffer buf = ByteBuffer.allocate(20000);
    dist.asBytes(buf);
    assertTrue(buf.position() < 11000);
    assertEquals(buf.position(), dist.byteSize());
    buf.clear();

    dist.asSmallBytes(buf);
    assertTrue(buf.position() < 6000);
    assertEquals(buf.position(), dist.smallByteSize());

    System.out.printf("# big %d bytes\n", buf.position());

    buf.flip();
    TDigest dist2 = TDigest.fromBytes(buf);
    assertEquals(dist.centroidCount(), dist2.centroidCount());
    assertEquals(dist.compression(), dist2.compression(), 0);
    assertEquals(dist.size(), dist2.size());

    for (double q = 0; q < 1; q += 0.01) {
        assertEquals(dist.quantile(q), dist2.quantile(q), 1e-8);
    }

    Iterator<? extends TDigest.Group> ix = dist2.centroids().iterator();
    for (TDigest.Group group : dist.centroids()) {
        assertTrue(ix.hasNext());
        assertEquals(group.count(), ix.next().count());
    }
    assertFalse(ix.hasNext());

    buf.flip();
    dist.asSmallBytes(buf);
    assertTrue(buf.position() < 6000);
    System.out.printf("# small %d bytes\n", buf.position());

    buf.flip();
    dist2 = TDigest.fromBytes(buf);
    assertEquals(dist.centroidCount(), dist2.centroidCount());
    assertEquals(dist.compression(), dist2.compression(), 0);
    assertEquals(dist.size(), dist2.size());

    for (double q = 0; q < 1; q += 0.01) {
        assertEquals(dist.quantile(q), dist2.quantile(q), 1e-6);
    }

    ix = dist2.centroids().iterator();
    for (TDigest.Group group : dist.centroids()) {
        assertTrue(ix.hasNext());
        assertEquals(group.count(), ix.next().count());
    }
    assertFalse(ix.hasNext());
}

From source file:com.clearspring.analytics.stream.quantile.TDigestTest.java

License:Apache License

@Test
public void testIntEncoding() {
    Random gen = RandomUtils.getRandom();
    ByteBuffer buf = ByteBuffer.allocate(10000);
    List<Integer> ref = Lists.newArrayList();
    for (int i = 0; i < 3000; i++) {
        int n = gen.nextInt();
        n = n >>> (i / 100);//from  ww w. j  a  va  2  s  . co  m
        ref.add(n);
        TDigest.encode(buf, n);
    }

    buf.flip();

    for (int i = 0; i < 3000; i++) {
        int n = TDigest.decode(buf);
        assertEquals(String.format("%d:", i), ref.get(i).intValue(), n);
    }
}