Example usage for org.apache.commons.math3.distribution ZipfDistribution ZipfDistribution

Introduction

In this page you can find the example usage for org.apache.commons.math3.distribution ZipfDistribution ZipfDistribution.

Prototype

public ZipfDistribution(final int numberOfElements, final double exponent)

Source Link

Document

Create a new Zipf distribution with the given number of elements and exponent.

Usage

From source file:it.stilo.g.util.WeightedZIPFianGenerator.java

public static void generate(WeightedDirectedGraph g, double exponent, int degree, int runner) {

    long time = System.currentTimeMillis();
    final CountDownLatch latch = new CountDownLatch(runner);

    ZipfDistribution dist = new ZipfDistribution(g.size, exponent);
    Thread[] workers = new Thread[runner];
    for (int i = 0; i < runner; i++) {
        workers[i] = new Thread(new WeightedZIPFianGenerator(g, latch, dist, degree, i, runner));
        workers[i].setName("" + i);
        workers[i].start();/*w w  w . j a va  2s. co  m*/
    }

    try {
        latch.await();
    } catch (InterruptedException e) {
        logger.debug(e);
    }

    logger.info(WeightedZIPFianGenerator.class.getName() + "\t" + ((System.currentTimeMillis() - time) / 1000d)
            + "s");
}

From source file:com.qwazr.search.bench.test.Merging.MergingTest.java

@BeforeClass
public static void setup() {
    executorService = Executors.newCachedThreadPool();

    // Let's build a dictionary of terms
    dictionary = new String[100_000];
    for (int i = 0; i < dictionary.length; i++)
        dictionary[i] = RandomUtils.alphanumeric(RandomUtils.nextInt(TERM_MIN_SIZE, TERM_MAX_SIZE));
    distribution = new ZipfDistribution(dictionary.length, 1.07f);
}

From source file:main.java.db.tpcc.TpccDatabase.java

private void estimateTableSize(Workload wrl) {
    Global.LOGGER.info("-----------------------------------------------------------------------------");
    Global.LOGGER.info("Estimating initial data tuple counts for '" + this.getDb_name() + "' database ...");

    String[] table_names = { TpccConstants.TBL_WAREHOUSE, TpccConstants.TBL_ITEM, TpccConstants.TBL_DISTRICT,
            TpccConstants.TBL_STOCK, TpccConstants.TBL_CUSTOMER, TpccConstants.TBL_HISTORY,
            TpccConstants.TBL_ORDERS, TpccConstants.TBL_NEW_ORDER, TpccConstants.TBL_ORDER_LINE };

    int dependent = 0;

    for (int tbl_id = 1; tbl_id <= wrl.tbl; tbl_id++) {
        Table tbl = new Table(tbl_id, wrl.tbl_types.get(tbl_id), table_names[tbl_id - 1]);
        this.getDb_tbl_name_id_map().put(tbl.getTbl_name(), tbl.getTbl_id());

        // Set TPCC table ranks for Warehouse and Item tables
        if (tbl.getTbl_name().equals(TpccConstants.TBL_WAREHOUSE)) {
            tbl.zipfDistribution = new ZipfDistribution(TpccConstants.NUM_WAREHOUSES, TpccConstants.ZIPF_EXP);
            tbl.zipfDistribution.reseedRandomGenerator(Global.repeatedRuns);

        } else if (tbl.getTbl_name().equals(TpccConstants.TBL_ITEM)) {
            tbl.zipfDistribution = new ZipfDistribution(
                    ((int) (TpccConstants.NUM_ITEMS * WorkloadConstants.SCALE_FACTOR)), TpccConstants.ZIPF_EXP);
            tbl.zipfDistribution.reseedRandomGenerator(Global.repeatedRuns);
        }/*w w  w.  j  a  v  a  2 s.c o  m*/

        this.getDb_tables().put(tbl_id, tbl);

        // Determine the number of Data rows to be populated for each individual table
        switch (tbl.getTbl_name()) {
        case TpccConstants.TBL_WAREHOUSE:
            tbl.setTbl_init_tuples(TpccConstants.NUM_WAREHOUSES);
            break;

        case TpccConstants.TBL_ITEM:
            tbl.setTbl_init_tuples((int) (TpccConstants.NUM_ITEMS * WorkloadConstants.SCALE_FACTOR));
            break;

        case TpccConstants.TBL_DISTRICT:
            tbl.setTbl_init_tuples(
                    (int) (TpccConstants.DISTRICTS_PER_WAREHOUSE * TpccConstants.NUM_WAREHOUSES));
            break;

        case TpccConstants.TBL_STOCK:
            tbl.setTbl_init_tuples((int) (TpccConstants.STOCKS_PER_WAREHOUSE * TpccConstants.NUM_WAREHOUSES
                    * WorkloadConstants.SCALE_FACTOR));
            break;

        case TpccConstants.TBL_CUSTOMER: // Relating District Table
            tbl.setTbl_init_tuples((int) (TpccConstants.CUSTOMERS_PER_DISTRICT * this
                    .getTable(this.getDb_tbl_name_id_map().get(TpccConstants.TBL_DISTRICT)).getTbl_init_tuples()
                    * WorkloadConstants.SCALE_FACTOR));
            break;

        case TpccConstants.TBL_HISTORY: // Relating Customer Table (1+) // CUSTOMER
            tbl.setTbl_init_tuples(this.getTable(this.getDb_tbl_name_id_map().get(TpccConstants.TBL_CUSTOMER))
                    .getTbl_init_tuples());
            break;

        case TpccConstants.TBL_ORDERS: // Relating Customer Table (1+)
            tbl.setTbl_init_tuples(
                    (int) (this.getTable(this.getDb_tbl_name_id_map().get(TpccConstants.TBL_CUSTOMER))
                            .getTbl_init_tuples()));
            break;

        case TpccConstants.TBL_NEW_ORDER: // Relating Order Table (the last 1/3 values from the Order Table) //ORDERS               
            tbl.setTbl_init_tuples((int) (Math.pow(10.0d,
                    (Utility.getBase(this.getTable(this.getDb_tbl_name_id_map().get(TpccConstants.TBL_ORDERS))
                            .getTbl_init_tuples()) - 1))
                    - Math.pow(10.0d,
                            (Utility.getBase(
                                    this.getTable(this.getDb_tbl_name_id_map().get(TpccConstants.TBL_ORDERS))
                                            .getTbl_init_tuples())
                                    - 2))));
            break;

        case TpccConstants.TBL_ORDER_LINE: // Relating Order and Stock Table (10 most popular values from the Stock table) //ORDERS
            //tbl.setTbl_init_tuples(this.getTable(this.getDb_tbl_name_id_map().get(TpccConstants.TBL_HISTORY)).getTbl_init_tuples() * TpccConstants.NUM_MOST_POPULAR_ITEM_FROM_STOCK);
            tbl.setTbl_init_tuples(this.getTable(6).getTbl_init_tuples() * 10);
            break;
        }

        Global.LOGGER.info(tbl.getTbl_name() + ": " + tbl.getTbl_init_tuples());

        // Setting dependency information for the database tables
        if (tbl.getTbl_type() != 0) {
            for (int i = 1; i <= wrl.tbl; i++) {
                dependent = wrl.schema.get(tbl.getTbl_id()).get(i - 1); // ArrayList index starts from 0               

                if (dependent == 1)
                    tbl.getTbl_foreign_tables().add(i);
            }
        }
    }
}

From source file:main.java.db.twitter.TwitterDatabase.java

private void loadUsers() {

    Table tbl = this.getTable(this.getDb_tbl_name_id_map().get(TwitterConstants.TBL_USER));

    tbl.zipfDistribution = new ZipfDistribution(this.num_users, TwitterConstants.ZIPF_EXP);
    tbl.zipfDistribution.reseedRandomGenerator(Global.repeatedRuns);

    int pk = 0;/*from  ww  w .  j a v  a2  s.com*/
    for (pk = 1; pk <= this.num_users; pk++) {
        ++Global.global_tupleSeq;
        this.insertTuple(tbl.getTbl_id(), pk);
    }

    tbl.setTbl_last_entry(pk);
    Global.LOGGER.info(tbl.getTbl_tuples().size() + " tuples have successfully inserted into '"
            + tbl.getTbl_name() + "' table.");
}

From source file:main.java.db.twitter.TwitterDatabase.java

/**
 * The number of Tweets is fixed to num_tweets
 * We simply select using the distribution who issued the Tweet
 *//*from w  w  w . j a  v  a  2  s.c om*/
private void loadTweets() {

    Table tbl = this.getTable(this.getDb_tbl_name_id_map().get(TwitterConstants.TBL_TWEETS));

    tbl.zipfDistribution = new ZipfDistribution(this.num_tweets, TwitterConstants.ZIPF_EXP);
    tbl.zipfDistribution.reseedRandomGenerator(Global.repeatedRuns);

    // Foreign Table
    Table ftbl = this.getTable(this.getDb_tbl_name_id_map().get(TwitterConstants.TBL_USER));

    ScrambledZipfianGenerator szGen = new ScrambledZipfianGenerator(this.num_users);

    int pk = 0, fk = 0;
    for (pk = 1; pk <= this.num_tweets; pk++) {
        fk = szGen.nextInt() + 1;
        //fk = ftbl.zipfDistribution.sample();

        ++Global.global_tupleSeq;
        Tuple tpl = this.insertTuple(tbl.getTbl_id(), pk);

        // Populating foreign table relationships         
        if (tpl.getTuple_fk().containsKey(ftbl.getTbl_id())) {
            tpl.getTuple_fk().get(ftbl.getTbl_id()).add(fk);

        } else {
            Set<Integer> fkList = new HashSet<Integer>();
            fkList.add(fk);
            tpl.getTuple_fk().put(ftbl.getTbl_id(), fkList);
        }

        // Populating into index
        tbl.insertSecondaryIdx(fk, pk);
    }

    tbl.setTbl_last_entry(pk);
    Global.LOGGER.info(tbl.getTbl_tuples().size() + " tuples have successfully inserted into '"
            + tbl.getTbl_name() + "' table.");
}

From source file:io.druid.benchmark.datagen.BenchmarkColumnValueGenerator.java

private void initDistribution() {
    BenchmarkColumnSchema.ValueDistribution distributionType = schema.getDistributionType();
    ValueType type = schema.getType();/*from ww w  .jav a 2  s  . c  om*/
    List<Object> enumeratedValues = schema.getEnumeratedValues();
    List<Double> enumeratedProbabilities = schema.getEnumeratedProbabilities();
    List<Pair<Object, Double>> probabilities = new ArrayList<>();

    switch (distributionType) {
    case SEQUENTIAL:
        // not random, just cycle through numbers from start to end, or cycle through enumerated values if provided
        distribution = new SequentialDistribution(schema.getStartInt(), schema.getEndInt(),
                schema.getEnumeratedValues());
        break;
    case UNIFORM:
        distribution = new UniformRealDistribution(schema.getStartDouble(), schema.getEndDouble());
        break;
    case DISCRETE_UNIFORM:
        if (enumeratedValues == null) {
            enumeratedValues = new ArrayList<>();
            for (int i = schema.getStartInt(); i < schema.getEndInt(); i++) {
                Object val = convertType(i, type);
                enumeratedValues.add(val);
            }
        }
        // give them all equal probability, the library will normalize probabilities to sum to 1.0
        for (int i = 0; i < enumeratedValues.size(); i++) {
            probabilities.add(new Pair<>(enumeratedValues.get(i), 0.1));
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;
    case NORMAL:
        distribution = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
        break;
    case ROUNDED_NORMAL:
        NormalDistribution normalDist = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
        distribution = new RealRoundingDistribution(normalDist);
        break;
    case ZIPF:
        int cardinality;
        if (enumeratedValues == null) {
            Integer startInt = schema.getStartInt();
            cardinality = schema.getEndInt() - startInt;
            ZipfDistribution zipf = new ZipfDistribution(cardinality, schema.getZipfExponent());
            for (int i = 0; i < cardinality; i++) {
                probabilities.add(new Pair<>((Object) (i + startInt), zipf.probability(i)));
            }
        } else {
            cardinality = enumeratedValues.size();
            ZipfDistribution zipf = new ZipfDistribution(enumeratedValues.size(), schema.getZipfExponent());
            for (int i = 0; i < cardinality; i++) {
                probabilities.add(new Pair<>(enumeratedValues.get(i), zipf.probability(i)));
            }
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;
    case ENUMERATED:
        for (int i = 0; i < enumeratedValues.size(); i++) {
            probabilities.add(new Pair<>(enumeratedValues.get(i), enumeratedProbabilities.get(i)));
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;

    default:
        throw new UnsupportedOperationException("Unknown distribution type: " + distributionType);
    }

    if (distribution instanceof AbstractIntegerDistribution) {
        ((AbstractIntegerDistribution) distribution).reseedRandomGenerator(seed);
    } else if (distribution instanceof AbstractRealDistribution) {
        ((AbstractRealDistribution) distribution).reseedRandomGenerator(seed);
    } else if (distribution instanceof EnumeratedDistribution) {
        ((EnumeratedDistribution) distribution).reseedRandomGenerator(seed);
    }
}

From source file:main.java.db.twitter.TwitterDatabase.java

/**
 * For each User (Follower) we select how many users he is following (followees List)
  * then select users to fill up that list.
  * Selecting is based on the distribution.
  * NOTE: We are using two different distribution to avoid correlation:
  * ZipfianGenerator (describes the followed most) 
  * ScrambledZipfianGenerator (describes the heavy tweeters)
 *///from w  ww.j  a  va 2  s. c o m
private void loadFollowData() {

    Table tbl_followers = this.getTable(this.getDb_tbl_name_id_map().get(TwitterConstants.TBL_FOLLOWERS));
    Table tbl_follows = this.getTable(this.getDb_tbl_name_id_map().get(TwitterConstants.TBL_FOLLOWS));

    // Foreign Tables
    ArrayList<Integer> ftblList_followers = new ArrayList<Integer>(tbl_followers.getTbl_foreign_tables());
    Table ftbl_followers = this.getTable(ftblList_followers.get(0));

    ArrayList<Integer> ftblList_follows = new ArrayList<Integer>(tbl_follows.getTbl_foreign_tables());
    Table ftbl_follows = this.getTable(ftblList_follows.get(0));

    // Distributions
    tbl_followers.zipfDistribution = new ZipfDistribution(this.num_users, TwitterConstants.ZIPF_EXP);
    tbl_followers.zipfDistribution.reseedRandomGenerator(Global.repeatedRuns);

    tbl_follows.zipfDistribution = new ZipfDistribution(this.num_follows, TwitterConstants.ZIPF_EXP);
    tbl_follows.zipfDistribution.reseedRandomGenerator(Global.repeatedRuns);

    ZipfianGenerator zipfFollowee = new ZipfianGenerator(this.num_users, TwitterConstants.ZIPF_EXP);
    ZipfianGenerator zipfFollows = new ZipfianGenerator(this.num_follows, TwitterConstants.ZIPF_EXP);

    Set<Integer> allFollowees = new HashSet<Integer>();
    Set<Integer> followees = new HashSet<Integer>();
    int follower = 0, followee = 0;

    for (follower = 1; follower <= this.num_users; follower++) {
        // Adding a new 'Follower'
        ++Global.global_tupleSeq;
        Tuple follower_tpl = this.insertTuple(tbl_follows.getTbl_id(), follower);

        followees.clear();

        int time = zipfFollows.nextInt() + 1;
        //time = tbl_follows.zipfDistribution.sample();

        // At least this follower will follow 1 user
        if (time == 0)
            time = 1;

        for (int f = 0; f < time;) {
            followee = zipfFollowee.nextInt() + 1;
            //followee = tbl_followers.zipfDistribution.sample();

            if (follower != followee && !followees.contains(followee)) {
                followees.add(followee);

                // Adding a new 'Followee' if not exists
                Tuple followee_tpl;

                if (!allFollowees.contains(followee)) {
                    ++Global.global_tupleSeq;
                    followee_tpl = this.insertTuple(tbl_followers.getTbl_id(), followee);

                } else {
                    followee_tpl = tbl_followers.getTupleByPk(followee);
                }

                // Follows Table ('follower' follows 'followee' || f2 follows f1 )                        
                // Populating foreign table relationships
                if (follower_tpl.getTuple_fk().containsKey(ftbl_follows.getTbl_id())) {
                    follower_tpl.getTuple_fk().get(ftbl_follows.getTbl_id()).add(followee);

                } else {
                    Set<Integer> followeeSet = new HashSet<Integer>();
                    followeeSet.add(followee);
                    follower_tpl.getTuple_fk().put(ftbl_follows.getTbl_id(), followeeSet);
                }

                // Insert into index
                tbl_follows.insertSecondaryIdx(follower, followee);

                // Followers Table ('followee' is followed by 'follower' || f1 is followed by f2)                
                // Populating foreign table relationships                
                if (followee_tpl.getTuple_fk().containsKey(ftbl_followers.getTbl_id())) {
                    followee_tpl.getTuple_fk().get(ftbl_followers.getTbl_id()).add(follower);

                } else {
                    Set<Integer> followerSet = new HashSet<Integer>();
                    followerSet.add(follower);
                    followee_tpl.getTuple_fk().put(ftbl_followers.getTbl_id(), followerSet);
                }

                // Insert into index
                tbl_followers.insertSecondaryIdx(followee, follower);

                ++f;
            } //end-if()
        } //end-for()
    } //end-for()

    tbl_followers.setTbl_last_entry(followee);
    tbl_follows.setTbl_last_entry(follower);

    Global.LOGGER.info(tbl_followers.getTbl_tuples().size() + " tuples have successfully inserted into '"
            + tbl_followers.getTbl_name() + "' table.");
    Global.LOGGER.info(tbl_follows.getTbl_tuples().size() + " tuples have successfully inserted into '"
            + tbl_follows.getTbl_name() + "' table.");
}

From source file:org.apache.beam.sdk.io.synthetic.SyntheticBoundedIOTest.java

@Before
public void setUp() {
    testSourceOptions.splitPointFrequencyRecords = 1;
    testSourceOptions.numRecords = 10;//from   w  ww  .j av a 2s  .c o  m
    testSourceOptions.keySizeBytes = 10;
    testSourceOptions.valueSizeBytes = 20;
    testSourceOptions.numHotKeys = 3;
    testSourceOptions.hotKeyFraction = 0.3;
    testSourceOptions.setSeed(123456);
    testSourceOptions.bundleSizeDistribution = fromIntegerDistribution(new ZipfDistribution(100, 2.5));
    testSourceOptions.forceNumInitialBundles = null;
}

From source file:org.apache.druid.benchmark.datagen.BenchmarkColumnValueGenerator.java

private void initDistribution() {
    BenchmarkColumnSchema.ValueDistribution distributionType = schema.getDistributionType();
    ValueType type = schema.getType();//w  w  w.j  a  v a2  s  .c  o m
    List<Object> enumeratedValues = schema.getEnumeratedValues();
    List<Double> enumeratedProbabilities = schema.getEnumeratedProbabilities();
    List<Pair<Object, Double>> probabilities = new ArrayList<>();

    switch (distributionType) {
    case SEQUENTIAL:
        // not random, just cycle through numbers from start to end, or cycle through enumerated values if provided
        distribution = new SequentialDistribution(schema.getStartInt(), schema.getEndInt(),
                schema.getEnumeratedValues());
        break;
    case UNIFORM:
        distribution = new UniformRealDistribution(schema.getStartDouble(), schema.getEndDouble());
        break;
    case DISCRETE_UNIFORM:
        if (enumeratedValues == null) {
            enumeratedValues = new ArrayList<>();
            for (int i = schema.getStartInt(); i < schema.getEndInt(); i++) {
                Object val = convertType(i, type);
                enumeratedValues.add(val);
            }
        }
        // give them all equal probability, the library will normalize probabilities to sum to 1.0
        for (Object enumeratedValue : enumeratedValues) {
            probabilities.add(new Pair<>(enumeratedValue, 0.1));
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;
    case NORMAL:
        distribution = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
        break;
    case ROUNDED_NORMAL:
        NormalDistribution normalDist = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
        distribution = new RealRoundingDistribution(normalDist);
        break;
    case ZIPF:
        int cardinality;
        if (enumeratedValues == null) {
            Integer startInt = schema.getStartInt();
            cardinality = schema.getEndInt() - startInt;
            ZipfDistribution zipf = new ZipfDistribution(cardinality, schema.getZipfExponent());
            for (int i = 0; i < cardinality; i++) {
                probabilities.add(new Pair<>((Object) (i + startInt), zipf.probability(i)));
            }
        } else {
            cardinality = enumeratedValues.size();
            ZipfDistribution zipf = new ZipfDistribution(enumeratedValues.size(), schema.getZipfExponent());
            for (int i = 0; i < cardinality; i++) {
                probabilities.add(new Pair<>(enumeratedValues.get(i), zipf.probability(i)));
            }
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;
    case ENUMERATED:
        for (int i = 0; i < enumeratedValues.size(); i++) {
            probabilities.add(new Pair<>(enumeratedValues.get(i), enumeratedProbabilities.get(i)));
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;

    default:
        throw new UnsupportedOperationException("Unknown distribution type: " + distributionType);
    }

    if (distribution instanceof AbstractIntegerDistribution) {
        ((AbstractIntegerDistribution) distribution).reseedRandomGenerator(seed);
    } else if (distribution instanceof AbstractRealDistribution) {
        ((AbstractRealDistribution) distribution).reseedRandomGenerator(seed);
    } else {
        ((EnumeratedDistribution) distribution).reseedRandomGenerator(seed);
    }
}

From source file:org.apache.kylin.measure.topn.TopNCounterTest.java

protected String prepareTestDate() throws IOException {
    String[] allKeys = new String[KEY_SPACE];

    for (int i = 0; i < KEY_SPACE; i++) {
        allKeys[i] = RandomStringUtils.randomAlphabetic(10);
    }// w ww  .  j a  v  a  2 s.  co m

    outputMsg("Start to create test random data...");
    long startTime = System.currentTimeMillis();
    ZipfDistribution zipf = new ZipfDistribution(KEY_SPACE, 0.5);
    int keyIndex;

    File tempFile = File.createTempFile("ZipfDistribution", ".txt");

    if (tempFile.exists())
        FileUtils.forceDelete(tempFile);
    FileWriter fw = new FileWriter(tempFile);
    try {
        for (int i = 0; i < TOTAL_RECORDS; i++) {
            keyIndex = zipf.sample() - 1;
            fw.write(allKeys[keyIndex]);
            fw.write('\n');
        }
    } finally {
        if (fw != null)
            fw.close();
    }

    outputMsg("Create test data takes : " + (System.currentTimeMillis() - startTime) / 1000 + " seconds.");
    outputMsg("Test data in : " + tempFile.getAbsolutePath());

    return tempFile.getAbsolutePath();
}