List of usage examples for org.apache.commons.math3.distribution ZipfDistribution ZipfDistribution
public ZipfDistribution(final int numberOfElements, final double exponent)
From source file:it.stilo.g.util.WeightedZIPFianGenerator.java
public static void generate(WeightedDirectedGraph g, double exponent, int degree, int runner) { long time = System.currentTimeMillis(); final CountDownLatch latch = new CountDownLatch(runner); ZipfDistribution dist = new ZipfDistribution(g.size, exponent); Thread[] workers = new Thread[runner]; for (int i = 0; i < runner; i++) { workers[i] = new Thread(new WeightedZIPFianGenerator(g, latch, dist, degree, i, runner)); workers[i].setName("" + i); workers[i].start();/*w w w . j a va 2s. co m*/ } try { latch.await(); } catch (InterruptedException e) { logger.debug(e); } logger.info(WeightedZIPFianGenerator.class.getName() + "\t" + ((System.currentTimeMillis() - time) / 1000d) + "s"); }
From source file:com.qwazr.search.bench.test.Merging.MergingTest.java
@BeforeClass public static void setup() { executorService = Executors.newCachedThreadPool(); // Let's build a dictionary of terms dictionary = new String[100_000]; for (int i = 0; i < dictionary.length; i++) dictionary[i] = RandomUtils.alphanumeric(RandomUtils.nextInt(TERM_MIN_SIZE, TERM_MAX_SIZE)); distribution = new ZipfDistribution(dictionary.length, 1.07f); }
From source file:main.java.db.tpcc.TpccDatabase.java
private void estimateTableSize(Workload wrl) { Global.LOGGER.info("-----------------------------------------------------------------------------"); Global.LOGGER.info("Estimating initial data tuple counts for '" + this.getDb_name() + "' database ..."); String[] table_names = { TpccConstants.TBL_WAREHOUSE, TpccConstants.TBL_ITEM, TpccConstants.TBL_DISTRICT, TpccConstants.TBL_STOCK, TpccConstants.TBL_CUSTOMER, TpccConstants.TBL_HISTORY, TpccConstants.TBL_ORDERS, TpccConstants.TBL_NEW_ORDER, TpccConstants.TBL_ORDER_LINE }; int dependent = 0; for (int tbl_id = 1; tbl_id <= wrl.tbl; tbl_id++) { Table tbl = new Table(tbl_id, wrl.tbl_types.get(tbl_id), table_names[tbl_id - 1]); this.getDb_tbl_name_id_map().put(tbl.getTbl_name(), tbl.getTbl_id()); // Set TPCC table ranks for Warehouse and Item tables if (tbl.getTbl_name().equals(TpccConstants.TBL_WAREHOUSE)) { tbl.zipfDistribution = new ZipfDistribution(TpccConstants.NUM_WAREHOUSES, TpccConstants.ZIPF_EXP); tbl.zipfDistribution.reseedRandomGenerator(Global.repeatedRuns); } else if (tbl.getTbl_name().equals(TpccConstants.TBL_ITEM)) { tbl.zipfDistribution = new ZipfDistribution( ((int) (TpccConstants.NUM_ITEMS * WorkloadConstants.SCALE_FACTOR)), TpccConstants.ZIPF_EXP); tbl.zipfDistribution.reseedRandomGenerator(Global.repeatedRuns); }/*w w w. j a v a 2 s.c o m*/ this.getDb_tables().put(tbl_id, tbl); // Determine the number of Data rows to be populated for each individual table switch (tbl.getTbl_name()) { case TpccConstants.TBL_WAREHOUSE: tbl.setTbl_init_tuples(TpccConstants.NUM_WAREHOUSES); break; case TpccConstants.TBL_ITEM: tbl.setTbl_init_tuples((int) (TpccConstants.NUM_ITEMS * WorkloadConstants.SCALE_FACTOR)); break; case TpccConstants.TBL_DISTRICT: tbl.setTbl_init_tuples( (int) (TpccConstants.DISTRICTS_PER_WAREHOUSE * TpccConstants.NUM_WAREHOUSES)); break; case TpccConstants.TBL_STOCK: tbl.setTbl_init_tuples((int) (TpccConstants.STOCKS_PER_WAREHOUSE * TpccConstants.NUM_WAREHOUSES * WorkloadConstants.SCALE_FACTOR)); break; case TpccConstants.TBL_CUSTOMER: // Relating District Table tbl.setTbl_init_tuples((int) (TpccConstants.CUSTOMERS_PER_DISTRICT * this .getTable(this.getDb_tbl_name_id_map().get(TpccConstants.TBL_DISTRICT)).getTbl_init_tuples() * WorkloadConstants.SCALE_FACTOR)); break; case TpccConstants.TBL_HISTORY: // Relating Customer Table (1+) // CUSTOMER tbl.setTbl_init_tuples(this.getTable(this.getDb_tbl_name_id_map().get(TpccConstants.TBL_CUSTOMER)) .getTbl_init_tuples()); break; case TpccConstants.TBL_ORDERS: // Relating Customer Table (1+) tbl.setTbl_init_tuples( (int) (this.getTable(this.getDb_tbl_name_id_map().get(TpccConstants.TBL_CUSTOMER)) .getTbl_init_tuples())); break; case TpccConstants.TBL_NEW_ORDER: // Relating Order Table (the last 1/3 values from the Order Table) //ORDERS tbl.setTbl_init_tuples((int) (Math.pow(10.0d, (Utility.getBase(this.getTable(this.getDb_tbl_name_id_map().get(TpccConstants.TBL_ORDERS)) .getTbl_init_tuples()) - 1)) - Math.pow(10.0d, (Utility.getBase( this.getTable(this.getDb_tbl_name_id_map().get(TpccConstants.TBL_ORDERS)) .getTbl_init_tuples()) - 2)))); break; case TpccConstants.TBL_ORDER_LINE: // Relating Order and Stock Table (10 most popular values from the Stock table) //ORDERS //tbl.setTbl_init_tuples(this.getTable(this.getDb_tbl_name_id_map().get(TpccConstants.TBL_HISTORY)).getTbl_init_tuples() * TpccConstants.NUM_MOST_POPULAR_ITEM_FROM_STOCK); tbl.setTbl_init_tuples(this.getTable(6).getTbl_init_tuples() * 10); break; } Global.LOGGER.info(tbl.getTbl_name() + ": " + tbl.getTbl_init_tuples()); // Setting dependency information for the database tables if (tbl.getTbl_type() != 0) { for (int i = 1; i <= wrl.tbl; i++) { dependent = wrl.schema.get(tbl.getTbl_id()).get(i - 1); // ArrayList index starts from 0 if (dependent == 1) tbl.getTbl_foreign_tables().add(i); } } } }
From source file:main.java.db.twitter.TwitterDatabase.java
private void loadUsers() { Table tbl = this.getTable(this.getDb_tbl_name_id_map().get(TwitterConstants.TBL_USER)); tbl.zipfDistribution = new ZipfDistribution(this.num_users, TwitterConstants.ZIPF_EXP); tbl.zipfDistribution.reseedRandomGenerator(Global.repeatedRuns); int pk = 0;/*from ww w . j a v a2 s.com*/ for (pk = 1; pk <= this.num_users; pk++) { ++Global.global_tupleSeq; this.insertTuple(tbl.getTbl_id(), pk); } tbl.setTbl_last_entry(pk); Global.LOGGER.info(tbl.getTbl_tuples().size() + " tuples have successfully inserted into '" + tbl.getTbl_name() + "' table."); }
From source file:main.java.db.twitter.TwitterDatabase.java
/** * The number of Tweets is fixed to num_tweets * We simply select using the distribution who issued the Tweet *//*from w w w . j a v a 2 s.c om*/ private void loadTweets() { Table tbl = this.getTable(this.getDb_tbl_name_id_map().get(TwitterConstants.TBL_TWEETS)); tbl.zipfDistribution = new ZipfDistribution(this.num_tweets, TwitterConstants.ZIPF_EXP); tbl.zipfDistribution.reseedRandomGenerator(Global.repeatedRuns); // Foreign Table Table ftbl = this.getTable(this.getDb_tbl_name_id_map().get(TwitterConstants.TBL_USER)); ScrambledZipfianGenerator szGen = new ScrambledZipfianGenerator(this.num_users); int pk = 0, fk = 0; for (pk = 1; pk <= this.num_tweets; pk++) { fk = szGen.nextInt() + 1; //fk = ftbl.zipfDistribution.sample(); ++Global.global_tupleSeq; Tuple tpl = this.insertTuple(tbl.getTbl_id(), pk); // Populating foreign table relationships if (tpl.getTuple_fk().containsKey(ftbl.getTbl_id())) { tpl.getTuple_fk().get(ftbl.getTbl_id()).add(fk); } else { Set<Integer> fkList = new HashSet<Integer>(); fkList.add(fk); tpl.getTuple_fk().put(ftbl.getTbl_id(), fkList); } // Populating into index tbl.insertSecondaryIdx(fk, pk); } tbl.setTbl_last_entry(pk); Global.LOGGER.info(tbl.getTbl_tuples().size() + " tuples have successfully inserted into '" + tbl.getTbl_name() + "' table."); }
From source file:io.druid.benchmark.datagen.BenchmarkColumnValueGenerator.java
private void initDistribution() { BenchmarkColumnSchema.ValueDistribution distributionType = schema.getDistributionType(); ValueType type = schema.getType();/*from ww w .jav a 2 s . c om*/ List<Object> enumeratedValues = schema.getEnumeratedValues(); List<Double> enumeratedProbabilities = schema.getEnumeratedProbabilities(); List<Pair<Object, Double>> probabilities = new ArrayList<>(); switch (distributionType) { case SEQUENTIAL: // not random, just cycle through numbers from start to end, or cycle through enumerated values if provided distribution = new SequentialDistribution(schema.getStartInt(), schema.getEndInt(), schema.getEnumeratedValues()); break; case UNIFORM: distribution = new UniformRealDistribution(schema.getStartDouble(), schema.getEndDouble()); break; case DISCRETE_UNIFORM: if (enumeratedValues == null) { enumeratedValues = new ArrayList<>(); for (int i = schema.getStartInt(); i < schema.getEndInt(); i++) { Object val = convertType(i, type); enumeratedValues.add(val); } } // give them all equal probability, the library will normalize probabilities to sum to 1.0 for (int i = 0; i < enumeratedValues.size(); i++) { probabilities.add(new Pair<>(enumeratedValues.get(i), 0.1)); } distribution = new EnumeratedTreeDistribution<>(probabilities); break; case NORMAL: distribution = new NormalDistribution(schema.getMean(), schema.getStandardDeviation()); break; case ROUNDED_NORMAL: NormalDistribution normalDist = new NormalDistribution(schema.getMean(), schema.getStandardDeviation()); distribution = new RealRoundingDistribution(normalDist); break; case ZIPF: int cardinality; if (enumeratedValues == null) { Integer startInt = schema.getStartInt(); cardinality = schema.getEndInt() - startInt; ZipfDistribution zipf = new ZipfDistribution(cardinality, schema.getZipfExponent()); for (int i = 0; i < cardinality; i++) { probabilities.add(new Pair<>((Object) (i + startInt), zipf.probability(i))); } } else { cardinality = enumeratedValues.size(); ZipfDistribution zipf = new ZipfDistribution(enumeratedValues.size(), schema.getZipfExponent()); for (int i = 0; i < cardinality; i++) { probabilities.add(new Pair<>(enumeratedValues.get(i), zipf.probability(i))); } } distribution = new EnumeratedTreeDistribution<>(probabilities); break; case ENUMERATED: for (int i = 0; i < enumeratedValues.size(); i++) { probabilities.add(new Pair<>(enumeratedValues.get(i), enumeratedProbabilities.get(i))); } distribution = new EnumeratedTreeDistribution<>(probabilities); break; default: throw new UnsupportedOperationException("Unknown distribution type: " + distributionType); } if (distribution instanceof AbstractIntegerDistribution) { ((AbstractIntegerDistribution) distribution).reseedRandomGenerator(seed); } else if (distribution instanceof AbstractRealDistribution) { ((AbstractRealDistribution) distribution).reseedRandomGenerator(seed); } else if (distribution instanceof EnumeratedDistribution) { ((EnumeratedDistribution) distribution).reseedRandomGenerator(seed); } }
From source file:main.java.db.twitter.TwitterDatabase.java
/** * For each User (Follower) we select how many users he is following (followees List) * then select users to fill up that list. * Selecting is based on the distribution. * NOTE: We are using two different distribution to avoid correlation: * ZipfianGenerator (describes the followed most) * ScrambledZipfianGenerator (describes the heavy tweeters) *///from w ww.j a va 2 s. c o m private void loadFollowData() { Table tbl_followers = this.getTable(this.getDb_tbl_name_id_map().get(TwitterConstants.TBL_FOLLOWERS)); Table tbl_follows = this.getTable(this.getDb_tbl_name_id_map().get(TwitterConstants.TBL_FOLLOWS)); // Foreign Tables ArrayList<Integer> ftblList_followers = new ArrayList<Integer>(tbl_followers.getTbl_foreign_tables()); Table ftbl_followers = this.getTable(ftblList_followers.get(0)); ArrayList<Integer> ftblList_follows = new ArrayList<Integer>(tbl_follows.getTbl_foreign_tables()); Table ftbl_follows = this.getTable(ftblList_follows.get(0)); // Distributions tbl_followers.zipfDistribution = new ZipfDistribution(this.num_users, TwitterConstants.ZIPF_EXP); tbl_followers.zipfDistribution.reseedRandomGenerator(Global.repeatedRuns); tbl_follows.zipfDistribution = new ZipfDistribution(this.num_follows, TwitterConstants.ZIPF_EXP); tbl_follows.zipfDistribution.reseedRandomGenerator(Global.repeatedRuns); ZipfianGenerator zipfFollowee = new ZipfianGenerator(this.num_users, TwitterConstants.ZIPF_EXP); ZipfianGenerator zipfFollows = new ZipfianGenerator(this.num_follows, TwitterConstants.ZIPF_EXP); Set<Integer> allFollowees = new HashSet<Integer>(); Set<Integer> followees = new HashSet<Integer>(); int follower = 0, followee = 0; for (follower = 1; follower <= this.num_users; follower++) { // Adding a new 'Follower' ++Global.global_tupleSeq; Tuple follower_tpl = this.insertTuple(tbl_follows.getTbl_id(), follower); followees.clear(); int time = zipfFollows.nextInt() + 1; //time = tbl_follows.zipfDistribution.sample(); // At least this follower will follow 1 user if (time == 0) time = 1; for (int f = 0; f < time;) { followee = zipfFollowee.nextInt() + 1; //followee = tbl_followers.zipfDistribution.sample(); if (follower != followee && !followees.contains(followee)) { followees.add(followee); // Adding a new 'Followee' if not exists Tuple followee_tpl; if (!allFollowees.contains(followee)) { ++Global.global_tupleSeq; followee_tpl = this.insertTuple(tbl_followers.getTbl_id(), followee); } else { followee_tpl = tbl_followers.getTupleByPk(followee); } // Follows Table ('follower' follows 'followee' || f2 follows f1 ) // Populating foreign table relationships if (follower_tpl.getTuple_fk().containsKey(ftbl_follows.getTbl_id())) { follower_tpl.getTuple_fk().get(ftbl_follows.getTbl_id()).add(followee); } else { Set<Integer> followeeSet = new HashSet<Integer>(); followeeSet.add(followee); follower_tpl.getTuple_fk().put(ftbl_follows.getTbl_id(), followeeSet); } // Insert into index tbl_follows.insertSecondaryIdx(follower, followee); // Followers Table ('followee' is followed by 'follower' || f1 is followed by f2) // Populating foreign table relationships if (followee_tpl.getTuple_fk().containsKey(ftbl_followers.getTbl_id())) { followee_tpl.getTuple_fk().get(ftbl_followers.getTbl_id()).add(follower); } else { Set<Integer> followerSet = new HashSet<Integer>(); followerSet.add(follower); followee_tpl.getTuple_fk().put(ftbl_followers.getTbl_id(), followerSet); } // Insert into index tbl_followers.insertSecondaryIdx(followee, follower); ++f; } //end-if() } //end-for() } //end-for() tbl_followers.setTbl_last_entry(followee); tbl_follows.setTbl_last_entry(follower); Global.LOGGER.info(tbl_followers.getTbl_tuples().size() + " tuples have successfully inserted into '" + tbl_followers.getTbl_name() + "' table."); Global.LOGGER.info(tbl_follows.getTbl_tuples().size() + " tuples have successfully inserted into '" + tbl_follows.getTbl_name() + "' table."); }
From source file:org.apache.beam.sdk.io.synthetic.SyntheticBoundedIOTest.java
@Before public void setUp() { testSourceOptions.splitPointFrequencyRecords = 1; testSourceOptions.numRecords = 10;//from w ww .j av a 2s .c o m testSourceOptions.keySizeBytes = 10; testSourceOptions.valueSizeBytes = 20; testSourceOptions.numHotKeys = 3; testSourceOptions.hotKeyFraction = 0.3; testSourceOptions.setSeed(123456); testSourceOptions.bundleSizeDistribution = fromIntegerDistribution(new ZipfDistribution(100, 2.5)); testSourceOptions.forceNumInitialBundles = null; }
From source file:org.apache.druid.benchmark.datagen.BenchmarkColumnValueGenerator.java
private void initDistribution() { BenchmarkColumnSchema.ValueDistribution distributionType = schema.getDistributionType(); ValueType type = schema.getType();//w w w.j a v a2 s .c o m List<Object> enumeratedValues = schema.getEnumeratedValues(); List<Double> enumeratedProbabilities = schema.getEnumeratedProbabilities(); List<Pair<Object, Double>> probabilities = new ArrayList<>(); switch (distributionType) { case SEQUENTIAL: // not random, just cycle through numbers from start to end, or cycle through enumerated values if provided distribution = new SequentialDistribution(schema.getStartInt(), schema.getEndInt(), schema.getEnumeratedValues()); break; case UNIFORM: distribution = new UniformRealDistribution(schema.getStartDouble(), schema.getEndDouble()); break; case DISCRETE_UNIFORM: if (enumeratedValues == null) { enumeratedValues = new ArrayList<>(); for (int i = schema.getStartInt(); i < schema.getEndInt(); i++) { Object val = convertType(i, type); enumeratedValues.add(val); } } // give them all equal probability, the library will normalize probabilities to sum to 1.0 for (Object enumeratedValue : enumeratedValues) { probabilities.add(new Pair<>(enumeratedValue, 0.1)); } distribution = new EnumeratedTreeDistribution<>(probabilities); break; case NORMAL: distribution = new NormalDistribution(schema.getMean(), schema.getStandardDeviation()); break; case ROUNDED_NORMAL: NormalDistribution normalDist = new NormalDistribution(schema.getMean(), schema.getStandardDeviation()); distribution = new RealRoundingDistribution(normalDist); break; case ZIPF: int cardinality; if (enumeratedValues == null) { Integer startInt = schema.getStartInt(); cardinality = schema.getEndInt() - startInt; ZipfDistribution zipf = new ZipfDistribution(cardinality, schema.getZipfExponent()); for (int i = 0; i < cardinality; i++) { probabilities.add(new Pair<>((Object) (i + startInt), zipf.probability(i))); } } else { cardinality = enumeratedValues.size(); ZipfDistribution zipf = new ZipfDistribution(enumeratedValues.size(), schema.getZipfExponent()); for (int i = 0; i < cardinality; i++) { probabilities.add(new Pair<>(enumeratedValues.get(i), zipf.probability(i))); } } distribution = new EnumeratedTreeDistribution<>(probabilities); break; case ENUMERATED: for (int i = 0; i < enumeratedValues.size(); i++) { probabilities.add(new Pair<>(enumeratedValues.get(i), enumeratedProbabilities.get(i))); } distribution = new EnumeratedTreeDistribution<>(probabilities); break; default: throw new UnsupportedOperationException("Unknown distribution type: " + distributionType); } if (distribution instanceof AbstractIntegerDistribution) { ((AbstractIntegerDistribution) distribution).reseedRandomGenerator(seed); } else if (distribution instanceof AbstractRealDistribution) { ((AbstractRealDistribution) distribution).reseedRandomGenerator(seed); } else { ((EnumeratedDistribution) distribution).reseedRandomGenerator(seed); } }
From source file:org.apache.kylin.measure.topn.TopNCounterTest.java
protected String prepareTestDate() throws IOException { String[] allKeys = new String[KEY_SPACE]; for (int i = 0; i < KEY_SPACE; i++) { allKeys[i] = RandomStringUtils.randomAlphabetic(10); }// w ww . j a v a 2 s. co m outputMsg("Start to create test random data..."); long startTime = System.currentTimeMillis(); ZipfDistribution zipf = new ZipfDistribution(KEY_SPACE, 0.5); int keyIndex; File tempFile = File.createTempFile("ZipfDistribution", ".txt"); if (tempFile.exists()) FileUtils.forceDelete(tempFile); FileWriter fw = new FileWriter(tempFile); try { for (int i = 0; i < TOTAL_RECORDS; i++) { keyIndex = zipf.sample() - 1; fw.write(allKeys[keyIndex]); fw.write('\n'); } } finally { if (fw != null) fw.close(); } outputMsg("Create test data takes : " + (System.currentTimeMillis() - startTime) / 1000 + " seconds."); outputMsg("Test data in : " + tempFile.getAbsolutePath()); return tempFile.getAbsolutePath(); }