org.trnltk.apps.experiments.TurkishCollatorPerformanceTest.java Source code

Java tutorial

Introduction

Here is the source code for org.trnltk.apps.experiments.TurkishCollatorPerformanceTest.java

Source

/*
 * Copyright  2013  Ali Ok (aliokATapacheDOTorg)
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.trnltk.apps.experiments;

import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import org.apache.commons.lang3.Validate;
import org.apache.commons.lang3.time.StopWatch;
import org.junit.Ignore;
import org.junit.Test;
import org.trnltk.util.Constants;

import java.text.Collator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * Try with -Xmx7000m -Xms5000m
 *
 * @deprecated Collator is too slow!
 * On a pretty powerful machine, sorting 35M words took:
 * <ul>
 * <li>0:00:03.429 without the collator</li>
 * <li>0:02:29.257 with the collator with the strength PRIMARY</li>
 * </ul>
 */
public class TurkishCollatorPerformanceTest {

    @Test
    @Ignore
    public void testCollatorPerformance() {
        final Collator collator = Collator.getInstance(Constants.TURKISH_LOCALE);
        collator.setStrength(Collator.PRIMARY);

        List<String> biggerList = getList();

        System.out.println("Entry count : " + biggerList.size());

        final StopWatch stopWatch = new StopWatch();
        stopWatch.start();
        Collections.sort(biggerList);
        stopWatch.stop();
        System.out.println("w/o collator, it took " + stopWatch.toString());

        biggerList = getList();
        stopWatch.reset();
        stopWatch.start();
        Collections.sort(biggerList, collator);
        stopWatch.stop();
        System.out.println("w/ collator, it took " + stopWatch.toString());

    }

    private List<String> getList() {
        String text = "aklamada 2735\n" + "gn 2678\n" + "ekilde 2405\n" + "zere 2325\n" + "tm 2285\n"
                + "10 2252\n" + "ylnda 2223\n" + "gn 2151\n" + "\". 2140\n" + "5 2107\n" + "4 2018\n"
                + "... 1940\n" + "Trkiye'de 1933\n" + "ynelik 1925\n" + "zerinde 1910\n" + "zel 1906\n"
                + "yksek 1812\n" + " 1746\n" + "kt 1732\n" + "yle 1727\n" + "u 1693\n"
                + "kan 1653\n" + "; 1608\n" + "gvenlik 1602\n" + "6 1570\n" + "srasnda 1569\n"
                + "sz 1554\n" + "srail 1539\n" + "aklad 1528\n" + "wn 1505\n" + "qn 1505\n"
                + "xn 1505\n" + "zn 1505\n" + "n 1505\n" + "Dileri 1489\n";

        final ArrayList<String> wordList = new ArrayList<String>();
        final Iterable<String> lines = Splitter.on("\n").trimResults().omitEmptyStrings().split(text);
        for (String line : lines) {
            final List<String> words = Lists
                    .newArrayList(Splitter.on(" ").trimResults().omitEmptyStrings().split(line));
            Validate.isTrue(words.size() == 2, line);
            wordList.add(words.get(0));
        }

        List<String> biggerList = new ArrayList<String>();
        final int N = 1 * 1000 * 1000;
        for (int i = 0; i < N; i++) {
            biggerList.addAll(wordList);
            if (i % (N / 10) == 0)
                System.out.println("Adding " + i + ". list to the bigger list");
        }
        return biggerList;
    }

}