Java tutorial
/* * Copyright (C) 2014-2015 The National Library of Finland * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *********************************************************************************/ package fi.nationallibrary.ndl.solrvoikko2; import java.io.IOException; import java.io.StringReader; import java.util.LinkedList; import java.util.List; import java.util.Map.Entry; import java.util.concurrent.ConcurrentMap; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.util.AttributeFactory; import org.apache.lucene.analysis.core.StopFilterFactory; import org.puimula.libvoikko.Voikko; import com.googlecode.concurrentlinkedhashmap.ConcurrentLinkedHashMap; import fi.nationallibrary.ndl.solrvoikko2.VoikkoFilter.CompoundToken; import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; /** * Unit tests for Voikko * * @author ere.maijala@helsinki.fi * */ public class VoikkoTest extends TestCase { /** * Create the test case * * @param testName name of the test case */ public VoikkoTest(String testName) { super(testName); } /** * @return the suite of tests being tested */ public static Test suite() { return new TestSuite(VoikkoTest.class); } /** * Tests for Voikko */ public void testVoikko() throws IOException { LinkedList<Entry<String, String>> tests = new LinkedList<Entry<String, String>>(); tests.add(new java.util.AbstractMap.SimpleEntry<String, String>("kyminsanomat", "kyminsanoma [1:0:12],kymi [0:0:12],sanoma [1:0:12]")); tests.add(new java.util.AbstractMap.SimpleEntry<String, String>("taidemaalaus", "taidemaalaus [1:0:12],taide [0:0:12],maalaus [1:0:12]")); tests.add(new java.util.AbstractMap.SimpleEntry<String, String>("lopputarkastuspytkirja", "lopputarkastuspytkirja [1:0:24],loppu [0:0:24],tarkastus [1:0:24],pyt [1:0:24],kirja [1:0:24]")); tests.add( new java.util.AbstractMap.SimpleEntry<String, String>("totalgibberish", "totalgibberish [1:0:14]")); tests.add(new java.util.AbstractMap.SimpleEntry<String, String>("moottorisaha", "moottorisaha [1:0:12],moottori [0:0:12],saha [1:0:12]")); tests.add(new java.util.AbstractMap.SimpleEntry<String, String>("hyvinvointiasiantuntijajrjestelmss", "hyvinvointiasiantuntijajrjestelm [1:0:37],hyvinvointi [0:0:37],asia [1:0:37],tuntija [1:0:37],jrjestelm [1:0:37]")); tests.add(new java.util.AbstractMap.SimpleEntry<String, String>("kahdeksankulmainen", "kahdeksankulmainen [1:0:18],kahdeksan [0:0:18],kahdeksa [0:0:18],kulmainen [1:0:18]")); tests.add(new java.util.AbstractMap.SimpleEntry<String, String>("perinteinen puutarhakaluste", "perinteinen [1:0:11],puutarhakaluste [1:12:27],puu [0:12:27],tarha [1:12:27],kaluste [1:12:27]")); tests.add(new java.util.AbstractMap.SimpleEntry<String, String>("nuorisoty", "nuorisoty [1:0:10],nuoriso [0:0:10],ty [1:0:10]")); tests.add(new java.util.AbstractMap.SimpleEntry<String, String>("nuorisotyttmyys", "nuorisotyttmyys [1:0:17],nuoriso [0:0:17],tyttmyys [1:0:17]")); tests.add(new java.util.AbstractMap.SimpleEntry<String, String>("", "")); for (int i = 0; i < tests.size(); i++) { Entry<String, String> entry = tests.get(i); assertEquals("Testing '" + entry.getKey() + "'", entry.getValue(), getVoikkoWords(entry.getKey())); } } /** * Execute Voikko analysis and return results in a string * * @param term String to analyze * * @return Comma-separated list of results * @throws IOException */ final protected String getVoikkoWords(String term) throws IOException { ConcurrentMap<String, List<CompoundToken>> cache = new ConcurrentLinkedHashMap.Builder<String, List<CompoundToken>>() .maximumWeightedCapacity(100).build(); Tokenizer tokenizer = new StandardTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY); tokenizer.setReader(new StringReader(term)); tokenizer.reset(); Voikko voikko = new Voikko("fi-x-morphoid"); VoikkoFilter voikkoFilter = new VoikkoFilter(tokenizer, voikko, true, VoikkoFilter.DEFAULT_MIN_WORD_SIZE, VoikkoFilter.DEFAULT_MIN_SUBWORD_SIZE, VoikkoFilter.DEFAULT_MAX_SUBWORD_SIZE, true, cache, 0); String results = ""; //voikkoFilter.reset(); while (voikkoFilter.incrementToken()) { if (!results.isEmpty()) { results += ","; } results += voikkoFilter.termAtt.toString() + " [" + voikkoFilter.posIncAtt.getPositionIncrement() + ":" + voikkoFilter.offsetAtt.startOffset() + ":" + voikkoFilter.offsetAtt.endOffset() + "]"; } voikkoFilter.close(); return results; } }