List of usage examples for org.apache.lucene.analysis.hunspell HunspellStemFilter HunspellStemFilter
public HunspellStemFilter(TokenStream input, Dictionary dictionary, boolean dedup)
From source file:org.elasticsearch.analysis.hunspell.TestStemming.java
License:Apache License
public void test() throws Exception { LineNumberReader reader = new LineNumberReader(IOUtils.getDecodingReader( getClass().getResourceAsStream("/stemming-data/" + language + ".txt"), StandardCharsets.UTF_8)); dictionaryStream = getClass().getResourceAsStream("/" + language + "/" + language + ".dic"); affixStream = getClass().getResourceAsStream("/" + language + "/" + language + ".aff"); final Dictionary dictionary = new Dictionary(affixStream, dictionaryStream); Analyzer analyzer = new Analyzer() { @Override/* www . jav a 2 s . c o m*/ protected TokenStreamComponents createComponents(String field) { MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false); HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, false); return new TokenStreamComponents(tokenizer, filter); } }; String line = null; while ((line = reader.readLine()) != null) { int comment = line.indexOf('#'); if (comment >= 0) { line = line.substring(0, comment); } line = line.trim(); if (line.isEmpty()) { continue; } String elements[] = line.split("\\s+"); if (elements.length != 2) { throw new RuntimeException("Illegal number of elements in line: " + reader.getLineNumber()); } String input = elements[0]; String outputs[] = elements[1].split(","); compareStems(analyzer, input, outputs, reader.getLineNumber()); } analyzer.close(); reader.close(); }
From source file:org.elasticsearch.index.analysis.HunspellStemFilterFactory.java
License:Apache License
@Override public TokenStream create(TokenStream tokenStream) { return new HunspellStemFilter(tokenStream, dictionary, dedup); }