List of usage examples for org.apache.lucene.analysis.cz CzechAnalyzer CzechAnalyzer
public CzechAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable)
From source file:org.elasticsearch.analysis.common.CzechAnalyzerProvider.java
License:Apache License
CzechAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new CzechAnalyzer(Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
analyzer.setVersion(version);/*w ww.java2 s. c om*/
}
From source file:org.omegat.tokenizer.LuceneCzechTokenizer.java
License:Open Source License
@Override protected TokenStream getTokenStream(final String strOrig, final boolean stemsAllowed, final boolean stopWordsAllowed) { if (stemsAllowed) { String[] stopWords = stopWordsAllowed ? CzechAnalyzer.CZECH_STOP_WORDS : EMPTY_STRING_LIST; return new CzechAnalyzer(getBehavior(), stopWords).tokenStream("", new StringReader(strOrig)); } else {/* ww w . j av a 2 s .com*/ return new StandardTokenizer(getBehavior(), new StringReader(strOrig)); } }