List of usage examples for org.apache.lucene.analysis.ro RomanianAnalyzer getDefaultStopSet
public static CharArraySet getDefaultStopSet()
From source file:com.stratio.cassandra.lucene.schema.analysis.SnowballAnalyzerBuilder.java
License:Apache License
/** * Returns the default stopwords set used by Lucene language analyzer for the specified language. * * @param language The language for which the stopwords are. The supported languages are English, French, Spanish, * Portuguese, Italian, Romanian, German, Dutch, Swedish, Norwegian, Danish, Russian, Finnish, * Irish, Hungarian, Turkish, Armenian, Basque and Catalan. * @return The default stopwords set used by Lucene language analyzers. *//*w w w .jav a 2 s. c o m*/ private static CharArraySet getDefaultStopwords(String language) { switch (language) { case "English": return EnglishAnalyzer.getDefaultStopSet(); case "French": return FrenchAnalyzer.getDefaultStopSet(); case "Spanish": return SpanishAnalyzer.getDefaultStopSet(); case "Portuguese": return PortugueseAnalyzer.getDefaultStopSet(); case "Italian": return ItalianAnalyzer.getDefaultStopSet(); case "Romanian": return RomanianAnalyzer.getDefaultStopSet(); case "German": return GermanAnalyzer.getDefaultStopSet(); case "Dutch": return DutchAnalyzer.getDefaultStopSet(); case "Swedish": return SwedishAnalyzer.getDefaultStopSet(); case "Norwegian": return NorwegianAnalyzer.getDefaultStopSet(); case "Danish": return DanishAnalyzer.getDefaultStopSet(); case "Russian": return RussianAnalyzer.getDefaultStopSet(); case "Finnish": return FinnishAnalyzer.getDefaultStopSet(); case "Irish": return IrishAnalyzer.getDefaultStopSet(); case "Hungarian": return HungarianAnalyzer.getDefaultStopSet(); case "Turkish": return SpanishAnalyzer.getDefaultStopSet(); case "Armenian": return SpanishAnalyzer.getDefaultStopSet(); case "Basque": return BasqueAnalyzer.getDefaultStopSet(); case "Catalan": return CatalanAnalyzer.getDefaultStopSet(); default: return CharArraySet.EMPTY_SET; } }
From source file:indexer.CustomAnalyzer.java
private TokenStream filterStopWords(TokenStream tokenStream) throws IOException { tokenStream = new StopFilter(tokenStream, RomanianAnalyzer.getDefaultStopSet()); String[] stopWords = getStopwords(); CharArraySet stopSet = StopFilter.makeStopSet(stopWords); return new StopFilter(tokenStream, stopSet); }
From source file:my_code.MyRomanianAnalyzer.java
License:Apache License
public MyRomanianAnalyzer() { super(RomanianAnalyzer.getDefaultStopSet()); }
From source file:org.elasticsearch.analysis.common.RomanianAnalyzerProvider.java
License:Apache License
RomanianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new RomanianAnalyzer(
Analysis.parseStopWords(env, settings, RomanianAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
analyzer.setVersion(version);/*from ww w .ja va 2s.c o m*/
}
From source file:org.elasticsearch.index.analysis.RomanianAnalyzerProvider.java
License:Apache License
@Inject public RomanianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); analyzer = new RomanianAnalyzer(version, Analysis.parseStopWords(env, settings, RomanianAnalyzer.getDefaultStopSet(), version), Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version)); }
From source file:org.omegat.tokenizer.LuceneRomanianTokenizer.java
License:Open Source License
@Override protected TokenStream getTokenStream(final String strOrig, final boolean stemsAllowed, final boolean stopWordsAllowed) { if (stemsAllowed) { Set<?> stopWords = stopWordsAllowed ? RomanianAnalyzer.getDefaultStopSet() : Collections.EMPTY_SET; return new RomanianAnalyzer(getBehavior(), stopWords).tokenStream("", new StringReader(strOrig)); } else {/*from w w w . j a v a 2 s .co m*/ return new StandardTokenizer(getBehavior(), new StringReader(strOrig)); } }