List of usage examples for org.apache.lucene.analysis CharArraySet unmodifiableSet
public static CharArraySet unmodifiableSet(CharArraySet set)
From source file:RomanianAnalyzer.java
License:Apache License
/** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is * provided this analyzer will add a {@link KeywordMarkerFilter} before * stemming./*from ww w . ja va2 s . c om*/ * * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ public RomanianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) { super(matchVersion, stopwords); this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet)); }
From source file:com.asimihsan.handytrowel.nlp.StopwordAnnotator.java
License:Open Source License
public static CharArraySet getStopWordList(Version luceneVersion, String stopwordList, boolean ignoreCase) { String[] terms = stopwordList.split(","); CharArraySet stopwordSet = new CharArraySet(luceneVersion, terms.length, ignoreCase); for (String term : terms) { stopwordSet.add(term);/* www . j a v a 2s .co m*/ } return CharArraySet.unmodifiableSet(stopwordSet); }
From source file:com.mozilla.grouperfish.lucene.analysis.en.EnglishAnalyzer.java
License:Apache License
/** * Builds an analyzer with the given stop words. If a non-empty stem * exclusion set is provided this analyzer will add a * {@link KeywordMarkerFilter} before stemming. * /* w w w . j av a 2 s .c o m*/ * @param matchVersion * lucene compatibility version * @param stopwords * a stopword set * @param stemExclusionSet * a set of terms not to be stemmed */ public EnglishAnalyzer(Version matchVersion, Set<?> stopwords, boolean stem, Set<?> stemExclusionSet) { super(matchVersion, stopwords); this.stem = stem; this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet)); }
From source file:com.mozilla.grouperfish.lucene.analysis.en.NGramEnglishAnalyzer.java
License:Apache License
public NGramEnglishAnalyzer(Version matchVersion, Set<?> stopwords, boolean stem, boolean outputUnigrams, int minNGram, int maxNGram, Set<?> stemExclusionSet) { super(matchVersion, stopwords); this.stem = stem; this.outputUnigrams = outputUnigrams; this.minNGram = minNGram; this.maxNGram = maxNGram; this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet)); }
From source file:crawler.util.analyzer.TwitterAnalyzer.java
License:Apache License
public TwitterAnalyzer() { stopSet.addAll(stopWords); stopWordList = CharArraySet.unmodifiableSet(stopSet); }
From source file:edu.mit.ll.vizlincdb.document.FoldingSpanishAnalyzer.java
License:Apache License
/** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is * provided this analyzer will add a {@link KeywordMarkerFilter} before * stemming./*www. j a v a2 s. com*/ * * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ public FoldingSpanishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) { super(matchVersion, stopwords); this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet)); }
From source file:org.apache.jena.query.text.filter.SelectiveFoldingFilter.java
License:Apache License
public SelectiveFoldingFilter(TokenStream input, CharArraySet whitelisted) { super(input); Objects.requireNonNull(whitelisted, "You must provide the list of whiltelisted characters."); this.whitelisted = CharArraySet.unmodifiableSet(CharArraySet.copy(whitelisted)); }
From source file:org.codelibs.elasticsearch.index.analysis.SnowballAnalyzer.java
License:Apache License
/** Builds the named analyzer with the given stop words. */ public SnowballAnalyzer(String name, CharArraySet stopWords) { this(name);/* w w w. j ava 2 s .co m*/ stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopWords)); }
From source file:org.elasticsearch.analysis.common.SnowballAnalyzer.java
License:Apache License
/** Builds the named analyzer with the given stop words. */ SnowballAnalyzer(String name, CharArraySet stopWords) { this(name); stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopWords)); }
From source file:reaction.news.index.MyPortugueseAnalyzer.java
License:Apache License
/** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is * provided this analyzer will add a {@link KeywordMarkerFilter} before * stemming.//from w w w . j a va2 s.c o m * * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ public MyPortugueseAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) { super(matchVersion, stopwords); this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet)); }