Example usage for org.apache.lucene.analysis CharArraySet unmodifiableSet

List of usage examples for org.apache.lucene.analysis CharArraySet unmodifiableSet

Introduction

In this page you can find the example usage for org.apache.lucene.analysis CharArraySet unmodifiableSet.

Prototype

public static CharArraySet unmodifiableSet(CharArraySet set) 

Source Link

Document

Returns an unmodifiable CharArraySet .

Usage

From source file:RomanianAnalyzer.java

License:Apache License

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link KeywordMarkerFilter} before
 * stemming./*from   ww w . ja  va2  s . c om*/
 * 
 * @param matchVersion lucene compatibility version
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public RomanianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
}

From source file:com.asimihsan.handytrowel.nlp.StopwordAnnotator.java

License:Open Source License

public static CharArraySet getStopWordList(Version luceneVersion, String stopwordList, boolean ignoreCase) {
    String[] terms = stopwordList.split(",");
    CharArraySet stopwordSet = new CharArraySet(luceneVersion, terms.length, ignoreCase);
    for (String term : terms) {
        stopwordSet.add(term);/*  www  . j a  v a 2s  .co m*/
    }
    return CharArraySet.unmodifiableSet(stopwordSet);
}

From source file:com.mozilla.grouperfish.lucene.analysis.en.EnglishAnalyzer.java

License:Apache License

/**
 * Builds an analyzer with the given stop words. If a non-empty stem
 * exclusion set is provided this analyzer will add a
 * {@link KeywordMarkerFilter} before stemming.
 * /* w  w  w .  j av  a 2  s  .c  o m*/
 * @param matchVersion
 *            lucene compatibility version
 * @param stopwords
 *            a stopword set
 * @param stemExclusionSet
 *            a set of terms not to be stemmed
 */
public EnglishAnalyzer(Version matchVersion, Set<?> stopwords, boolean stem, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stem = stem;
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
}

From source file:com.mozilla.grouperfish.lucene.analysis.en.NGramEnglishAnalyzer.java

License:Apache License

public NGramEnglishAnalyzer(Version matchVersion, Set<?> stopwords, boolean stem, boolean outputUnigrams,
        int minNGram, int maxNGram, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stem = stem;
    this.outputUnigrams = outputUnigrams;
    this.minNGram = minNGram;
    this.maxNGram = maxNGram;
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
}

From source file:crawler.util.analyzer.TwitterAnalyzer.java

License:Apache License

public TwitterAnalyzer() {
    stopSet.addAll(stopWords);
    stopWordList = CharArraySet.unmodifiableSet(stopSet);
}

From source file:edu.mit.ll.vizlincdb.document.FoldingSpanishAnalyzer.java

License:Apache License

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link KeywordMarkerFilter} before
 * stemming./*www.  j  a v a2  s.  com*/
 *
 * @param matchVersion lucene compatibility version
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public FoldingSpanishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
}

From source file:org.apache.jena.query.text.filter.SelectiveFoldingFilter.java

License:Apache License

public SelectiveFoldingFilter(TokenStream input, CharArraySet whitelisted) {
    super(input);
    Objects.requireNonNull(whitelisted, "You must provide the list of whiltelisted characters.");
    this.whitelisted = CharArraySet.unmodifiableSet(CharArraySet.copy(whitelisted));
}

From source file:org.codelibs.elasticsearch.index.analysis.SnowballAnalyzer.java

License:Apache License

/** Builds the named analyzer with the given stop words. */
public SnowballAnalyzer(String name, CharArraySet stopWords) {
    this(name);/*  w  w  w. j ava 2  s .co  m*/
    stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopWords));
}

From source file:org.elasticsearch.analysis.common.SnowballAnalyzer.java

License:Apache License

/** Builds the named analyzer with the given stop words. */
SnowballAnalyzer(String name, CharArraySet stopWords) {
    this(name);
    stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopWords));
}

From source file:reaction.news.index.MyPortugueseAnalyzer.java

License:Apache License

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link KeywordMarkerFilter} before
 * stemming.//from  w  w  w  . j  a  va2 s.c o m
 * 
 * @param matchVersion lucene compatibility version
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public MyPortugueseAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
}