Example usage for org.apache.lucene.analysis StopFilter makeStopSet

List of usage examples for org.apache.lucene.analysis StopFilter makeStopSet

Introduction

In this page you can find the example usage for org.apache.lucene.analysis StopFilter makeStopSet.

Prototype

public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase) 

Source Link

Document

Creates a stopword set from the given stopword list.

Usage

From source file:brazilianStemmer.BrazilianAnalyzer.java

License:Apache License

/**
 * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}).
 *///w  w w.  j  av a2s.co  m
public BrazilianAnalyzer() {
    /*
     * aparentemente tem algum bug com o metodo 
     * StopFilter.makeStopSet na versao 1.9.1 do Lucene
     * 
     * O metodo makeStopSet estah implementado nesta classe.
     * 
     */
    //stopWords = StopFilter.makeStopSet(BRAZILIAN_STOP_WORDS);
    stopWords = StopFilter.makeStopSet(BRAZILIAN_STOP_WORDS, false);
    init();
}

From source file:brazilianStemmer.BrazilianAnalyzer.java

License:Apache License

private void init() {

    /*//ww  w.j  ava  2  s . co m
     * aparentemente tem algum bug com o metodo 
     * StopFilter.makeStopSet na versao 1.9.1 do Lucene
     * 
     * O metodo makeStopSet estah implementado nesta classe.
     * 
     */
    //englishStopWords = StopFilter.makeStopSet(StandardAnalyzer.STOP_WORDS);
    englishStopWords = StopFilter.makeStopSet(StandardAnalyzer.STOP_WORDS, false);

    // By Fabricio
    //englishStopWords.add("back");
    //englishStopWords.add("part");
    //englishStopWords.add("about");
    //englishStopWords.add("against");
    //englishStopWords.add("abuse");
    //englishStopWords.add("download");

    /*englishStopWords.add("");
    englishStopWords.add("");
    englishStopWords.add("");
    englishStopWords.add("");
    englishStopWords.add("");
    */

    // By Fabricio
    //stopWords.add("teor");
    //stopWords.add("conteudo");
}

From source file:brazilianStemmer.BrazilianAnalyzer.java

License:Apache License

public BrazilianAnalyzer(String[] stopwords) {

    /*/* ww w .  j  a  va  2  s.  c o  m*/
      * aparentemente tem algum bug com o metodo 
      * StopFilter.makeStopSet na versao 1.9.1 do Lucene
      * 
      * O metodo makeStopSet estah implementado nesta classe.
      * 
      */
    //stopWords = StopFilter.makeStopSet(stopwords);
    stopWords = StopFilter.makeStopSet(stopwords, false);
    init();
}

From source file:brazilianStemmer.BrazilianAnalyzer.java

License:Apache License

/**
 * Builds an exclusionlist from an array of Strings.
 *///from   w  ww .ja  va  2 s . co  m
public void setStemExclusionTable(String[] exclusionlist) {
    /*
      * aparentemente tem algum bug com o metodo 
      * StopFilter.makeStopSet na versao 1.9.1 do Lucene
      * 
      * O metodo makeStopSet estah implementado nesta classe.
      * 
      */
    //exclusionTable = StopFilter.makeStopSet(exclusionlist);
    exclusionTable = StopFilter.makeStopSet(exclusionlist, false);
}

From source file:com.bizosys.hsearch.inpipe.util.StopwordRefresh.java

License:Apache License

/**
 * This refreshes the stopword list.//from  www .ja  v a2 s.  c  o  m
 * @param allStopWords
 * @return
 * @throws ApplicationFault
 */
@SuppressWarnings("unchecked")
private Set<String> buildStopwords(List<String> allStopWords) throws SystemFault {

    if (null == allStopWords) {
        InpipeLog.l.warn(" FilterStopWords: No stop words.");
        return null;
    }

    try {
        Set wordSet = StopFilter.makeStopSet(LuceneConstants.version, allStopWords);
        if (InpipeLog.l.isInfoEnabled()) {
            InpipeLog.l.info(" StopwordManager: stopWords.size - " + wordSet.size());
        }
        return (Set<String>) wordSet;

    } catch (Exception ex) {
        throw new SystemFault(ex);
    }
}

From source file:com.github.pmerienne.trident.ml.preprocessing.EnglishTokenizer.java

License:Apache License

protected TokenStream createTokenStream(String text) {
    Set<?> luceneStopWords = this.stopWords == null ? EnglishAnalyzer.getDefaultStopSet()
            : StopFilter.makeStopSet(LUCENE_VERSION, stopWords);
    Analyzer analyzer = new EnglishSpecialAnalyzer(LUCENE_VERSION, luceneStopWords, this.stemExclusionsSet);

    TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(text));
    if (this.nGram) {
        tokenStream = new ShingleFilter(tokenStream, this.minNGram, this.maxNGram);
    }/* ww  w  .j a v  a 2 s . c om*/

    return tokenStream;
}

From source file:com.ikon.analysis.SpanishAnalyzer.java

License:Open Source License

/** 
 * Builds an analyzer with the default stop words. 
 */
public SpanishAnalyzer() {
    stopTable = StopFilter.makeStopSet(Config.LUCENE_VERSION, SPANISH_STOP_WORDS);
}

From source file:com.ikon.analysis.SpanishAnalyzer.java

License:Open Source License

/** Builds an analyzer with the given stop words. */
public SpanishAnalyzer(String[] stopWords) {
    stopTable = StopFilter.makeStopSet(Config.LUCENE_VERSION, stopWords);
}

From source file:jp.mwsoft.cjkanalyzers.CJKAnalyzerBase.java

License:Apache License

/**
 * Builds an analyzer which removes words in the provided array.
 * // www.j  a v  a2s. c om
 * @param stopWords
 *            stop word array
 * @deprecated use {@link #CJKAnalyzer(Version, Set)} instead
 */
@Deprecated
public CJKAnalyzerBase(Version matchVersion, String... stopWords) {
    super(matchVersion, StopFilter.makeStopSet(matchVersion, stopWords));
}

From source file:net.sf.jtmt.summarizers.SummaryAnalyzer.java

License:Apache License

/**
 * Instantiates a new summary analyzer./*from   w  ww. j  a va 2 s.c o  m*/
 *
 * @throws IOException Signals that an I/O exception has occurred.
 */
public SummaryAnalyzer() throws IOException {
    String[] stopwords = filterComments(StringUtils.split(FileUtils.readFileToString(
            new File(getClass().getResource("/resources/jtmt/stopwords.txt").getFile()), "UTF-8")));
    this.stopset = StopFilter.makeStopSet(stopwords, true);
}