List of usage examples for org.apache.lucene.analysis StopFilter makeStopSet
public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase)
From source file:brazilianStemmer.BrazilianAnalyzer.java
License:Apache License
/** * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}). *///w w w. j av a2s.co m public BrazilianAnalyzer() { /* * aparentemente tem algum bug com o metodo * StopFilter.makeStopSet na versao 1.9.1 do Lucene * * O metodo makeStopSet estah implementado nesta classe. * */ //stopWords = StopFilter.makeStopSet(BRAZILIAN_STOP_WORDS); stopWords = StopFilter.makeStopSet(BRAZILIAN_STOP_WORDS, false); init(); }
From source file:brazilianStemmer.BrazilianAnalyzer.java
License:Apache License
private void init() { /*//ww w.j ava 2 s . co m * aparentemente tem algum bug com o metodo * StopFilter.makeStopSet na versao 1.9.1 do Lucene * * O metodo makeStopSet estah implementado nesta classe. * */ //englishStopWords = StopFilter.makeStopSet(StandardAnalyzer.STOP_WORDS); englishStopWords = StopFilter.makeStopSet(StandardAnalyzer.STOP_WORDS, false); // By Fabricio //englishStopWords.add("back"); //englishStopWords.add("part"); //englishStopWords.add("about"); //englishStopWords.add("against"); //englishStopWords.add("abuse"); //englishStopWords.add("download"); /*englishStopWords.add(""); englishStopWords.add(""); englishStopWords.add(""); englishStopWords.add(""); englishStopWords.add(""); */ // By Fabricio //stopWords.add("teor"); //stopWords.add("conteudo"); }
From source file:brazilianStemmer.BrazilianAnalyzer.java
License:Apache License
public BrazilianAnalyzer(String[] stopwords) { /*/* ww w . j a va 2 s. c o m*/ * aparentemente tem algum bug com o metodo * StopFilter.makeStopSet na versao 1.9.1 do Lucene * * O metodo makeStopSet estah implementado nesta classe. * */ //stopWords = StopFilter.makeStopSet(stopwords); stopWords = StopFilter.makeStopSet(stopwords, false); init(); }
From source file:brazilianStemmer.BrazilianAnalyzer.java
License:Apache License
/** * Builds an exclusionlist from an array of Strings. *///from w ww .ja va 2 s . co m public void setStemExclusionTable(String[] exclusionlist) { /* * aparentemente tem algum bug com o metodo * StopFilter.makeStopSet na versao 1.9.1 do Lucene * * O metodo makeStopSet estah implementado nesta classe. * */ //exclusionTable = StopFilter.makeStopSet(exclusionlist); exclusionTable = StopFilter.makeStopSet(exclusionlist, false); }
From source file:com.bizosys.hsearch.inpipe.util.StopwordRefresh.java
License:Apache License
/** * This refreshes the stopword list.//from www .ja v a2 s. c o m * @param allStopWords * @return * @throws ApplicationFault */ @SuppressWarnings("unchecked") private Set<String> buildStopwords(List<String> allStopWords) throws SystemFault { if (null == allStopWords) { InpipeLog.l.warn(" FilterStopWords: No stop words."); return null; } try { Set wordSet = StopFilter.makeStopSet(LuceneConstants.version, allStopWords); if (InpipeLog.l.isInfoEnabled()) { InpipeLog.l.info(" StopwordManager: stopWords.size - " + wordSet.size()); } return (Set<String>) wordSet; } catch (Exception ex) { throw new SystemFault(ex); } }
From source file:com.github.pmerienne.trident.ml.preprocessing.EnglishTokenizer.java
License:Apache License
protected TokenStream createTokenStream(String text) { Set<?> luceneStopWords = this.stopWords == null ? EnglishAnalyzer.getDefaultStopSet() : StopFilter.makeStopSet(LUCENE_VERSION, stopWords); Analyzer analyzer = new EnglishSpecialAnalyzer(LUCENE_VERSION, luceneStopWords, this.stemExclusionsSet); TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(text)); if (this.nGram) { tokenStream = new ShingleFilter(tokenStream, this.minNGram, this.maxNGram); }/* ww w .j a v a 2 s . c om*/ return tokenStream; }
From source file:com.ikon.analysis.SpanishAnalyzer.java
License:Open Source License
/** * Builds an analyzer with the default stop words. */ public SpanishAnalyzer() { stopTable = StopFilter.makeStopSet(Config.LUCENE_VERSION, SPANISH_STOP_WORDS); }
From source file:com.ikon.analysis.SpanishAnalyzer.java
License:Open Source License
/** Builds an analyzer with the given stop words. */ public SpanishAnalyzer(String[] stopWords) { stopTable = StopFilter.makeStopSet(Config.LUCENE_VERSION, stopWords); }
From source file:jp.mwsoft.cjkanalyzers.CJKAnalyzerBase.java
License:Apache License
/** * Builds an analyzer which removes words in the provided array. * // www.j a v a2s. c om * @param stopWords * stop word array * @deprecated use {@link #CJKAnalyzer(Version, Set)} instead */ @Deprecated public CJKAnalyzerBase(Version matchVersion, String... stopWords) { super(matchVersion, StopFilter.makeStopSet(matchVersion, stopWords)); }
From source file:net.sf.jtmt.summarizers.SummaryAnalyzer.java
License:Apache License
/** * Instantiates a new summary analyzer./*from w ww. j a va 2 s.c o m*/ * * @throws IOException Signals that an I/O exception has occurred. */ public SummaryAnalyzer() throws IOException { String[] stopwords = filterComments(StringUtils.split(FileUtils.readFileToString( new File(getClass().getResource("/resources/jtmt/stopwords.txt").getFile()), "UTF-8"))); this.stopset = StopFilter.makeStopSet(stopwords, true); }