List of usage examples for org.apache.lucene.analysis.es SpanishAnalyzer getDefaultStopSet
public static CharArraySet getDefaultStopSet()
From source file:com.stratio.cassandra.lucene.schema.analysis.SnowballAnalyzerBuilder.java
License:Apache License
/** * Returns the default stopwords set used by Lucene language analyzer for the specified language. * * @param language The language for which the stopwords are. The supported languages are English, French, Spanish, * Portuguese, Italian, Romanian, German, Dutch, Swedish, Norwegian, Danish, Russian, Finnish, * Irish, Hungarian, Turkish, Armenian, Basque and Catalan. * @return The default stopwords set used by Lucene language analyzers. *///from w w w . ja v a 2 s . c o m private static CharArraySet getDefaultStopwords(String language) { switch (language) { case "English": return EnglishAnalyzer.getDefaultStopSet(); case "French": return FrenchAnalyzer.getDefaultStopSet(); case "Spanish": return SpanishAnalyzer.getDefaultStopSet(); case "Portuguese": return PortugueseAnalyzer.getDefaultStopSet(); case "Italian": return ItalianAnalyzer.getDefaultStopSet(); case "Romanian": return RomanianAnalyzer.getDefaultStopSet(); case "German": return GermanAnalyzer.getDefaultStopSet(); case "Dutch": return DutchAnalyzer.getDefaultStopSet(); case "Swedish": return SwedishAnalyzer.getDefaultStopSet(); case "Norwegian": return NorwegianAnalyzer.getDefaultStopSet(); case "Danish": return DanishAnalyzer.getDefaultStopSet(); case "Russian": return RussianAnalyzer.getDefaultStopSet(); case "Finnish": return FinnishAnalyzer.getDefaultStopSet(); case "Irish": return IrishAnalyzer.getDefaultStopSet(); case "Hungarian": return HungarianAnalyzer.getDefaultStopSet(); case "Turkish": return SpanishAnalyzer.getDefaultStopSet(); case "Armenian": return SpanishAnalyzer.getDefaultStopSet(); case "Basque": return BasqueAnalyzer.getDefaultStopSet(); case "Catalan": return CatalanAnalyzer.getDefaultStopSet(); default: return CharArraySet.EMPTY_SET; } }
From source file:com.stratio.cassandra.lucene.schema.analysis.StandardStopwordsTest.java
License:Apache License
@Test public void testGetSpanishPreBuiltAnalyzer() { CharArraySet stopwords = StandardStopwords.SPANISH.get(); assertEquals("Expected another stopwords", SpanishAnalyzer.getDefaultStopSet(), stopwords); }
From source file:ie.cmrc.smtx.lucene.analysis.EuropeanAnalyzer.java
License:Apache License
/** * Gets the stop words set for the provided language * @param language Two-letter code of a language * @return {@code CharArraySet} containing the stop words of the provided language. * If the provided language is not supported,then the Lucene standard stop words set * if returned./* w ww. j a va 2 s .c o m*/ */ protected CharArraySet getStopWordsSet(String language) { String lang = language; if (lang != null) lang = lang.trim().toLowerCase(); CharArraySet charArraySet = cache.get(lang); if (charArraySet == null) { if (SUPPORTED_LANGUAGES.contains(lang)) { if (lang.equals(LANG_EN)) { charArraySet = EnglishAnalyzer.getDefaultStopSet(); } else if (lang.equals(LANG_FR)) { charArraySet = FrenchAnalyzer.getDefaultStopSet(); } else if (lang.equals(LANG_ES)) { charArraySet = SpanishAnalyzer.getDefaultStopSet(); } else if (lang.equals(LANG_PT)) { charArraySet = PortugueseAnalyzer.getDefaultStopSet(); } else if (lang.equals(LANG_IT)) { charArraySet = ItalianAnalyzer.getDefaultStopSet(); } else if (lang.equals(LANG_DE)) { charArraySet = GermanAnalyzer.getDefaultStopSet(); } else if (lang.equals(LANG_NO)) { charArraySet = NorwegianAnalyzer.getDefaultStopSet(); } } else { charArraySet = StandardAnalyzer.STOP_WORDS_SET; } cache.put(lang, charArraySet); } return charArraySet; }
From source file:it.unipd.dei.ims.lucene.clef.AnalyzerFactory.java
License:Apache License
public static CharArraySet createStopset(String language, String stopsetType, String stopsetPath) throws Exception { CharArraySet stopset = CharArraySet.EMPTY_SET; if (stopsetType.equalsIgnoreCase("CUSTOM")) { try {/*from w ww.ja va 2s . co m*/ File f = new File(stopsetPath); stopset = new CharArraySet(0, true); Scanner sc = new Scanner(f); logger.debug("STOPLIST:"); while (sc.hasNextLine()) { String stopword = sc.nextLine().trim(); logger.debug("=> " + stopword); stopset.add(stopword); } logger.debug(""); sc.close(); } catch (FileNotFoundException e) { e.printStackTrace(); throw new Exception("FileNotFoundException when loading stopset"); } } else if (stopsetType.equalsIgnoreCase("DEFAULT")) { switch (language) { case "bg": stopset = BulgarianAnalyzer.getDefaultStopSet(); break; case "de": stopset = GermanAnalyzer.getDefaultStopSet(); break; case "es": stopset = SpanishAnalyzer.getDefaultStopSet(); break; case "fa": stopset = PersianAnalyzer.getDefaultStopSet(); break; case "fi": stopset = FinnishAnalyzer.getDefaultStopSet(); break; case "fr": stopset = FrenchAnalyzer.getDefaultStopSet(); break; case "hu": stopset = HungarianAnalyzer.getDefaultStopSet(); break; case "it": stopset = ItalianAnalyzer.getDefaultStopSet(); break; case "nl": stopset = DutchAnalyzer.getDefaultStopSet(); break; case "pt": stopset = PortugueseAnalyzer.getDefaultStopSet(); break; case "ru": stopset = RussianAnalyzer.getDefaultStopSet(); break; case "sv": stopset = SwedishAnalyzer.getDefaultStopSet(); break; default: throw new UnsupportedOperationException("Language not supported yet"); } } return stopset; }
From source file:org.efaps.admin.index.Index.java
License:Apache License
/** * Gets the analyzer.// ww w. java2s . c o m * * @return the analyzer * @throws EFapsException on error */ public static Analyzer getAnalyzer() throws EFapsException { IAnalyzerProvider provider = null; if (EFapsSystemConfiguration.get().containsAttributeValue(KernelSettings.INDEXANALYZERPROVCLASS)) { final String clazzname = EFapsSystemConfiguration.get() .getAttributeValue(KernelSettings.INDEXANALYZERPROVCLASS); try { final Class<?> clazz = Class.forName(clazzname, false, EFapsClassLoader.getInstance()); provider = (IAnalyzerProvider) clazz.newInstance(); } catch (final ClassNotFoundException | InstantiationException | IllegalAccessException e) { throw new EFapsException(Index.class, "Could not instanciate IAnalyzerProvider", e); } } else { provider = new IAnalyzerProvider() { @Override public Analyzer getAnalyzer() { return new StandardAnalyzer(SpanishAnalyzer.getDefaultStopSet()); } }; } return provider.getAnalyzer(); }
From source file:org.efaps.esjp.admin.index.AnalyzerProvider_Base.java
License:Apache License
/** * Gets the analyzer./*from w w w.j a va 2s . c o m*/ * * @param _companyId the _company id * @param _language the _language * @return the analyzer */ public Analyzer getAnalyzer(final Long _companyId, final String _language) { final StandardAnalyzer ret; switch (_language) { case "de": ret = new StandardAnalyzer(GermanAnalyzer.getDefaultStopSet()); break; case "es": ret = new StandardAnalyzer(SpanishAnalyzer.getDefaultStopSet()); break; case "en": default: ret = new StandardAnalyzer(EnglishAnalyzer.getDefaultStopSet()); break; } return ret; }
From source file:org.elasticsearch.analysis.common.SpanishAnalyzerProvider.java
License:Apache License
SpanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new SpanishAnalyzer(Analysis.parseStopWords(env, settings, SpanishAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
analyzer.setVersion(version);/*w w w.j av a2 s . c o m*/
}
From source file:org.elasticsearch.index.analysis.SpanishAnalyzerProvider.java
License:Apache License
@Inject public SpanishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); analyzer = new SpanishAnalyzer(version, Analysis.parseStopWords(env, settings, SpanishAnalyzer.getDefaultStopSet(), version), Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version)); }
From source file:org.omegat.tokenizer.LuceneSpanishTokenizer.java
License:Open Source License
@Override protected TokenStream getTokenStream(final String strOrig, final boolean stemsAllowed, final boolean stopWordsAllowed) { if (stemsAllowed) { Set<?> stopWords = stopWordsAllowed ? SpanishAnalyzer.getDefaultStopSet() : Collections.EMPTY_SET; return new SpanishAnalyzer(getBehavior(), stopWords).tokenStream("", new StringReader(strOrig)); } else {/*from w w w .j a v a 2s .c om*/ return new StandardTokenizer(getBehavior(), new StringReader(strOrig)); } }